xref: /openbmc/qemu/target/ppc/kvm.c (revision 3f53bc61)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_rma;
75 static int cap_spapr_tce;
76 static int cap_spapr_multitce;
77 static int cap_spapr_vfio;
78 static int cap_hior;
79 static int cap_one_reg;
80 static int cap_epr;
81 static int cap_ppc_watchdog;
82 static int cap_papr;
83 static int cap_htab_fd;
84 static int cap_fixup_hcalls;
85 static int cap_htm;             /* Hardware transactional memory support */
86 
87 static uint32_t debug_inst_opcode;
88 
89 /* XXX We have a race condition where we actually have a level triggered
90  *     interrupt, but the infrastructure can't expose that yet, so the guest
91  *     takes but ignores it, goes to sleep and never gets notified that there's
92  *     still an interrupt pending.
93  *
94  *     As a quick workaround, let's just wake up again 20 ms after we injected
95  *     an interrupt. That way we can assure that we're always reinjecting
96  *     interrupts in case the guest swallowed them.
97  */
98 static QEMUTimer *idle_timer;
99 
100 static void kvm_kick_cpu(void *opaque)
101 {
102     PowerPCCPU *cpu = opaque;
103 
104     qemu_cpu_kick(CPU(cpu));
105 }
106 
107 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
108  * should only be used for fallback tests - generally we should use
109  * explicit capabilities for the features we want, rather than
110  * assuming what is/isn't available depending on the KVM variant. */
111 static bool kvmppc_is_pr(KVMState *ks)
112 {
113     /* Assume KVM-PR if the GET_PVINFO capability is available */
114     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
115 }
116 
117 static int kvm_ppc_register_host_cpu_type(void);
118 
119 int kvm_arch_init(MachineState *ms, KVMState *s)
120 {
121     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
122     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
123     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
124     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
125     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
126     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
127     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
128     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
129     cap_spapr_vfio = false;
130     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
131     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
132     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
133     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
134     /* Note: we don't set cap_papr here, because this capability is
135      * only activated after this by kvmppc_set_papr() */
136     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
137     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
138     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
139 
140     if (!cap_interrupt_level) {
141         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
142                         "VM to stall at times!\n");
143     }
144 
145     kvm_ppc_register_host_cpu_type();
146 
147     return 0;
148 }
149 
150 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
151 {
152     return 0;
153 }
154 
155 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
156 {
157     CPUPPCState *cenv = &cpu->env;
158     CPUState *cs = CPU(cpu);
159     struct kvm_sregs sregs;
160     int ret;
161 
162     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
163         /* What we're really trying to say is "if we're on BookE, we use
164            the native PVR for now". This is the only sane way to check
165            it though, so we potentially confuse users that they can run
166            BookE guests on BookS. Let's hope nobody dares enough :) */
167         return 0;
168     } else {
169         if (!cap_segstate) {
170             fprintf(stderr, "kvm error: missing PVR setting capability\n");
171             return -ENOSYS;
172         }
173     }
174 
175     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
176     if (ret) {
177         return ret;
178     }
179 
180     sregs.pvr = cenv->spr[SPR_PVR];
181     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
182 }
183 
184 /* Set up a shared TLB array with KVM */
185 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
186 {
187     CPUPPCState *env = &cpu->env;
188     CPUState *cs = CPU(cpu);
189     struct kvm_book3e_206_tlb_params params = {};
190     struct kvm_config_tlb cfg = {};
191     unsigned int entries = 0;
192     int ret, i;
193 
194     if (!kvm_enabled() ||
195         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
196         return 0;
197     }
198 
199     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
200 
201     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
202         params.tlb_sizes[i] = booke206_tlb_size(env, i);
203         params.tlb_ways[i] = booke206_tlb_ways(env, i);
204         entries += params.tlb_sizes[i];
205     }
206 
207     assert(entries == env->nb_tlb);
208     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
209 
210     env->tlb_dirty = true;
211 
212     cfg.array = (uintptr_t)env->tlb.tlbm;
213     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
214     cfg.params = (uintptr_t)&params;
215     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
216 
217     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
218     if (ret < 0) {
219         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
220                 __func__, strerror(-ret));
221         return ret;
222     }
223 
224     env->kvm_sw_tlb = true;
225     return 0;
226 }
227 
228 
229 #if defined(TARGET_PPC64)
230 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
231                                        struct kvm_ppc_smmu_info *info)
232 {
233     CPUPPCState *env = &cpu->env;
234     CPUState *cs = CPU(cpu);
235 
236     memset(info, 0, sizeof(*info));
237 
238     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
239      * need to "guess" what the supported page sizes are.
240      *
241      * For that to work we make a few assumptions:
242      *
243      * - Check whether we are running "PR" KVM which only supports 4K
244      *   and 16M pages, but supports them regardless of the backing
245      *   store characteritics. We also don't support 1T segments.
246      *
247      *   This is safe as if HV KVM ever supports that capability or PR
248      *   KVM grows supports for more page/segment sizes, those versions
249      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
250      *   will not hit this fallback
251      *
252      * - Else we are running HV KVM. This means we only support page
253      *   sizes that fit in the backing store. Additionally we only
254      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
255      *   P7 encodings for the SLB and hash table. Here too, we assume
256      *   support for any newer processor will mean a kernel that
257      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
258      *   this fallback.
259      */
260     if (kvmppc_is_pr(cs->kvm_state)) {
261         /* No flags */
262         info->flags = 0;
263         info->slb_size = 64;
264 
265         /* Standard 4k base page size segment */
266         info->sps[0].page_shift = 12;
267         info->sps[0].slb_enc = 0;
268         info->sps[0].enc[0].page_shift = 12;
269         info->sps[0].enc[0].pte_enc = 0;
270 
271         /* Standard 16M large page size segment */
272         info->sps[1].page_shift = 24;
273         info->sps[1].slb_enc = SLB_VSID_L;
274         info->sps[1].enc[0].page_shift = 24;
275         info->sps[1].enc[0].pte_enc = 0;
276     } else {
277         int i = 0;
278 
279         /* HV KVM has backing store size restrictions */
280         info->flags = KVM_PPC_PAGE_SIZES_REAL;
281 
282         if (env->mmu_model & POWERPC_MMU_1TSEG) {
283             info->flags |= KVM_PPC_1T_SEGMENTS;
284         }
285 
286         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
287            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
288             info->slb_size = 32;
289         } else {
290             info->slb_size = 64;
291         }
292 
293         /* Standard 4k base page size segment */
294         info->sps[i].page_shift = 12;
295         info->sps[i].slb_enc = 0;
296         info->sps[i].enc[0].page_shift = 12;
297         info->sps[i].enc[0].pte_enc = 0;
298         i++;
299 
300         /* 64K on MMU 2.06 and later */
301         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
302             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
303             info->sps[i].page_shift = 16;
304             info->sps[i].slb_enc = 0x110;
305             info->sps[i].enc[0].page_shift = 16;
306             info->sps[i].enc[0].pte_enc = 1;
307             i++;
308         }
309 
310         /* Standard 16M large page size segment */
311         info->sps[i].page_shift = 24;
312         info->sps[i].slb_enc = SLB_VSID_L;
313         info->sps[i].enc[0].page_shift = 24;
314         info->sps[i].enc[0].pte_enc = 0;
315     }
316 }
317 
318 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
319 {
320     CPUState *cs = CPU(cpu);
321     int ret;
322 
323     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
324         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
325         if (ret == 0) {
326             return;
327         }
328     }
329 
330     kvm_get_fallback_smmu_info(cpu, info);
331 }
332 
333 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
334 {
335     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
336         return true;
337     }
338 
339     return (1ul << shift) <= rampgsize;
340 }
341 
342 static long max_cpu_page_size;
343 
344 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
345 {
346     static struct kvm_ppc_smmu_info smmu_info;
347     static bool has_smmu_info;
348     CPUPPCState *env = &cpu->env;
349     int iq, ik, jq, jk;
350     bool has_64k_pages = false;
351 
352     /* We only handle page sizes for 64-bit server guests for now */
353     if (!(env->mmu_model & POWERPC_MMU_64)) {
354         return;
355     }
356 
357     /* Collect MMU info from kernel if not already */
358     if (!has_smmu_info) {
359         kvm_get_smmu_info(cpu, &smmu_info);
360         has_smmu_info = true;
361     }
362 
363     if (!max_cpu_page_size) {
364         max_cpu_page_size = qemu_getrampagesize();
365     }
366 
367     /* Convert to QEMU form */
368     memset(&env->sps, 0, sizeof(env->sps));
369 
370     /* If we have HV KVM, we need to forbid CI large pages if our
371      * host page size is smaller than 64K.
372      */
373     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
374         env->ci_large_pages = getpagesize() >= 0x10000;
375     }
376 
377     /*
378      * XXX This loop should be an entry wide AND of the capabilities that
379      *     the selected CPU has with the capabilities that KVM supports.
380      */
381     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
382         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
383         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
384 
385         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
386                                  ksps->page_shift)) {
387             continue;
388         }
389         qsps->page_shift = ksps->page_shift;
390         qsps->slb_enc = ksps->slb_enc;
391         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
392             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
393                                      ksps->enc[jk].page_shift)) {
394                 continue;
395             }
396             if (ksps->enc[jk].page_shift == 16) {
397                 has_64k_pages = true;
398             }
399             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
400             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
401             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
402                 break;
403             }
404         }
405         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
406             break;
407         }
408     }
409     env->slb_nr = smmu_info.slb_size;
410     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
411         env->mmu_model &= ~POWERPC_MMU_1TSEG;
412     }
413     if (!has_64k_pages) {
414         env->mmu_model &= ~POWERPC_MMU_64K;
415     }
416 }
417 
418 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
419 {
420     Object *mem_obj = object_resolve_path(obj_path, NULL);
421     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
422     long pagesize;
423 
424     if (mempath) {
425         pagesize = qemu_mempath_getpagesize(mempath);
426     } else {
427         pagesize = getpagesize();
428     }
429 
430     return pagesize >= max_cpu_page_size;
431 }
432 
433 #else /* defined (TARGET_PPC64) */
434 
435 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
436 {
437 }
438 
439 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
440 {
441     return true;
442 }
443 
444 #endif /* !defined (TARGET_PPC64) */
445 
446 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
447 {
448     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
449 }
450 
451 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
452  * book3s supports only 1 watchpoint, so array size
453  * of 4 is sufficient for now.
454  */
455 #define MAX_HW_BKPTS 4
456 
457 static struct HWBreakpoint {
458     target_ulong addr;
459     int type;
460 } hw_debug_points[MAX_HW_BKPTS];
461 
462 static CPUWatchpoint hw_watchpoint;
463 
464 /* Default there is no breakpoint and watchpoint supported */
465 static int max_hw_breakpoint;
466 static int max_hw_watchpoint;
467 static int nb_hw_breakpoint;
468 static int nb_hw_watchpoint;
469 
470 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
471 {
472     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
473         max_hw_breakpoint = 2;
474         max_hw_watchpoint = 2;
475     }
476 
477     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
478         fprintf(stderr, "Error initializing h/w breakpoints\n");
479         return;
480     }
481 }
482 
483 int kvm_arch_init_vcpu(CPUState *cs)
484 {
485     PowerPCCPU *cpu = POWERPC_CPU(cs);
486     CPUPPCState *cenv = &cpu->env;
487     int ret;
488 
489     /* Gather server mmu info from KVM and update the CPU state */
490     kvm_fixup_page_sizes(cpu);
491 
492     /* Synchronize sregs with kvm */
493     ret = kvm_arch_sync_sregs(cpu);
494     if (ret) {
495         if (ret == -EINVAL) {
496             error_report("Register sync failed... If you're using kvm-hv.ko,"
497                          " only \"-cpu host\" is possible");
498         }
499         return ret;
500     }
501 
502     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
503 
504     switch (cenv->mmu_model) {
505     case POWERPC_MMU_BOOKE206:
506         /* This target supports access to KVM's guest TLB */
507         ret = kvm_booke206_tlb_init(cpu);
508         break;
509     case POWERPC_MMU_2_07:
510         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
511             /* KVM-HV has transactional memory on POWER8 also without the
512              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
513             cap_htm = true;
514         }
515         break;
516     default:
517         break;
518     }
519 
520     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
521     kvmppc_hw_debug_points_init(cenv);
522 
523     return ret;
524 }
525 
526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
527 {
528     CPUPPCState *env = &cpu->env;
529     CPUState *cs = CPU(cpu);
530     struct kvm_dirty_tlb dirty_tlb;
531     unsigned char *bitmap;
532     int ret;
533 
534     if (!env->kvm_sw_tlb) {
535         return;
536     }
537 
538     bitmap = g_malloc((env->nb_tlb + 7) / 8);
539     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
540 
541     dirty_tlb.bitmap = (uintptr_t)bitmap;
542     dirty_tlb.num_dirty = env->nb_tlb;
543 
544     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
545     if (ret) {
546         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
547                 __func__, strerror(-ret));
548     }
549 
550     g_free(bitmap);
551 }
552 
553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
554 {
555     PowerPCCPU *cpu = POWERPC_CPU(cs);
556     CPUPPCState *env = &cpu->env;
557     union {
558         uint32_t u32;
559         uint64_t u64;
560     } val;
561     struct kvm_one_reg reg = {
562         .id = id,
563         .addr = (uintptr_t) &val,
564     };
565     int ret;
566 
567     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
568     if (ret != 0) {
569         trace_kvm_failed_spr_get(spr, strerror(errno));
570     } else {
571         switch (id & KVM_REG_SIZE_MASK) {
572         case KVM_REG_SIZE_U32:
573             env->spr[spr] = val.u32;
574             break;
575 
576         case KVM_REG_SIZE_U64:
577             env->spr[spr] = val.u64;
578             break;
579 
580         default:
581             /* Don't handle this size yet */
582             abort();
583         }
584     }
585 }
586 
587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
588 {
589     PowerPCCPU *cpu = POWERPC_CPU(cs);
590     CPUPPCState *env = &cpu->env;
591     union {
592         uint32_t u32;
593         uint64_t u64;
594     } val;
595     struct kvm_one_reg reg = {
596         .id = id,
597         .addr = (uintptr_t) &val,
598     };
599     int ret;
600 
601     switch (id & KVM_REG_SIZE_MASK) {
602     case KVM_REG_SIZE_U32:
603         val.u32 = env->spr[spr];
604         break;
605 
606     case KVM_REG_SIZE_U64:
607         val.u64 = env->spr[spr];
608         break;
609 
610     default:
611         /* Don't handle this size yet */
612         abort();
613     }
614 
615     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
616     if (ret != 0) {
617         trace_kvm_failed_spr_set(spr, strerror(errno));
618     }
619 }
620 
621 static int kvm_put_fp(CPUState *cs)
622 {
623     PowerPCCPU *cpu = POWERPC_CPU(cs);
624     CPUPPCState *env = &cpu->env;
625     struct kvm_one_reg reg;
626     int i;
627     int ret;
628 
629     if (env->insns_flags & PPC_FLOAT) {
630         uint64_t fpscr = env->fpscr;
631         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
632 
633         reg.id = KVM_REG_PPC_FPSCR;
634         reg.addr = (uintptr_t)&fpscr;
635         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
636         if (ret < 0) {
637             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
638             return ret;
639         }
640 
641         for (i = 0; i < 32; i++) {
642             uint64_t vsr[2];
643 
644 #ifdef HOST_WORDS_BIGENDIAN
645             vsr[0] = float64_val(env->fpr[i]);
646             vsr[1] = env->vsr[i];
647 #else
648             vsr[0] = env->vsr[i];
649             vsr[1] = float64_val(env->fpr[i]);
650 #endif
651             reg.addr = (uintptr_t) &vsr;
652             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
653 
654             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
655             if (ret < 0) {
656                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
657                         i, strerror(errno));
658                 return ret;
659             }
660         }
661     }
662 
663     if (env->insns_flags & PPC_ALTIVEC) {
664         reg.id = KVM_REG_PPC_VSCR;
665         reg.addr = (uintptr_t)&env->vscr;
666         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667         if (ret < 0) {
668             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
669             return ret;
670         }
671 
672         for (i = 0; i < 32; i++) {
673             reg.id = KVM_REG_PPC_VR(i);
674             reg.addr = (uintptr_t)&env->avr[i];
675             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
676             if (ret < 0) {
677                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
678                 return ret;
679             }
680         }
681     }
682 
683     return 0;
684 }
685 
686 static int kvm_get_fp(CPUState *cs)
687 {
688     PowerPCCPU *cpu = POWERPC_CPU(cs);
689     CPUPPCState *env = &cpu->env;
690     struct kvm_one_reg reg;
691     int i;
692     int ret;
693 
694     if (env->insns_flags & PPC_FLOAT) {
695         uint64_t fpscr;
696         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
697 
698         reg.id = KVM_REG_PPC_FPSCR;
699         reg.addr = (uintptr_t)&fpscr;
700         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
701         if (ret < 0) {
702             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
703             return ret;
704         } else {
705             env->fpscr = fpscr;
706         }
707 
708         for (i = 0; i < 32; i++) {
709             uint64_t vsr[2];
710 
711             reg.addr = (uintptr_t) &vsr;
712             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
713 
714             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
715             if (ret < 0) {
716                 DPRINTF("Unable to get %s%d from KVM: %s\n",
717                         vsx ? "VSR" : "FPR", i, strerror(errno));
718                 return ret;
719             } else {
720 #ifdef HOST_WORDS_BIGENDIAN
721                 env->fpr[i] = vsr[0];
722                 if (vsx) {
723                     env->vsr[i] = vsr[1];
724                 }
725 #else
726                 env->fpr[i] = vsr[1];
727                 if (vsx) {
728                     env->vsr[i] = vsr[0];
729                 }
730 #endif
731             }
732         }
733     }
734 
735     if (env->insns_flags & PPC_ALTIVEC) {
736         reg.id = KVM_REG_PPC_VSCR;
737         reg.addr = (uintptr_t)&env->vscr;
738         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
739         if (ret < 0) {
740             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
741             return ret;
742         }
743 
744         for (i = 0; i < 32; i++) {
745             reg.id = KVM_REG_PPC_VR(i);
746             reg.addr = (uintptr_t)&env->avr[i];
747             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
748             if (ret < 0) {
749                 DPRINTF("Unable to get VR%d from KVM: %s\n",
750                         i, strerror(errno));
751                 return ret;
752             }
753         }
754     }
755 
756     return 0;
757 }
758 
759 #if defined(TARGET_PPC64)
760 static int kvm_get_vpa(CPUState *cs)
761 {
762     PowerPCCPU *cpu = POWERPC_CPU(cs);
763     CPUPPCState *env = &cpu->env;
764     struct kvm_one_reg reg;
765     int ret;
766 
767     reg.id = KVM_REG_PPC_VPA_ADDR;
768     reg.addr = (uintptr_t)&env->vpa_addr;
769     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770     if (ret < 0) {
771         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
772         return ret;
773     }
774 
775     assert((uintptr_t)&env->slb_shadow_size
776            == ((uintptr_t)&env->slb_shadow_addr + 8));
777     reg.id = KVM_REG_PPC_VPA_SLB;
778     reg.addr = (uintptr_t)&env->slb_shadow_addr;
779     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
780     if (ret < 0) {
781         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
782                 strerror(errno));
783         return ret;
784     }
785 
786     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
787     reg.id = KVM_REG_PPC_VPA_DTL;
788     reg.addr = (uintptr_t)&env->dtl_addr;
789     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790     if (ret < 0) {
791         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
792                 strerror(errno));
793         return ret;
794     }
795 
796     return 0;
797 }
798 
799 static int kvm_put_vpa(CPUState *cs)
800 {
801     PowerPCCPU *cpu = POWERPC_CPU(cs);
802     CPUPPCState *env = &cpu->env;
803     struct kvm_one_reg reg;
804     int ret;
805 
806     /* SLB shadow or DTL can't be registered unless a master VPA is
807      * registered.  That means when restoring state, if a VPA *is*
808      * registered, we need to set that up first.  If not, we need to
809      * deregister the others before deregistering the master VPA */
810     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
811 
812     if (env->vpa_addr) {
813         reg.id = KVM_REG_PPC_VPA_ADDR;
814         reg.addr = (uintptr_t)&env->vpa_addr;
815         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
816         if (ret < 0) {
817             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
818             return ret;
819         }
820     }
821 
822     assert((uintptr_t)&env->slb_shadow_size
823            == ((uintptr_t)&env->slb_shadow_addr + 8));
824     reg.id = KVM_REG_PPC_VPA_SLB;
825     reg.addr = (uintptr_t)&env->slb_shadow_addr;
826     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
827     if (ret < 0) {
828         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
829         return ret;
830     }
831 
832     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
833     reg.id = KVM_REG_PPC_VPA_DTL;
834     reg.addr = (uintptr_t)&env->dtl_addr;
835     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836     if (ret < 0) {
837         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
838                 strerror(errno));
839         return ret;
840     }
841 
842     if (!env->vpa_addr) {
843         reg.id = KVM_REG_PPC_VPA_ADDR;
844         reg.addr = (uintptr_t)&env->vpa_addr;
845         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
846         if (ret < 0) {
847             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
848             return ret;
849         }
850     }
851 
852     return 0;
853 }
854 #endif /* TARGET_PPC64 */
855 
856 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
857 {
858     CPUPPCState *env = &cpu->env;
859     struct kvm_sregs sregs;
860     int i;
861 
862     sregs.pvr = env->spr[SPR_PVR];
863 
864     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
865 
866     /* Sync SLB */
867 #ifdef TARGET_PPC64
868     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
869         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
870         if (env->slb[i].esid & SLB_ESID_V) {
871             sregs.u.s.ppc64.slb[i].slbe |= i;
872         }
873         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
874     }
875 #endif
876 
877     /* Sync SRs */
878     for (i = 0; i < 16; i++) {
879         sregs.u.s.ppc32.sr[i] = env->sr[i];
880     }
881 
882     /* Sync BATs */
883     for (i = 0; i < 8; i++) {
884         /* Beware. We have to swap upper and lower bits here */
885         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
886             | env->DBAT[1][i];
887         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
888             | env->IBAT[1][i];
889     }
890 
891     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
892 }
893 
894 int kvm_arch_put_registers(CPUState *cs, int level)
895 {
896     PowerPCCPU *cpu = POWERPC_CPU(cs);
897     CPUPPCState *env = &cpu->env;
898     struct kvm_regs regs;
899     int ret;
900     int i;
901 
902     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
903     if (ret < 0) {
904         return ret;
905     }
906 
907     regs.ctr = env->ctr;
908     regs.lr  = env->lr;
909     regs.xer = cpu_read_xer(env);
910     regs.msr = env->msr;
911     regs.pc = env->nip;
912 
913     regs.srr0 = env->spr[SPR_SRR0];
914     regs.srr1 = env->spr[SPR_SRR1];
915 
916     regs.sprg0 = env->spr[SPR_SPRG0];
917     regs.sprg1 = env->spr[SPR_SPRG1];
918     regs.sprg2 = env->spr[SPR_SPRG2];
919     regs.sprg3 = env->spr[SPR_SPRG3];
920     regs.sprg4 = env->spr[SPR_SPRG4];
921     regs.sprg5 = env->spr[SPR_SPRG5];
922     regs.sprg6 = env->spr[SPR_SPRG6];
923     regs.sprg7 = env->spr[SPR_SPRG7];
924 
925     regs.pid = env->spr[SPR_BOOKE_PID];
926 
927     for (i = 0;i < 32; i++)
928         regs.gpr[i] = env->gpr[i];
929 
930     regs.cr = 0;
931     for (i = 0; i < 8; i++) {
932         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
933     }
934 
935     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
936     if (ret < 0)
937         return ret;
938 
939     kvm_put_fp(cs);
940 
941     if (env->tlb_dirty) {
942         kvm_sw_tlb_put(cpu);
943         env->tlb_dirty = false;
944     }
945 
946     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
947         ret = kvmppc_put_books_sregs(cpu);
948         if (ret < 0) {
949             return ret;
950         }
951     }
952 
953     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
954         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
955     }
956 
957     if (cap_one_reg) {
958         int i;
959 
960         /* We deliberately ignore errors here, for kernels which have
961          * the ONE_REG calls, but don't support the specific
962          * registers, there's a reasonable chance things will still
963          * work, at least until we try to migrate. */
964         for (i = 0; i < 1024; i++) {
965             uint64_t id = env->spr_cb[i].one_reg_id;
966 
967             if (id != 0) {
968                 kvm_put_one_spr(cs, id, i);
969             }
970         }
971 
972 #ifdef TARGET_PPC64
973         if (msr_ts) {
974             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
975                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
976             }
977             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
978                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
979             }
980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
990         }
991 
992         if (cap_papr) {
993             if (kvm_put_vpa(cs) < 0) {
994                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
995             }
996         }
997 
998         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
999 #endif /* TARGET_PPC64 */
1000     }
1001 
1002     return ret;
1003 }
1004 
1005 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1006 {
1007      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1008 }
1009 
1010 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1011 {
1012     CPUPPCState *env = &cpu->env;
1013     struct kvm_sregs sregs;
1014     int ret;
1015 
1016     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1017     if (ret < 0) {
1018         return ret;
1019     }
1020 
1021     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1022         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1023         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1024         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1025         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1026         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1027         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1028         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1029         env->spr[SPR_DECR] = sregs.u.e.dec;
1030         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1031         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1032         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1033     }
1034 
1035     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1036         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1037         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1038         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1039         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1040         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1041     }
1042 
1043     if (sregs.u.e.features & KVM_SREGS_E_64) {
1044         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1045     }
1046 
1047     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1048         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1049     }
1050 
1051     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1052         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1053         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1054         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1055         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1056         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1057         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1058         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1059         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1060         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1061         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1062         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1063         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1064         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1065         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1066         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1067         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1068         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1069         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1070         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1071         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1072         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1073         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1074         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1075         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1076         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1077         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1078         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1079         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1080         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1081         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1082         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1083         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1084 
1085         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1086             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1087             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1088             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1089             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1090             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1091             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1092         }
1093 
1094         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1095             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1096             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1097         }
1098 
1099         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1100             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1101             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1102             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1103             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1104         }
1105     }
1106 
1107     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1108         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1109         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1110         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1111         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1112         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1113         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1114         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1115         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1116         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1117         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_EXP) {
1121         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1122     }
1123 
1124     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1125         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1126         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1127     }
1128 
1129     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1130         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1131         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1132         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1133 
1134         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1135             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1136             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1137         }
1138     }
1139 
1140     return 0;
1141 }
1142 
1143 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1144 {
1145     CPUPPCState *env = &cpu->env;
1146     struct kvm_sregs sregs;
1147     int ret;
1148     int i;
1149 
1150     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1151     if (ret < 0) {
1152         return ret;
1153     }
1154 
1155     if (!cpu->vhyp) {
1156         ppc_store_sdr1(env, sregs.u.s.sdr1);
1157     }
1158 
1159     /* Sync SLB */
1160 #ifdef TARGET_PPC64
1161     /*
1162      * The packed SLB array we get from KVM_GET_SREGS only contains
1163      * information about valid entries. So we flush our internal copy
1164      * to get rid of stale ones, then put all valid SLB entries back
1165      * in.
1166      */
1167     memset(env->slb, 0, sizeof(env->slb));
1168     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1169         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1170         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1171         /*
1172          * Only restore valid entries
1173          */
1174         if (rb & SLB_ESID_V) {
1175             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1176         }
1177     }
1178 #endif
1179 
1180     /* Sync SRs */
1181     for (i = 0; i < 16; i++) {
1182         env->sr[i] = sregs.u.s.ppc32.sr[i];
1183     }
1184 
1185     /* Sync BATs */
1186     for (i = 0; i < 8; i++) {
1187         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1188         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1189         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1190         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1191     }
1192 
1193     return 0;
1194 }
1195 
1196 int kvm_arch_get_registers(CPUState *cs)
1197 {
1198     PowerPCCPU *cpu = POWERPC_CPU(cs);
1199     CPUPPCState *env = &cpu->env;
1200     struct kvm_regs regs;
1201     uint32_t cr;
1202     int i, ret;
1203 
1204     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1205     if (ret < 0)
1206         return ret;
1207 
1208     cr = regs.cr;
1209     for (i = 7; i >= 0; i--) {
1210         env->crf[i] = cr & 15;
1211         cr >>= 4;
1212     }
1213 
1214     env->ctr = regs.ctr;
1215     env->lr = regs.lr;
1216     cpu_write_xer(env, regs.xer);
1217     env->msr = regs.msr;
1218     env->nip = regs.pc;
1219 
1220     env->spr[SPR_SRR0] = regs.srr0;
1221     env->spr[SPR_SRR1] = regs.srr1;
1222 
1223     env->spr[SPR_SPRG0] = regs.sprg0;
1224     env->spr[SPR_SPRG1] = regs.sprg1;
1225     env->spr[SPR_SPRG2] = regs.sprg2;
1226     env->spr[SPR_SPRG3] = regs.sprg3;
1227     env->spr[SPR_SPRG4] = regs.sprg4;
1228     env->spr[SPR_SPRG5] = regs.sprg5;
1229     env->spr[SPR_SPRG6] = regs.sprg6;
1230     env->spr[SPR_SPRG7] = regs.sprg7;
1231 
1232     env->spr[SPR_BOOKE_PID] = regs.pid;
1233 
1234     for (i = 0;i < 32; i++)
1235         env->gpr[i] = regs.gpr[i];
1236 
1237     kvm_get_fp(cs);
1238 
1239     if (cap_booke_sregs) {
1240         ret = kvmppc_get_booke_sregs(cpu);
1241         if (ret < 0) {
1242             return ret;
1243         }
1244     }
1245 
1246     if (cap_segstate) {
1247         ret = kvmppc_get_books_sregs(cpu);
1248         if (ret < 0) {
1249             return ret;
1250         }
1251     }
1252 
1253     if (cap_hior) {
1254         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1255     }
1256 
1257     if (cap_one_reg) {
1258         int i;
1259 
1260         /* We deliberately ignore errors here, for kernels which have
1261          * the ONE_REG calls, but don't support the specific
1262          * registers, there's a reasonable chance things will still
1263          * work, at least until we try to migrate. */
1264         for (i = 0; i < 1024; i++) {
1265             uint64_t id = env->spr_cb[i].one_reg_id;
1266 
1267             if (id != 0) {
1268                 kvm_get_one_spr(cs, id, i);
1269             }
1270         }
1271 
1272 #ifdef TARGET_PPC64
1273         if (msr_ts) {
1274             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1275                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1276             }
1277             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1278                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1279             }
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1290         }
1291 
1292         if (cap_papr) {
1293             if (kvm_get_vpa(cs) < 0) {
1294                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1295             }
1296         }
1297 
1298         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1299 #endif
1300     }
1301 
1302     return 0;
1303 }
1304 
1305 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1306 {
1307     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1308 
1309     if (irq != PPC_INTERRUPT_EXT) {
1310         return 0;
1311     }
1312 
1313     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1314         return 0;
1315     }
1316 
1317     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1318 
1319     return 0;
1320 }
1321 
1322 #if defined(TARGET_PPCEMB)
1323 #define PPC_INPUT_INT PPC40x_INPUT_INT
1324 #elif defined(TARGET_PPC64)
1325 #define PPC_INPUT_INT PPC970_INPUT_INT
1326 #else
1327 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1328 #endif
1329 
1330 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1331 {
1332     PowerPCCPU *cpu = POWERPC_CPU(cs);
1333     CPUPPCState *env = &cpu->env;
1334     int r;
1335     unsigned irq;
1336 
1337     qemu_mutex_lock_iothread();
1338 
1339     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1340      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1341     if (!cap_interrupt_level &&
1342         run->ready_for_interrupt_injection &&
1343         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1344         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1345     {
1346         /* For now KVM disregards the 'irq' argument. However, in the
1347          * future KVM could cache it in-kernel to avoid a heavyweight exit
1348          * when reading the UIC.
1349          */
1350         irq = KVM_INTERRUPT_SET;
1351 
1352         DPRINTF("injected interrupt %d\n", irq);
1353         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1354         if (r < 0) {
1355             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1356         }
1357 
1358         /* Always wake up soon in case the interrupt was level based */
1359         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1360                        (NANOSECONDS_PER_SECOND / 50));
1361     }
1362 
1363     /* We don't know if there are more interrupts pending after this. However,
1364      * the guest will return to userspace in the course of handling this one
1365      * anyways, so we will get a chance to deliver the rest. */
1366 
1367     qemu_mutex_unlock_iothread();
1368 }
1369 
1370 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1371 {
1372     return MEMTXATTRS_UNSPECIFIED;
1373 }
1374 
1375 int kvm_arch_process_async_events(CPUState *cs)
1376 {
1377     return cs->halted;
1378 }
1379 
1380 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1381 {
1382     CPUState *cs = CPU(cpu);
1383     CPUPPCState *env = &cpu->env;
1384 
1385     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1386         cs->halted = 1;
1387         cs->exception_index = EXCP_HLT;
1388     }
1389 
1390     return 0;
1391 }
1392 
1393 /* map dcr access to existing qemu dcr emulation */
1394 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1395 {
1396     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1397         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1398 
1399     return 0;
1400 }
1401 
1402 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1403 {
1404     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1405         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1406 
1407     return 0;
1408 }
1409 
1410 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1411 {
1412     /* Mixed endian case is not handled */
1413     uint32_t sc = debug_inst_opcode;
1414 
1415     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1416                             sizeof(sc), 0) ||
1417         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1418         return -EINVAL;
1419     }
1420 
1421     return 0;
1422 }
1423 
1424 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425 {
1426     uint32_t sc;
1427 
1428     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1429         sc != debug_inst_opcode ||
1430         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1431                             sizeof(sc), 1)) {
1432         return -EINVAL;
1433     }
1434 
1435     return 0;
1436 }
1437 
1438 static int find_hw_breakpoint(target_ulong addr, int type)
1439 {
1440     int n;
1441 
1442     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1443            <= ARRAY_SIZE(hw_debug_points));
1444 
1445     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1446         if (hw_debug_points[n].addr == addr &&
1447              hw_debug_points[n].type == type) {
1448             return n;
1449         }
1450     }
1451 
1452     return -1;
1453 }
1454 
1455 static int find_hw_watchpoint(target_ulong addr, int *flag)
1456 {
1457     int n;
1458 
1459     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1460     if (n >= 0) {
1461         *flag = BP_MEM_ACCESS;
1462         return n;
1463     }
1464 
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1466     if (n >= 0) {
1467         *flag = BP_MEM_WRITE;
1468         return n;
1469     }
1470 
1471     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1472     if (n >= 0) {
1473         *flag = BP_MEM_READ;
1474         return n;
1475     }
1476 
1477     return -1;
1478 }
1479 
1480 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1481                                   target_ulong len, int type)
1482 {
1483     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1484         return -ENOBUFS;
1485     }
1486 
1487     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1488     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1489 
1490     switch (type) {
1491     case GDB_BREAKPOINT_HW:
1492         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1493             return -ENOBUFS;
1494         }
1495 
1496         if (find_hw_breakpoint(addr, type) >= 0) {
1497             return -EEXIST;
1498         }
1499 
1500         nb_hw_breakpoint++;
1501         break;
1502 
1503     case GDB_WATCHPOINT_WRITE:
1504     case GDB_WATCHPOINT_READ:
1505     case GDB_WATCHPOINT_ACCESS:
1506         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1507             return -ENOBUFS;
1508         }
1509 
1510         if (find_hw_breakpoint(addr, type) >= 0) {
1511             return -EEXIST;
1512         }
1513 
1514         nb_hw_watchpoint++;
1515         break;
1516 
1517     default:
1518         return -ENOSYS;
1519     }
1520 
1521     return 0;
1522 }
1523 
1524 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1525                                   target_ulong len, int type)
1526 {
1527     int n;
1528 
1529     n = find_hw_breakpoint(addr, type);
1530     if (n < 0) {
1531         return -ENOENT;
1532     }
1533 
1534     switch (type) {
1535     case GDB_BREAKPOINT_HW:
1536         nb_hw_breakpoint--;
1537         break;
1538 
1539     case GDB_WATCHPOINT_WRITE:
1540     case GDB_WATCHPOINT_READ:
1541     case GDB_WATCHPOINT_ACCESS:
1542         nb_hw_watchpoint--;
1543         break;
1544 
1545     default:
1546         return -ENOSYS;
1547     }
1548     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1549 
1550     return 0;
1551 }
1552 
1553 void kvm_arch_remove_all_hw_breakpoints(void)
1554 {
1555     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1556 }
1557 
1558 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1559 {
1560     int n;
1561 
1562     /* Software Breakpoint updates */
1563     if (kvm_sw_breakpoints_active(cs)) {
1564         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1565     }
1566 
1567     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1568            <= ARRAY_SIZE(hw_debug_points));
1569     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1570 
1571     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1573         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1574         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1575             switch (hw_debug_points[n].type) {
1576             case GDB_BREAKPOINT_HW:
1577                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1578                 break;
1579             case GDB_WATCHPOINT_WRITE:
1580                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1581                 break;
1582             case GDB_WATCHPOINT_READ:
1583                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             case GDB_WATCHPOINT_ACCESS:
1586                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1587                                         KVMPPC_DEBUG_WATCH_READ;
1588                 break;
1589             default:
1590                 cpu_abort(cs, "Unsupported breakpoint type\n");
1591             }
1592             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1593         }
1594     }
1595 }
1596 
1597 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1598 {
1599     CPUState *cs = CPU(cpu);
1600     CPUPPCState *env = &cpu->env;
1601     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1602     int handle = 0;
1603     int n;
1604     int flag = 0;
1605 
1606     if (cs->singlestep_enabled) {
1607         handle = 1;
1608     } else if (arch_info->status) {
1609         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1611                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1612                 if (n >= 0) {
1613                     handle = 1;
1614                 }
1615             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1616                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1617                 n = find_hw_watchpoint(arch_info->address,  &flag);
1618                 if (n >= 0) {
1619                     handle = 1;
1620                     cs->watchpoint_hit = &hw_watchpoint;
1621                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1622                     hw_watchpoint.flags = flag;
1623                 }
1624             }
1625         }
1626     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1627         handle = 1;
1628     } else {
1629         /* QEMU is not able to handle debug exception, so inject
1630          * program exception to guest;
1631          * Yes program exception NOT debug exception !!
1632          * When QEMU is using debug resources then debug exception must
1633          * be always set. To achieve this we set MSR_DE and also set
1634          * MSRP_DEP so guest cannot change MSR_DE.
1635          * When emulating debug resource for guest we want guest
1636          * to control MSR_DE (enable/disable debug interrupt on need).
1637          * Supporting both configurations are NOT possible.
1638          * So the result is that we cannot share debug resources
1639          * between QEMU and Guest on BOOKE architecture.
1640          * In the current design QEMU gets the priority over guest,
1641          * this means that if QEMU is using debug resources then guest
1642          * cannot use them;
1643          * For software breakpoint QEMU uses a privileged instruction;
1644          * So there cannot be any reason that we are here for guest
1645          * set debug exception, only possibility is guest executed a
1646          * privileged / illegal instruction and that's why we are
1647          * injecting a program interrupt.
1648          */
1649 
1650         cpu_synchronize_state(cs);
1651         /* env->nip is PC, so increment this by 4 to use
1652          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1653          */
1654         env->nip += 4;
1655         cs->exception_index = POWERPC_EXCP_PROGRAM;
1656         env->error_code = POWERPC_EXCP_INVAL;
1657         ppc_cpu_do_interrupt(cs);
1658     }
1659 
1660     return handle;
1661 }
1662 
1663 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1664 {
1665     PowerPCCPU *cpu = POWERPC_CPU(cs);
1666     CPUPPCState *env = &cpu->env;
1667     int ret;
1668 
1669     qemu_mutex_lock_iothread();
1670 
1671     switch (run->exit_reason) {
1672     case KVM_EXIT_DCR:
1673         if (run->dcr.is_write) {
1674             DPRINTF("handle dcr write\n");
1675             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1676         } else {
1677             DPRINTF("handle dcr read\n");
1678             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1679         }
1680         break;
1681     case KVM_EXIT_HLT:
1682         DPRINTF("handle halt\n");
1683         ret = kvmppc_handle_halt(cpu);
1684         break;
1685 #if defined(TARGET_PPC64)
1686     case KVM_EXIT_PAPR_HCALL:
1687         DPRINTF("handle PAPR hypercall\n");
1688         run->papr_hcall.ret = spapr_hypercall(cpu,
1689                                               run->papr_hcall.nr,
1690                                               run->papr_hcall.args);
1691         ret = 0;
1692         break;
1693 #endif
1694     case KVM_EXIT_EPR:
1695         DPRINTF("handle epr\n");
1696         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1697         ret = 0;
1698         break;
1699     case KVM_EXIT_WATCHDOG:
1700         DPRINTF("handle watchdog expiry\n");
1701         watchdog_perform_action();
1702         ret = 0;
1703         break;
1704 
1705     case KVM_EXIT_DEBUG:
1706         DPRINTF("handle debug exception\n");
1707         if (kvm_handle_debug(cpu, run)) {
1708             ret = EXCP_DEBUG;
1709             break;
1710         }
1711         /* re-enter, this exception was guest-internal */
1712         ret = 0;
1713         break;
1714 
1715     default:
1716         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1717         ret = -1;
1718         break;
1719     }
1720 
1721     qemu_mutex_unlock_iothread();
1722     return ret;
1723 }
1724 
1725 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1726 {
1727     CPUState *cs = CPU(cpu);
1728     uint32_t bits = tsr_bits;
1729     struct kvm_one_reg reg = {
1730         .id = KVM_REG_PPC_OR_TSR,
1731         .addr = (uintptr_t) &bits,
1732     };
1733 
1734     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1735 }
1736 
1737 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1738 {
1739 
1740     CPUState *cs = CPU(cpu);
1741     uint32_t bits = tsr_bits;
1742     struct kvm_one_reg reg = {
1743         .id = KVM_REG_PPC_CLEAR_TSR,
1744         .addr = (uintptr_t) &bits,
1745     };
1746 
1747     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1748 }
1749 
1750 int kvmppc_set_tcr(PowerPCCPU *cpu)
1751 {
1752     CPUState *cs = CPU(cpu);
1753     CPUPPCState *env = &cpu->env;
1754     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1755 
1756     struct kvm_one_reg reg = {
1757         .id = KVM_REG_PPC_TCR,
1758         .addr = (uintptr_t) &tcr,
1759     };
1760 
1761     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1762 }
1763 
1764 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1765 {
1766     CPUState *cs = CPU(cpu);
1767     int ret;
1768 
1769     if (!kvm_enabled()) {
1770         return -1;
1771     }
1772 
1773     if (!cap_ppc_watchdog) {
1774         printf("warning: KVM does not support watchdog");
1775         return -1;
1776     }
1777 
1778     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1779     if (ret < 0) {
1780         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1781                 __func__, strerror(-ret));
1782         return ret;
1783     }
1784 
1785     return ret;
1786 }
1787 
1788 static int read_cpuinfo(const char *field, char *value, int len)
1789 {
1790     FILE *f;
1791     int ret = -1;
1792     int field_len = strlen(field);
1793     char line[512];
1794 
1795     f = fopen("/proc/cpuinfo", "r");
1796     if (!f) {
1797         return -1;
1798     }
1799 
1800     do {
1801         if (!fgets(line, sizeof(line), f)) {
1802             break;
1803         }
1804         if (!strncmp(line, field, field_len)) {
1805             pstrcpy(value, len, line);
1806             ret = 0;
1807             break;
1808         }
1809     } while(*line);
1810 
1811     fclose(f);
1812 
1813     return ret;
1814 }
1815 
1816 uint32_t kvmppc_get_tbfreq(void)
1817 {
1818     char line[512];
1819     char *ns;
1820     uint32_t retval = NANOSECONDS_PER_SECOND;
1821 
1822     if (read_cpuinfo("timebase", line, sizeof(line))) {
1823         return retval;
1824     }
1825 
1826     if (!(ns = strchr(line, ':'))) {
1827         return retval;
1828     }
1829 
1830     ns++;
1831 
1832     return atoi(ns);
1833 }
1834 
1835 bool kvmppc_get_host_serial(char **value)
1836 {
1837     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1838                                NULL);
1839 }
1840 
1841 bool kvmppc_get_host_model(char **value)
1842 {
1843     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1844 }
1845 
1846 /* Try to find a device tree node for a CPU with clock-frequency property */
1847 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1848 {
1849     struct dirent *dirp;
1850     DIR *dp;
1851 
1852     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1853         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1854         return -1;
1855     }
1856 
1857     buf[0] = '\0';
1858     while ((dirp = readdir(dp)) != NULL) {
1859         FILE *f;
1860         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1861                  dirp->d_name);
1862         f = fopen(buf, "r");
1863         if (f) {
1864             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1865             fclose(f);
1866             break;
1867         }
1868         buf[0] = '\0';
1869     }
1870     closedir(dp);
1871     if (buf[0] == '\0') {
1872         printf("Unknown host!\n");
1873         return -1;
1874     }
1875 
1876     return 0;
1877 }
1878 
1879 static uint64_t kvmppc_read_int_dt(const char *filename)
1880 {
1881     union {
1882         uint32_t v32;
1883         uint64_t v64;
1884     } u;
1885     FILE *f;
1886     int len;
1887 
1888     f = fopen(filename, "rb");
1889     if (!f) {
1890         return -1;
1891     }
1892 
1893     len = fread(&u, 1, sizeof(u), f);
1894     fclose(f);
1895     switch (len) {
1896     case 4:
1897         /* property is a 32-bit quantity */
1898         return be32_to_cpu(u.v32);
1899     case 8:
1900         return be64_to_cpu(u.v64);
1901     }
1902 
1903     return 0;
1904 }
1905 
1906 /* Read a CPU node property from the host device tree that's a single
1907  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1908  * (can't find or open the property, or doesn't understand the
1909  * format) */
1910 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1911 {
1912     char buf[PATH_MAX], *tmp;
1913     uint64_t val;
1914 
1915     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1916         return -1;
1917     }
1918 
1919     tmp = g_strdup_printf("%s/%s", buf, propname);
1920     val = kvmppc_read_int_dt(tmp);
1921     g_free(tmp);
1922 
1923     return val;
1924 }
1925 
1926 uint64_t kvmppc_get_clockfreq(void)
1927 {
1928     return kvmppc_read_int_cpu_dt("clock-frequency");
1929 }
1930 
1931 uint32_t kvmppc_get_vmx(void)
1932 {
1933     return kvmppc_read_int_cpu_dt("ibm,vmx");
1934 }
1935 
1936 uint32_t kvmppc_get_dfp(void)
1937 {
1938     return kvmppc_read_int_cpu_dt("ibm,dfp");
1939 }
1940 
1941 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1942  {
1943      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1944      CPUState *cs = CPU(cpu);
1945 
1946     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1947         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1948         return 0;
1949     }
1950 
1951     return 1;
1952 }
1953 
1954 int kvmppc_get_hasidle(CPUPPCState *env)
1955 {
1956     struct kvm_ppc_pvinfo pvinfo;
1957 
1958     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1959         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1960         return 1;
1961     }
1962 
1963     return 0;
1964 }
1965 
1966 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1967 {
1968     uint32_t *hc = (uint32_t*)buf;
1969     struct kvm_ppc_pvinfo pvinfo;
1970 
1971     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1972         memcpy(buf, pvinfo.hcall, buf_len);
1973         return 0;
1974     }
1975 
1976     /*
1977      * Fallback to always fail hypercalls regardless of endianness:
1978      *
1979      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1980      *     li r3, -1
1981      *     b .+8       (becomes nop in wrong endian)
1982      *     bswap32(li r3, -1)
1983      */
1984 
1985     hc[0] = cpu_to_be32(0x08000048);
1986     hc[1] = cpu_to_be32(0x3860ffff);
1987     hc[2] = cpu_to_be32(0x48000008);
1988     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1989 
1990     return 1;
1991 }
1992 
1993 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1994 {
1995     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1996 }
1997 
1998 void kvmppc_enable_logical_ci_hcalls(void)
1999 {
2000     /*
2001      * FIXME: it would be nice if we could detect the cases where
2002      * we're using a device which requires the in kernel
2003      * implementation of these hcalls, but the kernel lacks them and
2004      * produce a warning.
2005      */
2006     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2007     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2008 }
2009 
2010 void kvmppc_enable_set_mode_hcall(void)
2011 {
2012     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2013 }
2014 
2015 void kvmppc_enable_clear_ref_mod_hcalls(void)
2016 {
2017     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2018     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2019 }
2020 
2021 void kvmppc_set_papr(PowerPCCPU *cpu)
2022 {
2023     CPUState *cs = CPU(cpu);
2024     int ret;
2025 
2026     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2027     if (ret) {
2028         error_report("This vCPU type or KVM version does not support PAPR");
2029         exit(1);
2030     }
2031 
2032     /* Update the capability flag so we sync the right information
2033      * with kvm */
2034     cap_papr = 1;
2035 }
2036 
2037 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2038 {
2039     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2040 }
2041 
2042 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2043 {
2044     CPUState *cs = CPU(cpu);
2045     int ret;
2046 
2047     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2048     if (ret && mpic_proxy) {
2049         error_report("This KVM version does not support EPR");
2050         exit(1);
2051     }
2052 }
2053 
2054 int kvmppc_smt_threads(void)
2055 {
2056     return cap_ppc_smt ? cap_ppc_smt : 1;
2057 }
2058 
2059 #ifdef TARGET_PPC64
2060 off_t kvmppc_alloc_rma(void **rma)
2061 {
2062     off_t size;
2063     int fd;
2064     struct kvm_allocate_rma ret;
2065 
2066     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2067      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2068      *                      not necessary on this hardware
2069      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2070      *
2071      * FIXME: We should allow the user to force contiguous RMA
2072      * allocation in the cap_ppc_rma==1 case.
2073      */
2074     if (cap_ppc_rma < 2) {
2075         return 0;
2076     }
2077 
2078     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2079     if (fd < 0) {
2080         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2081                 strerror(errno));
2082         return -1;
2083     }
2084 
2085     size = MIN(ret.rma_size, 256ul << 20);
2086 
2087     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2088     if (*rma == MAP_FAILED) {
2089         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2090         return -1;
2091     };
2092 
2093     return size;
2094 }
2095 
2096 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2097 {
2098     struct kvm_ppc_smmu_info info;
2099     long rampagesize, best_page_shift;
2100     int i;
2101 
2102     if (cap_ppc_rma >= 2) {
2103         return current_size;
2104     }
2105 
2106     /* Find the largest hardware supported page size that's less than
2107      * or equal to the (logical) backing page size of guest RAM */
2108     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2109     rampagesize = qemu_getrampagesize();
2110     best_page_shift = 0;
2111 
2112     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2113         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2114 
2115         if (!sps->page_shift) {
2116             continue;
2117         }
2118 
2119         if ((sps->page_shift > best_page_shift)
2120             && ((1UL << sps->page_shift) <= rampagesize)) {
2121             best_page_shift = sps->page_shift;
2122         }
2123     }
2124 
2125     return MIN(current_size,
2126                1ULL << (best_page_shift + hash_shift - 7));
2127 }
2128 #endif
2129 
2130 bool kvmppc_spapr_use_multitce(void)
2131 {
2132     return cap_spapr_multitce;
2133 }
2134 
2135 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2136                               bool need_vfio)
2137 {
2138     struct kvm_create_spapr_tce args = {
2139         .liobn = liobn,
2140         .window_size = window_size,
2141     };
2142     long len;
2143     int fd;
2144     void *table;
2145 
2146     /* Must set fd to -1 so we don't try to munmap when called for
2147      * destroying the table, which the upper layers -will- do
2148      */
2149     *pfd = -1;
2150     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2151         return NULL;
2152     }
2153 
2154     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2155     if (fd < 0) {
2156         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2157                 liobn);
2158         return NULL;
2159     }
2160 
2161     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2162     /* FIXME: round this up to page size */
2163 
2164     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2165     if (table == MAP_FAILED) {
2166         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2167                 liobn);
2168         close(fd);
2169         return NULL;
2170     }
2171 
2172     *pfd = fd;
2173     return table;
2174 }
2175 
2176 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2177 {
2178     long len;
2179 
2180     if (fd < 0) {
2181         return -1;
2182     }
2183 
2184     len = nb_table * sizeof(uint64_t);
2185     if ((munmap(table, len) < 0) ||
2186         (close(fd) < 0)) {
2187         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2188                 strerror(errno));
2189         /* Leak the table */
2190     }
2191 
2192     return 0;
2193 }
2194 
2195 int kvmppc_reset_htab(int shift_hint)
2196 {
2197     uint32_t shift = shift_hint;
2198 
2199     if (!kvm_enabled()) {
2200         /* Full emulation, tell caller to allocate htab itself */
2201         return 0;
2202     }
2203     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2204         int ret;
2205         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2206         if (ret == -ENOTTY) {
2207             /* At least some versions of PR KVM advertise the
2208              * capability, but don't implement the ioctl().  Oops.
2209              * Return 0 so that we allocate the htab in qemu, as is
2210              * correct for PR. */
2211             return 0;
2212         } else if (ret < 0) {
2213             return ret;
2214         }
2215         return shift;
2216     }
2217 
2218     /* We have a kernel that predates the htab reset calls.  For PR
2219      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2220      * this era, it has allocated a 16MB fixed size hash table already. */
2221     if (kvmppc_is_pr(kvm_state)) {
2222         /* PR - tell caller to allocate htab */
2223         return 0;
2224     } else {
2225         /* HV - assume 16MB kernel allocated htab */
2226         return 24;
2227     }
2228 }
2229 
2230 static inline uint32_t mfpvr(void)
2231 {
2232     uint32_t pvr;
2233 
2234     asm ("mfpvr %0"
2235          : "=r"(pvr));
2236     return pvr;
2237 }
2238 
2239 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2240 {
2241     if (on) {
2242         *word |= flags;
2243     } else {
2244         *word &= ~flags;
2245     }
2246 }
2247 
2248 static void kvmppc_host_cpu_initfn(Object *obj)
2249 {
2250     assert(kvm_enabled());
2251 }
2252 
2253 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2254 {
2255     DeviceClass *dc = DEVICE_CLASS(oc);
2256     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2257     uint32_t vmx = kvmppc_get_vmx();
2258     uint32_t dfp = kvmppc_get_dfp();
2259     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2260     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2261 
2262     /* Now fix up the class with information we can query from the host */
2263     pcc->pvr = mfpvr();
2264 
2265     if (vmx != -1) {
2266         /* Only override when we know what the host supports */
2267         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2268         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2269     }
2270     if (dfp != -1) {
2271         /* Only override when we know what the host supports */
2272         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2273     }
2274 
2275     if (dcache_size != -1) {
2276         pcc->l1_dcache_size = dcache_size;
2277     }
2278 
2279     if (icache_size != -1) {
2280         pcc->l1_icache_size = icache_size;
2281     }
2282 
2283     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2284     dc->cannot_destroy_with_object_finalize_yet = true;
2285 }
2286 
2287 bool kvmppc_has_cap_epr(void)
2288 {
2289     return cap_epr;
2290 }
2291 
2292 bool kvmppc_has_cap_htab_fd(void)
2293 {
2294     return cap_htab_fd;
2295 }
2296 
2297 bool kvmppc_has_cap_fixup_hcalls(void)
2298 {
2299     return cap_fixup_hcalls;
2300 }
2301 
2302 bool kvmppc_has_cap_htm(void)
2303 {
2304     return cap_htm;
2305 }
2306 
2307 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2308 {
2309     ObjectClass *oc = OBJECT_CLASS(pcc);
2310 
2311     while (oc && !object_class_is_abstract(oc)) {
2312         oc = object_class_get_parent(oc);
2313     }
2314     assert(oc);
2315 
2316     return POWERPC_CPU_CLASS(oc);
2317 }
2318 
2319 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2320 {
2321     uint32_t host_pvr = mfpvr();
2322     PowerPCCPUClass *pvr_pcc;
2323 
2324     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2325     if (pvr_pcc == NULL) {
2326         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2327     }
2328 
2329     return pvr_pcc;
2330 }
2331 
2332 static int kvm_ppc_register_host_cpu_type(void)
2333 {
2334     TypeInfo type_info = {
2335         .name = TYPE_HOST_POWERPC_CPU,
2336         .instance_init = kvmppc_host_cpu_initfn,
2337         .class_init = kvmppc_host_cpu_class_init,
2338     };
2339     PowerPCCPUClass *pvr_pcc;
2340     DeviceClass *dc;
2341     int i;
2342 
2343     pvr_pcc = kvm_ppc_get_host_cpu_class();
2344     if (pvr_pcc == NULL) {
2345         return -1;
2346     }
2347     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2348     type_register(&type_info);
2349 
2350 #if defined(TARGET_PPC64)
2351     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2352     type_info.parent = TYPE_SPAPR_CPU_CORE,
2353     type_info.instance_size = sizeof(sPAPRCPUCore);
2354     type_info.instance_init = NULL;
2355     type_info.class_init = spapr_cpu_core_class_init;
2356     type_info.class_data = (void *) "host";
2357     type_register(&type_info);
2358     g_free((void *)type_info.name);
2359 #endif
2360 
2361     /*
2362      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2363      * we want "POWER8" to be a "family" alias that points to the current
2364      * host CPU type, too)
2365      */
2366     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2367     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2368         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2369             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2370             char *suffix;
2371 
2372             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2373             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2374             if (suffix) {
2375                 *suffix = 0;
2376             }
2377             ppc_cpu_aliases[i].oc = oc;
2378             break;
2379         }
2380     }
2381 
2382     return 0;
2383 }
2384 
2385 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2386 {
2387     struct kvm_rtas_token_args args = {
2388         .token = token,
2389     };
2390 
2391     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2392         return -ENOENT;
2393     }
2394 
2395     strncpy(args.name, function, sizeof(args.name));
2396 
2397     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2398 }
2399 
2400 int kvmppc_get_htab_fd(bool write)
2401 {
2402     struct kvm_get_htab_fd s = {
2403         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2404         .start_index = 0,
2405     };
2406 
2407     if (!cap_htab_fd) {
2408         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2409         return -1;
2410     }
2411 
2412     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2413 }
2414 
2415 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2416 {
2417     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2418     uint8_t buf[bufsize];
2419     ssize_t rc;
2420 
2421     do {
2422         rc = read(fd, buf, bufsize);
2423         if (rc < 0) {
2424             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2425                     strerror(errno));
2426             return rc;
2427         } else if (rc) {
2428             uint8_t *buffer = buf;
2429             ssize_t n = rc;
2430             while (n) {
2431                 struct kvm_get_htab_header *head =
2432                     (struct kvm_get_htab_header *) buffer;
2433                 size_t chunksize = sizeof(*head) +
2434                      HASH_PTE_SIZE_64 * head->n_valid;
2435 
2436                 qemu_put_be32(f, head->index);
2437                 qemu_put_be16(f, head->n_valid);
2438                 qemu_put_be16(f, head->n_invalid);
2439                 qemu_put_buffer(f, (void *)(head + 1),
2440                                 HASH_PTE_SIZE_64 * head->n_valid);
2441 
2442                 buffer += chunksize;
2443                 n -= chunksize;
2444             }
2445         }
2446     } while ((rc != 0)
2447              && ((max_ns < 0)
2448                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2449 
2450     return (rc == 0) ? 1 : 0;
2451 }
2452 
2453 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2454                            uint16_t n_valid, uint16_t n_invalid)
2455 {
2456     struct kvm_get_htab_header *buf;
2457     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2458     ssize_t rc;
2459 
2460     buf = alloca(chunksize);
2461     buf->index = index;
2462     buf->n_valid = n_valid;
2463     buf->n_invalid = n_invalid;
2464 
2465     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2466 
2467     rc = write(fd, buf, chunksize);
2468     if (rc < 0) {
2469         fprintf(stderr, "Error writing KVM hash table: %s\n",
2470                 strerror(errno));
2471         return rc;
2472     }
2473     if (rc != chunksize) {
2474         /* We should never get a short write on a single chunk */
2475         fprintf(stderr, "Short write, restoring KVM hash table\n");
2476         return -1;
2477     }
2478     return 0;
2479 }
2480 
2481 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2482 {
2483     return true;
2484 }
2485 
2486 void kvm_arch_init_irq_routing(KVMState *s)
2487 {
2488 }
2489 
2490 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2491 {
2492     struct kvm_get_htab_fd ghf = {
2493         .flags = 0,
2494         .start_index = ptex,
2495     };
2496     int fd, rc;
2497     int i;
2498 
2499     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2500     if (fd < 0) {
2501         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2502     }
2503 
2504     i = 0;
2505     while (i < n) {
2506         struct kvm_get_htab_header *hdr;
2507         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2508         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2509 
2510         rc = read(fd, buf, sizeof(buf));
2511         if (rc < 0) {
2512             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2513         }
2514 
2515         hdr = (struct kvm_get_htab_header *)buf;
2516         while ((i < n) && ((char *)hdr < (buf + rc))) {
2517             int invalid = hdr->n_invalid;
2518 
2519             if (hdr->index != (ptex + i)) {
2520                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2521                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2522             }
2523 
2524             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2525             i += hdr->n_valid;
2526 
2527             if ((n - i) < invalid) {
2528                 invalid = n - i;
2529             }
2530             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2531             i += hdr->n_invalid;
2532 
2533             hdr = (struct kvm_get_htab_header *)
2534                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2535         }
2536     }
2537 
2538     close(fd);
2539 }
2540 
2541 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2542 {
2543     int fd, rc;
2544     struct kvm_get_htab_fd ghf;
2545     struct {
2546         struct kvm_get_htab_header hdr;
2547         uint64_t pte0;
2548         uint64_t pte1;
2549     } buf;
2550 
2551     ghf.flags = 0;
2552     ghf.start_index = 0;     /* Ignored */
2553     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2554     if (fd < 0) {
2555         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2556     }
2557 
2558     buf.hdr.n_valid = 1;
2559     buf.hdr.n_invalid = 0;
2560     buf.hdr.index = ptex;
2561     buf.pte0 = cpu_to_be64(pte0);
2562     buf.pte1 = cpu_to_be64(pte1);
2563 
2564     rc = write(fd, &buf, sizeof(buf));
2565     if (rc != sizeof(buf)) {
2566         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2567     }
2568     close(fd);
2569 }
2570 
2571 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2572                              uint64_t address, uint32_t data, PCIDevice *dev)
2573 {
2574     return 0;
2575 }
2576 
2577 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2578                                 int vector, PCIDevice *dev)
2579 {
2580     return 0;
2581 }
2582 
2583 int kvm_arch_release_virq_post(int virq)
2584 {
2585     return 0;
2586 }
2587 
2588 int kvm_arch_msi_data_to_gsi(uint32_t data)
2589 {
2590     return data & 0xffff;
2591 }
2592 
2593 int kvmppc_enable_hwrng(void)
2594 {
2595     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2596         return -1;
2597     }
2598 
2599     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2600 }
2601