xref: /openbmc/qemu/target/ppc/kvm.c (revision d36f7de8)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
252 {
253     int ret;
254 
255     assert(kvm_state != NULL);
256 
257     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
258         error_setg(errp, "KVM doesn't expose the MMU features it supports");
259         error_append_hint(errp, "Consider switching to a newer KVM\n");
260         return;
261     }
262 
263     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
264     if (ret == 0) {
265         return;
266     }
267 
268     error_setg_errno(errp, -ret,
269                      "KVM failed to provide the MMU features it supports");
270 }
271 
272 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
273 {
274     KVMState *s = KVM_STATE(current_machine->accelerator);
275     struct ppc_radix_page_info *radix_page_info;
276     struct kvm_ppc_rmmu_info rmmu_info;
277     int i;
278 
279     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
280         return NULL;
281     }
282     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
283         return NULL;
284     }
285     radix_page_info = g_malloc0(sizeof(*radix_page_info));
286     radix_page_info->count = 0;
287     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
288         if (rmmu_info.ap_encodings[i]) {
289             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
290             radix_page_info->count++;
291         }
292     }
293     return radix_page_info;
294 }
295 
296 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
297                                      bool radix, bool gtse,
298                                      uint64_t proc_tbl)
299 {
300     CPUState *cs = CPU(cpu);
301     int ret;
302     uint64_t flags = 0;
303     struct kvm_ppc_mmuv3_cfg cfg = {
304         .process_table = proc_tbl,
305     };
306 
307     if (radix) {
308         flags |= KVM_PPC_MMUV3_RADIX;
309     }
310     if (gtse) {
311         flags |= KVM_PPC_MMUV3_GTSE;
312     }
313     cfg.flags = flags;
314     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
315     switch (ret) {
316     case 0:
317         return H_SUCCESS;
318     case -EINVAL:
319         return H_PARAMETER;
320     case -ENODEV:
321         return H_NOT_AVAILABLE;
322     default:
323         return H_HARDWARE;
324     }
325 }
326 
327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
328 {
329     static struct kvm_ppc_smmu_info smmu_info;
330 
331     if (!kvm_enabled()) {
332         return false;
333     }
334 
335     kvm_get_smmu_info(&smmu_info, &error_fatal);
336     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
337 }
338 
339 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
340 {
341     struct kvm_ppc_smmu_info smmu_info;
342     int iq, ik, jq, jk;
343     Error *local_err = NULL;
344 
345     /* For now, we only have anything to check on hash64 MMUs */
346     if (!cpu->hash64_opts || !kvm_enabled()) {
347         return;
348     }
349 
350     kvm_get_smmu_info(&smmu_info, &local_err);
351     if (local_err) {
352         error_propagate(errp, local_err);
353         return;
354     }
355 
356     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
357         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
358         error_setg(errp,
359                    "KVM does not support 1TiB segments which guest expects");
360         return;
361     }
362 
363     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
364         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
365                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
366         return;
367     }
368 
369     /*
370      * Verify that every pagesize supported by the cpu model is
371      * supported by KVM with the same encodings
372      */
373     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
374         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
375         struct kvm_ppc_one_seg_page_size *ksps;
376 
377         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
378             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
379                 break;
380             }
381         }
382         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
383             error_setg(errp, "KVM doesn't support for base page shift %u",
384                        qsps->page_shift);
385             return;
386         }
387 
388         ksps = &smmu_info.sps[ik];
389         if (ksps->slb_enc != qsps->slb_enc) {
390             error_setg(errp,
391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
392                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
393             return;
394         }
395 
396         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
397             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
398                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
399                     break;
400                 }
401             }
402 
403             if (jk >= ARRAY_SIZE(ksps->enc)) {
404                 error_setg(errp, "KVM doesn't support page shift %u/%u",
405                            qsps->enc[jq].page_shift, qsps->page_shift);
406                 return;
407             }
408             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
409                 error_setg(errp,
410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
411                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
412                            qsps->page_shift, qsps->enc[jq].pte_enc);
413                 return;
414             }
415         }
416     }
417 
418     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
419         /* Mostly what guest pagesizes we can use are related to the
420          * host pages used to map guest RAM, which is handled in the
421          * platform code. Cache-Inhibited largepages (64k) however are
422          * used for I/O, so if they're mapped to the host at all it
423          * will be a normal mapping, not a special hugepage one used
424          * for RAM. */
425         if (getpagesize() < 0x10000) {
426             error_setg(errp,
427                        "KVM can't supply 64kiB CI pages, which guest expects");
428         }
429     }
430 }
431 #endif /* !defined (TARGET_PPC64) */
432 
433 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
434 {
435     return POWERPC_CPU(cpu)->vcpu_id;
436 }
437 
438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
439  * book3s supports only 1 watchpoint, so array size
440  * of 4 is sufficient for now.
441  */
442 #define MAX_HW_BKPTS 4
443 
444 static struct HWBreakpoint {
445     target_ulong addr;
446     int type;
447 } hw_debug_points[MAX_HW_BKPTS];
448 
449 static CPUWatchpoint hw_watchpoint;
450 
451 /* Default there is no breakpoint and watchpoint supported */
452 static int max_hw_breakpoint;
453 static int max_hw_watchpoint;
454 static int nb_hw_breakpoint;
455 static int nb_hw_watchpoint;
456 
457 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
458 {
459     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
460         max_hw_breakpoint = 2;
461         max_hw_watchpoint = 2;
462     }
463 
464     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
465         fprintf(stderr, "Error initializing h/w breakpoints\n");
466         return;
467     }
468 }
469 
470 int kvm_arch_init_vcpu(CPUState *cs)
471 {
472     PowerPCCPU *cpu = POWERPC_CPU(cs);
473     CPUPPCState *cenv = &cpu->env;
474     int ret;
475 
476     /* Synchronize sregs with kvm */
477     ret = kvm_arch_sync_sregs(cpu);
478     if (ret) {
479         if (ret == -EINVAL) {
480             error_report("Register sync failed... If you're using kvm-hv.ko,"
481                          " only \"-cpu host\" is possible");
482         }
483         return ret;
484     }
485 
486     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
487 
488     switch (cenv->mmu_model) {
489     case POWERPC_MMU_BOOKE206:
490         /* This target supports access to KVM's guest TLB */
491         ret = kvm_booke206_tlb_init(cpu);
492         break;
493     case POWERPC_MMU_2_07:
494         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
495             /* KVM-HV has transactional memory on POWER8 also without the
496              * KVM_CAP_PPC_HTM extension, so enable it here instead as
497              * long as it's availble to userspace on the host. */
498             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
499                 cap_htm = true;
500             }
501         }
502         break;
503     default:
504         break;
505     }
506 
507     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
508     kvmppc_hw_debug_points_init(cenv);
509 
510     return ret;
511 }
512 
513 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
514 {
515     CPUPPCState *env = &cpu->env;
516     CPUState *cs = CPU(cpu);
517     struct kvm_dirty_tlb dirty_tlb;
518     unsigned char *bitmap;
519     int ret;
520 
521     if (!env->kvm_sw_tlb) {
522         return;
523     }
524 
525     bitmap = g_malloc((env->nb_tlb + 7) / 8);
526     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
527 
528     dirty_tlb.bitmap = (uintptr_t)bitmap;
529     dirty_tlb.num_dirty = env->nb_tlb;
530 
531     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
532     if (ret) {
533         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
534                 __func__, strerror(-ret));
535     }
536 
537     g_free(bitmap);
538 }
539 
540 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
541 {
542     PowerPCCPU *cpu = POWERPC_CPU(cs);
543     CPUPPCState *env = &cpu->env;
544     union {
545         uint32_t u32;
546         uint64_t u64;
547     } val;
548     struct kvm_one_reg reg = {
549         .id = id,
550         .addr = (uintptr_t) &val,
551     };
552     int ret;
553 
554     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
555     if (ret != 0) {
556         trace_kvm_failed_spr_get(spr, strerror(errno));
557     } else {
558         switch (id & KVM_REG_SIZE_MASK) {
559         case KVM_REG_SIZE_U32:
560             env->spr[spr] = val.u32;
561             break;
562 
563         case KVM_REG_SIZE_U64:
564             env->spr[spr] = val.u64;
565             break;
566 
567         default:
568             /* Don't handle this size yet */
569             abort();
570         }
571     }
572 }
573 
574 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
575 {
576     PowerPCCPU *cpu = POWERPC_CPU(cs);
577     CPUPPCState *env = &cpu->env;
578     union {
579         uint32_t u32;
580         uint64_t u64;
581     } val;
582     struct kvm_one_reg reg = {
583         .id = id,
584         .addr = (uintptr_t) &val,
585     };
586     int ret;
587 
588     switch (id & KVM_REG_SIZE_MASK) {
589     case KVM_REG_SIZE_U32:
590         val.u32 = env->spr[spr];
591         break;
592 
593     case KVM_REG_SIZE_U64:
594         val.u64 = env->spr[spr];
595         break;
596 
597     default:
598         /* Don't handle this size yet */
599         abort();
600     }
601 
602     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
603     if (ret != 0) {
604         trace_kvm_failed_spr_set(spr, strerror(errno));
605     }
606 }
607 
608 static int kvm_put_fp(CPUState *cs)
609 {
610     PowerPCCPU *cpu = POWERPC_CPU(cs);
611     CPUPPCState *env = &cpu->env;
612     struct kvm_one_reg reg;
613     int i;
614     int ret;
615 
616     if (env->insns_flags & PPC_FLOAT) {
617         uint64_t fpscr = env->fpscr;
618         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
619 
620         reg.id = KVM_REG_PPC_FPSCR;
621         reg.addr = (uintptr_t)&fpscr;
622         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
623         if (ret < 0) {
624             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
625             return ret;
626         }
627 
628         for (i = 0; i < 32; i++) {
629             uint64_t vsr[2];
630 
631 #ifdef HOST_WORDS_BIGENDIAN
632             vsr[0] = float64_val(env->fpr[i]);
633             vsr[1] = env->vsr[i];
634 #else
635             vsr[0] = env->vsr[i];
636             vsr[1] = float64_val(env->fpr[i]);
637 #endif
638             reg.addr = (uintptr_t) &vsr;
639             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
640 
641             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
642             if (ret < 0) {
643                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
644                         i, strerror(errno));
645                 return ret;
646             }
647         }
648     }
649 
650     if (env->insns_flags & PPC_ALTIVEC) {
651         reg.id = KVM_REG_PPC_VSCR;
652         reg.addr = (uintptr_t)&env->vscr;
653         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
654         if (ret < 0) {
655             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
656             return ret;
657         }
658 
659         for (i = 0; i < 32; i++) {
660             reg.id = KVM_REG_PPC_VR(i);
661             reg.addr = (uintptr_t)&env->avr[i];
662             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
663             if (ret < 0) {
664                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
665                 return ret;
666             }
667         }
668     }
669 
670     return 0;
671 }
672 
673 static int kvm_get_fp(CPUState *cs)
674 {
675     PowerPCCPU *cpu = POWERPC_CPU(cs);
676     CPUPPCState *env = &cpu->env;
677     struct kvm_one_reg reg;
678     int i;
679     int ret;
680 
681     if (env->insns_flags & PPC_FLOAT) {
682         uint64_t fpscr;
683         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
684 
685         reg.id = KVM_REG_PPC_FPSCR;
686         reg.addr = (uintptr_t)&fpscr;
687         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
688         if (ret < 0) {
689             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
690             return ret;
691         } else {
692             env->fpscr = fpscr;
693         }
694 
695         for (i = 0; i < 32; i++) {
696             uint64_t vsr[2];
697 
698             reg.addr = (uintptr_t) &vsr;
699             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
700 
701             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
702             if (ret < 0) {
703                 DPRINTF("Unable to get %s%d from KVM: %s\n",
704                         vsx ? "VSR" : "FPR", i, strerror(errno));
705                 return ret;
706             } else {
707 #ifdef HOST_WORDS_BIGENDIAN
708                 env->fpr[i] = vsr[0];
709                 if (vsx) {
710                     env->vsr[i] = vsr[1];
711                 }
712 #else
713                 env->fpr[i] = vsr[1];
714                 if (vsx) {
715                     env->vsr[i] = vsr[0];
716                 }
717 #endif
718             }
719         }
720     }
721 
722     if (env->insns_flags & PPC_ALTIVEC) {
723         reg.id = KVM_REG_PPC_VSCR;
724         reg.addr = (uintptr_t)&env->vscr;
725         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
726         if (ret < 0) {
727             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
728             return ret;
729         }
730 
731         for (i = 0; i < 32; i++) {
732             reg.id = KVM_REG_PPC_VR(i);
733             reg.addr = (uintptr_t)&env->avr[i];
734             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
735             if (ret < 0) {
736                 DPRINTF("Unable to get VR%d from KVM: %s\n",
737                         i, strerror(errno));
738                 return ret;
739             }
740         }
741     }
742 
743     return 0;
744 }
745 
746 #if defined(TARGET_PPC64)
747 static int kvm_get_vpa(CPUState *cs)
748 {
749     PowerPCCPU *cpu = POWERPC_CPU(cs);
750     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
751     struct kvm_one_reg reg;
752     int ret;
753 
754     reg.id = KVM_REG_PPC_VPA_ADDR;
755     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
756     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
757     if (ret < 0) {
758         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
759         return ret;
760     }
761 
762     assert((uintptr_t)&spapr_cpu->slb_shadow_size
763            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
764     reg.id = KVM_REG_PPC_VPA_SLB;
765     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
767     if (ret < 0) {
768         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
769                 strerror(errno));
770         return ret;
771     }
772 
773     assert((uintptr_t)&spapr_cpu->dtl_size
774            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
775     reg.id = KVM_REG_PPC_VPA_DTL;
776     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778     if (ret < 0) {
779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
780                 strerror(errno));
781         return ret;
782     }
783 
784     return 0;
785 }
786 
787 static int kvm_put_vpa(CPUState *cs)
788 {
789     PowerPCCPU *cpu = POWERPC_CPU(cs);
790     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
791     struct kvm_one_reg reg;
792     int ret;
793 
794     /* SLB shadow or DTL can't be registered unless a master VPA is
795      * registered.  That means when restoring state, if a VPA *is*
796      * registered, we need to set that up first.  If not, we need to
797      * deregister the others before deregistering the master VPA */
798     assert(spapr_cpu->vpa_addr
799            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
800 
801     if (spapr_cpu->vpa_addr) {
802         reg.id = KVM_REG_PPC_VPA_ADDR;
803         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
804         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
805         if (ret < 0) {
806             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
807             return ret;
808         }
809     }
810 
811     assert((uintptr_t)&spapr_cpu->slb_shadow_size
812            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
813     reg.id = KVM_REG_PPC_VPA_SLB;
814     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
815     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
816     if (ret < 0) {
817         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
818         return ret;
819     }
820 
821     assert((uintptr_t)&spapr_cpu->dtl_size
822            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
823     reg.id = KVM_REG_PPC_VPA_DTL;
824     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
825     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
826     if (ret < 0) {
827         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
828                 strerror(errno));
829         return ret;
830     }
831 
832     if (!spapr_cpu->vpa_addr) {
833         reg.id = KVM_REG_PPC_VPA_ADDR;
834         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
835         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836         if (ret < 0) {
837             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
838             return ret;
839         }
840     }
841 
842     return 0;
843 }
844 #endif /* TARGET_PPC64 */
845 
846 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
847 {
848     CPUPPCState *env = &cpu->env;
849     struct kvm_sregs sregs;
850     int i;
851 
852     sregs.pvr = env->spr[SPR_PVR];
853 
854     if (cpu->vhyp) {
855         PPCVirtualHypervisorClass *vhc =
856             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
857         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
858     } else {
859         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
860     }
861 
862     /* Sync SLB */
863 #ifdef TARGET_PPC64
864     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
865         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
866         if (env->slb[i].esid & SLB_ESID_V) {
867             sregs.u.s.ppc64.slb[i].slbe |= i;
868         }
869         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
870     }
871 #endif
872 
873     /* Sync SRs */
874     for (i = 0; i < 16; i++) {
875         sregs.u.s.ppc32.sr[i] = env->sr[i];
876     }
877 
878     /* Sync BATs */
879     for (i = 0; i < 8; i++) {
880         /* Beware. We have to swap upper and lower bits here */
881         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
882             | env->DBAT[1][i];
883         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
884             | env->IBAT[1][i];
885     }
886 
887     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
888 }
889 
890 int kvm_arch_put_registers(CPUState *cs, int level)
891 {
892     PowerPCCPU *cpu = POWERPC_CPU(cs);
893     CPUPPCState *env = &cpu->env;
894     struct kvm_regs regs;
895     int ret;
896     int i;
897 
898     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
899     if (ret < 0) {
900         return ret;
901     }
902 
903     regs.ctr = env->ctr;
904     regs.lr  = env->lr;
905     regs.xer = cpu_read_xer(env);
906     regs.msr = env->msr;
907     regs.pc = env->nip;
908 
909     regs.srr0 = env->spr[SPR_SRR0];
910     regs.srr1 = env->spr[SPR_SRR1];
911 
912     regs.sprg0 = env->spr[SPR_SPRG0];
913     regs.sprg1 = env->spr[SPR_SPRG1];
914     regs.sprg2 = env->spr[SPR_SPRG2];
915     regs.sprg3 = env->spr[SPR_SPRG3];
916     regs.sprg4 = env->spr[SPR_SPRG4];
917     regs.sprg5 = env->spr[SPR_SPRG5];
918     regs.sprg6 = env->spr[SPR_SPRG6];
919     regs.sprg7 = env->spr[SPR_SPRG7];
920 
921     regs.pid = env->spr[SPR_BOOKE_PID];
922 
923     for (i = 0;i < 32; i++)
924         regs.gpr[i] = env->gpr[i];
925 
926     regs.cr = 0;
927     for (i = 0; i < 8; i++) {
928         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
929     }
930 
931     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
932     if (ret < 0)
933         return ret;
934 
935     kvm_put_fp(cs);
936 
937     if (env->tlb_dirty) {
938         kvm_sw_tlb_put(cpu);
939         env->tlb_dirty = false;
940     }
941 
942     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
943         ret = kvmppc_put_books_sregs(cpu);
944         if (ret < 0) {
945             return ret;
946         }
947     }
948 
949     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
950         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
951     }
952 
953     if (cap_one_reg) {
954         int i;
955 
956         /* We deliberately ignore errors here, for kernels which have
957          * the ONE_REG calls, but don't support the specific
958          * registers, there's a reasonable chance things will still
959          * work, at least until we try to migrate. */
960         for (i = 0; i < 1024; i++) {
961             uint64_t id = env->spr_cb[i].one_reg_id;
962 
963             if (id != 0) {
964                 kvm_put_one_spr(cs, id, i);
965             }
966         }
967 
968 #ifdef TARGET_PPC64
969         if (msr_ts) {
970             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
971                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
972             }
973             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
974                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
975             }
976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
986         }
987 
988         if (cap_papr) {
989             if (kvm_put_vpa(cs) < 0) {
990                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
991             }
992         }
993 
994         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
995 #endif /* TARGET_PPC64 */
996     }
997 
998     return ret;
999 }
1000 
1001 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1002 {
1003      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1004 }
1005 
1006 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1007 {
1008     CPUPPCState *env = &cpu->env;
1009     struct kvm_sregs sregs;
1010     int ret;
1011 
1012     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013     if (ret < 0) {
1014         return ret;
1015     }
1016 
1017     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025         env->spr[SPR_DECR] = sregs.u.e.dec;
1026         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1029     }
1030 
1031     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1037     }
1038 
1039     if (sregs.u.e.features & KVM_SREGS_E_64) {
1040         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1041     }
1042 
1043     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1045     }
1046 
1047     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1050         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1052         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1054         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1056         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1058         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1060         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1062         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1064         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1066         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1068         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1070         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1072         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1074         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1076         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1078         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1080 
1081         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1084             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1086             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1088         }
1089 
1090         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1093         }
1094 
1095         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1098             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1100         }
1101     }
1102 
1103     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1114     }
1115 
1116     if (sregs.u.e.features & KVM_SREGS_EXP) {
1117         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1123     }
1124 
1125     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1129 
1130         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1133         }
1134     }
1135 
1136     return 0;
1137 }
1138 
1139 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1140 {
1141     CPUPPCState *env = &cpu->env;
1142     struct kvm_sregs sregs;
1143     int ret;
1144     int i;
1145 
1146     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147     if (ret < 0) {
1148         return ret;
1149     }
1150 
1151     if (!cpu->vhyp) {
1152         ppc_store_sdr1(env, sregs.u.s.sdr1);
1153     }
1154 
1155     /* Sync SLB */
1156 #ifdef TARGET_PPC64
1157     /*
1158      * The packed SLB array we get from KVM_GET_SREGS only contains
1159      * information about valid entries. So we flush our internal copy
1160      * to get rid of stale ones, then put all valid SLB entries back
1161      * in.
1162      */
1163     memset(env->slb, 0, sizeof(env->slb));
1164     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1167         /*
1168          * Only restore valid entries
1169          */
1170         if (rb & SLB_ESID_V) {
1171             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1172         }
1173     }
1174 #endif
1175 
1176     /* Sync SRs */
1177     for (i = 0; i < 16; i++) {
1178         env->sr[i] = sregs.u.s.ppc32.sr[i];
1179     }
1180 
1181     /* Sync BATs */
1182     for (i = 0; i < 8; i++) {
1183         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1187     }
1188 
1189     return 0;
1190 }
1191 
1192 int kvm_arch_get_registers(CPUState *cs)
1193 {
1194     PowerPCCPU *cpu = POWERPC_CPU(cs);
1195     CPUPPCState *env = &cpu->env;
1196     struct kvm_regs regs;
1197     uint32_t cr;
1198     int i, ret;
1199 
1200     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201     if (ret < 0)
1202         return ret;
1203 
1204     cr = regs.cr;
1205     for (i = 7; i >= 0; i--) {
1206         env->crf[i] = cr & 15;
1207         cr >>= 4;
1208     }
1209 
1210     env->ctr = regs.ctr;
1211     env->lr = regs.lr;
1212     cpu_write_xer(env, regs.xer);
1213     env->msr = regs.msr;
1214     env->nip = regs.pc;
1215 
1216     env->spr[SPR_SRR0] = regs.srr0;
1217     env->spr[SPR_SRR1] = regs.srr1;
1218 
1219     env->spr[SPR_SPRG0] = regs.sprg0;
1220     env->spr[SPR_SPRG1] = regs.sprg1;
1221     env->spr[SPR_SPRG2] = regs.sprg2;
1222     env->spr[SPR_SPRG3] = regs.sprg3;
1223     env->spr[SPR_SPRG4] = regs.sprg4;
1224     env->spr[SPR_SPRG5] = regs.sprg5;
1225     env->spr[SPR_SPRG6] = regs.sprg6;
1226     env->spr[SPR_SPRG7] = regs.sprg7;
1227 
1228     env->spr[SPR_BOOKE_PID] = regs.pid;
1229 
1230     for (i = 0;i < 32; i++)
1231         env->gpr[i] = regs.gpr[i];
1232 
1233     kvm_get_fp(cs);
1234 
1235     if (cap_booke_sregs) {
1236         ret = kvmppc_get_booke_sregs(cpu);
1237         if (ret < 0) {
1238             return ret;
1239         }
1240     }
1241 
1242     if (cap_segstate) {
1243         ret = kvmppc_get_books_sregs(cpu);
1244         if (ret < 0) {
1245             return ret;
1246         }
1247     }
1248 
1249     if (cap_hior) {
1250         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1251     }
1252 
1253     if (cap_one_reg) {
1254         int i;
1255 
1256         /* We deliberately ignore errors here, for kernels which have
1257          * the ONE_REG calls, but don't support the specific
1258          * registers, there's a reasonable chance things will still
1259          * work, at least until we try to migrate. */
1260         for (i = 0; i < 1024; i++) {
1261             uint64_t id = env->spr_cb[i].one_reg_id;
1262 
1263             if (id != 0) {
1264                 kvm_get_one_spr(cs, id, i);
1265             }
1266         }
1267 
1268 #ifdef TARGET_PPC64
1269         if (msr_ts) {
1270             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1272             }
1273             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1275             }
1276             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1286         }
1287 
1288         if (cap_papr) {
1289             if (kvm_get_vpa(cs) < 0) {
1290                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1291             }
1292         }
1293 
1294         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295 #endif
1296     }
1297 
1298     return 0;
1299 }
1300 
1301 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1302 {
1303     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1304 
1305     if (irq != PPC_INTERRUPT_EXT) {
1306         return 0;
1307     }
1308 
1309     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310         return 0;
1311     }
1312 
1313     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1314 
1315     return 0;
1316 }
1317 
1318 #if defined(TARGET_PPCEMB)
1319 #define PPC_INPUT_INT PPC40x_INPUT_INT
1320 #elif defined(TARGET_PPC64)
1321 #define PPC_INPUT_INT PPC970_INPUT_INT
1322 #else
1323 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 #endif
1325 
1326 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327 {
1328     PowerPCCPU *cpu = POWERPC_CPU(cs);
1329     CPUPPCState *env = &cpu->env;
1330     int r;
1331     unsigned irq;
1332 
1333     qemu_mutex_lock_iothread();
1334 
1335     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337     if (!cap_interrupt_level &&
1338         run->ready_for_interrupt_injection &&
1339         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341     {
1342         /* For now KVM disregards the 'irq' argument. However, in the
1343          * future KVM could cache it in-kernel to avoid a heavyweight exit
1344          * when reading the UIC.
1345          */
1346         irq = KVM_INTERRUPT_SET;
1347 
1348         DPRINTF("injected interrupt %d\n", irq);
1349         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350         if (r < 0) {
1351             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352         }
1353 
1354         /* Always wake up soon in case the interrupt was level based */
1355         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356                        (NANOSECONDS_PER_SECOND / 50));
1357     }
1358 
1359     /* We don't know if there are more interrupts pending after this. However,
1360      * the guest will return to userspace in the course of handling this one
1361      * anyways, so we will get a chance to deliver the rest. */
1362 
1363     qemu_mutex_unlock_iothread();
1364 }
1365 
1366 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367 {
1368     return MEMTXATTRS_UNSPECIFIED;
1369 }
1370 
1371 int kvm_arch_process_async_events(CPUState *cs)
1372 {
1373     return cs->halted;
1374 }
1375 
1376 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377 {
1378     CPUState *cs = CPU(cpu);
1379     CPUPPCState *env = &cpu->env;
1380 
1381     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382         cs->halted = 1;
1383         cs->exception_index = EXCP_HLT;
1384     }
1385 
1386     return 0;
1387 }
1388 
1389 /* map dcr access to existing qemu dcr emulation */
1390 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391 {
1392     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394 
1395     return 0;
1396 }
1397 
1398 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399 {
1400     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402 
1403     return 0;
1404 }
1405 
1406 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407 {
1408     /* Mixed endian case is not handled */
1409     uint32_t sc = debug_inst_opcode;
1410 
1411     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412                             sizeof(sc), 0) ||
1413         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414         return -EINVAL;
1415     }
1416 
1417     return 0;
1418 }
1419 
1420 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421 {
1422     uint32_t sc;
1423 
1424     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425         sc != debug_inst_opcode ||
1426         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427                             sizeof(sc), 1)) {
1428         return -EINVAL;
1429     }
1430 
1431     return 0;
1432 }
1433 
1434 static int find_hw_breakpoint(target_ulong addr, int type)
1435 {
1436     int n;
1437 
1438     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439            <= ARRAY_SIZE(hw_debug_points));
1440 
1441     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442         if (hw_debug_points[n].addr == addr &&
1443              hw_debug_points[n].type == type) {
1444             return n;
1445         }
1446     }
1447 
1448     return -1;
1449 }
1450 
1451 static int find_hw_watchpoint(target_ulong addr, int *flag)
1452 {
1453     int n;
1454 
1455     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456     if (n >= 0) {
1457         *flag = BP_MEM_ACCESS;
1458         return n;
1459     }
1460 
1461     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462     if (n >= 0) {
1463         *flag = BP_MEM_WRITE;
1464         return n;
1465     }
1466 
1467     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468     if (n >= 0) {
1469         *flag = BP_MEM_READ;
1470         return n;
1471     }
1472 
1473     return -1;
1474 }
1475 
1476 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477                                   target_ulong len, int type)
1478 {
1479     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480         return -ENOBUFS;
1481     }
1482 
1483     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485 
1486     switch (type) {
1487     case GDB_BREAKPOINT_HW:
1488         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489             return -ENOBUFS;
1490         }
1491 
1492         if (find_hw_breakpoint(addr, type) >= 0) {
1493             return -EEXIST;
1494         }
1495 
1496         nb_hw_breakpoint++;
1497         break;
1498 
1499     case GDB_WATCHPOINT_WRITE:
1500     case GDB_WATCHPOINT_READ:
1501     case GDB_WATCHPOINT_ACCESS:
1502         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503             return -ENOBUFS;
1504         }
1505 
1506         if (find_hw_breakpoint(addr, type) >= 0) {
1507             return -EEXIST;
1508         }
1509 
1510         nb_hw_watchpoint++;
1511         break;
1512 
1513     default:
1514         return -ENOSYS;
1515     }
1516 
1517     return 0;
1518 }
1519 
1520 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521                                   target_ulong len, int type)
1522 {
1523     int n;
1524 
1525     n = find_hw_breakpoint(addr, type);
1526     if (n < 0) {
1527         return -ENOENT;
1528     }
1529 
1530     switch (type) {
1531     case GDB_BREAKPOINT_HW:
1532         nb_hw_breakpoint--;
1533         break;
1534 
1535     case GDB_WATCHPOINT_WRITE:
1536     case GDB_WATCHPOINT_READ:
1537     case GDB_WATCHPOINT_ACCESS:
1538         nb_hw_watchpoint--;
1539         break;
1540 
1541     default:
1542         return -ENOSYS;
1543     }
1544     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545 
1546     return 0;
1547 }
1548 
1549 void kvm_arch_remove_all_hw_breakpoints(void)
1550 {
1551     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552 }
1553 
1554 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555 {
1556     int n;
1557 
1558     /* Software Breakpoint updates */
1559     if (kvm_sw_breakpoints_active(cs)) {
1560         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561     }
1562 
1563     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564            <= ARRAY_SIZE(hw_debug_points));
1565     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566 
1567     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571             switch (hw_debug_points[n].type) {
1572             case GDB_BREAKPOINT_HW:
1573                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574                 break;
1575             case GDB_WATCHPOINT_WRITE:
1576                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577                 break;
1578             case GDB_WATCHPOINT_READ:
1579                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580                 break;
1581             case GDB_WATCHPOINT_ACCESS:
1582                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583                                         KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             default:
1586                 cpu_abort(cs, "Unsupported breakpoint type\n");
1587             }
1588             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589         }
1590     }
1591 }
1592 
1593 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594 {
1595     CPUState *cs = CPU(cpu);
1596     CPUPPCState *env = &cpu->env;
1597     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598     int handle = 0;
1599     int n;
1600     int flag = 0;
1601 
1602     if (cs->singlestep_enabled) {
1603         handle = 1;
1604     } else if (arch_info->status) {
1605         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608                 if (n >= 0) {
1609                     handle = 1;
1610                 }
1611             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1613                 n = find_hw_watchpoint(arch_info->address,  &flag);
1614                 if (n >= 0) {
1615                     handle = 1;
1616                     cs->watchpoint_hit = &hw_watchpoint;
1617                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618                     hw_watchpoint.flags = flag;
1619                 }
1620             }
1621         }
1622     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623         handle = 1;
1624     } else {
1625         /* QEMU is not able to handle debug exception, so inject
1626          * program exception to guest;
1627          * Yes program exception NOT debug exception !!
1628          * When QEMU is using debug resources then debug exception must
1629          * be always set. To achieve this we set MSR_DE and also set
1630          * MSRP_DEP so guest cannot change MSR_DE.
1631          * When emulating debug resource for guest we want guest
1632          * to control MSR_DE (enable/disable debug interrupt on need).
1633          * Supporting both configurations are NOT possible.
1634          * So the result is that we cannot share debug resources
1635          * between QEMU and Guest on BOOKE architecture.
1636          * In the current design QEMU gets the priority over guest,
1637          * this means that if QEMU is using debug resources then guest
1638          * cannot use them;
1639          * For software breakpoint QEMU uses a privileged instruction;
1640          * So there cannot be any reason that we are here for guest
1641          * set debug exception, only possibility is guest executed a
1642          * privileged / illegal instruction and that's why we are
1643          * injecting a program interrupt.
1644          */
1645 
1646         cpu_synchronize_state(cs);
1647         /* env->nip is PC, so increment this by 4 to use
1648          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649          */
1650         env->nip += 4;
1651         cs->exception_index = POWERPC_EXCP_PROGRAM;
1652         env->error_code = POWERPC_EXCP_INVAL;
1653         ppc_cpu_do_interrupt(cs);
1654     }
1655 
1656     return handle;
1657 }
1658 
1659 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660 {
1661     PowerPCCPU *cpu = POWERPC_CPU(cs);
1662     CPUPPCState *env = &cpu->env;
1663     int ret;
1664 
1665     qemu_mutex_lock_iothread();
1666 
1667     switch (run->exit_reason) {
1668     case KVM_EXIT_DCR:
1669         if (run->dcr.is_write) {
1670             DPRINTF("handle dcr write\n");
1671             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672         } else {
1673             DPRINTF("handle dcr read\n");
1674             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675         }
1676         break;
1677     case KVM_EXIT_HLT:
1678         DPRINTF("handle halt\n");
1679         ret = kvmppc_handle_halt(cpu);
1680         break;
1681 #if defined(TARGET_PPC64)
1682     case KVM_EXIT_PAPR_HCALL:
1683         DPRINTF("handle PAPR hypercall\n");
1684         run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                               run->papr_hcall.nr,
1686                                               run->papr_hcall.args);
1687         ret = 0;
1688         break;
1689 #endif
1690     case KVM_EXIT_EPR:
1691         DPRINTF("handle epr\n");
1692         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693         ret = 0;
1694         break;
1695     case KVM_EXIT_WATCHDOG:
1696         DPRINTF("handle watchdog expiry\n");
1697         watchdog_perform_action();
1698         ret = 0;
1699         break;
1700 
1701     case KVM_EXIT_DEBUG:
1702         DPRINTF("handle debug exception\n");
1703         if (kvm_handle_debug(cpu, run)) {
1704             ret = EXCP_DEBUG;
1705             break;
1706         }
1707         /* re-enter, this exception was guest-internal */
1708         ret = 0;
1709         break;
1710 
1711     default:
1712         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713         ret = -1;
1714         break;
1715     }
1716 
1717     qemu_mutex_unlock_iothread();
1718     return ret;
1719 }
1720 
1721 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722 {
1723     CPUState *cs = CPU(cpu);
1724     uint32_t bits = tsr_bits;
1725     struct kvm_one_reg reg = {
1726         .id = KVM_REG_PPC_OR_TSR,
1727         .addr = (uintptr_t) &bits,
1728     };
1729 
1730     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731 }
1732 
1733 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734 {
1735 
1736     CPUState *cs = CPU(cpu);
1737     uint32_t bits = tsr_bits;
1738     struct kvm_one_reg reg = {
1739         .id = KVM_REG_PPC_CLEAR_TSR,
1740         .addr = (uintptr_t) &bits,
1741     };
1742 
1743     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744 }
1745 
1746 int kvmppc_set_tcr(PowerPCCPU *cpu)
1747 {
1748     CPUState *cs = CPU(cpu);
1749     CPUPPCState *env = &cpu->env;
1750     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751 
1752     struct kvm_one_reg reg = {
1753         .id = KVM_REG_PPC_TCR,
1754         .addr = (uintptr_t) &tcr,
1755     };
1756 
1757     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758 }
1759 
1760 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761 {
1762     CPUState *cs = CPU(cpu);
1763     int ret;
1764 
1765     if (!kvm_enabled()) {
1766         return -1;
1767     }
1768 
1769     if (!cap_ppc_watchdog) {
1770         printf("warning: KVM does not support watchdog");
1771         return -1;
1772     }
1773 
1774     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775     if (ret < 0) {
1776         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                 __func__, strerror(-ret));
1778         return ret;
1779     }
1780 
1781     return ret;
1782 }
1783 
1784 static int read_cpuinfo(const char *field, char *value, int len)
1785 {
1786     FILE *f;
1787     int ret = -1;
1788     int field_len = strlen(field);
1789     char line[512];
1790 
1791     f = fopen("/proc/cpuinfo", "r");
1792     if (!f) {
1793         return -1;
1794     }
1795 
1796     do {
1797         if (!fgets(line, sizeof(line), f)) {
1798             break;
1799         }
1800         if (!strncmp(line, field, field_len)) {
1801             pstrcpy(value, len, line);
1802             ret = 0;
1803             break;
1804         }
1805     } while(*line);
1806 
1807     fclose(f);
1808 
1809     return ret;
1810 }
1811 
1812 uint32_t kvmppc_get_tbfreq(void)
1813 {
1814     char line[512];
1815     char *ns;
1816     uint32_t retval = NANOSECONDS_PER_SECOND;
1817 
1818     if (read_cpuinfo("timebase", line, sizeof(line))) {
1819         return retval;
1820     }
1821 
1822     if (!(ns = strchr(line, ':'))) {
1823         return retval;
1824     }
1825 
1826     ns++;
1827 
1828     return atoi(ns);
1829 }
1830 
1831 bool kvmppc_get_host_serial(char **value)
1832 {
1833     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834                                NULL);
1835 }
1836 
1837 bool kvmppc_get_host_model(char **value)
1838 {
1839     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840 }
1841 
1842 /* Try to find a device tree node for a CPU with clock-frequency property */
1843 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844 {
1845     struct dirent *dirp;
1846     DIR *dp;
1847 
1848     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850         return -1;
1851     }
1852 
1853     buf[0] = '\0';
1854     while ((dirp = readdir(dp)) != NULL) {
1855         FILE *f;
1856         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857                  dirp->d_name);
1858         f = fopen(buf, "r");
1859         if (f) {
1860             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861             fclose(f);
1862             break;
1863         }
1864         buf[0] = '\0';
1865     }
1866     closedir(dp);
1867     if (buf[0] == '\0') {
1868         printf("Unknown host!\n");
1869         return -1;
1870     }
1871 
1872     return 0;
1873 }
1874 
1875 static uint64_t kvmppc_read_int_dt(const char *filename)
1876 {
1877     union {
1878         uint32_t v32;
1879         uint64_t v64;
1880     } u;
1881     FILE *f;
1882     int len;
1883 
1884     f = fopen(filename, "rb");
1885     if (!f) {
1886         return -1;
1887     }
1888 
1889     len = fread(&u, 1, sizeof(u), f);
1890     fclose(f);
1891     switch (len) {
1892     case 4:
1893         /* property is a 32-bit quantity */
1894         return be32_to_cpu(u.v32);
1895     case 8:
1896         return be64_to_cpu(u.v64);
1897     }
1898 
1899     return 0;
1900 }
1901 
1902 /* Read a CPU node property from the host device tree that's a single
1903  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1904  * (can't find or open the property, or doesn't understand the
1905  * format) */
1906 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907 {
1908     char buf[PATH_MAX], *tmp;
1909     uint64_t val;
1910 
1911     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912         return -1;
1913     }
1914 
1915     tmp = g_strdup_printf("%s/%s", buf, propname);
1916     val = kvmppc_read_int_dt(tmp);
1917     g_free(tmp);
1918 
1919     return val;
1920 }
1921 
1922 uint64_t kvmppc_get_clockfreq(void)
1923 {
1924     return kvmppc_read_int_cpu_dt("clock-frequency");
1925 }
1926 
1927 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928  {
1929      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930      CPUState *cs = CPU(cpu);
1931 
1932     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934         return 0;
1935     }
1936 
1937     return 1;
1938 }
1939 
1940 int kvmppc_get_hasidle(CPUPPCState *env)
1941 {
1942     struct kvm_ppc_pvinfo pvinfo;
1943 
1944     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946         return 1;
1947     }
1948 
1949     return 0;
1950 }
1951 
1952 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953 {
1954     uint32_t *hc = (uint32_t*)buf;
1955     struct kvm_ppc_pvinfo pvinfo;
1956 
1957     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958         memcpy(buf, pvinfo.hcall, buf_len);
1959         return 0;
1960     }
1961 
1962     /*
1963      * Fallback to always fail hypercalls regardless of endianness:
1964      *
1965      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966      *     li r3, -1
1967      *     b .+8       (becomes nop in wrong endian)
1968      *     bswap32(li r3, -1)
1969      */
1970 
1971     hc[0] = cpu_to_be32(0x08000048);
1972     hc[1] = cpu_to_be32(0x3860ffff);
1973     hc[2] = cpu_to_be32(0x48000008);
1974     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975 
1976     return 1;
1977 }
1978 
1979 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980 {
1981     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982 }
1983 
1984 void kvmppc_enable_logical_ci_hcalls(void)
1985 {
1986     /*
1987      * FIXME: it would be nice if we could detect the cases where
1988      * we're using a device which requires the in kernel
1989      * implementation of these hcalls, but the kernel lacks them and
1990      * produce a warning.
1991      */
1992     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994 }
1995 
1996 void kvmppc_enable_set_mode_hcall(void)
1997 {
1998     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999 }
2000 
2001 void kvmppc_enable_clear_ref_mod_hcalls(void)
2002 {
2003     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005 }
2006 
2007 void kvmppc_set_papr(PowerPCCPU *cpu)
2008 {
2009     CPUState *cs = CPU(cpu);
2010     int ret;
2011 
2012     if (!kvm_enabled()) {
2013         return;
2014     }
2015 
2016     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017     if (ret) {
2018         error_report("This vCPU type or KVM version does not support PAPR");
2019         exit(1);
2020     }
2021 
2022     /* Update the capability flag so we sync the right information
2023      * with kvm */
2024     cap_papr = 1;
2025 }
2026 
2027 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028 {
2029     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030 }
2031 
2032 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033 {
2034     CPUState *cs = CPU(cpu);
2035     int ret;
2036 
2037     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038     if (ret && mpic_proxy) {
2039         error_report("This KVM version does not support EPR");
2040         exit(1);
2041     }
2042 }
2043 
2044 int kvmppc_smt_threads(void)
2045 {
2046     return cap_ppc_smt ? cap_ppc_smt : 1;
2047 }
2048 
2049 int kvmppc_set_smt_threads(int smt)
2050 {
2051     int ret;
2052 
2053     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054     if (!ret) {
2055         cap_ppc_smt = smt;
2056     }
2057     return ret;
2058 }
2059 
2060 void kvmppc_hint_smt_possible(Error **errp)
2061 {
2062     int i;
2063     GString *g;
2064     char *s;
2065 
2066     assert(kvm_enabled());
2067     if (cap_ppc_smt_possible) {
2068         g = g_string_new("Available VSMT modes:");
2069         for (i = 63; i >= 0; i--) {
2070             if ((1UL << i) & cap_ppc_smt_possible) {
2071                 g_string_append_printf(g, " %lu", (1UL << i));
2072             }
2073         }
2074         s = g_string_free(g, false);
2075         error_append_hint(errp, "%s.\n", s);
2076         g_free(s);
2077     } else {
2078         error_append_hint(errp,
2079                           "This KVM seems to be too old to support VSMT.\n");
2080     }
2081 }
2082 
2083 
2084 #ifdef TARGET_PPC64
2085 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086 {
2087     struct kvm_ppc_smmu_info info;
2088     long rampagesize, best_page_shift;
2089     int i;
2090 
2091     /* Find the largest hardware supported page size that's less than
2092      * or equal to the (logical) backing page size of guest RAM */
2093     kvm_get_smmu_info(&info, &error_fatal);
2094     rampagesize = qemu_getrampagesize();
2095     best_page_shift = 0;
2096 
2097     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099 
2100         if (!sps->page_shift) {
2101             continue;
2102         }
2103 
2104         if ((sps->page_shift > best_page_shift)
2105             && ((1UL << sps->page_shift) <= rampagesize)) {
2106             best_page_shift = sps->page_shift;
2107         }
2108     }
2109 
2110     return MIN(current_size,
2111                1ULL << (best_page_shift + hash_shift - 7));
2112 }
2113 #endif
2114 
2115 bool kvmppc_spapr_use_multitce(void)
2116 {
2117     return cap_spapr_multitce;
2118 }
2119 
2120 int kvmppc_spapr_enable_inkernel_multitce(void)
2121 {
2122     int ret;
2123 
2124     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125                             H_PUT_TCE_INDIRECT, 1);
2126     if (!ret) {
2127         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                                 H_STUFF_TCE, 1);
2129     }
2130 
2131     return ret;
2132 }
2133 
2134 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135                               uint64_t bus_offset, uint32_t nb_table,
2136                               int *pfd, bool need_vfio)
2137 {
2138     long len;
2139     int fd;
2140     void *table;
2141 
2142     /* Must set fd to -1 so we don't try to munmap when called for
2143      * destroying the table, which the upper layers -will- do
2144      */
2145     *pfd = -1;
2146     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147         return NULL;
2148     }
2149 
2150     if (cap_spapr_tce_64) {
2151         struct kvm_create_spapr_tce_64 args = {
2152             .liobn = liobn,
2153             .page_shift = page_shift,
2154             .offset = bus_offset >> page_shift,
2155             .size = nb_table,
2156             .flags = 0
2157         };
2158         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159         if (fd < 0) {
2160             fprintf(stderr,
2161                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162                     liobn);
2163             return NULL;
2164         }
2165     } else if (cap_spapr_tce) {
2166         uint64_t window_size = (uint64_t) nb_table << page_shift;
2167         struct kvm_create_spapr_tce args = {
2168             .liobn = liobn,
2169             .window_size = window_size,
2170         };
2171         if ((window_size != args.window_size) || bus_offset) {
2172             return NULL;
2173         }
2174         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175         if (fd < 0) {
2176             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177                     liobn);
2178             return NULL;
2179         }
2180     } else {
2181         return NULL;
2182     }
2183 
2184     len = nb_table * sizeof(uint64_t);
2185     /* FIXME: round this up to page size */
2186 
2187     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188     if (table == MAP_FAILED) {
2189         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190                 liobn);
2191         close(fd);
2192         return NULL;
2193     }
2194 
2195     *pfd = fd;
2196     return table;
2197 }
2198 
2199 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200 {
2201     long len;
2202 
2203     if (fd < 0) {
2204         return -1;
2205     }
2206 
2207     len = nb_table * sizeof(uint64_t);
2208     if ((munmap(table, len) < 0) ||
2209         (close(fd) < 0)) {
2210         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211                 strerror(errno));
2212         /* Leak the table */
2213     }
2214 
2215     return 0;
2216 }
2217 
2218 int kvmppc_reset_htab(int shift_hint)
2219 {
2220     uint32_t shift = shift_hint;
2221 
2222     if (!kvm_enabled()) {
2223         /* Full emulation, tell caller to allocate htab itself */
2224         return 0;
2225     }
2226     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227         int ret;
2228         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229         if (ret == -ENOTTY) {
2230             /* At least some versions of PR KVM advertise the
2231              * capability, but don't implement the ioctl().  Oops.
2232              * Return 0 so that we allocate the htab in qemu, as is
2233              * correct for PR. */
2234             return 0;
2235         } else if (ret < 0) {
2236             return ret;
2237         }
2238         return shift;
2239     }
2240 
2241     /* We have a kernel that predates the htab reset calls.  For PR
2242      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243      * this era, it has allocated a 16MB fixed size hash table already. */
2244     if (kvmppc_is_pr(kvm_state)) {
2245         /* PR - tell caller to allocate htab */
2246         return 0;
2247     } else {
2248         /* HV - assume 16MB kernel allocated htab */
2249         return 24;
2250     }
2251 }
2252 
2253 static inline uint32_t mfpvr(void)
2254 {
2255     uint32_t pvr;
2256 
2257     asm ("mfpvr %0"
2258          : "=r"(pvr));
2259     return pvr;
2260 }
2261 
2262 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263 {
2264     if (on) {
2265         *word |= flags;
2266     } else {
2267         *word &= ~flags;
2268     }
2269 }
2270 
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 {
2273     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276 
2277     /* Now fix up the class with information we can query from the host */
2278     pcc->pvr = mfpvr();
2279 
2280     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286 
2287     if (dcache_size != -1) {
2288         pcc->l1_dcache_size = dcache_size;
2289     }
2290 
2291     if (icache_size != -1) {
2292         pcc->l1_icache_size = icache_size;
2293     }
2294 
2295 #if defined(TARGET_PPC64)
2296     pcc->radix_page_info = kvm_get_radix_page_info();
2297 
2298     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299         /*
2300          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301          * compliant.  More importantly, advertising ISA 3.00
2302          * architected mode may prevent guests from activating
2303          * necessary DD1 workarounds.
2304          */
2305         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307     }
2308 #endif /* defined(TARGET_PPC64) */
2309 }
2310 
2311 bool kvmppc_has_cap_epr(void)
2312 {
2313     return cap_epr;
2314 }
2315 
2316 bool kvmppc_has_cap_fixup_hcalls(void)
2317 {
2318     return cap_fixup_hcalls;
2319 }
2320 
2321 bool kvmppc_has_cap_htm(void)
2322 {
2323     return cap_htm;
2324 }
2325 
2326 bool kvmppc_has_cap_mmu_radix(void)
2327 {
2328     return cap_mmu_radix;
2329 }
2330 
2331 bool kvmppc_has_cap_mmu_hash_v3(void)
2332 {
2333     return cap_mmu_hash_v3;
2334 }
2335 
2336 static bool kvmppc_power8_host(void)
2337 {
2338     bool ret = false;
2339 #ifdef TARGET_PPC64
2340     {
2341         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344               (base_pvr == CPU_POWERPC_POWER8_BASE);
2345     }
2346 #endif /* TARGET_PPC64 */
2347     return ret;
2348 }
2349 
2350 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351 {
2352     bool l1d_thread_priv_req = !kvmppc_power8_host();
2353 
2354     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355         return 2;
2356     } else if ((!l1d_thread_priv_req ||
2357                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358                (c.character & c.character_mask
2359                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360         return 1;
2361     }
2362 
2363     return 0;
2364 }
2365 
2366 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367 {
2368     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369         return 2;
2370     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371         return 1;
2372     }
2373 
2374     return 0;
2375 }
2376 
2377 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378 {
2379     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380         return  SPAPR_CAP_FIXED_CCD;
2381     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382         return SPAPR_CAP_FIXED_IBS;
2383     }
2384 
2385     return 0;
2386 }
2387 
2388 static void kvmppc_get_cpu_characteristics(KVMState *s)
2389 {
2390     struct kvm_ppc_cpu_char c;
2391     int ret;
2392 
2393     /* Assume broken */
2394     cap_ppc_safe_cache = 0;
2395     cap_ppc_safe_bounds_check = 0;
2396     cap_ppc_safe_indirect_branch = 0;
2397 
2398     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399     if (!ret) {
2400         return;
2401     }
2402     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403     if (ret < 0) {
2404         return;
2405     }
2406 
2407     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410 }
2411 
2412 int kvmppc_get_cap_safe_cache(void)
2413 {
2414     return cap_ppc_safe_cache;
2415 }
2416 
2417 int kvmppc_get_cap_safe_bounds_check(void)
2418 {
2419     return cap_ppc_safe_bounds_check;
2420 }
2421 
2422 int kvmppc_get_cap_safe_indirect_branch(void)
2423 {
2424     return cap_ppc_safe_indirect_branch;
2425 }
2426 
2427 bool kvmppc_has_cap_spapr_vfio(void)
2428 {
2429     return cap_spapr_vfio;
2430 }
2431 
2432 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2433 {
2434     uint32_t host_pvr = mfpvr();
2435     PowerPCCPUClass *pvr_pcc;
2436 
2437     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2438     if (pvr_pcc == NULL) {
2439         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2440     }
2441 
2442     return pvr_pcc;
2443 }
2444 
2445 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2446 {
2447     TypeInfo type_info = {
2448         .name = TYPE_HOST_POWERPC_CPU,
2449         .class_init = kvmppc_host_cpu_class_init,
2450     };
2451     MachineClass *mc = MACHINE_GET_CLASS(ms);
2452     PowerPCCPUClass *pvr_pcc;
2453     ObjectClass *oc;
2454     DeviceClass *dc;
2455     int i;
2456 
2457     pvr_pcc = kvm_ppc_get_host_cpu_class();
2458     if (pvr_pcc == NULL) {
2459         return -1;
2460     }
2461     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2462     type_register(&type_info);
2463     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2464         /* override TCG default cpu type with 'host' cpu model */
2465         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2466     }
2467 
2468     oc = object_class_by_name(type_info.name);
2469     g_assert(oc);
2470 
2471     /*
2472      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2473      * we want "POWER8" to be a "family" alias that points to the current
2474      * host CPU type, too)
2475      */
2476     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2477     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2478         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2479             char *suffix;
2480 
2481             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2482             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2483             if (suffix) {
2484                 *suffix = 0;
2485             }
2486             break;
2487         }
2488     }
2489 
2490     return 0;
2491 }
2492 
2493 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2494 {
2495     struct kvm_rtas_token_args args = {
2496         .token = token,
2497     };
2498 
2499     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2500         return -ENOENT;
2501     }
2502 
2503     strncpy(args.name, function, sizeof(args.name));
2504 
2505     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2506 }
2507 
2508 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2509 {
2510     struct kvm_get_htab_fd s = {
2511         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2512         .start_index = index,
2513     };
2514     int ret;
2515 
2516     if (!cap_htab_fd) {
2517         error_setg(errp, "KVM version doesn't support %s the HPT",
2518                    write ? "writing" : "reading");
2519         return -ENOTSUP;
2520     }
2521 
2522     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2523     if (ret < 0) {
2524         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2525                    write ? "writing" : "reading", write ? "to" : "from",
2526                    strerror(errno));
2527         return -errno;
2528     }
2529 
2530     return ret;
2531 }
2532 
2533 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2534 {
2535     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2536     uint8_t buf[bufsize];
2537     ssize_t rc;
2538 
2539     do {
2540         rc = read(fd, buf, bufsize);
2541         if (rc < 0) {
2542             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2543                     strerror(errno));
2544             return rc;
2545         } else if (rc) {
2546             uint8_t *buffer = buf;
2547             ssize_t n = rc;
2548             while (n) {
2549                 struct kvm_get_htab_header *head =
2550                     (struct kvm_get_htab_header *) buffer;
2551                 size_t chunksize = sizeof(*head) +
2552                      HASH_PTE_SIZE_64 * head->n_valid;
2553 
2554                 qemu_put_be32(f, head->index);
2555                 qemu_put_be16(f, head->n_valid);
2556                 qemu_put_be16(f, head->n_invalid);
2557                 qemu_put_buffer(f, (void *)(head + 1),
2558                                 HASH_PTE_SIZE_64 * head->n_valid);
2559 
2560                 buffer += chunksize;
2561                 n -= chunksize;
2562             }
2563         }
2564     } while ((rc != 0)
2565              && ((max_ns < 0)
2566                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2567 
2568     return (rc == 0) ? 1 : 0;
2569 }
2570 
2571 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2572                            uint16_t n_valid, uint16_t n_invalid)
2573 {
2574     struct kvm_get_htab_header *buf;
2575     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2576     ssize_t rc;
2577 
2578     buf = alloca(chunksize);
2579     buf->index = index;
2580     buf->n_valid = n_valid;
2581     buf->n_invalid = n_invalid;
2582 
2583     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2584 
2585     rc = write(fd, buf, chunksize);
2586     if (rc < 0) {
2587         fprintf(stderr, "Error writing KVM hash table: %s\n",
2588                 strerror(errno));
2589         return rc;
2590     }
2591     if (rc != chunksize) {
2592         /* We should never get a short write on a single chunk */
2593         fprintf(stderr, "Short write, restoring KVM hash table\n");
2594         return -1;
2595     }
2596     return 0;
2597 }
2598 
2599 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2600 {
2601     return true;
2602 }
2603 
2604 void kvm_arch_init_irq_routing(KVMState *s)
2605 {
2606 }
2607 
2608 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2609 {
2610     int fd, rc;
2611     int i;
2612 
2613     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2614 
2615     i = 0;
2616     while (i < n) {
2617         struct kvm_get_htab_header *hdr;
2618         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2619         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2620 
2621         rc = read(fd, buf, sizeof(buf));
2622         if (rc < 0) {
2623             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624         }
2625 
2626         hdr = (struct kvm_get_htab_header *)buf;
2627         while ((i < n) && ((char *)hdr < (buf + rc))) {
2628             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2629 
2630             if (hdr->index != (ptex + i)) {
2631                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2632                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2633             }
2634 
2635             if (n - i < valid) {
2636                 valid = n - i;
2637             }
2638             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2639             i += valid;
2640 
2641             if ((n - i) < invalid) {
2642                 invalid = n - i;
2643             }
2644             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645             i += invalid;
2646 
2647             hdr = (struct kvm_get_htab_header *)
2648                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2649         }
2650     }
2651 
2652     close(fd);
2653 }
2654 
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2656 {
2657     int fd, rc;
2658     struct {
2659         struct kvm_get_htab_header hdr;
2660         uint64_t pte0;
2661         uint64_t pte1;
2662     } buf;
2663 
2664     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2665 
2666     buf.hdr.n_valid = 1;
2667     buf.hdr.n_invalid = 0;
2668     buf.hdr.index = ptex;
2669     buf.pte0 = cpu_to_be64(pte0);
2670     buf.pte1 = cpu_to_be64(pte1);
2671 
2672     rc = write(fd, &buf, sizeof(buf));
2673     if (rc != sizeof(buf)) {
2674         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2675     }
2676     close(fd);
2677 }
2678 
2679 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2680                              uint64_t address, uint32_t data, PCIDevice *dev)
2681 {
2682     return 0;
2683 }
2684 
2685 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2686                                 int vector, PCIDevice *dev)
2687 {
2688     return 0;
2689 }
2690 
2691 int kvm_arch_release_virq_post(int virq)
2692 {
2693     return 0;
2694 }
2695 
2696 int kvm_arch_msi_data_to_gsi(uint32_t data)
2697 {
2698     return data & 0xffff;
2699 }
2700 
2701 int kvmppc_enable_hwrng(void)
2702 {
2703     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2704         return -1;
2705     }
2706 
2707     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2708 }
2709 
2710 void kvmppc_check_papr_resize_hpt(Error **errp)
2711 {
2712     if (!kvm_enabled()) {
2713         return; /* No KVM, we're good */
2714     }
2715 
2716     if (cap_resize_hpt) {
2717         return; /* Kernel has explicit support, we're good */
2718     }
2719 
2720     /* Otherwise fallback on looking for PR KVM */
2721     if (kvmppc_is_pr(kvm_state)) {
2722         return;
2723     }
2724 
2725     error_setg(errp,
2726                "Hash page table resizing not available with this KVM version");
2727 }
2728 
2729 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2730 {
2731     CPUState *cs = CPU(cpu);
2732     struct kvm_ppc_resize_hpt rhpt = {
2733         .flags = flags,
2734         .shift = shift,
2735     };
2736 
2737     if (!cap_resize_hpt) {
2738         return -ENOSYS;
2739     }
2740 
2741     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2742 }
2743 
2744 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2745 {
2746     CPUState *cs = CPU(cpu);
2747     struct kvm_ppc_resize_hpt rhpt = {
2748         .flags = flags,
2749         .shift = shift,
2750     };
2751 
2752     if (!cap_resize_hpt) {
2753         return -ENOSYS;
2754     }
2755 
2756     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2757 }
2758 
2759 /*
2760  * This is a helper function to detect a post migration scenario
2761  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2762  * the guest kernel can't handle a PVR value other than the actual host
2763  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2764  *
2765  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2766  * (so, we're HV), return true. The workaround itself is done in
2767  * cpu_post_load.
2768  *
2769  * The order here is important: we'll only check for KVM PR as a
2770  * fallback if the guest kernel can't handle the situation itself.
2771  * We need to avoid as much as possible querying the running KVM type
2772  * in QEMU level.
2773  */
2774 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2775 {
2776     CPUState *cs = CPU(cpu);
2777 
2778     if (!kvm_enabled()) {
2779         return false;
2780     }
2781 
2782     if (cap_ppc_pvr_compat) {
2783         return false;
2784     }
2785 
2786     return !kvmppc_is_pr(cs->kvm_state);
2787 }
2788