xref: /openbmc/qemu/target/ppc/kvm.c (revision 438c78da)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
252 {
253     int ret;
254 
255     assert(kvm_state != NULL);
256 
257     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
258         error_setg(errp, "KVM doesn't expose the MMU features it supports");
259         error_append_hint(errp, "Consider switching to a newer KVM\n");
260         return;
261     }
262 
263     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
264     if (ret == 0) {
265         return;
266     }
267 
268     error_setg_errno(errp, -ret,
269                      "KVM failed to provide the MMU features it supports");
270 }
271 
272 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
273 {
274     KVMState *s = KVM_STATE(current_machine->accelerator);
275     struct ppc_radix_page_info *radix_page_info;
276     struct kvm_ppc_rmmu_info rmmu_info;
277     int i;
278 
279     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
280         return NULL;
281     }
282     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
283         return NULL;
284     }
285     radix_page_info = g_malloc0(sizeof(*radix_page_info));
286     radix_page_info->count = 0;
287     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
288         if (rmmu_info.ap_encodings[i]) {
289             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
290             radix_page_info->count++;
291         }
292     }
293     return radix_page_info;
294 }
295 
296 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
297                                      bool radix, bool gtse,
298                                      uint64_t proc_tbl)
299 {
300     CPUState *cs = CPU(cpu);
301     int ret;
302     uint64_t flags = 0;
303     struct kvm_ppc_mmuv3_cfg cfg = {
304         .process_table = proc_tbl,
305     };
306 
307     if (radix) {
308         flags |= KVM_PPC_MMUV3_RADIX;
309     }
310     if (gtse) {
311         flags |= KVM_PPC_MMUV3_GTSE;
312     }
313     cfg.flags = flags;
314     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
315     switch (ret) {
316     case 0:
317         return H_SUCCESS;
318     case -EINVAL:
319         return H_PARAMETER;
320     case -ENODEV:
321         return H_NOT_AVAILABLE;
322     default:
323         return H_HARDWARE;
324     }
325 }
326 
327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
328 {
329     static struct kvm_ppc_smmu_info smmu_info;
330 
331     if (!kvm_enabled()) {
332         return false;
333     }
334 
335     kvm_get_smmu_info(&smmu_info, &error_fatal);
336     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
337 }
338 
339 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
340 {
341     struct kvm_ppc_smmu_info smmu_info;
342     int iq, ik, jq, jk;
343     Error *local_err = NULL;
344 
345     /* For now, we only have anything to check on hash64 MMUs */
346     if (!cpu->hash64_opts || !kvm_enabled()) {
347         return;
348     }
349 
350     kvm_get_smmu_info(&smmu_info, &local_err);
351     if (local_err) {
352         error_propagate(errp, local_err);
353         return;
354     }
355 
356     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
357         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
358         error_setg(errp,
359                    "KVM does not support 1TiB segments which guest expects");
360         return;
361     }
362 
363     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
364         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
365                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
366         return;
367     }
368 
369     /*
370      * Verify that every pagesize supported by the cpu model is
371      * supported by KVM with the same encodings
372      */
373     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
374         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
375         struct kvm_ppc_one_seg_page_size *ksps;
376 
377         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
378             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
379                 break;
380             }
381         }
382         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
383             error_setg(errp, "KVM doesn't support for base page shift %u",
384                        qsps->page_shift);
385             return;
386         }
387 
388         ksps = &smmu_info.sps[ik];
389         if (ksps->slb_enc != qsps->slb_enc) {
390             error_setg(errp,
391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
392                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
393             return;
394         }
395 
396         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
397             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
398                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
399                     break;
400                 }
401             }
402 
403             if (jk >= ARRAY_SIZE(ksps->enc)) {
404                 error_setg(errp, "KVM doesn't support page shift %u/%u",
405                            qsps->enc[jq].page_shift, qsps->page_shift);
406                 return;
407             }
408             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
409                 error_setg(errp,
410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
411                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
412                            qsps->page_shift, qsps->enc[jq].pte_enc);
413                 return;
414             }
415         }
416     }
417 
418     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
419         /* Mostly what guest pagesizes we can use are related to the
420          * host pages used to map guest RAM, which is handled in the
421          * platform code. Cache-Inhibited largepages (64k) however are
422          * used for I/O, so if they're mapped to the host at all it
423          * will be a normal mapping, not a special hugepage one used
424          * for RAM. */
425         if (getpagesize() < 0x10000) {
426             error_setg(errp,
427                        "KVM can't supply 64kiB CI pages, which guest expects");
428         }
429     }
430 }
431 #endif /* !defined (TARGET_PPC64) */
432 
433 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
434 {
435     return POWERPC_CPU(cpu)->vcpu_id;
436 }
437 
438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
439  * book3s supports only 1 watchpoint, so array size
440  * of 4 is sufficient for now.
441  */
442 #define MAX_HW_BKPTS 4
443 
444 static struct HWBreakpoint {
445     target_ulong addr;
446     int type;
447 } hw_debug_points[MAX_HW_BKPTS];
448 
449 static CPUWatchpoint hw_watchpoint;
450 
451 /* Default there is no breakpoint and watchpoint supported */
452 static int max_hw_breakpoint;
453 static int max_hw_watchpoint;
454 static int nb_hw_breakpoint;
455 static int nb_hw_watchpoint;
456 
457 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
458 {
459     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
460         max_hw_breakpoint = 2;
461         max_hw_watchpoint = 2;
462     }
463 
464     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
465         fprintf(stderr, "Error initializing h/w breakpoints\n");
466         return;
467     }
468 }
469 
470 int kvm_arch_init_vcpu(CPUState *cs)
471 {
472     PowerPCCPU *cpu = POWERPC_CPU(cs);
473     CPUPPCState *cenv = &cpu->env;
474     int ret;
475 
476     /* Synchronize sregs with kvm */
477     ret = kvm_arch_sync_sregs(cpu);
478     if (ret) {
479         if (ret == -EINVAL) {
480             error_report("Register sync failed... If you're using kvm-hv.ko,"
481                          " only \"-cpu host\" is possible");
482         }
483         return ret;
484     }
485 
486     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
487 
488     switch (cenv->mmu_model) {
489     case POWERPC_MMU_BOOKE206:
490         /* This target supports access to KVM's guest TLB */
491         ret = kvm_booke206_tlb_init(cpu);
492         break;
493     case POWERPC_MMU_2_07:
494         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
495             /* KVM-HV has transactional memory on POWER8 also without the
496              * KVM_CAP_PPC_HTM extension, so enable it here instead as
497              * long as it's availble to userspace on the host. */
498             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
499                 cap_htm = true;
500             }
501         }
502         break;
503     default:
504         break;
505     }
506 
507     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
508     kvmppc_hw_debug_points_init(cenv);
509 
510     return ret;
511 }
512 
513 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
514 {
515     CPUPPCState *env = &cpu->env;
516     CPUState *cs = CPU(cpu);
517     struct kvm_dirty_tlb dirty_tlb;
518     unsigned char *bitmap;
519     int ret;
520 
521     if (!env->kvm_sw_tlb) {
522         return;
523     }
524 
525     bitmap = g_malloc((env->nb_tlb + 7) / 8);
526     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
527 
528     dirty_tlb.bitmap = (uintptr_t)bitmap;
529     dirty_tlb.num_dirty = env->nb_tlb;
530 
531     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
532     if (ret) {
533         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
534                 __func__, strerror(-ret));
535     }
536 
537     g_free(bitmap);
538 }
539 
540 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
541 {
542     PowerPCCPU *cpu = POWERPC_CPU(cs);
543     CPUPPCState *env = &cpu->env;
544     union {
545         uint32_t u32;
546         uint64_t u64;
547     } val;
548     struct kvm_one_reg reg = {
549         .id = id,
550         .addr = (uintptr_t) &val,
551     };
552     int ret;
553 
554     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
555     if (ret != 0) {
556         trace_kvm_failed_spr_get(spr, strerror(errno));
557     } else {
558         switch (id & KVM_REG_SIZE_MASK) {
559         case KVM_REG_SIZE_U32:
560             env->spr[spr] = val.u32;
561             break;
562 
563         case KVM_REG_SIZE_U64:
564             env->spr[spr] = val.u64;
565             break;
566 
567         default:
568             /* Don't handle this size yet */
569             abort();
570         }
571     }
572 }
573 
574 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
575 {
576     PowerPCCPU *cpu = POWERPC_CPU(cs);
577     CPUPPCState *env = &cpu->env;
578     union {
579         uint32_t u32;
580         uint64_t u64;
581     } val;
582     struct kvm_one_reg reg = {
583         .id = id,
584         .addr = (uintptr_t) &val,
585     };
586     int ret;
587 
588     switch (id & KVM_REG_SIZE_MASK) {
589     case KVM_REG_SIZE_U32:
590         val.u32 = env->spr[spr];
591         break;
592 
593     case KVM_REG_SIZE_U64:
594         val.u64 = env->spr[spr];
595         break;
596 
597     default:
598         /* Don't handle this size yet */
599         abort();
600     }
601 
602     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
603     if (ret != 0) {
604         trace_kvm_failed_spr_set(spr, strerror(errno));
605     }
606 }
607 
608 static int kvm_put_fp(CPUState *cs)
609 {
610     PowerPCCPU *cpu = POWERPC_CPU(cs);
611     CPUPPCState *env = &cpu->env;
612     struct kvm_one_reg reg;
613     int i;
614     int ret;
615 
616     if (env->insns_flags & PPC_FLOAT) {
617         uint64_t fpscr = env->fpscr;
618         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
619 
620         reg.id = KVM_REG_PPC_FPSCR;
621         reg.addr = (uintptr_t)&fpscr;
622         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
623         if (ret < 0) {
624             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
625             return ret;
626         }
627 
628         for (i = 0; i < 32; i++) {
629             uint64_t vsr[2];
630 
631 #ifdef HOST_WORDS_BIGENDIAN
632             vsr[0] = float64_val(env->fpr[i]);
633             vsr[1] = env->vsr[i];
634 #else
635             vsr[0] = env->vsr[i];
636             vsr[1] = float64_val(env->fpr[i]);
637 #endif
638             reg.addr = (uintptr_t) &vsr;
639             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
640 
641             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
642             if (ret < 0) {
643                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
644                         i, strerror(errno));
645                 return ret;
646             }
647         }
648     }
649 
650     if (env->insns_flags & PPC_ALTIVEC) {
651         reg.id = KVM_REG_PPC_VSCR;
652         reg.addr = (uintptr_t)&env->vscr;
653         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
654         if (ret < 0) {
655             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
656             return ret;
657         }
658 
659         for (i = 0; i < 32; i++) {
660             reg.id = KVM_REG_PPC_VR(i);
661             reg.addr = (uintptr_t)&env->avr[i];
662             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
663             if (ret < 0) {
664                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
665                 return ret;
666             }
667         }
668     }
669 
670     return 0;
671 }
672 
673 static int kvm_get_fp(CPUState *cs)
674 {
675     PowerPCCPU *cpu = POWERPC_CPU(cs);
676     CPUPPCState *env = &cpu->env;
677     struct kvm_one_reg reg;
678     int i;
679     int ret;
680 
681     if (env->insns_flags & PPC_FLOAT) {
682         uint64_t fpscr;
683         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
684 
685         reg.id = KVM_REG_PPC_FPSCR;
686         reg.addr = (uintptr_t)&fpscr;
687         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
688         if (ret < 0) {
689             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
690             return ret;
691         } else {
692             env->fpscr = fpscr;
693         }
694 
695         for (i = 0; i < 32; i++) {
696             uint64_t vsr[2];
697 
698             reg.addr = (uintptr_t) &vsr;
699             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
700 
701             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
702             if (ret < 0) {
703                 DPRINTF("Unable to get %s%d from KVM: %s\n",
704                         vsx ? "VSR" : "FPR", i, strerror(errno));
705                 return ret;
706             } else {
707 #ifdef HOST_WORDS_BIGENDIAN
708                 env->fpr[i] = vsr[0];
709                 if (vsx) {
710                     env->vsr[i] = vsr[1];
711                 }
712 #else
713                 env->fpr[i] = vsr[1];
714                 if (vsx) {
715                     env->vsr[i] = vsr[0];
716                 }
717 #endif
718             }
719         }
720     }
721 
722     if (env->insns_flags & PPC_ALTIVEC) {
723         reg.id = KVM_REG_PPC_VSCR;
724         reg.addr = (uintptr_t)&env->vscr;
725         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
726         if (ret < 0) {
727             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
728             return ret;
729         }
730 
731         for (i = 0; i < 32; i++) {
732             reg.id = KVM_REG_PPC_VR(i);
733             reg.addr = (uintptr_t)&env->avr[i];
734             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
735             if (ret < 0) {
736                 DPRINTF("Unable to get VR%d from KVM: %s\n",
737                         i, strerror(errno));
738                 return ret;
739             }
740         }
741     }
742 
743     return 0;
744 }
745 
746 #if defined(TARGET_PPC64)
747 static int kvm_get_vpa(CPUState *cs)
748 {
749     PowerPCCPU *cpu = POWERPC_CPU(cs);
750     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
751     struct kvm_one_reg reg;
752     int ret;
753 
754     reg.id = KVM_REG_PPC_VPA_ADDR;
755     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
756     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
757     if (ret < 0) {
758         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
759         return ret;
760     }
761 
762     assert((uintptr_t)&spapr_cpu->slb_shadow_size
763            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
764     reg.id = KVM_REG_PPC_VPA_SLB;
765     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
767     if (ret < 0) {
768         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
769                 strerror(errno));
770         return ret;
771     }
772 
773     assert((uintptr_t)&spapr_cpu->dtl_size
774            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
775     reg.id = KVM_REG_PPC_VPA_DTL;
776     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778     if (ret < 0) {
779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
780                 strerror(errno));
781         return ret;
782     }
783 
784     return 0;
785 }
786 
787 static int kvm_put_vpa(CPUState *cs)
788 {
789     PowerPCCPU *cpu = POWERPC_CPU(cs);
790     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
791     struct kvm_one_reg reg;
792     int ret;
793 
794     /* SLB shadow or DTL can't be registered unless a master VPA is
795      * registered.  That means when restoring state, if a VPA *is*
796      * registered, we need to set that up first.  If not, we need to
797      * deregister the others before deregistering the master VPA */
798     assert(spapr_cpu->vpa_addr
799            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
800 
801     if (spapr_cpu->vpa_addr) {
802         reg.id = KVM_REG_PPC_VPA_ADDR;
803         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
804         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
805         if (ret < 0) {
806             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
807             return ret;
808         }
809     }
810 
811     assert((uintptr_t)&spapr_cpu->slb_shadow_size
812            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
813     reg.id = KVM_REG_PPC_VPA_SLB;
814     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
815     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
816     if (ret < 0) {
817         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
818         return ret;
819     }
820 
821     assert((uintptr_t)&spapr_cpu->dtl_size
822            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
823     reg.id = KVM_REG_PPC_VPA_DTL;
824     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
825     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
826     if (ret < 0) {
827         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
828                 strerror(errno));
829         return ret;
830     }
831 
832     if (!spapr_cpu->vpa_addr) {
833         reg.id = KVM_REG_PPC_VPA_ADDR;
834         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
835         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836         if (ret < 0) {
837             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
838             return ret;
839         }
840     }
841 
842     return 0;
843 }
844 #endif /* TARGET_PPC64 */
845 
846 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
847 {
848     CPUPPCState *env = &cpu->env;
849     struct kvm_sregs sregs;
850     int i;
851 
852     sregs.pvr = env->spr[SPR_PVR];
853 
854     if (cpu->vhyp) {
855         PPCVirtualHypervisorClass *vhc =
856             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
857         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
858     } else {
859         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
860     }
861 
862     /* Sync SLB */
863 #ifdef TARGET_PPC64
864     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
865         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
866         if (env->slb[i].esid & SLB_ESID_V) {
867             sregs.u.s.ppc64.slb[i].slbe |= i;
868         }
869         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
870     }
871 #endif
872 
873     /* Sync SRs */
874     for (i = 0; i < 16; i++) {
875         sregs.u.s.ppc32.sr[i] = env->sr[i];
876     }
877 
878     /* Sync BATs */
879     for (i = 0; i < 8; i++) {
880         /* Beware. We have to swap upper and lower bits here */
881         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
882             | env->DBAT[1][i];
883         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
884             | env->IBAT[1][i];
885     }
886 
887     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
888 }
889 
890 int kvm_arch_put_registers(CPUState *cs, int level)
891 {
892     PowerPCCPU *cpu = POWERPC_CPU(cs);
893     CPUPPCState *env = &cpu->env;
894     struct kvm_regs regs;
895     int ret;
896     int i;
897 
898     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
899     if (ret < 0) {
900         return ret;
901     }
902 
903     regs.ctr = env->ctr;
904     regs.lr  = env->lr;
905     regs.xer = cpu_read_xer(env);
906     regs.msr = env->msr;
907     regs.pc = env->nip;
908 
909     regs.srr0 = env->spr[SPR_SRR0];
910     regs.srr1 = env->spr[SPR_SRR1];
911 
912     regs.sprg0 = env->spr[SPR_SPRG0];
913     regs.sprg1 = env->spr[SPR_SPRG1];
914     regs.sprg2 = env->spr[SPR_SPRG2];
915     regs.sprg3 = env->spr[SPR_SPRG3];
916     regs.sprg4 = env->spr[SPR_SPRG4];
917     regs.sprg5 = env->spr[SPR_SPRG5];
918     regs.sprg6 = env->spr[SPR_SPRG6];
919     regs.sprg7 = env->spr[SPR_SPRG7];
920 
921     regs.pid = env->spr[SPR_BOOKE_PID];
922 
923     for (i = 0;i < 32; i++)
924         regs.gpr[i] = env->gpr[i];
925 
926     regs.cr = 0;
927     for (i = 0; i < 8; i++) {
928         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
929     }
930 
931     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
932     if (ret < 0)
933         return ret;
934 
935     kvm_put_fp(cs);
936 
937     if (env->tlb_dirty) {
938         kvm_sw_tlb_put(cpu);
939         env->tlb_dirty = false;
940     }
941 
942     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
943         ret = kvmppc_put_books_sregs(cpu);
944         if (ret < 0) {
945             return ret;
946         }
947     }
948 
949     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
950         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
951     }
952 
953     if (cap_one_reg) {
954         int i;
955 
956         /* We deliberately ignore errors here, for kernels which have
957          * the ONE_REG calls, but don't support the specific
958          * registers, there's a reasonable chance things will still
959          * work, at least until we try to migrate. */
960         for (i = 0; i < 1024; i++) {
961             uint64_t id = env->spr_cb[i].one_reg_id;
962 
963             if (id != 0) {
964                 kvm_put_one_spr(cs, id, i);
965             }
966         }
967 
968 #ifdef TARGET_PPC64
969         if (msr_ts) {
970             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
971                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
972             }
973             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
974                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
975             }
976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
986         }
987 
988         if (cap_papr) {
989             if (kvm_put_vpa(cs) < 0) {
990                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
991             }
992         }
993 
994         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
995 #endif /* TARGET_PPC64 */
996     }
997 
998     return ret;
999 }
1000 
1001 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1002 {
1003      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1004 }
1005 
1006 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1007 {
1008     CPUPPCState *env = &cpu->env;
1009     struct kvm_sregs sregs;
1010     int ret;
1011 
1012     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013     if (ret < 0) {
1014         return ret;
1015     }
1016 
1017     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025         env->spr[SPR_DECR] = sregs.u.e.dec;
1026         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1029     }
1030 
1031     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1037     }
1038 
1039     if (sregs.u.e.features & KVM_SREGS_E_64) {
1040         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1041     }
1042 
1043     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1045     }
1046 
1047     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1050         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1052         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1054         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1056         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1058         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1060         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1062         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1064         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1066         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1068         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1070         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1072         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1074         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1076         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1078         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1080 
1081         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1084             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1086             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1088         }
1089 
1090         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1093         }
1094 
1095         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1098             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1100         }
1101     }
1102 
1103     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1114     }
1115 
1116     if (sregs.u.e.features & KVM_SREGS_EXP) {
1117         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1123     }
1124 
1125     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1129 
1130         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1133         }
1134     }
1135 
1136     return 0;
1137 }
1138 
1139 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1140 {
1141     CPUPPCState *env = &cpu->env;
1142     struct kvm_sregs sregs;
1143     int ret;
1144     int i;
1145 
1146     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147     if (ret < 0) {
1148         return ret;
1149     }
1150 
1151     if (!cpu->vhyp) {
1152         ppc_store_sdr1(env, sregs.u.s.sdr1);
1153     }
1154 
1155     /* Sync SLB */
1156 #ifdef TARGET_PPC64
1157     /*
1158      * The packed SLB array we get from KVM_GET_SREGS only contains
1159      * information about valid entries. So we flush our internal copy
1160      * to get rid of stale ones, then put all valid SLB entries back
1161      * in.
1162      */
1163     memset(env->slb, 0, sizeof(env->slb));
1164     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1167         /*
1168          * Only restore valid entries
1169          */
1170         if (rb & SLB_ESID_V) {
1171             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1172         }
1173     }
1174 #endif
1175 
1176     /* Sync SRs */
1177     for (i = 0; i < 16; i++) {
1178         env->sr[i] = sregs.u.s.ppc32.sr[i];
1179     }
1180 
1181     /* Sync BATs */
1182     for (i = 0; i < 8; i++) {
1183         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1187     }
1188 
1189     return 0;
1190 }
1191 
1192 int kvm_arch_get_registers(CPUState *cs)
1193 {
1194     PowerPCCPU *cpu = POWERPC_CPU(cs);
1195     CPUPPCState *env = &cpu->env;
1196     struct kvm_regs regs;
1197     uint32_t cr;
1198     int i, ret;
1199 
1200     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201     if (ret < 0)
1202         return ret;
1203 
1204     cr = regs.cr;
1205     for (i = 7; i >= 0; i--) {
1206         env->crf[i] = cr & 15;
1207         cr >>= 4;
1208     }
1209 
1210     env->ctr = regs.ctr;
1211     env->lr = regs.lr;
1212     cpu_write_xer(env, regs.xer);
1213     env->msr = regs.msr;
1214     env->nip = regs.pc;
1215 
1216     env->spr[SPR_SRR0] = regs.srr0;
1217     env->spr[SPR_SRR1] = regs.srr1;
1218 
1219     env->spr[SPR_SPRG0] = regs.sprg0;
1220     env->spr[SPR_SPRG1] = regs.sprg1;
1221     env->spr[SPR_SPRG2] = regs.sprg2;
1222     env->spr[SPR_SPRG3] = regs.sprg3;
1223     env->spr[SPR_SPRG4] = regs.sprg4;
1224     env->spr[SPR_SPRG5] = regs.sprg5;
1225     env->spr[SPR_SPRG6] = regs.sprg6;
1226     env->spr[SPR_SPRG7] = regs.sprg7;
1227 
1228     env->spr[SPR_BOOKE_PID] = regs.pid;
1229 
1230     for (i = 0;i < 32; i++)
1231         env->gpr[i] = regs.gpr[i];
1232 
1233     kvm_get_fp(cs);
1234 
1235     if (cap_booke_sregs) {
1236         ret = kvmppc_get_booke_sregs(cpu);
1237         if (ret < 0) {
1238             return ret;
1239         }
1240     }
1241 
1242     if (cap_segstate) {
1243         ret = kvmppc_get_books_sregs(cpu);
1244         if (ret < 0) {
1245             return ret;
1246         }
1247     }
1248 
1249     if (cap_hior) {
1250         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1251     }
1252 
1253     if (cap_one_reg) {
1254         int i;
1255 
1256         /* We deliberately ignore errors here, for kernels which have
1257          * the ONE_REG calls, but don't support the specific
1258          * registers, there's a reasonable chance things will still
1259          * work, at least until we try to migrate. */
1260         for (i = 0; i < 1024; i++) {
1261             uint64_t id = env->spr_cb[i].one_reg_id;
1262 
1263             if (id != 0) {
1264                 kvm_get_one_spr(cs, id, i);
1265             }
1266         }
1267 
1268 #ifdef TARGET_PPC64
1269         if (msr_ts) {
1270             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1272             }
1273             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1275             }
1276             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1286         }
1287 
1288         if (cap_papr) {
1289             if (kvm_get_vpa(cs) < 0) {
1290                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1291             }
1292         }
1293 
1294         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295 #endif
1296     }
1297 
1298     return 0;
1299 }
1300 
1301 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1302 {
1303     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1304 
1305     if (irq != PPC_INTERRUPT_EXT) {
1306         return 0;
1307     }
1308 
1309     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310         return 0;
1311     }
1312 
1313     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1314 
1315     return 0;
1316 }
1317 
1318 #if defined(TARGET_PPC64)
1319 #define PPC_INPUT_INT PPC970_INPUT_INT
1320 #else
1321 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1322 #endif
1323 
1324 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1325 {
1326     PowerPCCPU *cpu = POWERPC_CPU(cs);
1327     CPUPPCState *env = &cpu->env;
1328     int r;
1329     unsigned irq;
1330 
1331     qemu_mutex_lock_iothread();
1332 
1333     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1334      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1335     if (!cap_interrupt_level &&
1336         run->ready_for_interrupt_injection &&
1337         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1338         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1339     {
1340         /* For now KVM disregards the 'irq' argument. However, in the
1341          * future KVM could cache it in-kernel to avoid a heavyweight exit
1342          * when reading the UIC.
1343          */
1344         irq = KVM_INTERRUPT_SET;
1345 
1346         DPRINTF("injected interrupt %d\n", irq);
1347         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1348         if (r < 0) {
1349             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1350         }
1351 
1352         /* Always wake up soon in case the interrupt was level based */
1353         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1354                        (NANOSECONDS_PER_SECOND / 50));
1355     }
1356 
1357     /* We don't know if there are more interrupts pending after this. However,
1358      * the guest will return to userspace in the course of handling this one
1359      * anyways, so we will get a chance to deliver the rest. */
1360 
1361     qemu_mutex_unlock_iothread();
1362 }
1363 
1364 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1365 {
1366     return MEMTXATTRS_UNSPECIFIED;
1367 }
1368 
1369 int kvm_arch_process_async_events(CPUState *cs)
1370 {
1371     return cs->halted;
1372 }
1373 
1374 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1375 {
1376     CPUState *cs = CPU(cpu);
1377     CPUPPCState *env = &cpu->env;
1378 
1379     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1380         cs->halted = 1;
1381         cs->exception_index = EXCP_HLT;
1382     }
1383 
1384     return 0;
1385 }
1386 
1387 /* map dcr access to existing qemu dcr emulation */
1388 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1389 {
1390     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1391         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1392 
1393     return 0;
1394 }
1395 
1396 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1397 {
1398     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1399         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1400 
1401     return 0;
1402 }
1403 
1404 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1405 {
1406     /* Mixed endian case is not handled */
1407     uint32_t sc = debug_inst_opcode;
1408 
1409     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1410                             sizeof(sc), 0) ||
1411         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1412         return -EINVAL;
1413     }
1414 
1415     return 0;
1416 }
1417 
1418 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1419 {
1420     uint32_t sc;
1421 
1422     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1423         sc != debug_inst_opcode ||
1424         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1425                             sizeof(sc), 1)) {
1426         return -EINVAL;
1427     }
1428 
1429     return 0;
1430 }
1431 
1432 static int find_hw_breakpoint(target_ulong addr, int type)
1433 {
1434     int n;
1435 
1436     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1437            <= ARRAY_SIZE(hw_debug_points));
1438 
1439     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1440         if (hw_debug_points[n].addr == addr &&
1441              hw_debug_points[n].type == type) {
1442             return n;
1443         }
1444     }
1445 
1446     return -1;
1447 }
1448 
1449 static int find_hw_watchpoint(target_ulong addr, int *flag)
1450 {
1451     int n;
1452 
1453     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1454     if (n >= 0) {
1455         *flag = BP_MEM_ACCESS;
1456         return n;
1457     }
1458 
1459     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1460     if (n >= 0) {
1461         *flag = BP_MEM_WRITE;
1462         return n;
1463     }
1464 
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1466     if (n >= 0) {
1467         *flag = BP_MEM_READ;
1468         return n;
1469     }
1470 
1471     return -1;
1472 }
1473 
1474 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1475                                   target_ulong len, int type)
1476 {
1477     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1478         return -ENOBUFS;
1479     }
1480 
1481     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1482     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1483 
1484     switch (type) {
1485     case GDB_BREAKPOINT_HW:
1486         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1487             return -ENOBUFS;
1488         }
1489 
1490         if (find_hw_breakpoint(addr, type) >= 0) {
1491             return -EEXIST;
1492         }
1493 
1494         nb_hw_breakpoint++;
1495         break;
1496 
1497     case GDB_WATCHPOINT_WRITE:
1498     case GDB_WATCHPOINT_READ:
1499     case GDB_WATCHPOINT_ACCESS:
1500         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1501             return -ENOBUFS;
1502         }
1503 
1504         if (find_hw_breakpoint(addr, type) >= 0) {
1505             return -EEXIST;
1506         }
1507 
1508         nb_hw_watchpoint++;
1509         break;
1510 
1511     default:
1512         return -ENOSYS;
1513     }
1514 
1515     return 0;
1516 }
1517 
1518 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1519                                   target_ulong len, int type)
1520 {
1521     int n;
1522 
1523     n = find_hw_breakpoint(addr, type);
1524     if (n < 0) {
1525         return -ENOENT;
1526     }
1527 
1528     switch (type) {
1529     case GDB_BREAKPOINT_HW:
1530         nb_hw_breakpoint--;
1531         break;
1532 
1533     case GDB_WATCHPOINT_WRITE:
1534     case GDB_WATCHPOINT_READ:
1535     case GDB_WATCHPOINT_ACCESS:
1536         nb_hw_watchpoint--;
1537         break;
1538 
1539     default:
1540         return -ENOSYS;
1541     }
1542     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1543 
1544     return 0;
1545 }
1546 
1547 void kvm_arch_remove_all_hw_breakpoints(void)
1548 {
1549     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1550 }
1551 
1552 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1553 {
1554     int n;
1555 
1556     /* Software Breakpoint updates */
1557     if (kvm_sw_breakpoints_active(cs)) {
1558         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1559     }
1560 
1561     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1562            <= ARRAY_SIZE(hw_debug_points));
1563     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1564 
1565     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1566         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1567         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1568         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1569             switch (hw_debug_points[n].type) {
1570             case GDB_BREAKPOINT_HW:
1571                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1572                 break;
1573             case GDB_WATCHPOINT_WRITE:
1574                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1575                 break;
1576             case GDB_WATCHPOINT_READ:
1577                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1578                 break;
1579             case GDB_WATCHPOINT_ACCESS:
1580                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1581                                         KVMPPC_DEBUG_WATCH_READ;
1582                 break;
1583             default:
1584                 cpu_abort(cs, "Unsupported breakpoint type\n");
1585             }
1586             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1587         }
1588     }
1589 }
1590 
1591 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1592 {
1593     CPUState *cs = CPU(cpu);
1594     CPUPPCState *env = &cpu->env;
1595     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1596     int handle = 0;
1597     int n;
1598     int flag = 0;
1599 
1600     if (cs->singlestep_enabled) {
1601         handle = 1;
1602     } else if (arch_info->status) {
1603         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1604             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1605                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1606                 if (n >= 0) {
1607                     handle = 1;
1608                 }
1609             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1610                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1611                 n = find_hw_watchpoint(arch_info->address,  &flag);
1612                 if (n >= 0) {
1613                     handle = 1;
1614                     cs->watchpoint_hit = &hw_watchpoint;
1615                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1616                     hw_watchpoint.flags = flag;
1617                 }
1618             }
1619         }
1620     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1621         handle = 1;
1622     } else {
1623         /* QEMU is not able to handle debug exception, so inject
1624          * program exception to guest;
1625          * Yes program exception NOT debug exception !!
1626          * When QEMU is using debug resources then debug exception must
1627          * be always set. To achieve this we set MSR_DE and also set
1628          * MSRP_DEP so guest cannot change MSR_DE.
1629          * When emulating debug resource for guest we want guest
1630          * to control MSR_DE (enable/disable debug interrupt on need).
1631          * Supporting both configurations are NOT possible.
1632          * So the result is that we cannot share debug resources
1633          * between QEMU and Guest on BOOKE architecture.
1634          * In the current design QEMU gets the priority over guest,
1635          * this means that if QEMU is using debug resources then guest
1636          * cannot use them;
1637          * For software breakpoint QEMU uses a privileged instruction;
1638          * So there cannot be any reason that we are here for guest
1639          * set debug exception, only possibility is guest executed a
1640          * privileged / illegal instruction and that's why we are
1641          * injecting a program interrupt.
1642          */
1643 
1644         cpu_synchronize_state(cs);
1645         /* env->nip is PC, so increment this by 4 to use
1646          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1647          */
1648         env->nip += 4;
1649         cs->exception_index = POWERPC_EXCP_PROGRAM;
1650         env->error_code = POWERPC_EXCP_INVAL;
1651         ppc_cpu_do_interrupt(cs);
1652     }
1653 
1654     return handle;
1655 }
1656 
1657 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1658 {
1659     PowerPCCPU *cpu = POWERPC_CPU(cs);
1660     CPUPPCState *env = &cpu->env;
1661     int ret;
1662 
1663     qemu_mutex_lock_iothread();
1664 
1665     switch (run->exit_reason) {
1666     case KVM_EXIT_DCR:
1667         if (run->dcr.is_write) {
1668             DPRINTF("handle dcr write\n");
1669             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1670         } else {
1671             DPRINTF("handle dcr read\n");
1672             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1673         }
1674         break;
1675     case KVM_EXIT_HLT:
1676         DPRINTF("handle halt\n");
1677         ret = kvmppc_handle_halt(cpu);
1678         break;
1679 #if defined(TARGET_PPC64)
1680     case KVM_EXIT_PAPR_HCALL:
1681         DPRINTF("handle PAPR hypercall\n");
1682         run->papr_hcall.ret = spapr_hypercall(cpu,
1683                                               run->papr_hcall.nr,
1684                                               run->papr_hcall.args);
1685         ret = 0;
1686         break;
1687 #endif
1688     case KVM_EXIT_EPR:
1689         DPRINTF("handle epr\n");
1690         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1691         ret = 0;
1692         break;
1693     case KVM_EXIT_WATCHDOG:
1694         DPRINTF("handle watchdog expiry\n");
1695         watchdog_perform_action();
1696         ret = 0;
1697         break;
1698 
1699     case KVM_EXIT_DEBUG:
1700         DPRINTF("handle debug exception\n");
1701         if (kvm_handle_debug(cpu, run)) {
1702             ret = EXCP_DEBUG;
1703             break;
1704         }
1705         /* re-enter, this exception was guest-internal */
1706         ret = 0;
1707         break;
1708 
1709     default:
1710         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1711         ret = -1;
1712         break;
1713     }
1714 
1715     qemu_mutex_unlock_iothread();
1716     return ret;
1717 }
1718 
1719 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1720 {
1721     CPUState *cs = CPU(cpu);
1722     uint32_t bits = tsr_bits;
1723     struct kvm_one_reg reg = {
1724         .id = KVM_REG_PPC_OR_TSR,
1725         .addr = (uintptr_t) &bits,
1726     };
1727 
1728     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1729 }
1730 
1731 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1732 {
1733 
1734     CPUState *cs = CPU(cpu);
1735     uint32_t bits = tsr_bits;
1736     struct kvm_one_reg reg = {
1737         .id = KVM_REG_PPC_CLEAR_TSR,
1738         .addr = (uintptr_t) &bits,
1739     };
1740 
1741     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1742 }
1743 
1744 int kvmppc_set_tcr(PowerPCCPU *cpu)
1745 {
1746     CPUState *cs = CPU(cpu);
1747     CPUPPCState *env = &cpu->env;
1748     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1749 
1750     struct kvm_one_reg reg = {
1751         .id = KVM_REG_PPC_TCR,
1752         .addr = (uintptr_t) &tcr,
1753     };
1754 
1755     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1756 }
1757 
1758 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1759 {
1760     CPUState *cs = CPU(cpu);
1761     int ret;
1762 
1763     if (!kvm_enabled()) {
1764         return -1;
1765     }
1766 
1767     if (!cap_ppc_watchdog) {
1768         printf("warning: KVM does not support watchdog");
1769         return -1;
1770     }
1771 
1772     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1773     if (ret < 0) {
1774         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1775                 __func__, strerror(-ret));
1776         return ret;
1777     }
1778 
1779     return ret;
1780 }
1781 
1782 static int read_cpuinfo(const char *field, char *value, int len)
1783 {
1784     FILE *f;
1785     int ret = -1;
1786     int field_len = strlen(field);
1787     char line[512];
1788 
1789     f = fopen("/proc/cpuinfo", "r");
1790     if (!f) {
1791         return -1;
1792     }
1793 
1794     do {
1795         if (!fgets(line, sizeof(line), f)) {
1796             break;
1797         }
1798         if (!strncmp(line, field, field_len)) {
1799             pstrcpy(value, len, line);
1800             ret = 0;
1801             break;
1802         }
1803     } while(*line);
1804 
1805     fclose(f);
1806 
1807     return ret;
1808 }
1809 
1810 uint32_t kvmppc_get_tbfreq(void)
1811 {
1812     char line[512];
1813     char *ns;
1814     uint32_t retval = NANOSECONDS_PER_SECOND;
1815 
1816     if (read_cpuinfo("timebase", line, sizeof(line))) {
1817         return retval;
1818     }
1819 
1820     if (!(ns = strchr(line, ':'))) {
1821         return retval;
1822     }
1823 
1824     ns++;
1825 
1826     return atoi(ns);
1827 }
1828 
1829 bool kvmppc_get_host_serial(char **value)
1830 {
1831     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1832                                NULL);
1833 }
1834 
1835 bool kvmppc_get_host_model(char **value)
1836 {
1837     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1838 }
1839 
1840 /* Try to find a device tree node for a CPU with clock-frequency property */
1841 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1842 {
1843     struct dirent *dirp;
1844     DIR *dp;
1845 
1846     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1847         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1848         return -1;
1849     }
1850 
1851     buf[0] = '\0';
1852     while ((dirp = readdir(dp)) != NULL) {
1853         FILE *f;
1854         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1855                  dirp->d_name);
1856         f = fopen(buf, "r");
1857         if (f) {
1858             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1859             fclose(f);
1860             break;
1861         }
1862         buf[0] = '\0';
1863     }
1864     closedir(dp);
1865     if (buf[0] == '\0') {
1866         printf("Unknown host!\n");
1867         return -1;
1868     }
1869 
1870     return 0;
1871 }
1872 
1873 static uint64_t kvmppc_read_int_dt(const char *filename)
1874 {
1875     union {
1876         uint32_t v32;
1877         uint64_t v64;
1878     } u;
1879     FILE *f;
1880     int len;
1881 
1882     f = fopen(filename, "rb");
1883     if (!f) {
1884         return -1;
1885     }
1886 
1887     len = fread(&u, 1, sizeof(u), f);
1888     fclose(f);
1889     switch (len) {
1890     case 4:
1891         /* property is a 32-bit quantity */
1892         return be32_to_cpu(u.v32);
1893     case 8:
1894         return be64_to_cpu(u.v64);
1895     }
1896 
1897     return 0;
1898 }
1899 
1900 /* Read a CPU node property from the host device tree that's a single
1901  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1902  * (can't find or open the property, or doesn't understand the
1903  * format) */
1904 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1905 {
1906     char buf[PATH_MAX], *tmp;
1907     uint64_t val;
1908 
1909     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1910         return -1;
1911     }
1912 
1913     tmp = g_strdup_printf("%s/%s", buf, propname);
1914     val = kvmppc_read_int_dt(tmp);
1915     g_free(tmp);
1916 
1917     return val;
1918 }
1919 
1920 uint64_t kvmppc_get_clockfreq(void)
1921 {
1922     return kvmppc_read_int_cpu_dt("clock-frequency");
1923 }
1924 
1925 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1926  {
1927      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1928      CPUState *cs = CPU(cpu);
1929 
1930     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1931         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1932         return 0;
1933     }
1934 
1935     return 1;
1936 }
1937 
1938 int kvmppc_get_hasidle(CPUPPCState *env)
1939 {
1940     struct kvm_ppc_pvinfo pvinfo;
1941 
1942     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1943         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1944         return 1;
1945     }
1946 
1947     return 0;
1948 }
1949 
1950 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1951 {
1952     uint32_t *hc = (uint32_t*)buf;
1953     struct kvm_ppc_pvinfo pvinfo;
1954 
1955     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1956         memcpy(buf, pvinfo.hcall, buf_len);
1957         return 0;
1958     }
1959 
1960     /*
1961      * Fallback to always fail hypercalls regardless of endianness:
1962      *
1963      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1964      *     li r3, -1
1965      *     b .+8       (becomes nop in wrong endian)
1966      *     bswap32(li r3, -1)
1967      */
1968 
1969     hc[0] = cpu_to_be32(0x08000048);
1970     hc[1] = cpu_to_be32(0x3860ffff);
1971     hc[2] = cpu_to_be32(0x48000008);
1972     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1973 
1974     return 1;
1975 }
1976 
1977 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1978 {
1979     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1980 }
1981 
1982 void kvmppc_enable_logical_ci_hcalls(void)
1983 {
1984     /*
1985      * FIXME: it would be nice if we could detect the cases where
1986      * we're using a device which requires the in kernel
1987      * implementation of these hcalls, but the kernel lacks them and
1988      * produce a warning.
1989      */
1990     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1991     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1992 }
1993 
1994 void kvmppc_enable_set_mode_hcall(void)
1995 {
1996     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1997 }
1998 
1999 void kvmppc_enable_clear_ref_mod_hcalls(void)
2000 {
2001     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2002     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2003 }
2004 
2005 void kvmppc_set_papr(PowerPCCPU *cpu)
2006 {
2007     CPUState *cs = CPU(cpu);
2008     int ret;
2009 
2010     if (!kvm_enabled()) {
2011         return;
2012     }
2013 
2014     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2015     if (ret) {
2016         error_report("This vCPU type or KVM version does not support PAPR");
2017         exit(1);
2018     }
2019 
2020     /* Update the capability flag so we sync the right information
2021      * with kvm */
2022     cap_papr = 1;
2023 }
2024 
2025 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2026 {
2027     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2028 }
2029 
2030 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2031 {
2032     CPUState *cs = CPU(cpu);
2033     int ret;
2034 
2035     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2036     if (ret && mpic_proxy) {
2037         error_report("This KVM version does not support EPR");
2038         exit(1);
2039     }
2040 }
2041 
2042 int kvmppc_smt_threads(void)
2043 {
2044     return cap_ppc_smt ? cap_ppc_smt : 1;
2045 }
2046 
2047 int kvmppc_set_smt_threads(int smt)
2048 {
2049     int ret;
2050 
2051     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2052     if (!ret) {
2053         cap_ppc_smt = smt;
2054     }
2055     return ret;
2056 }
2057 
2058 void kvmppc_hint_smt_possible(Error **errp)
2059 {
2060     int i;
2061     GString *g;
2062     char *s;
2063 
2064     assert(kvm_enabled());
2065     if (cap_ppc_smt_possible) {
2066         g = g_string_new("Available VSMT modes:");
2067         for (i = 63; i >= 0; i--) {
2068             if ((1UL << i) & cap_ppc_smt_possible) {
2069                 g_string_append_printf(g, " %lu", (1UL << i));
2070             }
2071         }
2072         s = g_string_free(g, false);
2073         error_append_hint(errp, "%s.\n", s);
2074         g_free(s);
2075     } else {
2076         error_append_hint(errp,
2077                           "This KVM seems to be too old to support VSMT.\n");
2078     }
2079 }
2080 
2081 
2082 #ifdef TARGET_PPC64
2083 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2084 {
2085     struct kvm_ppc_smmu_info info;
2086     long rampagesize, best_page_shift;
2087     int i;
2088 
2089     /* Find the largest hardware supported page size that's less than
2090      * or equal to the (logical) backing page size of guest RAM */
2091     kvm_get_smmu_info(&info, &error_fatal);
2092     rampagesize = qemu_getrampagesize();
2093     best_page_shift = 0;
2094 
2095     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2096         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2097 
2098         if (!sps->page_shift) {
2099             continue;
2100         }
2101 
2102         if ((sps->page_shift > best_page_shift)
2103             && ((1UL << sps->page_shift) <= rampagesize)) {
2104             best_page_shift = sps->page_shift;
2105         }
2106     }
2107 
2108     return MIN(current_size,
2109                1ULL << (best_page_shift + hash_shift - 7));
2110 }
2111 #endif
2112 
2113 bool kvmppc_spapr_use_multitce(void)
2114 {
2115     return cap_spapr_multitce;
2116 }
2117 
2118 int kvmppc_spapr_enable_inkernel_multitce(void)
2119 {
2120     int ret;
2121 
2122     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2123                             H_PUT_TCE_INDIRECT, 1);
2124     if (!ret) {
2125         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2126                                 H_STUFF_TCE, 1);
2127     }
2128 
2129     return ret;
2130 }
2131 
2132 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2133                               uint64_t bus_offset, uint32_t nb_table,
2134                               int *pfd, bool need_vfio)
2135 {
2136     long len;
2137     int fd;
2138     void *table;
2139 
2140     /* Must set fd to -1 so we don't try to munmap when called for
2141      * destroying the table, which the upper layers -will- do
2142      */
2143     *pfd = -1;
2144     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2145         return NULL;
2146     }
2147 
2148     if (cap_spapr_tce_64) {
2149         struct kvm_create_spapr_tce_64 args = {
2150             .liobn = liobn,
2151             .page_shift = page_shift,
2152             .offset = bus_offset >> page_shift,
2153             .size = nb_table,
2154             .flags = 0
2155         };
2156         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2157         if (fd < 0) {
2158             fprintf(stderr,
2159                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2160                     liobn);
2161             return NULL;
2162         }
2163     } else if (cap_spapr_tce) {
2164         uint64_t window_size = (uint64_t) nb_table << page_shift;
2165         struct kvm_create_spapr_tce args = {
2166             .liobn = liobn,
2167             .window_size = window_size,
2168         };
2169         if ((window_size != args.window_size) || bus_offset) {
2170             return NULL;
2171         }
2172         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2173         if (fd < 0) {
2174             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2175                     liobn);
2176             return NULL;
2177         }
2178     } else {
2179         return NULL;
2180     }
2181 
2182     len = nb_table * sizeof(uint64_t);
2183     /* FIXME: round this up to page size */
2184 
2185     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2186     if (table == MAP_FAILED) {
2187         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2188                 liobn);
2189         close(fd);
2190         return NULL;
2191     }
2192 
2193     *pfd = fd;
2194     return table;
2195 }
2196 
2197 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2198 {
2199     long len;
2200 
2201     if (fd < 0) {
2202         return -1;
2203     }
2204 
2205     len = nb_table * sizeof(uint64_t);
2206     if ((munmap(table, len) < 0) ||
2207         (close(fd) < 0)) {
2208         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2209                 strerror(errno));
2210         /* Leak the table */
2211     }
2212 
2213     return 0;
2214 }
2215 
2216 int kvmppc_reset_htab(int shift_hint)
2217 {
2218     uint32_t shift = shift_hint;
2219 
2220     if (!kvm_enabled()) {
2221         /* Full emulation, tell caller to allocate htab itself */
2222         return 0;
2223     }
2224     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2225         int ret;
2226         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2227         if (ret == -ENOTTY) {
2228             /* At least some versions of PR KVM advertise the
2229              * capability, but don't implement the ioctl().  Oops.
2230              * Return 0 so that we allocate the htab in qemu, as is
2231              * correct for PR. */
2232             return 0;
2233         } else if (ret < 0) {
2234             return ret;
2235         }
2236         return shift;
2237     }
2238 
2239     /* We have a kernel that predates the htab reset calls.  For PR
2240      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2241      * this era, it has allocated a 16MB fixed size hash table already. */
2242     if (kvmppc_is_pr(kvm_state)) {
2243         /* PR - tell caller to allocate htab */
2244         return 0;
2245     } else {
2246         /* HV - assume 16MB kernel allocated htab */
2247         return 24;
2248     }
2249 }
2250 
2251 static inline uint32_t mfpvr(void)
2252 {
2253     uint32_t pvr;
2254 
2255     asm ("mfpvr %0"
2256          : "=r"(pvr));
2257     return pvr;
2258 }
2259 
2260 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2261 {
2262     if (on) {
2263         *word |= flags;
2264     } else {
2265         *word &= ~flags;
2266     }
2267 }
2268 
2269 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2270 {
2271     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2272     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2273     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2274 
2275     /* Now fix up the class with information we can query from the host */
2276     pcc->pvr = mfpvr();
2277 
2278     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2279                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2280     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2281                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2282     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2283                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2284 
2285     if (dcache_size != -1) {
2286         pcc->l1_dcache_size = dcache_size;
2287     }
2288 
2289     if (icache_size != -1) {
2290         pcc->l1_icache_size = icache_size;
2291     }
2292 
2293 #if defined(TARGET_PPC64)
2294     pcc->radix_page_info = kvm_get_radix_page_info();
2295 
2296     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2297         /*
2298          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2299          * compliant.  More importantly, advertising ISA 3.00
2300          * architected mode may prevent guests from activating
2301          * necessary DD1 workarounds.
2302          */
2303         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2304                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2305     }
2306 #endif /* defined(TARGET_PPC64) */
2307 }
2308 
2309 bool kvmppc_has_cap_epr(void)
2310 {
2311     return cap_epr;
2312 }
2313 
2314 bool kvmppc_has_cap_fixup_hcalls(void)
2315 {
2316     return cap_fixup_hcalls;
2317 }
2318 
2319 bool kvmppc_has_cap_htm(void)
2320 {
2321     return cap_htm;
2322 }
2323 
2324 bool kvmppc_has_cap_mmu_radix(void)
2325 {
2326     return cap_mmu_radix;
2327 }
2328 
2329 bool kvmppc_has_cap_mmu_hash_v3(void)
2330 {
2331     return cap_mmu_hash_v3;
2332 }
2333 
2334 static bool kvmppc_power8_host(void)
2335 {
2336     bool ret = false;
2337 #ifdef TARGET_PPC64
2338     {
2339         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2340         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2341               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2342               (base_pvr == CPU_POWERPC_POWER8_BASE);
2343     }
2344 #endif /* TARGET_PPC64 */
2345     return ret;
2346 }
2347 
2348 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2349 {
2350     bool l1d_thread_priv_req = !kvmppc_power8_host();
2351 
2352     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2353         return 2;
2354     } else if ((!l1d_thread_priv_req ||
2355                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2356                (c.character & c.character_mask
2357                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2358         return 1;
2359     }
2360 
2361     return 0;
2362 }
2363 
2364 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2365 {
2366     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2367         return 2;
2368     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2369         return 1;
2370     }
2371 
2372     return 0;
2373 }
2374 
2375 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2376 {
2377     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2378         return  SPAPR_CAP_FIXED_CCD;
2379     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2380         return SPAPR_CAP_FIXED_IBS;
2381     }
2382 
2383     return 0;
2384 }
2385 
2386 static void kvmppc_get_cpu_characteristics(KVMState *s)
2387 {
2388     struct kvm_ppc_cpu_char c;
2389     int ret;
2390 
2391     /* Assume broken */
2392     cap_ppc_safe_cache = 0;
2393     cap_ppc_safe_bounds_check = 0;
2394     cap_ppc_safe_indirect_branch = 0;
2395 
2396     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2397     if (!ret) {
2398         return;
2399     }
2400     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2401     if (ret < 0) {
2402         return;
2403     }
2404 
2405     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2406     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2407     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2408 }
2409 
2410 int kvmppc_get_cap_safe_cache(void)
2411 {
2412     return cap_ppc_safe_cache;
2413 }
2414 
2415 int kvmppc_get_cap_safe_bounds_check(void)
2416 {
2417     return cap_ppc_safe_bounds_check;
2418 }
2419 
2420 int kvmppc_get_cap_safe_indirect_branch(void)
2421 {
2422     return cap_ppc_safe_indirect_branch;
2423 }
2424 
2425 bool kvmppc_has_cap_spapr_vfio(void)
2426 {
2427     return cap_spapr_vfio;
2428 }
2429 
2430 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2431 {
2432     uint32_t host_pvr = mfpvr();
2433     PowerPCCPUClass *pvr_pcc;
2434 
2435     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2436     if (pvr_pcc == NULL) {
2437         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2438     }
2439 
2440     return pvr_pcc;
2441 }
2442 
2443 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2444 {
2445     TypeInfo type_info = {
2446         .name = TYPE_HOST_POWERPC_CPU,
2447         .class_init = kvmppc_host_cpu_class_init,
2448     };
2449     MachineClass *mc = MACHINE_GET_CLASS(ms);
2450     PowerPCCPUClass *pvr_pcc;
2451     ObjectClass *oc;
2452     DeviceClass *dc;
2453     int i;
2454 
2455     pvr_pcc = kvm_ppc_get_host_cpu_class();
2456     if (pvr_pcc == NULL) {
2457         return -1;
2458     }
2459     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2460     type_register(&type_info);
2461     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2462         /* override TCG default cpu type with 'host' cpu model */
2463         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2464     }
2465 
2466     oc = object_class_by_name(type_info.name);
2467     g_assert(oc);
2468 
2469     /*
2470      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2471      * we want "POWER8" to be a "family" alias that points to the current
2472      * host CPU type, too)
2473      */
2474     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2475     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2476         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2477             char *suffix;
2478 
2479             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2480             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2481             if (suffix) {
2482                 *suffix = 0;
2483             }
2484             break;
2485         }
2486     }
2487 
2488     return 0;
2489 }
2490 
2491 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2492 {
2493     struct kvm_rtas_token_args args = {
2494         .token = token,
2495     };
2496 
2497     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2498         return -ENOENT;
2499     }
2500 
2501     strncpy(args.name, function, sizeof(args.name));
2502 
2503     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2504 }
2505 
2506 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2507 {
2508     struct kvm_get_htab_fd s = {
2509         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2510         .start_index = index,
2511     };
2512     int ret;
2513 
2514     if (!cap_htab_fd) {
2515         error_setg(errp, "KVM version doesn't support %s the HPT",
2516                    write ? "writing" : "reading");
2517         return -ENOTSUP;
2518     }
2519 
2520     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2521     if (ret < 0) {
2522         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2523                    write ? "writing" : "reading", write ? "to" : "from",
2524                    strerror(errno));
2525         return -errno;
2526     }
2527 
2528     return ret;
2529 }
2530 
2531 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2532 {
2533     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2534     uint8_t buf[bufsize];
2535     ssize_t rc;
2536 
2537     do {
2538         rc = read(fd, buf, bufsize);
2539         if (rc < 0) {
2540             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2541                     strerror(errno));
2542             return rc;
2543         } else if (rc) {
2544             uint8_t *buffer = buf;
2545             ssize_t n = rc;
2546             while (n) {
2547                 struct kvm_get_htab_header *head =
2548                     (struct kvm_get_htab_header *) buffer;
2549                 size_t chunksize = sizeof(*head) +
2550                      HASH_PTE_SIZE_64 * head->n_valid;
2551 
2552                 qemu_put_be32(f, head->index);
2553                 qemu_put_be16(f, head->n_valid);
2554                 qemu_put_be16(f, head->n_invalid);
2555                 qemu_put_buffer(f, (void *)(head + 1),
2556                                 HASH_PTE_SIZE_64 * head->n_valid);
2557 
2558                 buffer += chunksize;
2559                 n -= chunksize;
2560             }
2561         }
2562     } while ((rc != 0)
2563              && ((max_ns < 0)
2564                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2565 
2566     return (rc == 0) ? 1 : 0;
2567 }
2568 
2569 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2570                            uint16_t n_valid, uint16_t n_invalid)
2571 {
2572     struct kvm_get_htab_header *buf;
2573     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2574     ssize_t rc;
2575 
2576     buf = alloca(chunksize);
2577     buf->index = index;
2578     buf->n_valid = n_valid;
2579     buf->n_invalid = n_invalid;
2580 
2581     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2582 
2583     rc = write(fd, buf, chunksize);
2584     if (rc < 0) {
2585         fprintf(stderr, "Error writing KVM hash table: %s\n",
2586                 strerror(errno));
2587         return rc;
2588     }
2589     if (rc != chunksize) {
2590         /* We should never get a short write on a single chunk */
2591         fprintf(stderr, "Short write, restoring KVM hash table\n");
2592         return -1;
2593     }
2594     return 0;
2595 }
2596 
2597 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2598 {
2599     return true;
2600 }
2601 
2602 void kvm_arch_init_irq_routing(KVMState *s)
2603 {
2604 }
2605 
2606 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2607 {
2608     int fd, rc;
2609     int i;
2610 
2611     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2612 
2613     i = 0;
2614     while (i < n) {
2615         struct kvm_get_htab_header *hdr;
2616         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2617         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2618 
2619         rc = read(fd, buf, sizeof(buf));
2620         if (rc < 0) {
2621             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2622         }
2623 
2624         hdr = (struct kvm_get_htab_header *)buf;
2625         while ((i < n) && ((char *)hdr < (buf + rc))) {
2626             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2627 
2628             if (hdr->index != (ptex + i)) {
2629                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2630                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2631             }
2632 
2633             if (n - i < valid) {
2634                 valid = n - i;
2635             }
2636             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2637             i += valid;
2638 
2639             if ((n - i) < invalid) {
2640                 invalid = n - i;
2641             }
2642             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2643             i += invalid;
2644 
2645             hdr = (struct kvm_get_htab_header *)
2646                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2647         }
2648     }
2649 
2650     close(fd);
2651 }
2652 
2653 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2654 {
2655     int fd, rc;
2656     struct {
2657         struct kvm_get_htab_header hdr;
2658         uint64_t pte0;
2659         uint64_t pte1;
2660     } buf;
2661 
2662     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2663 
2664     buf.hdr.n_valid = 1;
2665     buf.hdr.n_invalid = 0;
2666     buf.hdr.index = ptex;
2667     buf.pte0 = cpu_to_be64(pte0);
2668     buf.pte1 = cpu_to_be64(pte1);
2669 
2670     rc = write(fd, &buf, sizeof(buf));
2671     if (rc != sizeof(buf)) {
2672         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2673     }
2674     close(fd);
2675 }
2676 
2677 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2678                              uint64_t address, uint32_t data, PCIDevice *dev)
2679 {
2680     return 0;
2681 }
2682 
2683 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2684                                 int vector, PCIDevice *dev)
2685 {
2686     return 0;
2687 }
2688 
2689 int kvm_arch_release_virq_post(int virq)
2690 {
2691     return 0;
2692 }
2693 
2694 int kvm_arch_msi_data_to_gsi(uint32_t data)
2695 {
2696     return data & 0xffff;
2697 }
2698 
2699 int kvmppc_enable_hwrng(void)
2700 {
2701     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2702         return -1;
2703     }
2704 
2705     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2706 }
2707 
2708 void kvmppc_check_papr_resize_hpt(Error **errp)
2709 {
2710     if (!kvm_enabled()) {
2711         return; /* No KVM, we're good */
2712     }
2713 
2714     if (cap_resize_hpt) {
2715         return; /* Kernel has explicit support, we're good */
2716     }
2717 
2718     /* Otherwise fallback on looking for PR KVM */
2719     if (kvmppc_is_pr(kvm_state)) {
2720         return;
2721     }
2722 
2723     error_setg(errp,
2724                "Hash page table resizing not available with this KVM version");
2725 }
2726 
2727 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2728 {
2729     CPUState *cs = CPU(cpu);
2730     struct kvm_ppc_resize_hpt rhpt = {
2731         .flags = flags,
2732         .shift = shift,
2733     };
2734 
2735     if (!cap_resize_hpt) {
2736         return -ENOSYS;
2737     }
2738 
2739     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2740 }
2741 
2742 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2743 {
2744     CPUState *cs = CPU(cpu);
2745     struct kvm_ppc_resize_hpt rhpt = {
2746         .flags = flags,
2747         .shift = shift,
2748     };
2749 
2750     if (!cap_resize_hpt) {
2751         return -ENOSYS;
2752     }
2753 
2754     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2755 }
2756 
2757 /*
2758  * This is a helper function to detect a post migration scenario
2759  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2760  * the guest kernel can't handle a PVR value other than the actual host
2761  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2762  *
2763  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2764  * (so, we're HV), return true. The workaround itself is done in
2765  * cpu_post_load.
2766  *
2767  * The order here is important: we'll only check for KVM PR as a
2768  * fallback if the guest kernel can't handle the situation itself.
2769  * We need to avoid as much as possible querying the running KVM type
2770  * in QEMU level.
2771  */
2772 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2773 {
2774     CPUState *cs = CPU(cpu);
2775 
2776     if (!kvm_enabled()) {
2777         return false;
2778     }
2779 
2780     if (cap_ppc_pvr_compat) {
2781         return false;
2782     }
2783 
2784     return !kvmppc_is_pr(cs->kvm_state);
2785 }
2786 
2787 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2788 {
2789     CPUState *cs = CPU(cpu);
2790 
2791     if (kvm_enabled()) {
2792         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2793     }
2794 }
2795