xref: /openbmc/qemu/target/ppc/kvm.c (revision ac6dd31e)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 static int cap_ppc_nested_kvm_hv;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144     /* Note: we don't set cap_papr here, because this capability is
145      * only activated after this by kvmppc_set_papr() */
146     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153     kvmppc_get_cpu_characteristics(s);
154     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
254 {
255     int ret;
256 
257     assert(kvm_state != NULL);
258 
259     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
260         error_setg(errp, "KVM doesn't expose the MMU features it supports");
261         error_append_hint(errp, "Consider switching to a newer KVM\n");
262         return;
263     }
264 
265     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
266     if (ret == 0) {
267         return;
268     }
269 
270     error_setg_errno(errp, -ret,
271                      "KVM failed to provide the MMU features it supports");
272 }
273 
274 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
275 {
276     KVMState *s = KVM_STATE(current_machine->accelerator);
277     struct ppc_radix_page_info *radix_page_info;
278     struct kvm_ppc_rmmu_info rmmu_info;
279     int i;
280 
281     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
282         return NULL;
283     }
284     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
285         return NULL;
286     }
287     radix_page_info = g_malloc0(sizeof(*radix_page_info));
288     radix_page_info->count = 0;
289     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
290         if (rmmu_info.ap_encodings[i]) {
291             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
292             radix_page_info->count++;
293         }
294     }
295     return radix_page_info;
296 }
297 
298 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
299                                      bool radix, bool gtse,
300                                      uint64_t proc_tbl)
301 {
302     CPUState *cs = CPU(cpu);
303     int ret;
304     uint64_t flags = 0;
305     struct kvm_ppc_mmuv3_cfg cfg = {
306         .process_table = proc_tbl,
307     };
308 
309     if (radix) {
310         flags |= KVM_PPC_MMUV3_RADIX;
311     }
312     if (gtse) {
313         flags |= KVM_PPC_MMUV3_GTSE;
314     }
315     cfg.flags = flags;
316     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
317     switch (ret) {
318     case 0:
319         return H_SUCCESS;
320     case -EINVAL:
321         return H_PARAMETER;
322     case -ENODEV:
323         return H_NOT_AVAILABLE;
324     default:
325         return H_HARDWARE;
326     }
327 }
328 
329 bool kvmppc_hpt_needs_host_contiguous_pages(void)
330 {
331     static struct kvm_ppc_smmu_info smmu_info;
332 
333     if (!kvm_enabled()) {
334         return false;
335     }
336 
337     kvm_get_smmu_info(&smmu_info, &error_fatal);
338     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
339 }
340 
341 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
342 {
343     struct kvm_ppc_smmu_info smmu_info;
344     int iq, ik, jq, jk;
345     Error *local_err = NULL;
346 
347     /* For now, we only have anything to check on hash64 MMUs */
348     if (!cpu->hash64_opts || !kvm_enabled()) {
349         return;
350     }
351 
352     kvm_get_smmu_info(&smmu_info, &local_err);
353     if (local_err) {
354         error_propagate(errp, local_err);
355         return;
356     }
357 
358     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
359         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
360         error_setg(errp,
361                    "KVM does not support 1TiB segments which guest expects");
362         return;
363     }
364 
365     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
366         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
367                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
368         return;
369     }
370 
371     /*
372      * Verify that every pagesize supported by the cpu model is
373      * supported by KVM with the same encodings
374      */
375     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
376         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
377         struct kvm_ppc_one_seg_page_size *ksps;
378 
379         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
380             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
381                 break;
382             }
383         }
384         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
385             error_setg(errp, "KVM doesn't support for base page shift %u",
386                        qsps->page_shift);
387             return;
388         }
389 
390         ksps = &smmu_info.sps[ik];
391         if (ksps->slb_enc != qsps->slb_enc) {
392             error_setg(errp,
393 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
394                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
395             return;
396         }
397 
398         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
399             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
400                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
401                     break;
402                 }
403             }
404 
405             if (jk >= ARRAY_SIZE(ksps->enc)) {
406                 error_setg(errp, "KVM doesn't support page shift %u/%u",
407                            qsps->enc[jq].page_shift, qsps->page_shift);
408                 return;
409             }
410             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
411                 error_setg(errp,
412 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
413                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
414                            qsps->page_shift, qsps->enc[jq].pte_enc);
415                 return;
416             }
417         }
418     }
419 
420     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
421         /* Mostly what guest pagesizes we can use are related to the
422          * host pages used to map guest RAM, which is handled in the
423          * platform code. Cache-Inhibited largepages (64k) however are
424          * used for I/O, so if they're mapped to the host at all it
425          * will be a normal mapping, not a special hugepage one used
426          * for RAM. */
427         if (getpagesize() < 0x10000) {
428             error_setg(errp,
429                        "KVM can't supply 64kiB CI pages, which guest expects");
430         }
431     }
432 }
433 #endif /* !defined (TARGET_PPC64) */
434 
435 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
436 {
437     return POWERPC_CPU(cpu)->vcpu_id;
438 }
439 
440 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
441  * book3s supports only 1 watchpoint, so array size
442  * of 4 is sufficient for now.
443  */
444 #define MAX_HW_BKPTS 4
445 
446 static struct HWBreakpoint {
447     target_ulong addr;
448     int type;
449 } hw_debug_points[MAX_HW_BKPTS];
450 
451 static CPUWatchpoint hw_watchpoint;
452 
453 /* Default there is no breakpoint and watchpoint supported */
454 static int max_hw_breakpoint;
455 static int max_hw_watchpoint;
456 static int nb_hw_breakpoint;
457 static int nb_hw_watchpoint;
458 
459 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
460 {
461     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
462         max_hw_breakpoint = 2;
463         max_hw_watchpoint = 2;
464     }
465 
466     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
467         fprintf(stderr, "Error initializing h/w breakpoints\n");
468         return;
469     }
470 }
471 
472 int kvm_arch_init_vcpu(CPUState *cs)
473 {
474     PowerPCCPU *cpu = POWERPC_CPU(cs);
475     CPUPPCState *cenv = &cpu->env;
476     int ret;
477 
478     /* Synchronize sregs with kvm */
479     ret = kvm_arch_sync_sregs(cpu);
480     if (ret) {
481         if (ret == -EINVAL) {
482             error_report("Register sync failed... If you're using kvm-hv.ko,"
483                          " only \"-cpu host\" is possible");
484         }
485         return ret;
486     }
487 
488     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
489 
490     switch (cenv->mmu_model) {
491     case POWERPC_MMU_BOOKE206:
492         /* This target supports access to KVM's guest TLB */
493         ret = kvm_booke206_tlb_init(cpu);
494         break;
495     case POWERPC_MMU_2_07:
496         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
497             /* KVM-HV has transactional memory on POWER8 also without the
498              * KVM_CAP_PPC_HTM extension, so enable it here instead as
499              * long as it's availble to userspace on the host. */
500             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
501                 cap_htm = true;
502             }
503         }
504         break;
505     default:
506         break;
507     }
508 
509     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
510     kvmppc_hw_debug_points_init(cenv);
511 
512     return ret;
513 }
514 
515 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
516 {
517     CPUPPCState *env = &cpu->env;
518     CPUState *cs = CPU(cpu);
519     struct kvm_dirty_tlb dirty_tlb;
520     unsigned char *bitmap;
521     int ret;
522 
523     if (!env->kvm_sw_tlb) {
524         return;
525     }
526 
527     bitmap = g_malloc((env->nb_tlb + 7) / 8);
528     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
529 
530     dirty_tlb.bitmap = (uintptr_t)bitmap;
531     dirty_tlb.num_dirty = env->nb_tlb;
532 
533     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
534     if (ret) {
535         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
536                 __func__, strerror(-ret));
537     }
538 
539     g_free(bitmap);
540 }
541 
542 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
543 {
544     PowerPCCPU *cpu = POWERPC_CPU(cs);
545     CPUPPCState *env = &cpu->env;
546     union {
547         uint32_t u32;
548         uint64_t u64;
549     } val;
550     struct kvm_one_reg reg = {
551         .id = id,
552         .addr = (uintptr_t) &val,
553     };
554     int ret;
555 
556     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
557     if (ret != 0) {
558         trace_kvm_failed_spr_get(spr, strerror(errno));
559     } else {
560         switch (id & KVM_REG_SIZE_MASK) {
561         case KVM_REG_SIZE_U32:
562             env->spr[spr] = val.u32;
563             break;
564 
565         case KVM_REG_SIZE_U64:
566             env->spr[spr] = val.u64;
567             break;
568 
569         default:
570             /* Don't handle this size yet */
571             abort();
572         }
573     }
574 }
575 
576 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
577 {
578     PowerPCCPU *cpu = POWERPC_CPU(cs);
579     CPUPPCState *env = &cpu->env;
580     union {
581         uint32_t u32;
582         uint64_t u64;
583     } val;
584     struct kvm_one_reg reg = {
585         .id = id,
586         .addr = (uintptr_t) &val,
587     };
588     int ret;
589 
590     switch (id & KVM_REG_SIZE_MASK) {
591     case KVM_REG_SIZE_U32:
592         val.u32 = env->spr[spr];
593         break;
594 
595     case KVM_REG_SIZE_U64:
596         val.u64 = env->spr[spr];
597         break;
598 
599     default:
600         /* Don't handle this size yet */
601         abort();
602     }
603 
604     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
605     if (ret != 0) {
606         trace_kvm_failed_spr_set(spr, strerror(errno));
607     }
608 }
609 
610 static int kvm_put_fp(CPUState *cs)
611 {
612     PowerPCCPU *cpu = POWERPC_CPU(cs);
613     CPUPPCState *env = &cpu->env;
614     struct kvm_one_reg reg;
615     int i;
616     int ret;
617 
618     if (env->insns_flags & PPC_FLOAT) {
619         uint64_t fpscr = env->fpscr;
620         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
621 
622         reg.id = KVM_REG_PPC_FPSCR;
623         reg.addr = (uintptr_t)&fpscr;
624         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625         if (ret < 0) {
626             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
627             return ret;
628         }
629 
630         for (i = 0; i < 32; i++) {
631             uint64_t vsr[2];
632             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
633             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
634 
635 #ifdef HOST_WORDS_BIGENDIAN
636             vsr[0] = float64_val(*fpr);
637             vsr[1] = *vsrl;
638 #else
639             vsr[0] = *vsrl;
640             vsr[1] = float64_val(*fpr);
641 #endif
642             reg.addr = (uintptr_t) &vsr;
643             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
644 
645             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
646             if (ret < 0) {
647                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
648                         i, strerror(errno));
649                 return ret;
650             }
651         }
652     }
653 
654     if (env->insns_flags & PPC_ALTIVEC) {
655         reg.id = KVM_REG_PPC_VSCR;
656         reg.addr = (uintptr_t)&env->vscr;
657         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
658         if (ret < 0) {
659             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
660             return ret;
661         }
662 
663         for (i = 0; i < 32; i++) {
664             reg.id = KVM_REG_PPC_VR(i);
665             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
666             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667             if (ret < 0) {
668                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
669                 return ret;
670             }
671         }
672     }
673 
674     return 0;
675 }
676 
677 static int kvm_get_fp(CPUState *cs)
678 {
679     PowerPCCPU *cpu = POWERPC_CPU(cs);
680     CPUPPCState *env = &cpu->env;
681     struct kvm_one_reg reg;
682     int i;
683     int ret;
684 
685     if (env->insns_flags & PPC_FLOAT) {
686         uint64_t fpscr;
687         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
688 
689         reg.id = KVM_REG_PPC_FPSCR;
690         reg.addr = (uintptr_t)&fpscr;
691         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692         if (ret < 0) {
693             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
694             return ret;
695         } else {
696             env->fpscr = fpscr;
697         }
698 
699         for (i = 0; i < 32; i++) {
700             uint64_t vsr[2];
701             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
702             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
703 
704             reg.addr = (uintptr_t) &vsr;
705             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
706 
707             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
708             if (ret < 0) {
709                 DPRINTF("Unable to get %s%d from KVM: %s\n",
710                         vsx ? "VSR" : "FPR", i, strerror(errno));
711                 return ret;
712             } else {
713 #ifdef HOST_WORDS_BIGENDIAN
714                 *fpr = vsr[0];
715                 if (vsx) {
716                     *vsrl = vsr[1];
717                 }
718 #else
719                 *fpr = vsr[1];
720                 if (vsx) {
721                     *vsrl = vsr[0];
722                 }
723 #endif
724             }
725         }
726     }
727 
728     if (env->insns_flags & PPC_ALTIVEC) {
729         reg.id = KVM_REG_PPC_VSCR;
730         reg.addr = (uintptr_t)&env->vscr;
731         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
732         if (ret < 0) {
733             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
734             return ret;
735         }
736 
737         for (i = 0; i < 32; i++) {
738             reg.id = KVM_REG_PPC_VR(i);
739             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
740             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
741             if (ret < 0) {
742                 DPRINTF("Unable to get VR%d from KVM: %s\n",
743                         i, strerror(errno));
744                 return ret;
745             }
746         }
747     }
748 
749     return 0;
750 }
751 
752 #if defined(TARGET_PPC64)
753 static int kvm_get_vpa(CPUState *cs)
754 {
755     PowerPCCPU *cpu = POWERPC_CPU(cs);
756     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
757     struct kvm_one_reg reg;
758     int ret;
759 
760     reg.id = KVM_REG_PPC_VPA_ADDR;
761     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
762     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
763     if (ret < 0) {
764         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
765         return ret;
766     }
767 
768     assert((uintptr_t)&spapr_cpu->slb_shadow_size
769            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
770     reg.id = KVM_REG_PPC_VPA_SLB;
771     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
772     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
773     if (ret < 0) {
774         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
775                 strerror(errno));
776         return ret;
777     }
778 
779     assert((uintptr_t)&spapr_cpu->dtl_size
780            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
781     reg.id = KVM_REG_PPC_VPA_DTL;
782     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
783     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784     if (ret < 0) {
785         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
786                 strerror(errno));
787         return ret;
788     }
789 
790     return 0;
791 }
792 
793 static int kvm_put_vpa(CPUState *cs)
794 {
795     PowerPCCPU *cpu = POWERPC_CPU(cs);
796     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
797     struct kvm_one_reg reg;
798     int ret;
799 
800     /* SLB shadow or DTL can't be registered unless a master VPA is
801      * registered.  That means when restoring state, if a VPA *is*
802      * registered, we need to set that up first.  If not, we need to
803      * deregister the others before deregistering the master VPA */
804     assert(spapr_cpu->vpa_addr
805            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
806 
807     if (spapr_cpu->vpa_addr) {
808         reg.id = KVM_REG_PPC_VPA_ADDR;
809         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
810         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
811         if (ret < 0) {
812             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
813             return ret;
814         }
815     }
816 
817     assert((uintptr_t)&spapr_cpu->slb_shadow_size
818            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
819     reg.id = KVM_REG_PPC_VPA_SLB;
820     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
821     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
822     if (ret < 0) {
823         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
824         return ret;
825     }
826 
827     assert((uintptr_t)&spapr_cpu->dtl_size
828            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
829     reg.id = KVM_REG_PPC_VPA_DTL;
830     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
831     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
832     if (ret < 0) {
833         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
834                 strerror(errno));
835         return ret;
836     }
837 
838     if (!spapr_cpu->vpa_addr) {
839         reg.id = KVM_REG_PPC_VPA_ADDR;
840         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
841         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
842         if (ret < 0) {
843             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
844             return ret;
845         }
846     }
847 
848     return 0;
849 }
850 #endif /* TARGET_PPC64 */
851 
852 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
853 {
854     CPUPPCState *env = &cpu->env;
855     struct kvm_sregs sregs;
856     int i;
857 
858     sregs.pvr = env->spr[SPR_PVR];
859 
860     if (cpu->vhyp) {
861         PPCVirtualHypervisorClass *vhc =
862             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
863         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
864     } else {
865         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
866     }
867 
868     /* Sync SLB */
869 #ifdef TARGET_PPC64
870     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
871         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
872         if (env->slb[i].esid & SLB_ESID_V) {
873             sregs.u.s.ppc64.slb[i].slbe |= i;
874         }
875         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
876     }
877 #endif
878 
879     /* Sync SRs */
880     for (i = 0; i < 16; i++) {
881         sregs.u.s.ppc32.sr[i] = env->sr[i];
882     }
883 
884     /* Sync BATs */
885     for (i = 0; i < 8; i++) {
886         /* Beware. We have to swap upper and lower bits here */
887         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
888             | env->DBAT[1][i];
889         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
890             | env->IBAT[1][i];
891     }
892 
893     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
894 }
895 
896 int kvm_arch_put_registers(CPUState *cs, int level)
897 {
898     PowerPCCPU *cpu = POWERPC_CPU(cs);
899     CPUPPCState *env = &cpu->env;
900     struct kvm_regs regs;
901     int ret;
902     int i;
903 
904     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
905     if (ret < 0) {
906         return ret;
907     }
908 
909     regs.ctr = env->ctr;
910     regs.lr  = env->lr;
911     regs.xer = cpu_read_xer(env);
912     regs.msr = env->msr;
913     regs.pc = env->nip;
914 
915     regs.srr0 = env->spr[SPR_SRR0];
916     regs.srr1 = env->spr[SPR_SRR1];
917 
918     regs.sprg0 = env->spr[SPR_SPRG0];
919     regs.sprg1 = env->spr[SPR_SPRG1];
920     regs.sprg2 = env->spr[SPR_SPRG2];
921     regs.sprg3 = env->spr[SPR_SPRG3];
922     regs.sprg4 = env->spr[SPR_SPRG4];
923     regs.sprg5 = env->spr[SPR_SPRG5];
924     regs.sprg6 = env->spr[SPR_SPRG6];
925     regs.sprg7 = env->spr[SPR_SPRG7];
926 
927     regs.pid = env->spr[SPR_BOOKE_PID];
928 
929     for (i = 0;i < 32; i++)
930         regs.gpr[i] = env->gpr[i];
931 
932     regs.cr = 0;
933     for (i = 0; i < 8; i++) {
934         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
935     }
936 
937     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
938     if (ret < 0)
939         return ret;
940 
941     kvm_put_fp(cs);
942 
943     if (env->tlb_dirty) {
944         kvm_sw_tlb_put(cpu);
945         env->tlb_dirty = false;
946     }
947 
948     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
949         ret = kvmppc_put_books_sregs(cpu);
950         if (ret < 0) {
951             return ret;
952         }
953     }
954 
955     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
956         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
957     }
958 
959     if (cap_one_reg) {
960         int i;
961 
962         /* We deliberately ignore errors here, for kernels which have
963          * the ONE_REG calls, but don't support the specific
964          * registers, there's a reasonable chance things will still
965          * work, at least until we try to migrate. */
966         for (i = 0; i < 1024; i++) {
967             uint64_t id = env->spr_cb[i].one_reg_id;
968 
969             if (id != 0) {
970                 kvm_put_one_spr(cs, id, i);
971             }
972         }
973 
974 #ifdef TARGET_PPC64
975         if (msr_ts) {
976             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
977                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
978             }
979             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
980                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
981             }
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
991             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
992         }
993 
994         if (cap_papr) {
995             if (kvm_put_vpa(cs) < 0) {
996                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
997             }
998         }
999 
1000         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1001 #endif /* TARGET_PPC64 */
1002     }
1003 
1004     return ret;
1005 }
1006 
1007 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1008 {
1009      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1010 }
1011 
1012 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1013 {
1014     CPUPPCState *env = &cpu->env;
1015     struct kvm_sregs sregs;
1016     int ret;
1017 
1018     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1019     if (ret < 0) {
1020         return ret;
1021     }
1022 
1023     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1024         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1025         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1026         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1027         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1028         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1029         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1030         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1031         env->spr[SPR_DECR] = sregs.u.e.dec;
1032         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1033         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1034         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1035     }
1036 
1037     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1038         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1039         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1040         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1041         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1042         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1043     }
1044 
1045     if (sregs.u.e.features & KVM_SREGS_E_64) {
1046         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1047     }
1048 
1049     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1050         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1051     }
1052 
1053     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1054         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1055         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1056         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1057         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1058         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1059         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1060         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1061         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1062         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1063         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1064         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1065         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1066         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1067         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1068         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1069         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1070         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1071         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1072         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1073         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1074         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1075         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1076         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1077         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1078         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1079         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1080         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1081         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1082         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1083         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1084         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1085         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1086 
1087         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1088             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1089             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1090             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1091             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1092             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1093             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1094         }
1095 
1096         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1097             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1098             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1099         }
1100 
1101         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1102             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1103             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1104             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1105             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1106         }
1107     }
1108 
1109     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1110         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1111         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1112         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1113         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1114         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1115         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1116         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1117         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1118         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1119         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1120     }
1121 
1122     if (sregs.u.e.features & KVM_SREGS_EXP) {
1123         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1127         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1128         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1129     }
1130 
1131     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1132         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1133         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1134         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1135 
1136         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1137             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1138             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1139         }
1140     }
1141 
1142     return 0;
1143 }
1144 
1145 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1146 {
1147     CPUPPCState *env = &cpu->env;
1148     struct kvm_sregs sregs;
1149     int ret;
1150     int i;
1151 
1152     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1153     if (ret < 0) {
1154         return ret;
1155     }
1156 
1157     if (!cpu->vhyp) {
1158         ppc_store_sdr1(env, sregs.u.s.sdr1);
1159     }
1160 
1161     /* Sync SLB */
1162 #ifdef TARGET_PPC64
1163     /*
1164      * The packed SLB array we get from KVM_GET_SREGS only contains
1165      * information about valid entries. So we flush our internal copy
1166      * to get rid of stale ones, then put all valid SLB entries back
1167      * in.
1168      */
1169     memset(env->slb, 0, sizeof(env->slb));
1170     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1171         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1172         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1173         /*
1174          * Only restore valid entries
1175          */
1176         if (rb & SLB_ESID_V) {
1177             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1178         }
1179     }
1180 #endif
1181 
1182     /* Sync SRs */
1183     for (i = 0; i < 16; i++) {
1184         env->sr[i] = sregs.u.s.ppc32.sr[i];
1185     }
1186 
1187     /* Sync BATs */
1188     for (i = 0; i < 8; i++) {
1189         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1190         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1191         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1192         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1193     }
1194 
1195     return 0;
1196 }
1197 
1198 int kvm_arch_get_registers(CPUState *cs)
1199 {
1200     PowerPCCPU *cpu = POWERPC_CPU(cs);
1201     CPUPPCState *env = &cpu->env;
1202     struct kvm_regs regs;
1203     uint32_t cr;
1204     int i, ret;
1205 
1206     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1207     if (ret < 0)
1208         return ret;
1209 
1210     cr = regs.cr;
1211     for (i = 7; i >= 0; i--) {
1212         env->crf[i] = cr & 15;
1213         cr >>= 4;
1214     }
1215 
1216     env->ctr = regs.ctr;
1217     env->lr = regs.lr;
1218     cpu_write_xer(env, regs.xer);
1219     env->msr = regs.msr;
1220     env->nip = regs.pc;
1221 
1222     env->spr[SPR_SRR0] = regs.srr0;
1223     env->spr[SPR_SRR1] = regs.srr1;
1224 
1225     env->spr[SPR_SPRG0] = regs.sprg0;
1226     env->spr[SPR_SPRG1] = regs.sprg1;
1227     env->spr[SPR_SPRG2] = regs.sprg2;
1228     env->spr[SPR_SPRG3] = regs.sprg3;
1229     env->spr[SPR_SPRG4] = regs.sprg4;
1230     env->spr[SPR_SPRG5] = regs.sprg5;
1231     env->spr[SPR_SPRG6] = regs.sprg6;
1232     env->spr[SPR_SPRG7] = regs.sprg7;
1233 
1234     env->spr[SPR_BOOKE_PID] = regs.pid;
1235 
1236     for (i = 0;i < 32; i++)
1237         env->gpr[i] = regs.gpr[i];
1238 
1239     kvm_get_fp(cs);
1240 
1241     if (cap_booke_sregs) {
1242         ret = kvmppc_get_booke_sregs(cpu);
1243         if (ret < 0) {
1244             return ret;
1245         }
1246     }
1247 
1248     if (cap_segstate) {
1249         ret = kvmppc_get_books_sregs(cpu);
1250         if (ret < 0) {
1251             return ret;
1252         }
1253     }
1254 
1255     if (cap_hior) {
1256         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1257     }
1258 
1259     if (cap_one_reg) {
1260         int i;
1261 
1262         /* We deliberately ignore errors here, for kernels which have
1263          * the ONE_REG calls, but don't support the specific
1264          * registers, there's a reasonable chance things will still
1265          * work, at least until we try to migrate. */
1266         for (i = 0; i < 1024; i++) {
1267             uint64_t id = env->spr_cb[i].one_reg_id;
1268 
1269             if (id != 0) {
1270                 kvm_get_one_spr(cs, id, i);
1271             }
1272         }
1273 
1274 #ifdef TARGET_PPC64
1275         if (msr_ts) {
1276             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1277                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1278             }
1279             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1280                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1281             }
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1290             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1291             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1292         }
1293 
1294         if (cap_papr) {
1295             if (kvm_get_vpa(cs) < 0) {
1296                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1297             }
1298         }
1299 
1300         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1301 #endif
1302     }
1303 
1304     return 0;
1305 }
1306 
1307 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1308 {
1309     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1310 
1311     if (irq != PPC_INTERRUPT_EXT) {
1312         return 0;
1313     }
1314 
1315     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1316         return 0;
1317     }
1318 
1319     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1320 
1321     return 0;
1322 }
1323 
1324 #if defined(TARGET_PPC64)
1325 #define PPC_INPUT_INT PPC970_INPUT_INT
1326 #else
1327 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1328 #endif
1329 
1330 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1331 {
1332     PowerPCCPU *cpu = POWERPC_CPU(cs);
1333     CPUPPCState *env = &cpu->env;
1334     int r;
1335     unsigned irq;
1336 
1337     qemu_mutex_lock_iothread();
1338 
1339     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1340      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1341     if (!cap_interrupt_level &&
1342         run->ready_for_interrupt_injection &&
1343         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1344         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1345     {
1346         /* For now KVM disregards the 'irq' argument. However, in the
1347          * future KVM could cache it in-kernel to avoid a heavyweight exit
1348          * when reading the UIC.
1349          */
1350         irq = KVM_INTERRUPT_SET;
1351 
1352         DPRINTF("injected interrupt %d\n", irq);
1353         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1354         if (r < 0) {
1355             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1356         }
1357 
1358         /* Always wake up soon in case the interrupt was level based */
1359         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1360                        (NANOSECONDS_PER_SECOND / 50));
1361     }
1362 
1363     /* We don't know if there are more interrupts pending after this. However,
1364      * the guest will return to userspace in the course of handling this one
1365      * anyways, so we will get a chance to deliver the rest. */
1366 
1367     qemu_mutex_unlock_iothread();
1368 }
1369 
1370 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1371 {
1372     return MEMTXATTRS_UNSPECIFIED;
1373 }
1374 
1375 int kvm_arch_process_async_events(CPUState *cs)
1376 {
1377     return cs->halted;
1378 }
1379 
1380 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1381 {
1382     CPUState *cs = CPU(cpu);
1383     CPUPPCState *env = &cpu->env;
1384 
1385     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1386         cs->halted = 1;
1387         cs->exception_index = EXCP_HLT;
1388     }
1389 
1390     return 0;
1391 }
1392 
1393 /* map dcr access to existing qemu dcr emulation */
1394 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1395 {
1396     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1397         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1398 
1399     return 0;
1400 }
1401 
1402 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1403 {
1404     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1405         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1406 
1407     return 0;
1408 }
1409 
1410 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1411 {
1412     /* Mixed endian case is not handled */
1413     uint32_t sc = debug_inst_opcode;
1414 
1415     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1416                             sizeof(sc), 0) ||
1417         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1418         return -EINVAL;
1419     }
1420 
1421     return 0;
1422 }
1423 
1424 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425 {
1426     uint32_t sc;
1427 
1428     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1429         sc != debug_inst_opcode ||
1430         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1431                             sizeof(sc), 1)) {
1432         return -EINVAL;
1433     }
1434 
1435     return 0;
1436 }
1437 
1438 static int find_hw_breakpoint(target_ulong addr, int type)
1439 {
1440     int n;
1441 
1442     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1443            <= ARRAY_SIZE(hw_debug_points));
1444 
1445     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1446         if (hw_debug_points[n].addr == addr &&
1447              hw_debug_points[n].type == type) {
1448             return n;
1449         }
1450     }
1451 
1452     return -1;
1453 }
1454 
1455 static int find_hw_watchpoint(target_ulong addr, int *flag)
1456 {
1457     int n;
1458 
1459     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1460     if (n >= 0) {
1461         *flag = BP_MEM_ACCESS;
1462         return n;
1463     }
1464 
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1466     if (n >= 0) {
1467         *flag = BP_MEM_WRITE;
1468         return n;
1469     }
1470 
1471     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1472     if (n >= 0) {
1473         *flag = BP_MEM_READ;
1474         return n;
1475     }
1476 
1477     return -1;
1478 }
1479 
1480 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1481                                   target_ulong len, int type)
1482 {
1483     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1484         return -ENOBUFS;
1485     }
1486 
1487     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1488     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1489 
1490     switch (type) {
1491     case GDB_BREAKPOINT_HW:
1492         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1493             return -ENOBUFS;
1494         }
1495 
1496         if (find_hw_breakpoint(addr, type) >= 0) {
1497             return -EEXIST;
1498         }
1499 
1500         nb_hw_breakpoint++;
1501         break;
1502 
1503     case GDB_WATCHPOINT_WRITE:
1504     case GDB_WATCHPOINT_READ:
1505     case GDB_WATCHPOINT_ACCESS:
1506         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1507             return -ENOBUFS;
1508         }
1509 
1510         if (find_hw_breakpoint(addr, type) >= 0) {
1511             return -EEXIST;
1512         }
1513 
1514         nb_hw_watchpoint++;
1515         break;
1516 
1517     default:
1518         return -ENOSYS;
1519     }
1520 
1521     return 0;
1522 }
1523 
1524 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1525                                   target_ulong len, int type)
1526 {
1527     int n;
1528 
1529     n = find_hw_breakpoint(addr, type);
1530     if (n < 0) {
1531         return -ENOENT;
1532     }
1533 
1534     switch (type) {
1535     case GDB_BREAKPOINT_HW:
1536         nb_hw_breakpoint--;
1537         break;
1538 
1539     case GDB_WATCHPOINT_WRITE:
1540     case GDB_WATCHPOINT_READ:
1541     case GDB_WATCHPOINT_ACCESS:
1542         nb_hw_watchpoint--;
1543         break;
1544 
1545     default:
1546         return -ENOSYS;
1547     }
1548     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1549 
1550     return 0;
1551 }
1552 
1553 void kvm_arch_remove_all_hw_breakpoints(void)
1554 {
1555     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1556 }
1557 
1558 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1559 {
1560     int n;
1561 
1562     /* Software Breakpoint updates */
1563     if (kvm_sw_breakpoints_active(cs)) {
1564         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1565     }
1566 
1567     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1568            <= ARRAY_SIZE(hw_debug_points));
1569     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1570 
1571     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1573         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1574         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1575             switch (hw_debug_points[n].type) {
1576             case GDB_BREAKPOINT_HW:
1577                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1578                 break;
1579             case GDB_WATCHPOINT_WRITE:
1580                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1581                 break;
1582             case GDB_WATCHPOINT_READ:
1583                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             case GDB_WATCHPOINT_ACCESS:
1586                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1587                                         KVMPPC_DEBUG_WATCH_READ;
1588                 break;
1589             default:
1590                 cpu_abort(cs, "Unsupported breakpoint type\n");
1591             }
1592             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1593         }
1594     }
1595 }
1596 
1597 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1598 {
1599     CPUState *cs = CPU(cpu);
1600     CPUPPCState *env = &cpu->env;
1601     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1602     int handle = 0;
1603     int n;
1604     int flag = 0;
1605 
1606     if (cs->singlestep_enabled) {
1607         handle = 1;
1608     } else if (arch_info->status) {
1609         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1611                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1612                 if (n >= 0) {
1613                     handle = 1;
1614                 }
1615             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1616                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1617                 n = find_hw_watchpoint(arch_info->address,  &flag);
1618                 if (n >= 0) {
1619                     handle = 1;
1620                     cs->watchpoint_hit = &hw_watchpoint;
1621                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1622                     hw_watchpoint.flags = flag;
1623                 }
1624             }
1625         }
1626     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1627         handle = 1;
1628     } else {
1629         /* QEMU is not able to handle debug exception, so inject
1630          * program exception to guest;
1631          * Yes program exception NOT debug exception !!
1632          * When QEMU is using debug resources then debug exception must
1633          * be always set. To achieve this we set MSR_DE and also set
1634          * MSRP_DEP so guest cannot change MSR_DE.
1635          * When emulating debug resource for guest we want guest
1636          * to control MSR_DE (enable/disable debug interrupt on need).
1637          * Supporting both configurations are NOT possible.
1638          * So the result is that we cannot share debug resources
1639          * between QEMU and Guest on BOOKE architecture.
1640          * In the current design QEMU gets the priority over guest,
1641          * this means that if QEMU is using debug resources then guest
1642          * cannot use them;
1643          * For software breakpoint QEMU uses a privileged instruction;
1644          * So there cannot be any reason that we are here for guest
1645          * set debug exception, only possibility is guest executed a
1646          * privileged / illegal instruction and that's why we are
1647          * injecting a program interrupt.
1648          */
1649 
1650         cpu_synchronize_state(cs);
1651         /* env->nip is PC, so increment this by 4 to use
1652          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1653          */
1654         env->nip += 4;
1655         cs->exception_index = POWERPC_EXCP_PROGRAM;
1656         env->error_code = POWERPC_EXCP_INVAL;
1657         ppc_cpu_do_interrupt(cs);
1658     }
1659 
1660     return handle;
1661 }
1662 
1663 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1664 {
1665     PowerPCCPU *cpu = POWERPC_CPU(cs);
1666     CPUPPCState *env = &cpu->env;
1667     int ret;
1668 
1669     qemu_mutex_lock_iothread();
1670 
1671     switch (run->exit_reason) {
1672     case KVM_EXIT_DCR:
1673         if (run->dcr.is_write) {
1674             DPRINTF("handle dcr write\n");
1675             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1676         } else {
1677             DPRINTF("handle dcr read\n");
1678             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1679         }
1680         break;
1681     case KVM_EXIT_HLT:
1682         DPRINTF("handle halt\n");
1683         ret = kvmppc_handle_halt(cpu);
1684         break;
1685 #if defined(TARGET_PPC64)
1686     case KVM_EXIT_PAPR_HCALL:
1687         DPRINTF("handle PAPR hypercall\n");
1688         run->papr_hcall.ret = spapr_hypercall(cpu,
1689                                               run->papr_hcall.nr,
1690                                               run->papr_hcall.args);
1691         ret = 0;
1692         break;
1693 #endif
1694     case KVM_EXIT_EPR:
1695         DPRINTF("handle epr\n");
1696         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1697         ret = 0;
1698         break;
1699     case KVM_EXIT_WATCHDOG:
1700         DPRINTF("handle watchdog expiry\n");
1701         watchdog_perform_action();
1702         ret = 0;
1703         break;
1704 
1705     case KVM_EXIT_DEBUG:
1706         DPRINTF("handle debug exception\n");
1707         if (kvm_handle_debug(cpu, run)) {
1708             ret = EXCP_DEBUG;
1709             break;
1710         }
1711         /* re-enter, this exception was guest-internal */
1712         ret = 0;
1713         break;
1714 
1715     default:
1716         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1717         ret = -1;
1718         break;
1719     }
1720 
1721     qemu_mutex_unlock_iothread();
1722     return ret;
1723 }
1724 
1725 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1726 {
1727     CPUState *cs = CPU(cpu);
1728     uint32_t bits = tsr_bits;
1729     struct kvm_one_reg reg = {
1730         .id = KVM_REG_PPC_OR_TSR,
1731         .addr = (uintptr_t) &bits,
1732     };
1733 
1734     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1735 }
1736 
1737 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1738 {
1739 
1740     CPUState *cs = CPU(cpu);
1741     uint32_t bits = tsr_bits;
1742     struct kvm_one_reg reg = {
1743         .id = KVM_REG_PPC_CLEAR_TSR,
1744         .addr = (uintptr_t) &bits,
1745     };
1746 
1747     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1748 }
1749 
1750 int kvmppc_set_tcr(PowerPCCPU *cpu)
1751 {
1752     CPUState *cs = CPU(cpu);
1753     CPUPPCState *env = &cpu->env;
1754     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1755 
1756     struct kvm_one_reg reg = {
1757         .id = KVM_REG_PPC_TCR,
1758         .addr = (uintptr_t) &tcr,
1759     };
1760 
1761     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1762 }
1763 
1764 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1765 {
1766     CPUState *cs = CPU(cpu);
1767     int ret;
1768 
1769     if (!kvm_enabled()) {
1770         return -1;
1771     }
1772 
1773     if (!cap_ppc_watchdog) {
1774         printf("warning: KVM does not support watchdog");
1775         return -1;
1776     }
1777 
1778     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1779     if (ret < 0) {
1780         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1781                 __func__, strerror(-ret));
1782         return ret;
1783     }
1784 
1785     return ret;
1786 }
1787 
1788 static int read_cpuinfo(const char *field, char *value, int len)
1789 {
1790     FILE *f;
1791     int ret = -1;
1792     int field_len = strlen(field);
1793     char line[512];
1794 
1795     f = fopen("/proc/cpuinfo", "r");
1796     if (!f) {
1797         return -1;
1798     }
1799 
1800     do {
1801         if (!fgets(line, sizeof(line), f)) {
1802             break;
1803         }
1804         if (!strncmp(line, field, field_len)) {
1805             pstrcpy(value, len, line);
1806             ret = 0;
1807             break;
1808         }
1809     } while(*line);
1810 
1811     fclose(f);
1812 
1813     return ret;
1814 }
1815 
1816 uint32_t kvmppc_get_tbfreq(void)
1817 {
1818     char line[512];
1819     char *ns;
1820     uint32_t retval = NANOSECONDS_PER_SECOND;
1821 
1822     if (read_cpuinfo("timebase", line, sizeof(line))) {
1823         return retval;
1824     }
1825 
1826     if (!(ns = strchr(line, ':'))) {
1827         return retval;
1828     }
1829 
1830     ns++;
1831 
1832     return atoi(ns);
1833 }
1834 
1835 bool kvmppc_get_host_serial(char **value)
1836 {
1837     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1838                                NULL);
1839 }
1840 
1841 bool kvmppc_get_host_model(char **value)
1842 {
1843     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1844 }
1845 
1846 /* Try to find a device tree node for a CPU with clock-frequency property */
1847 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1848 {
1849     struct dirent *dirp;
1850     DIR *dp;
1851 
1852     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1853         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1854         return -1;
1855     }
1856 
1857     buf[0] = '\0';
1858     while ((dirp = readdir(dp)) != NULL) {
1859         FILE *f;
1860         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1861                  dirp->d_name);
1862         f = fopen(buf, "r");
1863         if (f) {
1864             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1865             fclose(f);
1866             break;
1867         }
1868         buf[0] = '\0';
1869     }
1870     closedir(dp);
1871     if (buf[0] == '\0') {
1872         printf("Unknown host!\n");
1873         return -1;
1874     }
1875 
1876     return 0;
1877 }
1878 
1879 static uint64_t kvmppc_read_int_dt(const char *filename)
1880 {
1881     union {
1882         uint32_t v32;
1883         uint64_t v64;
1884     } u;
1885     FILE *f;
1886     int len;
1887 
1888     f = fopen(filename, "rb");
1889     if (!f) {
1890         return -1;
1891     }
1892 
1893     len = fread(&u, 1, sizeof(u), f);
1894     fclose(f);
1895     switch (len) {
1896     case 4:
1897         /* property is a 32-bit quantity */
1898         return be32_to_cpu(u.v32);
1899     case 8:
1900         return be64_to_cpu(u.v64);
1901     }
1902 
1903     return 0;
1904 }
1905 
1906 /* Read a CPU node property from the host device tree that's a single
1907  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1908  * (can't find or open the property, or doesn't understand the
1909  * format) */
1910 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1911 {
1912     char buf[PATH_MAX], *tmp;
1913     uint64_t val;
1914 
1915     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1916         return -1;
1917     }
1918 
1919     tmp = g_strdup_printf("%s/%s", buf, propname);
1920     val = kvmppc_read_int_dt(tmp);
1921     g_free(tmp);
1922 
1923     return val;
1924 }
1925 
1926 uint64_t kvmppc_get_clockfreq(void)
1927 {
1928     return kvmppc_read_int_cpu_dt("clock-frequency");
1929 }
1930 
1931 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1932  {
1933      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1934      CPUState *cs = CPU(cpu);
1935 
1936     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1937         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1938         return 0;
1939     }
1940 
1941     return 1;
1942 }
1943 
1944 int kvmppc_get_hasidle(CPUPPCState *env)
1945 {
1946     struct kvm_ppc_pvinfo pvinfo;
1947 
1948     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1949         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1950         return 1;
1951     }
1952 
1953     return 0;
1954 }
1955 
1956 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1957 {
1958     uint32_t *hc = (uint32_t*)buf;
1959     struct kvm_ppc_pvinfo pvinfo;
1960 
1961     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1962         memcpy(buf, pvinfo.hcall, buf_len);
1963         return 0;
1964     }
1965 
1966     /*
1967      * Fallback to always fail hypercalls regardless of endianness:
1968      *
1969      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1970      *     li r3, -1
1971      *     b .+8       (becomes nop in wrong endian)
1972      *     bswap32(li r3, -1)
1973      */
1974 
1975     hc[0] = cpu_to_be32(0x08000048);
1976     hc[1] = cpu_to_be32(0x3860ffff);
1977     hc[2] = cpu_to_be32(0x48000008);
1978     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1979 
1980     return 1;
1981 }
1982 
1983 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1984 {
1985     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1986 }
1987 
1988 void kvmppc_enable_logical_ci_hcalls(void)
1989 {
1990     /*
1991      * FIXME: it would be nice if we could detect the cases where
1992      * we're using a device which requires the in kernel
1993      * implementation of these hcalls, but the kernel lacks them and
1994      * produce a warning.
1995      */
1996     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1997     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1998 }
1999 
2000 void kvmppc_enable_set_mode_hcall(void)
2001 {
2002     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2003 }
2004 
2005 void kvmppc_enable_clear_ref_mod_hcalls(void)
2006 {
2007     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2008     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2009 }
2010 
2011 void kvmppc_set_papr(PowerPCCPU *cpu)
2012 {
2013     CPUState *cs = CPU(cpu);
2014     int ret;
2015 
2016     if (!kvm_enabled()) {
2017         return;
2018     }
2019 
2020     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2021     if (ret) {
2022         error_report("This vCPU type or KVM version does not support PAPR");
2023         exit(1);
2024     }
2025 
2026     /* Update the capability flag so we sync the right information
2027      * with kvm */
2028     cap_papr = 1;
2029 }
2030 
2031 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2032 {
2033     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2034 }
2035 
2036 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2037 {
2038     CPUState *cs = CPU(cpu);
2039     int ret;
2040 
2041     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2042     if (ret && mpic_proxy) {
2043         error_report("This KVM version does not support EPR");
2044         exit(1);
2045     }
2046 }
2047 
2048 int kvmppc_smt_threads(void)
2049 {
2050     return cap_ppc_smt ? cap_ppc_smt : 1;
2051 }
2052 
2053 int kvmppc_set_smt_threads(int smt)
2054 {
2055     int ret;
2056 
2057     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2058     if (!ret) {
2059         cap_ppc_smt = smt;
2060     }
2061     return ret;
2062 }
2063 
2064 void kvmppc_hint_smt_possible(Error **errp)
2065 {
2066     int i;
2067     GString *g;
2068     char *s;
2069 
2070     assert(kvm_enabled());
2071     if (cap_ppc_smt_possible) {
2072         g = g_string_new("Available VSMT modes:");
2073         for (i = 63; i >= 0; i--) {
2074             if ((1UL << i) & cap_ppc_smt_possible) {
2075                 g_string_append_printf(g, " %lu", (1UL << i));
2076             }
2077         }
2078         s = g_string_free(g, false);
2079         error_append_hint(errp, "%s.\n", s);
2080         g_free(s);
2081     } else {
2082         error_append_hint(errp,
2083                           "This KVM seems to be too old to support VSMT.\n");
2084     }
2085 }
2086 
2087 
2088 #ifdef TARGET_PPC64
2089 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2090 {
2091     struct kvm_ppc_smmu_info info;
2092     long rampagesize, best_page_shift;
2093     int i;
2094 
2095     /* Find the largest hardware supported page size that's less than
2096      * or equal to the (logical) backing page size of guest RAM */
2097     kvm_get_smmu_info(&info, &error_fatal);
2098     rampagesize = qemu_getrampagesize();
2099     best_page_shift = 0;
2100 
2101     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2102         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2103 
2104         if (!sps->page_shift) {
2105             continue;
2106         }
2107 
2108         if ((sps->page_shift > best_page_shift)
2109             && ((1UL << sps->page_shift) <= rampagesize)) {
2110             best_page_shift = sps->page_shift;
2111         }
2112     }
2113 
2114     return MIN(current_size,
2115                1ULL << (best_page_shift + hash_shift - 7));
2116 }
2117 #endif
2118 
2119 bool kvmppc_spapr_use_multitce(void)
2120 {
2121     return cap_spapr_multitce;
2122 }
2123 
2124 int kvmppc_spapr_enable_inkernel_multitce(void)
2125 {
2126     int ret;
2127 
2128     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2129                             H_PUT_TCE_INDIRECT, 1);
2130     if (!ret) {
2131         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2132                                 H_STUFF_TCE, 1);
2133     }
2134 
2135     return ret;
2136 }
2137 
2138 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2139                               uint64_t bus_offset, uint32_t nb_table,
2140                               int *pfd, bool need_vfio)
2141 {
2142     long len;
2143     int fd;
2144     void *table;
2145 
2146     /* Must set fd to -1 so we don't try to munmap when called for
2147      * destroying the table, which the upper layers -will- do
2148      */
2149     *pfd = -1;
2150     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2151         return NULL;
2152     }
2153 
2154     if (cap_spapr_tce_64) {
2155         struct kvm_create_spapr_tce_64 args = {
2156             .liobn = liobn,
2157             .page_shift = page_shift,
2158             .offset = bus_offset >> page_shift,
2159             .size = nb_table,
2160             .flags = 0
2161         };
2162         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2163         if (fd < 0) {
2164             fprintf(stderr,
2165                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2166                     liobn);
2167             return NULL;
2168         }
2169     } else if (cap_spapr_tce) {
2170         uint64_t window_size = (uint64_t) nb_table << page_shift;
2171         struct kvm_create_spapr_tce args = {
2172             .liobn = liobn,
2173             .window_size = window_size,
2174         };
2175         if ((window_size != args.window_size) || bus_offset) {
2176             return NULL;
2177         }
2178         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2179         if (fd < 0) {
2180             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2181                     liobn);
2182             return NULL;
2183         }
2184     } else {
2185         return NULL;
2186     }
2187 
2188     len = nb_table * sizeof(uint64_t);
2189     /* FIXME: round this up to page size */
2190 
2191     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2192     if (table == MAP_FAILED) {
2193         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2194                 liobn);
2195         close(fd);
2196         return NULL;
2197     }
2198 
2199     *pfd = fd;
2200     return table;
2201 }
2202 
2203 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2204 {
2205     long len;
2206 
2207     if (fd < 0) {
2208         return -1;
2209     }
2210 
2211     len = nb_table * sizeof(uint64_t);
2212     if ((munmap(table, len) < 0) ||
2213         (close(fd) < 0)) {
2214         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2215                 strerror(errno));
2216         /* Leak the table */
2217     }
2218 
2219     return 0;
2220 }
2221 
2222 int kvmppc_reset_htab(int shift_hint)
2223 {
2224     uint32_t shift = shift_hint;
2225 
2226     if (!kvm_enabled()) {
2227         /* Full emulation, tell caller to allocate htab itself */
2228         return 0;
2229     }
2230     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2231         int ret;
2232         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2233         if (ret == -ENOTTY) {
2234             /* At least some versions of PR KVM advertise the
2235              * capability, but don't implement the ioctl().  Oops.
2236              * Return 0 so that we allocate the htab in qemu, as is
2237              * correct for PR. */
2238             return 0;
2239         } else if (ret < 0) {
2240             return ret;
2241         }
2242         return shift;
2243     }
2244 
2245     /* We have a kernel that predates the htab reset calls.  For PR
2246      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2247      * this era, it has allocated a 16MB fixed size hash table already. */
2248     if (kvmppc_is_pr(kvm_state)) {
2249         /* PR - tell caller to allocate htab */
2250         return 0;
2251     } else {
2252         /* HV - assume 16MB kernel allocated htab */
2253         return 24;
2254     }
2255 }
2256 
2257 static inline uint32_t mfpvr(void)
2258 {
2259     uint32_t pvr;
2260 
2261     asm ("mfpvr %0"
2262          : "=r"(pvr));
2263     return pvr;
2264 }
2265 
2266 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2267 {
2268     if (on) {
2269         *word |= flags;
2270     } else {
2271         *word &= ~flags;
2272     }
2273 }
2274 
2275 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2276 {
2277     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2278     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2279     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2280 
2281     /* Now fix up the class with information we can query from the host */
2282     pcc->pvr = mfpvr();
2283 
2284     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2285                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2286     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2287                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2288     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2289                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2290 
2291     if (dcache_size != -1) {
2292         pcc->l1_dcache_size = dcache_size;
2293     }
2294 
2295     if (icache_size != -1) {
2296         pcc->l1_icache_size = icache_size;
2297     }
2298 
2299 #if defined(TARGET_PPC64)
2300     pcc->radix_page_info = kvm_get_radix_page_info();
2301 
2302     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2303         /*
2304          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2305          * compliant.  More importantly, advertising ISA 3.00
2306          * architected mode may prevent guests from activating
2307          * necessary DD1 workarounds.
2308          */
2309         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2310                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2311     }
2312 #endif /* defined(TARGET_PPC64) */
2313 }
2314 
2315 bool kvmppc_has_cap_epr(void)
2316 {
2317     return cap_epr;
2318 }
2319 
2320 bool kvmppc_has_cap_fixup_hcalls(void)
2321 {
2322     return cap_fixup_hcalls;
2323 }
2324 
2325 bool kvmppc_has_cap_htm(void)
2326 {
2327     return cap_htm;
2328 }
2329 
2330 bool kvmppc_has_cap_mmu_radix(void)
2331 {
2332     return cap_mmu_radix;
2333 }
2334 
2335 bool kvmppc_has_cap_mmu_hash_v3(void)
2336 {
2337     return cap_mmu_hash_v3;
2338 }
2339 
2340 static bool kvmppc_power8_host(void)
2341 {
2342     bool ret = false;
2343 #ifdef TARGET_PPC64
2344     {
2345         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2346         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2347               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2348               (base_pvr == CPU_POWERPC_POWER8_BASE);
2349     }
2350 #endif /* TARGET_PPC64 */
2351     return ret;
2352 }
2353 
2354 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2355 {
2356     bool l1d_thread_priv_req = !kvmppc_power8_host();
2357 
2358     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2359         return 2;
2360     } else if ((!l1d_thread_priv_req ||
2361                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2362                (c.character & c.character_mask
2363                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2364         return 1;
2365     }
2366 
2367     return 0;
2368 }
2369 
2370 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2371 {
2372     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2373         return 2;
2374     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2375         return 1;
2376     }
2377 
2378     return 0;
2379 }
2380 
2381 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2382 {
2383     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2384         return  SPAPR_CAP_FIXED_CCD;
2385     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2386         return SPAPR_CAP_FIXED_IBS;
2387     }
2388 
2389     return 0;
2390 }
2391 
2392 static void kvmppc_get_cpu_characteristics(KVMState *s)
2393 {
2394     struct kvm_ppc_cpu_char c;
2395     int ret;
2396 
2397     /* Assume broken */
2398     cap_ppc_safe_cache = 0;
2399     cap_ppc_safe_bounds_check = 0;
2400     cap_ppc_safe_indirect_branch = 0;
2401 
2402     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2403     if (!ret) {
2404         return;
2405     }
2406     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2407     if (ret < 0) {
2408         return;
2409     }
2410 
2411     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2412     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2413     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2414 }
2415 
2416 int kvmppc_get_cap_safe_cache(void)
2417 {
2418     return cap_ppc_safe_cache;
2419 }
2420 
2421 int kvmppc_get_cap_safe_bounds_check(void)
2422 {
2423     return cap_ppc_safe_bounds_check;
2424 }
2425 
2426 int kvmppc_get_cap_safe_indirect_branch(void)
2427 {
2428     return cap_ppc_safe_indirect_branch;
2429 }
2430 
2431 bool kvmppc_has_cap_nested_kvm_hv(void)
2432 {
2433     return !!cap_ppc_nested_kvm_hv;
2434 }
2435 
2436 int kvmppc_set_cap_nested_kvm_hv(int enable)
2437 {
2438     return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2439 }
2440 
2441 bool kvmppc_has_cap_spapr_vfio(void)
2442 {
2443     return cap_spapr_vfio;
2444 }
2445 
2446 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2447 {
2448     uint32_t host_pvr = mfpvr();
2449     PowerPCCPUClass *pvr_pcc;
2450 
2451     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2452     if (pvr_pcc == NULL) {
2453         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2454     }
2455 
2456     return pvr_pcc;
2457 }
2458 
2459 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2460 {
2461     TypeInfo type_info = {
2462         .name = TYPE_HOST_POWERPC_CPU,
2463         .class_init = kvmppc_host_cpu_class_init,
2464     };
2465     MachineClass *mc = MACHINE_GET_CLASS(ms);
2466     PowerPCCPUClass *pvr_pcc;
2467     ObjectClass *oc;
2468     DeviceClass *dc;
2469     int i;
2470 
2471     pvr_pcc = kvm_ppc_get_host_cpu_class();
2472     if (pvr_pcc == NULL) {
2473         return -1;
2474     }
2475     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2476     type_register(&type_info);
2477     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2478         /* override TCG default cpu type with 'host' cpu model */
2479         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2480     }
2481 
2482     oc = object_class_by_name(type_info.name);
2483     g_assert(oc);
2484 
2485     /*
2486      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2487      * we want "POWER8" to be a "family" alias that points to the current
2488      * host CPU type, too)
2489      */
2490     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2491     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2492         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2493             char *suffix;
2494 
2495             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2496             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2497             if (suffix) {
2498                 *suffix = 0;
2499             }
2500             break;
2501         }
2502     }
2503 
2504     return 0;
2505 }
2506 
2507 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2508 {
2509     struct kvm_rtas_token_args args = {
2510         .token = token,
2511     };
2512 
2513     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2514         return -ENOENT;
2515     }
2516 
2517     strncpy(args.name, function, sizeof(args.name));
2518 
2519     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2520 }
2521 
2522 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2523 {
2524     struct kvm_get_htab_fd s = {
2525         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2526         .start_index = index,
2527     };
2528     int ret;
2529 
2530     if (!cap_htab_fd) {
2531         error_setg(errp, "KVM version doesn't support %s the HPT",
2532                    write ? "writing" : "reading");
2533         return -ENOTSUP;
2534     }
2535 
2536     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2537     if (ret < 0) {
2538         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2539                    write ? "writing" : "reading", write ? "to" : "from",
2540                    strerror(errno));
2541         return -errno;
2542     }
2543 
2544     return ret;
2545 }
2546 
2547 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2548 {
2549     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2550     uint8_t buf[bufsize];
2551     ssize_t rc;
2552 
2553     do {
2554         rc = read(fd, buf, bufsize);
2555         if (rc < 0) {
2556             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2557                     strerror(errno));
2558             return rc;
2559         } else if (rc) {
2560             uint8_t *buffer = buf;
2561             ssize_t n = rc;
2562             while (n) {
2563                 struct kvm_get_htab_header *head =
2564                     (struct kvm_get_htab_header *) buffer;
2565                 size_t chunksize = sizeof(*head) +
2566                      HASH_PTE_SIZE_64 * head->n_valid;
2567 
2568                 qemu_put_be32(f, head->index);
2569                 qemu_put_be16(f, head->n_valid);
2570                 qemu_put_be16(f, head->n_invalid);
2571                 qemu_put_buffer(f, (void *)(head + 1),
2572                                 HASH_PTE_SIZE_64 * head->n_valid);
2573 
2574                 buffer += chunksize;
2575                 n -= chunksize;
2576             }
2577         }
2578     } while ((rc != 0)
2579              && ((max_ns < 0)
2580                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2581 
2582     return (rc == 0) ? 1 : 0;
2583 }
2584 
2585 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2586                            uint16_t n_valid, uint16_t n_invalid)
2587 {
2588     struct kvm_get_htab_header *buf;
2589     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2590     ssize_t rc;
2591 
2592     buf = alloca(chunksize);
2593     buf->index = index;
2594     buf->n_valid = n_valid;
2595     buf->n_invalid = n_invalid;
2596 
2597     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2598 
2599     rc = write(fd, buf, chunksize);
2600     if (rc < 0) {
2601         fprintf(stderr, "Error writing KVM hash table: %s\n",
2602                 strerror(errno));
2603         return rc;
2604     }
2605     if (rc != chunksize) {
2606         /* We should never get a short write on a single chunk */
2607         fprintf(stderr, "Short write, restoring KVM hash table\n");
2608         return -1;
2609     }
2610     return 0;
2611 }
2612 
2613 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2614 {
2615     return true;
2616 }
2617 
2618 void kvm_arch_init_irq_routing(KVMState *s)
2619 {
2620 }
2621 
2622 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2623 {
2624     int fd, rc;
2625     int i;
2626 
2627     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2628 
2629     i = 0;
2630     while (i < n) {
2631         struct kvm_get_htab_header *hdr;
2632         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2633         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2634 
2635         rc = read(fd, buf, sizeof(buf));
2636         if (rc < 0) {
2637             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2638         }
2639 
2640         hdr = (struct kvm_get_htab_header *)buf;
2641         while ((i < n) && ((char *)hdr < (buf + rc))) {
2642             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2643 
2644             if (hdr->index != (ptex + i)) {
2645                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2646                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2647             }
2648 
2649             if (n - i < valid) {
2650                 valid = n - i;
2651             }
2652             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2653             i += valid;
2654 
2655             if ((n - i) < invalid) {
2656                 invalid = n - i;
2657             }
2658             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2659             i += invalid;
2660 
2661             hdr = (struct kvm_get_htab_header *)
2662                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2663         }
2664     }
2665 
2666     close(fd);
2667 }
2668 
2669 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2670 {
2671     int fd, rc;
2672     struct {
2673         struct kvm_get_htab_header hdr;
2674         uint64_t pte0;
2675         uint64_t pte1;
2676     } buf;
2677 
2678     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2679 
2680     buf.hdr.n_valid = 1;
2681     buf.hdr.n_invalid = 0;
2682     buf.hdr.index = ptex;
2683     buf.pte0 = cpu_to_be64(pte0);
2684     buf.pte1 = cpu_to_be64(pte1);
2685 
2686     rc = write(fd, &buf, sizeof(buf));
2687     if (rc != sizeof(buf)) {
2688         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2689     }
2690     close(fd);
2691 }
2692 
2693 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2694                              uint64_t address, uint32_t data, PCIDevice *dev)
2695 {
2696     return 0;
2697 }
2698 
2699 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2700                                 int vector, PCIDevice *dev)
2701 {
2702     return 0;
2703 }
2704 
2705 int kvm_arch_release_virq_post(int virq)
2706 {
2707     return 0;
2708 }
2709 
2710 int kvm_arch_msi_data_to_gsi(uint32_t data)
2711 {
2712     return data & 0xffff;
2713 }
2714 
2715 int kvmppc_enable_hwrng(void)
2716 {
2717     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2718         return -1;
2719     }
2720 
2721     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2722 }
2723 
2724 void kvmppc_check_papr_resize_hpt(Error **errp)
2725 {
2726     if (!kvm_enabled()) {
2727         return; /* No KVM, we're good */
2728     }
2729 
2730     if (cap_resize_hpt) {
2731         return; /* Kernel has explicit support, we're good */
2732     }
2733 
2734     /* Otherwise fallback on looking for PR KVM */
2735     if (kvmppc_is_pr(kvm_state)) {
2736         return;
2737     }
2738 
2739     error_setg(errp,
2740                "Hash page table resizing not available with this KVM version");
2741 }
2742 
2743 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2744 {
2745     CPUState *cs = CPU(cpu);
2746     struct kvm_ppc_resize_hpt rhpt = {
2747         .flags = flags,
2748         .shift = shift,
2749     };
2750 
2751     if (!cap_resize_hpt) {
2752         return -ENOSYS;
2753     }
2754 
2755     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2756 }
2757 
2758 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2759 {
2760     CPUState *cs = CPU(cpu);
2761     struct kvm_ppc_resize_hpt rhpt = {
2762         .flags = flags,
2763         .shift = shift,
2764     };
2765 
2766     if (!cap_resize_hpt) {
2767         return -ENOSYS;
2768     }
2769 
2770     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2771 }
2772 
2773 /*
2774  * This is a helper function to detect a post migration scenario
2775  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2776  * the guest kernel can't handle a PVR value other than the actual host
2777  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2778  *
2779  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2780  * (so, we're HV), return true. The workaround itself is done in
2781  * cpu_post_load.
2782  *
2783  * The order here is important: we'll only check for KVM PR as a
2784  * fallback if the guest kernel can't handle the situation itself.
2785  * We need to avoid as much as possible querying the running KVM type
2786  * in QEMU level.
2787  */
2788 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2789 {
2790     CPUState *cs = CPU(cpu);
2791 
2792     if (!kvm_enabled()) {
2793         return false;
2794     }
2795 
2796     if (cap_ppc_pvr_compat) {
2797         return false;
2798     }
2799 
2800     return !kvmppc_is_pr(cs->kvm_state);
2801 }
2802 
2803 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2804 {
2805     CPUState *cs = CPU(cpu);
2806 
2807     if (kvm_enabled()) {
2808         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2809     }
2810 }
2811