xref: /openbmc/qemu/target/ppc/kvm.c (revision 666952ea7c12c4c44282a3b00b817509008df215)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #include "elf.h"
50 #include "sysemu/kvm_int.h"
51 
52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
53 
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55     KVM_CAP_LAST_INFO
56 };
57 
58 static int cap_interrupt_unset;
59 static int cap_interrupt_level;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_smt_possible;
64 static int cap_spapr_tce;
65 static int cap_spapr_tce_64;
66 static int cap_spapr_multitce;
67 static int cap_spapr_vfio;
68 static int cap_hior;
69 static int cap_one_reg;
70 static int cap_epr;
71 static int cap_ppc_watchdog;
72 static int cap_papr;
73 static int cap_htab_fd;
74 static int cap_fixup_hcalls;
75 static int cap_htm;             /* Hardware transactional memory support */
76 static int cap_mmu_radix;
77 static int cap_mmu_hash_v3;
78 static int cap_xive;
79 static int cap_resize_hpt;
80 static int cap_ppc_pvr_compat;
81 static int cap_ppc_safe_cache;
82 static int cap_ppc_safe_bounds_check;
83 static int cap_ppc_safe_indirect_branch;
84 static int cap_ppc_count_cache_flush_assist;
85 static int cap_ppc_nested_kvm_hv;
86 static int cap_large_decr;
87 
88 static uint32_t debug_inst_opcode;
89 
90 /*
91  * XXX We have a race condition where we actually have a level triggered
92  *     interrupt, but the infrastructure can't expose that yet, so the guest
93  *     takes but ignores it, goes to sleep and never gets notified that there's
94  *     still an interrupt pending.
95  *
96  *     As a quick workaround, let's just wake up again 20 ms after we injected
97  *     an interrupt. That way we can assure that we're always reinjecting
98  *     interrupts in case the guest swallowed them.
99  */
100 static QEMUTimer *idle_timer;
101 
102 static void kvm_kick_cpu(void *opaque)
103 {
104     PowerPCCPU *cpu = opaque;
105 
106     qemu_cpu_kick(CPU(cpu));
107 }
108 
109 /*
110  * Check whether we are running with KVM-PR (instead of KVM-HV).  This
111  * should only be used for fallback tests - generally we should use
112  * explicit capabilities for the features we want, rather than
113  * assuming what is/isn't available depending on the KVM variant.
114  */
115 static bool kvmppc_is_pr(KVMState *ks)
116 {
117     /* Assume KVM-PR if the GET_PVINFO capability is available */
118     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
119 }
120 
121 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
122 static void kvmppc_get_cpu_characteristics(KVMState *s);
123 static int kvmppc_get_dec_bits(void);
124 
125 int kvm_arch_init(MachineState *ms, KVMState *s)
126 {
127     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
128     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
129     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
130     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
131     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
135     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
140     /*
141      * Note: we don't set cap_papr here, because this capability is
142      * only activated after this by kvmppc_set_papr()
143      */
144     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
145     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
146     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
147     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
148     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
149     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
150     cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
154     cap_large_decr = kvmppc_get_dec_bits();
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.
161      */
162     cap_ppc_pvr_compat = false;
163 
164     if (!cap_interrupt_level) {
165         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
166                         "VM to stall at times!\n");
167     }
168 
169     kvm_ppc_register_host_cpu_type(ms);
170 
171     return 0;
172 }
173 
174 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
175 {
176     return 0;
177 }
178 
179 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
180 {
181     CPUPPCState *cenv = &cpu->env;
182     CPUState *cs = CPU(cpu);
183     struct kvm_sregs sregs;
184     int ret;
185 
186     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
187         /*
188          * What we're really trying to say is "if we're on BookE, we
189          * use the native PVR for now". This is the only sane way to
190          * check it though, so we potentially confuse users that they
191          * can run BookE guests on BookS. Let's hope nobody dares
192          * enough :)
193          */
194         return 0;
195     } else {
196         if (!cap_segstate) {
197             fprintf(stderr, "kvm error: missing PVR setting capability\n");
198             return -ENOSYS;
199         }
200     }
201 
202     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
203     if (ret) {
204         return ret;
205     }
206 
207     sregs.pvr = cenv->spr[SPR_PVR];
208     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
209 }
210 
211 /* Set up a shared TLB array with KVM */
212 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
213 {
214     CPUPPCState *env = &cpu->env;
215     CPUState *cs = CPU(cpu);
216     struct kvm_book3e_206_tlb_params params = {};
217     struct kvm_config_tlb cfg = {};
218     unsigned int entries = 0;
219     int ret, i;
220 
221     if (!kvm_enabled() ||
222         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
223         return 0;
224     }
225 
226     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
227 
228     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
229         params.tlb_sizes[i] = booke206_tlb_size(env, i);
230         params.tlb_ways[i] = booke206_tlb_ways(env, i);
231         entries += params.tlb_sizes[i];
232     }
233 
234     assert(entries == env->nb_tlb);
235     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
236 
237     env->tlb_dirty = true;
238 
239     cfg.array = (uintptr_t)env->tlb.tlbm;
240     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
241     cfg.params = (uintptr_t)&params;
242     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
243 
244     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
245     if (ret < 0) {
246         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
247                 __func__, strerror(-ret));
248         return ret;
249     }
250 
251     env->kvm_sw_tlb = true;
252     return 0;
253 }
254 
255 
256 #if defined(TARGET_PPC64)
257 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
258 {
259     int ret;
260 
261     assert(kvm_state != NULL);
262 
263     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
264         error_setg(errp, "KVM doesn't expose the MMU features it supports");
265         error_append_hint(errp, "Consider switching to a newer KVM\n");
266         return;
267     }
268 
269     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
270     if (ret == 0) {
271         return;
272     }
273 
274     error_setg_errno(errp, -ret,
275                      "KVM failed to provide the MMU features it supports");
276 }
277 
278 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
279 {
280     KVMState *s = KVM_STATE(current_machine->accelerator);
281     struct ppc_radix_page_info *radix_page_info;
282     struct kvm_ppc_rmmu_info rmmu_info;
283     int i;
284 
285     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
286         return NULL;
287     }
288     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
289         return NULL;
290     }
291     radix_page_info = g_malloc0(sizeof(*radix_page_info));
292     radix_page_info->count = 0;
293     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
294         if (rmmu_info.ap_encodings[i]) {
295             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
296             radix_page_info->count++;
297         }
298     }
299     return radix_page_info;
300 }
301 
302 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
303                                      bool radix, bool gtse,
304                                      uint64_t proc_tbl)
305 {
306     CPUState *cs = CPU(cpu);
307     int ret;
308     uint64_t flags = 0;
309     struct kvm_ppc_mmuv3_cfg cfg = {
310         .process_table = proc_tbl,
311     };
312 
313     if (radix) {
314         flags |= KVM_PPC_MMUV3_RADIX;
315     }
316     if (gtse) {
317         flags |= KVM_PPC_MMUV3_GTSE;
318     }
319     cfg.flags = flags;
320     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
321     switch (ret) {
322     case 0:
323         return H_SUCCESS;
324     case -EINVAL:
325         return H_PARAMETER;
326     case -ENODEV:
327         return H_NOT_AVAILABLE;
328     default:
329         return H_HARDWARE;
330     }
331 }
332 
333 bool kvmppc_hpt_needs_host_contiguous_pages(void)
334 {
335     static struct kvm_ppc_smmu_info smmu_info;
336 
337     if (!kvm_enabled()) {
338         return false;
339     }
340 
341     kvm_get_smmu_info(&smmu_info, &error_fatal);
342     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
343 }
344 
345 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
346 {
347     struct kvm_ppc_smmu_info smmu_info;
348     int iq, ik, jq, jk;
349     Error *local_err = NULL;
350 
351     /* For now, we only have anything to check on hash64 MMUs */
352     if (!cpu->hash64_opts || !kvm_enabled()) {
353         return;
354     }
355 
356     kvm_get_smmu_info(&smmu_info, &local_err);
357     if (local_err) {
358         error_propagate(errp, local_err);
359         return;
360     }
361 
362     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
363         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
364         error_setg(errp,
365                    "KVM does not support 1TiB segments which guest expects");
366         return;
367     }
368 
369     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
370         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
371                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
372         return;
373     }
374 
375     /*
376      * Verify that every pagesize supported by the cpu model is
377      * supported by KVM with the same encodings
378      */
379     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
380         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
381         struct kvm_ppc_one_seg_page_size *ksps;
382 
383         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
384             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
385                 break;
386             }
387         }
388         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
389             error_setg(errp, "KVM doesn't support for base page shift %u",
390                        qsps->page_shift);
391             return;
392         }
393 
394         ksps = &smmu_info.sps[ik];
395         if (ksps->slb_enc != qsps->slb_enc) {
396             error_setg(errp,
397 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
398                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
399             return;
400         }
401 
402         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
403             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
404                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
405                     break;
406                 }
407             }
408 
409             if (jk >= ARRAY_SIZE(ksps->enc)) {
410                 error_setg(errp, "KVM doesn't support page shift %u/%u",
411                            qsps->enc[jq].page_shift, qsps->page_shift);
412                 return;
413             }
414             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
415                 error_setg(errp,
416 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
417                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
418                            qsps->page_shift, qsps->enc[jq].pte_enc);
419                 return;
420             }
421         }
422     }
423 
424     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
425         /*
426          * Mostly what guest pagesizes we can use are related to the
427          * host pages used to map guest RAM, which is handled in the
428          * platform code. Cache-Inhibited largepages (64k) however are
429          * used for I/O, so if they're mapped to the host at all it
430          * will be a normal mapping, not a special hugepage one used
431          * for RAM.
432          */
433         if (getpagesize() < 0x10000) {
434             error_setg(errp,
435                        "KVM can't supply 64kiB CI pages, which guest expects");
436         }
437     }
438 }
439 #endif /* !defined (TARGET_PPC64) */
440 
441 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
442 {
443     return POWERPC_CPU(cpu)->vcpu_id;
444 }
445 
446 /*
447  * e500 supports 2 h/w breakpoint and 2 watchpoint.  book3s supports
448  * only 1 watchpoint, so array size of 4 is sufficient for now.
449  */
450 #define MAX_HW_BKPTS 4
451 
452 static struct HWBreakpoint {
453     target_ulong addr;
454     int type;
455 } hw_debug_points[MAX_HW_BKPTS];
456 
457 static CPUWatchpoint hw_watchpoint;
458 
459 /* Default there is no breakpoint and watchpoint supported */
460 static int max_hw_breakpoint;
461 static int max_hw_watchpoint;
462 static int nb_hw_breakpoint;
463 static int nb_hw_watchpoint;
464 
465 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
466 {
467     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
468         max_hw_breakpoint = 2;
469         max_hw_watchpoint = 2;
470     }
471 
472     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
473         fprintf(stderr, "Error initializing h/w breakpoints\n");
474         return;
475     }
476 }
477 
478 int kvm_arch_init_vcpu(CPUState *cs)
479 {
480     PowerPCCPU *cpu = POWERPC_CPU(cs);
481     CPUPPCState *cenv = &cpu->env;
482     int ret;
483 
484     /* Synchronize sregs with kvm */
485     ret = kvm_arch_sync_sregs(cpu);
486     if (ret) {
487         if (ret == -EINVAL) {
488             error_report("Register sync failed... If you're using kvm-hv.ko,"
489                          " only \"-cpu host\" is possible");
490         }
491         return ret;
492     }
493 
494     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
495 
496     switch (cenv->mmu_model) {
497     case POWERPC_MMU_BOOKE206:
498         /* This target supports access to KVM's guest TLB */
499         ret = kvm_booke206_tlb_init(cpu);
500         break;
501     case POWERPC_MMU_2_07:
502         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
503             /*
504              * KVM-HV has transactional memory on POWER8 also without
505              * the KVM_CAP_PPC_HTM extension, so enable it here
506              * instead as long as it's availble to userspace on the
507              * host.
508              */
509             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
510                 cap_htm = true;
511             }
512         }
513         break;
514     default:
515         break;
516     }
517 
518     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
519     kvmppc_hw_debug_points_init(cenv);
520 
521     return ret;
522 }
523 
524 int kvm_arch_destroy_vcpu(CPUState *cs)
525 {
526     return 0;
527 }
528 
529 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
530 {
531     CPUPPCState *env = &cpu->env;
532     CPUState *cs = CPU(cpu);
533     struct kvm_dirty_tlb dirty_tlb;
534     unsigned char *bitmap;
535     int ret;
536 
537     if (!env->kvm_sw_tlb) {
538         return;
539     }
540 
541     bitmap = g_malloc((env->nb_tlb + 7) / 8);
542     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
543 
544     dirty_tlb.bitmap = (uintptr_t)bitmap;
545     dirty_tlb.num_dirty = env->nb_tlb;
546 
547     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
548     if (ret) {
549         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
550                 __func__, strerror(-ret));
551     }
552 
553     g_free(bitmap);
554 }
555 
556 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
557 {
558     PowerPCCPU *cpu = POWERPC_CPU(cs);
559     CPUPPCState *env = &cpu->env;
560     union {
561         uint32_t u32;
562         uint64_t u64;
563     } val;
564     struct kvm_one_reg reg = {
565         .id = id,
566         .addr = (uintptr_t) &val,
567     };
568     int ret;
569 
570     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
571     if (ret != 0) {
572         trace_kvm_failed_spr_get(spr, strerror(errno));
573     } else {
574         switch (id & KVM_REG_SIZE_MASK) {
575         case KVM_REG_SIZE_U32:
576             env->spr[spr] = val.u32;
577             break;
578 
579         case KVM_REG_SIZE_U64:
580             env->spr[spr] = val.u64;
581             break;
582 
583         default:
584             /* Don't handle this size yet */
585             abort();
586         }
587     }
588 }
589 
590 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
591 {
592     PowerPCCPU *cpu = POWERPC_CPU(cs);
593     CPUPPCState *env = &cpu->env;
594     union {
595         uint32_t u32;
596         uint64_t u64;
597     } val;
598     struct kvm_one_reg reg = {
599         .id = id,
600         .addr = (uintptr_t) &val,
601     };
602     int ret;
603 
604     switch (id & KVM_REG_SIZE_MASK) {
605     case KVM_REG_SIZE_U32:
606         val.u32 = env->spr[spr];
607         break;
608 
609     case KVM_REG_SIZE_U64:
610         val.u64 = env->spr[spr];
611         break;
612 
613     default:
614         /* Don't handle this size yet */
615         abort();
616     }
617 
618     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
619     if (ret != 0) {
620         trace_kvm_failed_spr_set(spr, strerror(errno));
621     }
622 }
623 
624 static int kvm_put_fp(CPUState *cs)
625 {
626     PowerPCCPU *cpu = POWERPC_CPU(cs);
627     CPUPPCState *env = &cpu->env;
628     struct kvm_one_reg reg;
629     int i;
630     int ret;
631 
632     if (env->insns_flags & PPC_FLOAT) {
633         uint64_t fpscr = env->fpscr;
634         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
635 
636         reg.id = KVM_REG_PPC_FPSCR;
637         reg.addr = (uintptr_t)&fpscr;
638         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
639         if (ret < 0) {
640             trace_kvm_failed_fpscr_set(strerror(errno));
641             return ret;
642         }
643 
644         for (i = 0; i < 32; i++) {
645             uint64_t vsr[2];
646             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
647             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
648 
649 #ifdef HOST_WORDS_BIGENDIAN
650             vsr[0] = float64_val(*fpr);
651             vsr[1] = *vsrl;
652 #else
653             vsr[0] = *vsrl;
654             vsr[1] = float64_val(*fpr);
655 #endif
656             reg.addr = (uintptr_t) &vsr;
657             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
658 
659             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
660             if (ret < 0) {
661                 trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i,
662                                         strerror(errno));
663                 return ret;
664             }
665         }
666     }
667 
668     if (env->insns_flags & PPC_ALTIVEC) {
669         reg.id = KVM_REG_PPC_VSCR;
670         reg.addr = (uintptr_t)&env->vscr;
671         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
672         if (ret < 0) {
673             trace_kvm_failed_vscr_set(strerror(errno));
674             return ret;
675         }
676 
677         for (i = 0; i < 32; i++) {
678             reg.id = KVM_REG_PPC_VR(i);
679             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
680             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
681             if (ret < 0) {
682                 trace_kvm_failed_vr_set(i, strerror(errno));
683                 return ret;
684             }
685         }
686     }
687 
688     return 0;
689 }
690 
691 static int kvm_get_fp(CPUState *cs)
692 {
693     PowerPCCPU *cpu = POWERPC_CPU(cs);
694     CPUPPCState *env = &cpu->env;
695     struct kvm_one_reg reg;
696     int i;
697     int ret;
698 
699     if (env->insns_flags & PPC_FLOAT) {
700         uint64_t fpscr;
701         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 
703         reg.id = KVM_REG_PPC_FPSCR;
704         reg.addr = (uintptr_t)&fpscr;
705         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
706         if (ret < 0) {
707             trace_kvm_failed_fpscr_get(strerror(errno));
708             return ret;
709         } else {
710             env->fpscr = fpscr;
711         }
712 
713         for (i = 0; i < 32; i++) {
714             uint64_t vsr[2];
715             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
716             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
717 
718             reg.addr = (uintptr_t) &vsr;
719             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
720 
721             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
722             if (ret < 0) {
723                 trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i,
724                                         strerror(errno));
725                 return ret;
726             } else {
727 #ifdef HOST_WORDS_BIGENDIAN
728                 *fpr = vsr[0];
729                 if (vsx) {
730                     *vsrl = vsr[1];
731                 }
732 #else
733                 *fpr = vsr[1];
734                 if (vsx) {
735                     *vsrl = vsr[0];
736                 }
737 #endif
738             }
739         }
740     }
741 
742     if (env->insns_flags & PPC_ALTIVEC) {
743         reg.id = KVM_REG_PPC_VSCR;
744         reg.addr = (uintptr_t)&env->vscr;
745         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
746         if (ret < 0) {
747             trace_kvm_failed_vscr_get(strerror(errno));
748             return ret;
749         }
750 
751         for (i = 0; i < 32; i++) {
752             reg.id = KVM_REG_PPC_VR(i);
753             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
754             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
755             if (ret < 0) {
756                 trace_kvm_failed_vr_get(i, strerror(errno));
757                 return ret;
758             }
759         }
760     }
761 
762     return 0;
763 }
764 
765 #if defined(TARGET_PPC64)
766 static int kvm_get_vpa(CPUState *cs)
767 {
768     PowerPCCPU *cpu = POWERPC_CPU(cs);
769     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
770     struct kvm_one_reg reg;
771     int ret;
772 
773     reg.id = KVM_REG_PPC_VPA_ADDR;
774     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
775     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
776     if (ret < 0) {
777         trace_kvm_failed_vpa_addr_get(strerror(errno));
778         return ret;
779     }
780 
781     assert((uintptr_t)&spapr_cpu->slb_shadow_size
782            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
783     reg.id = KVM_REG_PPC_VPA_SLB;
784     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
785     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
786     if (ret < 0) {
787         trace_kvm_failed_slb_get(strerror(errno));
788         return ret;
789     }
790 
791     assert((uintptr_t)&spapr_cpu->dtl_size
792            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
793     reg.id = KVM_REG_PPC_VPA_DTL;
794     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
795     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
796     if (ret < 0) {
797         trace_kvm_failed_dtl_get(strerror(errno));
798         return ret;
799     }
800 
801     return 0;
802 }
803 
804 static int kvm_put_vpa(CPUState *cs)
805 {
806     PowerPCCPU *cpu = POWERPC_CPU(cs);
807     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
808     struct kvm_one_reg reg;
809     int ret;
810 
811     /*
812      * SLB shadow or DTL can't be registered unless a master VPA is
813      * registered.  That means when restoring state, if a VPA *is*
814      * registered, we need to set that up first.  If not, we need to
815      * deregister the others before deregistering the master VPA
816      */
817     assert(spapr_cpu->vpa_addr
818            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
819 
820     if (spapr_cpu->vpa_addr) {
821         reg.id = KVM_REG_PPC_VPA_ADDR;
822         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
823         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
824         if (ret < 0) {
825             trace_kvm_failed_vpa_addr_set(strerror(errno));
826             return ret;
827         }
828     }
829 
830     assert((uintptr_t)&spapr_cpu->slb_shadow_size
831            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
832     reg.id = KVM_REG_PPC_VPA_SLB;
833     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
834     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
835     if (ret < 0) {
836         trace_kvm_failed_slb_set(strerror(errno));
837         return ret;
838     }
839 
840     assert((uintptr_t)&spapr_cpu->dtl_size
841            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
842     reg.id = KVM_REG_PPC_VPA_DTL;
843     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
844     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
845     if (ret < 0) {
846         trace_kvm_failed_dtl_set(strerror(errno));
847         return ret;
848     }
849 
850     if (!spapr_cpu->vpa_addr) {
851         reg.id = KVM_REG_PPC_VPA_ADDR;
852         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
853         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
854         if (ret < 0) {
855             trace_kvm_failed_null_vpa_addr_set(strerror(errno));
856             return ret;
857         }
858     }
859 
860     return 0;
861 }
862 #endif /* TARGET_PPC64 */
863 
864 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
865 {
866     CPUPPCState *env = &cpu->env;
867     struct kvm_sregs sregs;
868     int i;
869 
870     sregs.pvr = env->spr[SPR_PVR];
871 
872     if (cpu->vhyp) {
873         PPCVirtualHypervisorClass *vhc =
874             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
875         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
876     } else {
877         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
878     }
879 
880     /* Sync SLB */
881 #ifdef TARGET_PPC64
882     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
883         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
884         if (env->slb[i].esid & SLB_ESID_V) {
885             sregs.u.s.ppc64.slb[i].slbe |= i;
886         }
887         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
888     }
889 #endif
890 
891     /* Sync SRs */
892     for (i = 0; i < 16; i++) {
893         sregs.u.s.ppc32.sr[i] = env->sr[i];
894     }
895 
896     /* Sync BATs */
897     for (i = 0; i < 8; i++) {
898         /* Beware. We have to swap upper and lower bits here */
899         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
900             | env->DBAT[1][i];
901         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
902             | env->IBAT[1][i];
903     }
904 
905     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
906 }
907 
908 int kvm_arch_put_registers(CPUState *cs, int level)
909 {
910     PowerPCCPU *cpu = POWERPC_CPU(cs);
911     CPUPPCState *env = &cpu->env;
912     struct kvm_regs regs;
913     int ret;
914     int i;
915 
916     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
917     if (ret < 0) {
918         return ret;
919     }
920 
921     regs.ctr = env->ctr;
922     regs.lr  = env->lr;
923     regs.xer = cpu_read_xer(env);
924     regs.msr = env->msr;
925     regs.pc = env->nip;
926 
927     regs.srr0 = env->spr[SPR_SRR0];
928     regs.srr1 = env->spr[SPR_SRR1];
929 
930     regs.sprg0 = env->spr[SPR_SPRG0];
931     regs.sprg1 = env->spr[SPR_SPRG1];
932     regs.sprg2 = env->spr[SPR_SPRG2];
933     regs.sprg3 = env->spr[SPR_SPRG3];
934     regs.sprg4 = env->spr[SPR_SPRG4];
935     regs.sprg5 = env->spr[SPR_SPRG5];
936     regs.sprg6 = env->spr[SPR_SPRG6];
937     regs.sprg7 = env->spr[SPR_SPRG7];
938 
939     regs.pid = env->spr[SPR_BOOKE_PID];
940 
941     for (i = 0; i < 32; i++) {
942         regs.gpr[i] = env->gpr[i];
943     }
944 
945     regs.cr = 0;
946     for (i = 0; i < 8; i++) {
947         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
948     }
949 
950     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
951     if (ret < 0) {
952         return ret;
953     }
954 
955     kvm_put_fp(cs);
956 
957     if (env->tlb_dirty) {
958         kvm_sw_tlb_put(cpu);
959         env->tlb_dirty = false;
960     }
961 
962     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
963         ret = kvmppc_put_books_sregs(cpu);
964         if (ret < 0) {
965             return ret;
966         }
967     }
968 
969     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
970         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
971     }
972 
973     if (cap_one_reg) {
974         int i;
975 
976         /*
977          * We deliberately ignore errors here, for kernels which have
978          * the ONE_REG calls, but don't support the specific
979          * registers, there's a reasonable chance things will still
980          * work, at least until we try to migrate.
981          */
982         for (i = 0; i < 1024; i++) {
983             uint64_t id = env->spr_cb[i].one_reg_id;
984 
985             if (id != 0) {
986                 kvm_put_one_spr(cs, id, i);
987             }
988         }
989 
990 #ifdef TARGET_PPC64
991         if (msr_ts) {
992             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
993                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
994             }
995             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
996                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
997             }
998             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
999             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1000             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1001             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1002             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1004             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1005             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1006             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1007             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1008         }
1009 
1010         if (cap_papr) {
1011             if (kvm_put_vpa(cs) < 0) {
1012                 trace_kvm_failed_put_vpa();
1013             }
1014         }
1015 
1016         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1017 #endif /* TARGET_PPC64 */
1018     }
1019 
1020     return ret;
1021 }
1022 
1023 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1024 {
1025      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1026 }
1027 
1028 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1029 {
1030     CPUPPCState *env = &cpu->env;
1031     struct kvm_sregs sregs;
1032     int ret;
1033 
1034     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1035     if (ret < 0) {
1036         return ret;
1037     }
1038 
1039     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1040         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1041         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1042         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1043         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1044         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1045         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1046         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1047         env->spr[SPR_DECR] = sregs.u.e.dec;
1048         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1049         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1050         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1051     }
1052 
1053     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1054         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1055         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1056         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1057         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1058         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1059     }
1060 
1061     if (sregs.u.e.features & KVM_SREGS_E_64) {
1062         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1063     }
1064 
1065     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1066         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1067     }
1068 
1069     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1070         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1071         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1072         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1073         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1074         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1075         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1076         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1077         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1078         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1079         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1080         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1081         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1082         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1083         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1084         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1085         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1086         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1087         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1088         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1089         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1090         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1091         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1092         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1093         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1094         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1095         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1096         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1097         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1098         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1099         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1100         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1101         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1102 
1103         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1104             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1105             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1106             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1107             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1108             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1109             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1110         }
1111 
1112         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1113             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1114             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1115         }
1116 
1117         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1118             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1119             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1120             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1121             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1122         }
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1126         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1127         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1128         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1129         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1130         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1131         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1132         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1133         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1134         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1135         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1136     }
1137 
1138     if (sregs.u.e.features & KVM_SREGS_EXP) {
1139         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1140     }
1141 
1142     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1143         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1144         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1145     }
1146 
1147     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1148         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1149         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1150         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1151 
1152         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1153             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1154             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1155         }
1156     }
1157 
1158     return 0;
1159 }
1160 
1161 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1162 {
1163     CPUPPCState *env = &cpu->env;
1164     struct kvm_sregs sregs;
1165     int ret;
1166     int i;
1167 
1168     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1169     if (ret < 0) {
1170         return ret;
1171     }
1172 
1173     if (!cpu->vhyp) {
1174         ppc_store_sdr1(env, sregs.u.s.sdr1);
1175     }
1176 
1177     /* Sync SLB */
1178 #ifdef TARGET_PPC64
1179     /*
1180      * The packed SLB array we get from KVM_GET_SREGS only contains
1181      * information about valid entries. So we flush our internal copy
1182      * to get rid of stale ones, then put all valid SLB entries back
1183      * in.
1184      */
1185     memset(env->slb, 0, sizeof(env->slb));
1186     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1187         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1188         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1189         /*
1190          * Only restore valid entries
1191          */
1192         if (rb & SLB_ESID_V) {
1193             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1194         }
1195     }
1196 #endif
1197 
1198     /* Sync SRs */
1199     for (i = 0; i < 16; i++) {
1200         env->sr[i] = sregs.u.s.ppc32.sr[i];
1201     }
1202 
1203     /* Sync BATs */
1204     for (i = 0; i < 8; i++) {
1205         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1206         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1207         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1208         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1209     }
1210 
1211     return 0;
1212 }
1213 
1214 int kvm_arch_get_registers(CPUState *cs)
1215 {
1216     PowerPCCPU *cpu = POWERPC_CPU(cs);
1217     CPUPPCState *env = &cpu->env;
1218     struct kvm_regs regs;
1219     uint32_t cr;
1220     int i, ret;
1221 
1222     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1223     if (ret < 0) {
1224         return ret;
1225     }
1226 
1227     cr = regs.cr;
1228     for (i = 7; i >= 0; i--) {
1229         env->crf[i] = cr & 15;
1230         cr >>= 4;
1231     }
1232 
1233     env->ctr = regs.ctr;
1234     env->lr = regs.lr;
1235     cpu_write_xer(env, regs.xer);
1236     env->msr = regs.msr;
1237     env->nip = regs.pc;
1238 
1239     env->spr[SPR_SRR0] = regs.srr0;
1240     env->spr[SPR_SRR1] = regs.srr1;
1241 
1242     env->spr[SPR_SPRG0] = regs.sprg0;
1243     env->spr[SPR_SPRG1] = regs.sprg1;
1244     env->spr[SPR_SPRG2] = regs.sprg2;
1245     env->spr[SPR_SPRG3] = regs.sprg3;
1246     env->spr[SPR_SPRG4] = regs.sprg4;
1247     env->spr[SPR_SPRG5] = regs.sprg5;
1248     env->spr[SPR_SPRG6] = regs.sprg6;
1249     env->spr[SPR_SPRG7] = regs.sprg7;
1250 
1251     env->spr[SPR_BOOKE_PID] = regs.pid;
1252 
1253     for (i = 0; i < 32; i++) {
1254         env->gpr[i] = regs.gpr[i];
1255     }
1256 
1257     kvm_get_fp(cs);
1258 
1259     if (cap_booke_sregs) {
1260         ret = kvmppc_get_booke_sregs(cpu);
1261         if (ret < 0) {
1262             return ret;
1263         }
1264     }
1265 
1266     if (cap_segstate) {
1267         ret = kvmppc_get_books_sregs(cpu);
1268         if (ret < 0) {
1269             return ret;
1270         }
1271     }
1272 
1273     if (cap_hior) {
1274         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1275     }
1276 
1277     if (cap_one_reg) {
1278         int i;
1279 
1280         /*
1281          * We deliberately ignore errors here, for kernels which have
1282          * the ONE_REG calls, but don't support the specific
1283          * registers, there's a reasonable chance things will still
1284          * work, at least until we try to migrate.
1285          */
1286         for (i = 0; i < 1024; i++) {
1287             uint64_t id = env->spr_cb[i].one_reg_id;
1288 
1289             if (id != 0) {
1290                 kvm_get_one_spr(cs, id, i);
1291             }
1292         }
1293 
1294 #ifdef TARGET_PPC64
1295         if (msr_ts) {
1296             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1297                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1298             }
1299             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1300                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1301             }
1302             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1304             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1305             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1306             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1307             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1308             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1309             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1310             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1311             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1312         }
1313 
1314         if (cap_papr) {
1315             if (kvm_get_vpa(cs) < 0) {
1316                 trace_kvm_failed_get_vpa();
1317             }
1318         }
1319 
1320         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1321 #endif
1322     }
1323 
1324     return 0;
1325 }
1326 
1327 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1328 {
1329     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1330 
1331     if (irq != PPC_INTERRUPT_EXT) {
1332         return 0;
1333     }
1334 
1335     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1336         return 0;
1337     }
1338 
1339     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1340 
1341     return 0;
1342 }
1343 
1344 #if defined(TARGET_PPC64)
1345 #define PPC_INPUT_INT PPC970_INPUT_INT
1346 #else
1347 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1348 #endif
1349 
1350 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1351 {
1352     PowerPCCPU *cpu = POWERPC_CPU(cs);
1353     CPUPPCState *env = &cpu->env;
1354     int r;
1355     unsigned irq;
1356 
1357     qemu_mutex_lock_iothread();
1358 
1359     /*
1360      * PowerPC QEMU tracks the various core input pins (interrupt,
1361      * critical interrupt, reset, etc) in PPC-specific
1362      * env->irq_input_state.
1363      */
1364     if (!cap_interrupt_level &&
1365         run->ready_for_interrupt_injection &&
1366         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1367         (env->irq_input_state & (1 << PPC_INPUT_INT)))
1368     {
1369         /*
1370          * For now KVM disregards the 'irq' argument. However, in the
1371          * future KVM could cache it in-kernel to avoid a heavyweight
1372          * exit when reading the UIC.
1373          */
1374         irq = KVM_INTERRUPT_SET;
1375 
1376         trace_kvm_injected_interrupt(irq);
1377         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1378         if (r < 0) {
1379             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1380         }
1381 
1382         /* Always wake up soon in case the interrupt was level based */
1383         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1384                        (NANOSECONDS_PER_SECOND / 50));
1385     }
1386 
1387     /*
1388      * We don't know if there are more interrupts pending after
1389      * this. However, the guest will return to userspace in the course
1390      * of handling this one anyways, so we will get a chance to
1391      * deliver the rest.
1392      */
1393 
1394     qemu_mutex_unlock_iothread();
1395 }
1396 
1397 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1398 {
1399     return MEMTXATTRS_UNSPECIFIED;
1400 }
1401 
1402 int kvm_arch_process_async_events(CPUState *cs)
1403 {
1404     return cs->halted;
1405 }
1406 
1407 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1408 {
1409     CPUState *cs = CPU(cpu);
1410     CPUPPCState *env = &cpu->env;
1411 
1412     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1413         cs->halted = 1;
1414         cs->exception_index = EXCP_HLT;
1415     }
1416 
1417     return 0;
1418 }
1419 
1420 /* map dcr access to existing qemu dcr emulation */
1421 static int kvmppc_handle_dcr_read(CPUPPCState *env,
1422                                   uint32_t dcrn, uint32_t *data)
1423 {
1424     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) {
1425         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1426     }
1427 
1428     return 0;
1429 }
1430 
1431 static int kvmppc_handle_dcr_write(CPUPPCState *env,
1432                                    uint32_t dcrn, uint32_t data)
1433 {
1434     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) {
1435         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1436     }
1437 
1438     return 0;
1439 }
1440 
1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1442 {
1443     /* Mixed endian case is not handled */
1444     uint32_t sc = debug_inst_opcode;
1445 
1446     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447                             sizeof(sc), 0) ||
1448         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449         return -EINVAL;
1450     }
1451 
1452     return 0;
1453 }
1454 
1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1456 {
1457     uint32_t sc;
1458 
1459     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460         sc != debug_inst_opcode ||
1461         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462                             sizeof(sc), 1)) {
1463         return -EINVAL;
1464     }
1465 
1466     return 0;
1467 }
1468 
1469 static int find_hw_breakpoint(target_ulong addr, int type)
1470 {
1471     int n;
1472 
1473     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474            <= ARRAY_SIZE(hw_debug_points));
1475 
1476     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477         if (hw_debug_points[n].addr == addr &&
1478              hw_debug_points[n].type == type) {
1479             return n;
1480         }
1481     }
1482 
1483     return -1;
1484 }
1485 
1486 static int find_hw_watchpoint(target_ulong addr, int *flag)
1487 {
1488     int n;
1489 
1490     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491     if (n >= 0) {
1492         *flag = BP_MEM_ACCESS;
1493         return n;
1494     }
1495 
1496     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497     if (n >= 0) {
1498         *flag = BP_MEM_WRITE;
1499         return n;
1500     }
1501 
1502     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503     if (n >= 0) {
1504         *flag = BP_MEM_READ;
1505         return n;
1506     }
1507 
1508     return -1;
1509 }
1510 
1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512                                   target_ulong len, int type)
1513 {
1514     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515         return -ENOBUFS;
1516     }
1517 
1518     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1520 
1521     switch (type) {
1522     case GDB_BREAKPOINT_HW:
1523         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524             return -ENOBUFS;
1525         }
1526 
1527         if (find_hw_breakpoint(addr, type) >= 0) {
1528             return -EEXIST;
1529         }
1530 
1531         nb_hw_breakpoint++;
1532         break;
1533 
1534     case GDB_WATCHPOINT_WRITE:
1535     case GDB_WATCHPOINT_READ:
1536     case GDB_WATCHPOINT_ACCESS:
1537         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538             return -ENOBUFS;
1539         }
1540 
1541         if (find_hw_breakpoint(addr, type) >= 0) {
1542             return -EEXIST;
1543         }
1544 
1545         nb_hw_watchpoint++;
1546         break;
1547 
1548     default:
1549         return -ENOSYS;
1550     }
1551 
1552     return 0;
1553 }
1554 
1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     int n;
1559 
1560     n = find_hw_breakpoint(addr, type);
1561     if (n < 0) {
1562         return -ENOENT;
1563     }
1564 
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         nb_hw_breakpoint--;
1568         break;
1569 
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         nb_hw_watchpoint--;
1574         break;
1575 
1576     default:
1577         return -ENOSYS;
1578     }
1579     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1580 
1581     return 0;
1582 }
1583 
1584 void kvm_arch_remove_all_hw_breakpoints(void)
1585 {
1586     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1587 }
1588 
1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1590 {
1591     int n;
1592 
1593     /* Software Breakpoint updates */
1594     if (kvm_sw_breakpoints_active(cs)) {
1595         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1596     }
1597 
1598     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599            <= ARRAY_SIZE(hw_debug_points));
1600     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1601 
1602     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606             switch (hw_debug_points[n].type) {
1607             case GDB_BREAKPOINT_HW:
1608                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609                 break;
1610             case GDB_WATCHPOINT_WRITE:
1611                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612                 break;
1613             case GDB_WATCHPOINT_READ:
1614                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615                 break;
1616             case GDB_WATCHPOINT_ACCESS:
1617                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618                                         KVMPPC_DEBUG_WATCH_READ;
1619                 break;
1620             default:
1621                 cpu_abort(cs, "Unsupported breakpoint type\n");
1622             }
1623             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1624         }
1625     }
1626 }
1627 
1628 static int kvm_handle_hw_breakpoint(CPUState *cs,
1629                                     struct kvm_debug_exit_arch *arch_info)
1630 {
1631     int handle = 0;
1632     int n;
1633     int flag = 0;
1634 
1635     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1636         if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1637             n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1638             if (n >= 0) {
1639                 handle = 1;
1640             }
1641         } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1642                                         KVMPPC_DEBUG_WATCH_WRITE)) {
1643             n = find_hw_watchpoint(arch_info->address,  &flag);
1644             if (n >= 0) {
1645                 handle = 1;
1646                 cs->watchpoint_hit = &hw_watchpoint;
1647                 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1648                 hw_watchpoint.flags = flag;
1649             }
1650         }
1651     }
1652     return handle;
1653 }
1654 
1655 static int kvm_handle_singlestep(void)
1656 {
1657     return 1;
1658 }
1659 
1660 static int kvm_handle_sw_breakpoint(void)
1661 {
1662     return 1;
1663 }
1664 
1665 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1666 {
1667     CPUState *cs = CPU(cpu);
1668     CPUPPCState *env = &cpu->env;
1669     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1670 
1671     if (cs->singlestep_enabled) {
1672         return kvm_handle_singlestep();
1673     }
1674 
1675     if (arch_info->status) {
1676         return kvm_handle_hw_breakpoint(cs, arch_info);
1677     }
1678 
1679     if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1680         return kvm_handle_sw_breakpoint();
1681     }
1682 
1683     /*
1684      * QEMU is not able to handle debug exception, so inject
1685      * program exception to guest;
1686      * Yes program exception NOT debug exception !!
1687      * When QEMU is using debug resources then debug exception must
1688      * be always set. To achieve this we set MSR_DE and also set
1689      * MSRP_DEP so guest cannot change MSR_DE.
1690      * When emulating debug resource for guest we want guest
1691      * to control MSR_DE (enable/disable debug interrupt on need).
1692      * Supporting both configurations are NOT possible.
1693      * So the result is that we cannot share debug resources
1694      * between QEMU and Guest on BOOKE architecture.
1695      * In the current design QEMU gets the priority over guest,
1696      * this means that if QEMU is using debug resources then guest
1697      * cannot use them;
1698      * For software breakpoint QEMU uses a privileged instruction;
1699      * So there cannot be any reason that we are here for guest
1700      * set debug exception, only possibility is guest executed a
1701      * privileged / illegal instruction and that's why we are
1702      * injecting a program interrupt.
1703      */
1704     cpu_synchronize_state(cs);
1705     /*
1706      * env->nip is PC, so increment this by 4 to use
1707      * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1708      */
1709     env->nip += 4;
1710     cs->exception_index = POWERPC_EXCP_PROGRAM;
1711     env->error_code = POWERPC_EXCP_INVAL;
1712     ppc_cpu_do_interrupt(cs);
1713 
1714     return 0;
1715 }
1716 
1717 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1718 {
1719     PowerPCCPU *cpu = POWERPC_CPU(cs);
1720     CPUPPCState *env = &cpu->env;
1721     int ret;
1722 
1723     qemu_mutex_lock_iothread();
1724 
1725     switch (run->exit_reason) {
1726     case KVM_EXIT_DCR:
1727         if (run->dcr.is_write) {
1728             trace_kvm_handle_dcr_write();
1729             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1730         } else {
1731             trace_kvm_handle_dcr_read();
1732             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1733         }
1734         break;
1735     case KVM_EXIT_HLT:
1736         trace_kvm_handle_halt();
1737         ret = kvmppc_handle_halt(cpu);
1738         break;
1739 #if defined(TARGET_PPC64)
1740     case KVM_EXIT_PAPR_HCALL:
1741         trace_kvm_handle_papr_hcall();
1742         run->papr_hcall.ret = spapr_hypercall(cpu,
1743                                               run->papr_hcall.nr,
1744                                               run->papr_hcall.args);
1745         ret = 0;
1746         break;
1747 #endif
1748     case KVM_EXIT_EPR:
1749         trace_kvm_handle_epr();
1750         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1751         ret = 0;
1752         break;
1753     case KVM_EXIT_WATCHDOG:
1754         trace_kvm_handle_watchdog_expiry();
1755         watchdog_perform_action();
1756         ret = 0;
1757         break;
1758 
1759     case KVM_EXIT_DEBUG:
1760         trace_kvm_handle_debug_exception();
1761         if (kvm_handle_debug(cpu, run)) {
1762             ret = EXCP_DEBUG;
1763             break;
1764         }
1765         /* re-enter, this exception was guest-internal */
1766         ret = 0;
1767         break;
1768 
1769     default:
1770         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1771         ret = -1;
1772         break;
1773     }
1774 
1775     qemu_mutex_unlock_iothread();
1776     return ret;
1777 }
1778 
1779 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1780 {
1781     CPUState *cs = CPU(cpu);
1782     uint32_t bits = tsr_bits;
1783     struct kvm_one_reg reg = {
1784         .id = KVM_REG_PPC_OR_TSR,
1785         .addr = (uintptr_t) &bits,
1786     };
1787 
1788     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1789 }
1790 
1791 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1792 {
1793 
1794     CPUState *cs = CPU(cpu);
1795     uint32_t bits = tsr_bits;
1796     struct kvm_one_reg reg = {
1797         .id = KVM_REG_PPC_CLEAR_TSR,
1798         .addr = (uintptr_t) &bits,
1799     };
1800 
1801     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802 }
1803 
1804 int kvmppc_set_tcr(PowerPCCPU *cpu)
1805 {
1806     CPUState *cs = CPU(cpu);
1807     CPUPPCState *env = &cpu->env;
1808     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1809 
1810     struct kvm_one_reg reg = {
1811         .id = KVM_REG_PPC_TCR,
1812         .addr = (uintptr_t) &tcr,
1813     };
1814 
1815     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1816 }
1817 
1818 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1819 {
1820     CPUState *cs = CPU(cpu);
1821     int ret;
1822 
1823     if (!kvm_enabled()) {
1824         return -1;
1825     }
1826 
1827     if (!cap_ppc_watchdog) {
1828         printf("warning: KVM does not support watchdog");
1829         return -1;
1830     }
1831 
1832     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1833     if (ret < 0) {
1834         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1835                 __func__, strerror(-ret));
1836         return ret;
1837     }
1838 
1839     return ret;
1840 }
1841 
1842 static int read_cpuinfo(const char *field, char *value, int len)
1843 {
1844     FILE *f;
1845     int ret = -1;
1846     int field_len = strlen(field);
1847     char line[512];
1848 
1849     f = fopen("/proc/cpuinfo", "r");
1850     if (!f) {
1851         return -1;
1852     }
1853 
1854     do {
1855         if (!fgets(line, sizeof(line), f)) {
1856             break;
1857         }
1858         if (!strncmp(line, field, field_len)) {
1859             pstrcpy(value, len, line);
1860             ret = 0;
1861             break;
1862         }
1863     } while (*line);
1864 
1865     fclose(f);
1866 
1867     return ret;
1868 }
1869 
1870 uint32_t kvmppc_get_tbfreq(void)
1871 {
1872     char line[512];
1873     char *ns;
1874     uint32_t retval = NANOSECONDS_PER_SECOND;
1875 
1876     if (read_cpuinfo("timebase", line, sizeof(line))) {
1877         return retval;
1878     }
1879 
1880     ns = strchr(line, ':');
1881     if (!ns) {
1882         return retval;
1883     }
1884 
1885     ns++;
1886 
1887     return atoi(ns);
1888 }
1889 
1890 bool kvmppc_get_host_serial(char **value)
1891 {
1892     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1893                                NULL);
1894 }
1895 
1896 bool kvmppc_get_host_model(char **value)
1897 {
1898     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1899 }
1900 
1901 /* Try to find a device tree node for a CPU with clock-frequency property */
1902 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1903 {
1904     struct dirent *dirp;
1905     DIR *dp;
1906 
1907     dp = opendir(PROC_DEVTREE_CPU);
1908     if (!dp) {
1909         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1910         return -1;
1911     }
1912 
1913     buf[0] = '\0';
1914     while ((dirp = readdir(dp)) != NULL) {
1915         FILE *f;
1916         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1917                  dirp->d_name);
1918         f = fopen(buf, "r");
1919         if (f) {
1920             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1921             fclose(f);
1922             break;
1923         }
1924         buf[0] = '\0';
1925     }
1926     closedir(dp);
1927     if (buf[0] == '\0') {
1928         printf("Unknown host!\n");
1929         return -1;
1930     }
1931 
1932     return 0;
1933 }
1934 
1935 static uint64_t kvmppc_read_int_dt(const char *filename)
1936 {
1937     union {
1938         uint32_t v32;
1939         uint64_t v64;
1940     } u;
1941     FILE *f;
1942     int len;
1943 
1944     f = fopen(filename, "rb");
1945     if (!f) {
1946         return -1;
1947     }
1948 
1949     len = fread(&u, 1, sizeof(u), f);
1950     fclose(f);
1951     switch (len) {
1952     case 4:
1953         /* property is a 32-bit quantity */
1954         return be32_to_cpu(u.v32);
1955     case 8:
1956         return be64_to_cpu(u.v64);
1957     }
1958 
1959     return 0;
1960 }
1961 
1962 /*
1963  * Read a CPU node property from the host device tree that's a single
1964  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1965  * (can't find or open the property, or doesn't understand the format)
1966  */
1967 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1968 {
1969     char buf[PATH_MAX], *tmp;
1970     uint64_t val;
1971 
1972     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1973         return -1;
1974     }
1975 
1976     tmp = g_strdup_printf("%s/%s", buf, propname);
1977     val = kvmppc_read_int_dt(tmp);
1978     g_free(tmp);
1979 
1980     return val;
1981 }
1982 
1983 uint64_t kvmppc_get_clockfreq(void)
1984 {
1985     return kvmppc_read_int_cpu_dt("clock-frequency");
1986 }
1987 
1988 static int kvmppc_get_dec_bits(void)
1989 {
1990     int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1991 
1992     if (nr_bits > 0) {
1993         return nr_bits;
1994     }
1995     return 0;
1996 }
1997 
1998 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1999 {
2000     CPUState *cs = env_cpu(env);
2001 
2002     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2003         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2004         return 0;
2005     }
2006 
2007     return 1;
2008 }
2009 
2010 int kvmppc_get_hasidle(CPUPPCState *env)
2011 {
2012     struct kvm_ppc_pvinfo pvinfo;
2013 
2014     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2015         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2016         return 1;
2017     }
2018 
2019     return 0;
2020 }
2021 
2022 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2023 {
2024     uint32_t *hc = (uint32_t *)buf;
2025     struct kvm_ppc_pvinfo pvinfo;
2026 
2027     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2028         memcpy(buf, pvinfo.hcall, buf_len);
2029         return 0;
2030     }
2031 
2032     /*
2033      * Fallback to always fail hypercalls regardless of endianness:
2034      *
2035      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2036      *     li r3, -1
2037      *     b .+8       (becomes nop in wrong endian)
2038      *     bswap32(li r3, -1)
2039      */
2040 
2041     hc[0] = cpu_to_be32(0x08000048);
2042     hc[1] = cpu_to_be32(0x3860ffff);
2043     hc[2] = cpu_to_be32(0x48000008);
2044     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2045 
2046     return 1;
2047 }
2048 
2049 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2050 {
2051     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2052 }
2053 
2054 void kvmppc_enable_logical_ci_hcalls(void)
2055 {
2056     /*
2057      * FIXME: it would be nice if we could detect the cases where
2058      * we're using a device which requires the in kernel
2059      * implementation of these hcalls, but the kernel lacks them and
2060      * produce a warning.
2061      */
2062     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2063     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2064 }
2065 
2066 void kvmppc_enable_set_mode_hcall(void)
2067 {
2068     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2069 }
2070 
2071 void kvmppc_enable_clear_ref_mod_hcalls(void)
2072 {
2073     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2074     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2075 }
2076 
2077 void kvmppc_enable_h_page_init(void)
2078 {
2079     kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2080 }
2081 
2082 void kvmppc_set_papr(PowerPCCPU *cpu)
2083 {
2084     CPUState *cs = CPU(cpu);
2085     int ret;
2086 
2087     if (!kvm_enabled()) {
2088         return;
2089     }
2090 
2091     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2092     if (ret) {
2093         error_report("This vCPU type or KVM version does not support PAPR");
2094         exit(1);
2095     }
2096 
2097     /*
2098      * Update the capability flag so we sync the right information
2099      * with kvm
2100      */
2101     cap_papr = 1;
2102 }
2103 
2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105 {
2106     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107 }
2108 
2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110 {
2111     CPUState *cs = CPU(cpu);
2112     int ret;
2113 
2114     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115     if (ret && mpic_proxy) {
2116         error_report("This KVM version does not support EPR");
2117         exit(1);
2118     }
2119 }
2120 
2121 int kvmppc_smt_threads(void)
2122 {
2123     return cap_ppc_smt ? cap_ppc_smt : 1;
2124 }
2125 
2126 int kvmppc_set_smt_threads(int smt)
2127 {
2128     int ret;
2129 
2130     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2131     if (!ret) {
2132         cap_ppc_smt = smt;
2133     }
2134     return ret;
2135 }
2136 
2137 void kvmppc_hint_smt_possible(Error **errp)
2138 {
2139     int i;
2140     GString *g;
2141     char *s;
2142 
2143     assert(kvm_enabled());
2144     if (cap_ppc_smt_possible) {
2145         g = g_string_new("Available VSMT modes:");
2146         for (i = 63; i >= 0; i--) {
2147             if ((1UL << i) & cap_ppc_smt_possible) {
2148                 g_string_append_printf(g, " %lu", (1UL << i));
2149             }
2150         }
2151         s = g_string_free(g, false);
2152         error_append_hint(errp, "%s.\n", s);
2153         g_free(s);
2154     } else {
2155         error_append_hint(errp,
2156                           "This KVM seems to be too old to support VSMT.\n");
2157     }
2158 }
2159 
2160 
2161 #ifdef TARGET_PPC64
2162 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2163 {
2164     struct kvm_ppc_smmu_info info;
2165     long rampagesize, best_page_shift;
2166     int i;
2167 
2168     /*
2169      * Find the largest hardware supported page size that's less than
2170      * or equal to the (logical) backing page size of guest RAM
2171      */
2172     kvm_get_smmu_info(&info, &error_fatal);
2173     rampagesize = qemu_minrampagesize();
2174     best_page_shift = 0;
2175 
2176     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2177         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2178 
2179         if (!sps->page_shift) {
2180             continue;
2181         }
2182 
2183         if ((sps->page_shift > best_page_shift)
2184             && ((1UL << sps->page_shift) <= rampagesize)) {
2185             best_page_shift = sps->page_shift;
2186         }
2187     }
2188 
2189     return MIN(current_size,
2190                1ULL << (best_page_shift + hash_shift - 7));
2191 }
2192 #endif
2193 
2194 bool kvmppc_spapr_use_multitce(void)
2195 {
2196     return cap_spapr_multitce;
2197 }
2198 
2199 int kvmppc_spapr_enable_inkernel_multitce(void)
2200 {
2201     int ret;
2202 
2203     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2204                             H_PUT_TCE_INDIRECT, 1);
2205     if (!ret) {
2206         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                                 H_STUFF_TCE, 1);
2208     }
2209 
2210     return ret;
2211 }
2212 
2213 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2214                               uint64_t bus_offset, uint32_t nb_table,
2215                               int *pfd, bool need_vfio)
2216 {
2217     long len;
2218     int fd;
2219     void *table;
2220 
2221     /*
2222      * Must set fd to -1 so we don't try to munmap when called for
2223      * destroying the table, which the upper layers -will- do
2224      */
2225     *pfd = -1;
2226     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2227         return NULL;
2228     }
2229 
2230     if (cap_spapr_tce_64) {
2231         struct kvm_create_spapr_tce_64 args = {
2232             .liobn = liobn,
2233             .page_shift = page_shift,
2234             .offset = bus_offset >> page_shift,
2235             .size = nb_table,
2236             .flags = 0
2237         };
2238         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2239         if (fd < 0) {
2240             fprintf(stderr,
2241                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2242                     liobn);
2243             return NULL;
2244         }
2245     } else if (cap_spapr_tce) {
2246         uint64_t window_size = (uint64_t) nb_table << page_shift;
2247         struct kvm_create_spapr_tce args = {
2248             .liobn = liobn,
2249             .window_size = window_size,
2250         };
2251         if ((window_size != args.window_size) || bus_offset) {
2252             return NULL;
2253         }
2254         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2255         if (fd < 0) {
2256             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2257                     liobn);
2258             return NULL;
2259         }
2260     } else {
2261         return NULL;
2262     }
2263 
2264     len = nb_table * sizeof(uint64_t);
2265     /* FIXME: round this up to page size */
2266 
2267     table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2268     if (table == MAP_FAILED) {
2269         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2270                 liobn);
2271         close(fd);
2272         return NULL;
2273     }
2274 
2275     *pfd = fd;
2276     return table;
2277 }
2278 
2279 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2280 {
2281     long len;
2282 
2283     if (fd < 0) {
2284         return -1;
2285     }
2286 
2287     len = nb_table * sizeof(uint64_t);
2288     if ((munmap(table, len) < 0) ||
2289         (close(fd) < 0)) {
2290         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2291                 strerror(errno));
2292         /* Leak the table */
2293     }
2294 
2295     return 0;
2296 }
2297 
2298 int kvmppc_reset_htab(int shift_hint)
2299 {
2300     uint32_t shift = shift_hint;
2301 
2302     if (!kvm_enabled()) {
2303         /* Full emulation, tell caller to allocate htab itself */
2304         return 0;
2305     }
2306     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2307         int ret;
2308         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2309         if (ret == -ENOTTY) {
2310             /*
2311              * At least some versions of PR KVM advertise the
2312              * capability, but don't implement the ioctl().  Oops.
2313              * Return 0 so that we allocate the htab in qemu, as is
2314              * correct for PR.
2315              */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322 
2323     /*
2324      * We have a kernel that predates the htab reset calls.  For PR
2325      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2326      * this era, it has allocated a 16MB fixed size hash table
2327      * already.
2328      */
2329     if (kvmppc_is_pr(kvm_state)) {
2330         /* PR - tell caller to allocate htab */
2331         return 0;
2332     } else {
2333         /* HV - assume 16MB kernel allocated htab */
2334         return 24;
2335     }
2336 }
2337 
2338 static inline uint32_t mfpvr(void)
2339 {
2340     uint32_t pvr;
2341 
2342     asm ("mfpvr %0"
2343          : "=r"(pvr));
2344     return pvr;
2345 }
2346 
2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2348 {
2349     if (on) {
2350         *word |= flags;
2351     } else {
2352         *word &= ~flags;
2353     }
2354 }
2355 
2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2357 {
2358     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2360     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2361 
2362     /* Now fix up the class with information we can query from the host */
2363     pcc->pvr = mfpvr();
2364 
2365     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2366                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2367     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2368                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2369     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2370                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2371 
2372     if (dcache_size != -1) {
2373         pcc->l1_dcache_size = dcache_size;
2374     }
2375 
2376     if (icache_size != -1) {
2377         pcc->l1_icache_size = icache_size;
2378     }
2379 
2380 #if defined(TARGET_PPC64)
2381     pcc->radix_page_info = kvm_get_radix_page_info();
2382 
2383     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2384         /*
2385          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2386          * compliant.  More importantly, advertising ISA 3.00
2387          * architected mode may prevent guests from activating
2388          * necessary DD1 workarounds.
2389          */
2390         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2391                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2392     }
2393 #endif /* defined(TARGET_PPC64) */
2394 }
2395 
2396 bool kvmppc_has_cap_epr(void)
2397 {
2398     return cap_epr;
2399 }
2400 
2401 bool kvmppc_has_cap_fixup_hcalls(void)
2402 {
2403     return cap_fixup_hcalls;
2404 }
2405 
2406 bool kvmppc_has_cap_htm(void)
2407 {
2408     return cap_htm;
2409 }
2410 
2411 bool kvmppc_has_cap_mmu_radix(void)
2412 {
2413     return cap_mmu_radix;
2414 }
2415 
2416 bool kvmppc_has_cap_mmu_hash_v3(void)
2417 {
2418     return cap_mmu_hash_v3;
2419 }
2420 
2421 static bool kvmppc_power8_host(void)
2422 {
2423     bool ret = false;
2424 #ifdef TARGET_PPC64
2425     {
2426         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2427         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2428               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2429               (base_pvr == CPU_POWERPC_POWER8_BASE);
2430     }
2431 #endif /* TARGET_PPC64 */
2432     return ret;
2433 }
2434 
2435 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2436 {
2437     bool l1d_thread_priv_req = !kvmppc_power8_host();
2438 
2439     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2440         return 2;
2441     } else if ((!l1d_thread_priv_req ||
2442                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2443                (c.character & c.character_mask
2444                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2445         return 1;
2446     }
2447 
2448     return 0;
2449 }
2450 
2451 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2452 {
2453     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2454         return 2;
2455     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2456         return 1;
2457     }
2458 
2459     return 0;
2460 }
2461 
2462 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2463 {
2464     if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2465         (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2466         (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2467         return SPAPR_CAP_FIXED_NA;
2468     } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2469         return SPAPR_CAP_WORKAROUND;
2470     } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2471         return  SPAPR_CAP_FIXED_CCD;
2472     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2473         return SPAPR_CAP_FIXED_IBS;
2474     }
2475 
2476     return 0;
2477 }
2478 
2479 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2480 {
2481     if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2482         return 1;
2483     }
2484     return 0;
2485 }
2486 
2487 bool kvmppc_has_cap_xive(void)
2488 {
2489     return cap_xive;
2490 }
2491 
2492 static void kvmppc_get_cpu_characteristics(KVMState *s)
2493 {
2494     struct kvm_ppc_cpu_char c;
2495     int ret;
2496 
2497     /* Assume broken */
2498     cap_ppc_safe_cache = 0;
2499     cap_ppc_safe_bounds_check = 0;
2500     cap_ppc_safe_indirect_branch = 0;
2501 
2502     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2503     if (!ret) {
2504         return;
2505     }
2506     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2507     if (ret < 0) {
2508         return;
2509     }
2510 
2511     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2512     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2513     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2514     cap_ppc_count_cache_flush_assist =
2515         parse_cap_ppc_count_cache_flush_assist(c);
2516 }
2517 
2518 int kvmppc_get_cap_safe_cache(void)
2519 {
2520     return cap_ppc_safe_cache;
2521 }
2522 
2523 int kvmppc_get_cap_safe_bounds_check(void)
2524 {
2525     return cap_ppc_safe_bounds_check;
2526 }
2527 
2528 int kvmppc_get_cap_safe_indirect_branch(void)
2529 {
2530     return cap_ppc_safe_indirect_branch;
2531 }
2532 
2533 int kvmppc_get_cap_count_cache_flush_assist(void)
2534 {
2535     return cap_ppc_count_cache_flush_assist;
2536 }
2537 
2538 bool kvmppc_has_cap_nested_kvm_hv(void)
2539 {
2540     return !!cap_ppc_nested_kvm_hv;
2541 }
2542 
2543 int kvmppc_set_cap_nested_kvm_hv(int enable)
2544 {
2545     return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2546 }
2547 
2548 bool kvmppc_has_cap_spapr_vfio(void)
2549 {
2550     return cap_spapr_vfio;
2551 }
2552 
2553 int kvmppc_get_cap_large_decr(void)
2554 {
2555     return cap_large_decr;
2556 }
2557 
2558 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2559 {
2560     CPUState *cs = CPU(cpu);
2561     uint64_t lpcr;
2562 
2563     kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2564     /* Do we need to modify the LPCR? */
2565     if (!!(lpcr & LPCR_LD) != !!enable) {
2566         if (enable) {
2567             lpcr |= LPCR_LD;
2568         } else {
2569             lpcr &= ~LPCR_LD;
2570         }
2571         kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2572         kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2573 
2574         if (!!(lpcr & LPCR_LD) != !!enable) {
2575             return -1;
2576         }
2577     }
2578 
2579     return 0;
2580 }
2581 
2582 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2583 {
2584     uint32_t host_pvr = mfpvr();
2585     PowerPCCPUClass *pvr_pcc;
2586 
2587     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2588     if (pvr_pcc == NULL) {
2589         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2590     }
2591 
2592     return pvr_pcc;
2593 }
2594 
2595 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2596 {
2597     TypeInfo type_info = {
2598         .name = TYPE_HOST_POWERPC_CPU,
2599         .class_init = kvmppc_host_cpu_class_init,
2600     };
2601     MachineClass *mc = MACHINE_GET_CLASS(ms);
2602     PowerPCCPUClass *pvr_pcc;
2603     ObjectClass *oc;
2604     DeviceClass *dc;
2605     int i;
2606 
2607     pvr_pcc = kvm_ppc_get_host_cpu_class();
2608     if (pvr_pcc == NULL) {
2609         return -1;
2610     }
2611     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2612     type_register(&type_info);
2613     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2614         /* override TCG default cpu type with 'host' cpu model */
2615         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2616     }
2617 
2618     oc = object_class_by_name(type_info.name);
2619     g_assert(oc);
2620 
2621     /*
2622      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2623      * we want "POWER8" to be a "family" alias that points to the current
2624      * host CPU type, too)
2625      */
2626     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2627     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2628         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2629             char *suffix;
2630 
2631             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2632             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2633             if (suffix) {
2634                 *suffix = 0;
2635             }
2636             break;
2637         }
2638     }
2639 
2640     return 0;
2641 }
2642 
2643 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2644 {
2645     struct kvm_rtas_token_args args = {
2646         .token = token,
2647     };
2648 
2649     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2650         return -ENOENT;
2651     }
2652 
2653     strncpy(args.name, function, sizeof(args.name));
2654 
2655     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2656 }
2657 
2658 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2659 {
2660     struct kvm_get_htab_fd s = {
2661         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2662         .start_index = index,
2663     };
2664     int ret;
2665 
2666     if (!cap_htab_fd) {
2667         error_setg(errp, "KVM version doesn't support %s the HPT",
2668                    write ? "writing" : "reading");
2669         return -ENOTSUP;
2670     }
2671 
2672     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2673     if (ret < 0) {
2674         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2675                    write ? "writing" : "reading", write ? "to" : "from",
2676                    strerror(errno));
2677         return -errno;
2678     }
2679 
2680     return ret;
2681 }
2682 
2683 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2684 {
2685     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2686     uint8_t buf[bufsize];
2687     ssize_t rc;
2688 
2689     do {
2690         rc = read(fd, buf, bufsize);
2691         if (rc < 0) {
2692             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2693                     strerror(errno));
2694             return rc;
2695         } else if (rc) {
2696             uint8_t *buffer = buf;
2697             ssize_t n = rc;
2698             while (n) {
2699                 struct kvm_get_htab_header *head =
2700                     (struct kvm_get_htab_header *) buffer;
2701                 size_t chunksize = sizeof(*head) +
2702                      HASH_PTE_SIZE_64 * head->n_valid;
2703 
2704                 qemu_put_be32(f, head->index);
2705                 qemu_put_be16(f, head->n_valid);
2706                 qemu_put_be16(f, head->n_invalid);
2707                 qemu_put_buffer(f, (void *)(head + 1),
2708                                 HASH_PTE_SIZE_64 * head->n_valid);
2709 
2710                 buffer += chunksize;
2711                 n -= chunksize;
2712             }
2713         }
2714     } while ((rc != 0)
2715              && ((max_ns < 0) ||
2716                  ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2717 
2718     return (rc == 0) ? 1 : 0;
2719 }
2720 
2721 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2722                            uint16_t n_valid, uint16_t n_invalid)
2723 {
2724     struct kvm_get_htab_header *buf;
2725     size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64;
2726     ssize_t rc;
2727 
2728     buf = alloca(chunksize);
2729     buf->index = index;
2730     buf->n_valid = n_valid;
2731     buf->n_invalid = n_invalid;
2732 
2733     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid);
2734 
2735     rc = write(fd, buf, chunksize);
2736     if (rc < 0) {
2737         fprintf(stderr, "Error writing KVM hash table: %s\n",
2738                 strerror(errno));
2739         return rc;
2740     }
2741     if (rc != chunksize) {
2742         /* We should never get a short write on a single chunk */
2743         fprintf(stderr, "Short write, restoring KVM hash table\n");
2744         return -1;
2745     }
2746     return 0;
2747 }
2748 
2749 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2750 {
2751     return true;
2752 }
2753 
2754 void kvm_arch_init_irq_routing(KVMState *s)
2755 {
2756 }
2757 
2758 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2759 {
2760     int fd, rc;
2761     int i;
2762 
2763     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2764 
2765     i = 0;
2766     while (i < n) {
2767         struct kvm_get_htab_header *hdr;
2768         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2769         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2770 
2771         rc = read(fd, buf, sizeof(buf));
2772         if (rc < 0) {
2773             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2774         }
2775 
2776         hdr = (struct kvm_get_htab_header *)buf;
2777         while ((i < n) && ((char *)hdr < (buf + rc))) {
2778             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2779 
2780             if (hdr->index != (ptex + i)) {
2781                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2782                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2783             }
2784 
2785             if (n - i < valid) {
2786                 valid = n - i;
2787             }
2788             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2789             i += valid;
2790 
2791             if ((n - i) < invalid) {
2792                 invalid = n - i;
2793             }
2794             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2795             i += invalid;
2796 
2797             hdr = (struct kvm_get_htab_header *)
2798                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2799         }
2800     }
2801 
2802     close(fd);
2803 }
2804 
2805 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2806 {
2807     int fd, rc;
2808     struct {
2809         struct kvm_get_htab_header hdr;
2810         uint64_t pte0;
2811         uint64_t pte1;
2812     } buf;
2813 
2814     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2815 
2816     buf.hdr.n_valid = 1;
2817     buf.hdr.n_invalid = 0;
2818     buf.hdr.index = ptex;
2819     buf.pte0 = cpu_to_be64(pte0);
2820     buf.pte1 = cpu_to_be64(pte1);
2821 
2822     rc = write(fd, &buf, sizeof(buf));
2823     if (rc != sizeof(buf)) {
2824         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2825     }
2826     close(fd);
2827 }
2828 
2829 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2830                              uint64_t address, uint32_t data, PCIDevice *dev)
2831 {
2832     return 0;
2833 }
2834 
2835 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2836                                 int vector, PCIDevice *dev)
2837 {
2838     return 0;
2839 }
2840 
2841 int kvm_arch_release_virq_post(int virq)
2842 {
2843     return 0;
2844 }
2845 
2846 int kvm_arch_msi_data_to_gsi(uint32_t data)
2847 {
2848     return data & 0xffff;
2849 }
2850 
2851 int kvmppc_enable_hwrng(void)
2852 {
2853     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2854         return -1;
2855     }
2856 
2857     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2858 }
2859 
2860 void kvmppc_check_papr_resize_hpt(Error **errp)
2861 {
2862     if (!kvm_enabled()) {
2863         return; /* No KVM, we're good */
2864     }
2865 
2866     if (cap_resize_hpt) {
2867         return; /* Kernel has explicit support, we're good */
2868     }
2869 
2870     /* Otherwise fallback on looking for PR KVM */
2871     if (kvmppc_is_pr(kvm_state)) {
2872         return;
2873     }
2874 
2875     error_setg(errp,
2876                "Hash page table resizing not available with this KVM version");
2877 }
2878 
2879 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2880 {
2881     CPUState *cs = CPU(cpu);
2882     struct kvm_ppc_resize_hpt rhpt = {
2883         .flags = flags,
2884         .shift = shift,
2885     };
2886 
2887     if (!cap_resize_hpt) {
2888         return -ENOSYS;
2889     }
2890 
2891     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2892 }
2893 
2894 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2895 {
2896     CPUState *cs = CPU(cpu);
2897     struct kvm_ppc_resize_hpt rhpt = {
2898         .flags = flags,
2899         .shift = shift,
2900     };
2901 
2902     if (!cap_resize_hpt) {
2903         return -ENOSYS;
2904     }
2905 
2906     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2907 }
2908 
2909 /*
2910  * This is a helper function to detect a post migration scenario
2911  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2912  * the guest kernel can't handle a PVR value other than the actual host
2913  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2914  *
2915  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2916  * (so, we're HV), return true. The workaround itself is done in
2917  * cpu_post_load.
2918  *
2919  * The order here is important: we'll only check for KVM PR as a
2920  * fallback if the guest kernel can't handle the situation itself.
2921  * We need to avoid as much as possible querying the running KVM type
2922  * in QEMU level.
2923  */
2924 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2925 {
2926     CPUState *cs = CPU(cpu);
2927 
2928     if (!kvm_enabled()) {
2929         return false;
2930     }
2931 
2932     if (cap_ppc_pvr_compat) {
2933         return false;
2934     }
2935 
2936     return !kvmppc_is_pr(cs->kvm_state);
2937 }
2938 
2939 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2940 {
2941     CPUState *cs = CPU(cpu);
2942 
2943     if (kvm_enabled()) {
2944         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2945     }
2946 }
2947