xref: /openbmc/qemu/target/ppc/kvm.c (revision 200280af0e19bfaeb9431eb0ee1ee2d8bf8d3a0a)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #include "elf.h"
50 #include "sysemu/kvm_int.h"
51 
52 //#define DEBUG_KVM
53 
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59     do { } while (0)
60 #endif
61 
62 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
63 
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65     KVM_CAP_LAST_INFO
66 };
67 
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_smt_possible;
74 static int cap_spapr_tce;
75 static int cap_spapr_tce_64;
76 static int cap_spapr_multitce;
77 static int cap_spapr_vfio;
78 static int cap_hior;
79 static int cap_one_reg;
80 static int cap_epr;
81 static int cap_ppc_watchdog;
82 static int cap_papr;
83 static int cap_htab_fd;
84 static int cap_fixup_hcalls;
85 static int cap_htm;             /* Hardware transactional memory support */
86 static int cap_mmu_radix;
87 static int cap_mmu_hash_v3;
88 static int cap_resize_hpt;
89 static int cap_ppc_pvr_compat;
90 static int cap_ppc_safe_cache;
91 static int cap_ppc_safe_bounds_check;
92 static int cap_ppc_safe_indirect_branch;
93 static int cap_ppc_nested_kvm_hv;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
154     /*
155      * Note: setting it to false because there is not such capability
156      * in KVM at this moment.
157      *
158      * TODO: call kvm_vm_check_extension() with the right capability
159      * after the kernel starts implementing it.*/
160     cap_ppc_pvr_compat = false;
161 
162     if (!cap_interrupt_level) {
163         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
164                         "VM to stall at times!\n");
165     }
166 
167     kvm_ppc_register_host_cpu_type(ms);
168 
169     return 0;
170 }
171 
172 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 {
174     return 0;
175 }
176 
177 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 {
179     CPUPPCState *cenv = &cpu->env;
180     CPUState *cs = CPU(cpu);
181     struct kvm_sregs sregs;
182     int ret;
183 
184     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
185         /* What we're really trying to say is "if we're on BookE, we use
186            the native PVR for now". This is the only sane way to check
187            it though, so we potentially confuse users that they can run
188            BookE guests on BookS. Let's hope nobody dares enough :) */
189         return 0;
190     } else {
191         if (!cap_segstate) {
192             fprintf(stderr, "kvm error: missing PVR setting capability\n");
193             return -ENOSYS;
194         }
195     }
196 
197     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
198     if (ret) {
199         return ret;
200     }
201 
202     sregs.pvr = cenv->spr[SPR_PVR];
203     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
204 }
205 
206 /* Set up a shared TLB array with KVM */
207 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 {
209     CPUPPCState *env = &cpu->env;
210     CPUState *cs = CPU(cpu);
211     struct kvm_book3e_206_tlb_params params = {};
212     struct kvm_config_tlb cfg = {};
213     unsigned int entries = 0;
214     int ret, i;
215 
216     if (!kvm_enabled() ||
217         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
218         return 0;
219     }
220 
221     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 
223     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
224         params.tlb_sizes[i] = booke206_tlb_size(env, i);
225         params.tlb_ways[i] = booke206_tlb_ways(env, i);
226         entries += params.tlb_sizes[i];
227     }
228 
229     assert(entries == env->nb_tlb);
230     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 
232     env->tlb_dirty = true;
233 
234     cfg.array = (uintptr_t)env->tlb.tlbm;
235     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
236     cfg.params = (uintptr_t)&params;
237     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 
239     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
240     if (ret < 0) {
241         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
242                 __func__, strerror(-ret));
243         return ret;
244     }
245 
246     env->kvm_sw_tlb = true;
247     return 0;
248 }
249 
250 
251 #if defined(TARGET_PPC64)
252 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
253 {
254     int ret;
255 
256     assert(kvm_state != NULL);
257 
258     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
259         error_setg(errp, "KVM doesn't expose the MMU features it supports");
260         error_append_hint(errp, "Consider switching to a newer KVM\n");
261         return;
262     }
263 
264     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
265     if (ret == 0) {
266         return;
267     }
268 
269     error_setg_errno(errp, -ret,
270                      "KVM failed to provide the MMU features it supports");
271 }
272 
273 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
274 {
275     KVMState *s = KVM_STATE(current_machine->accelerator);
276     struct ppc_radix_page_info *radix_page_info;
277     struct kvm_ppc_rmmu_info rmmu_info;
278     int i;
279 
280     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
281         return NULL;
282     }
283     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
284         return NULL;
285     }
286     radix_page_info = g_malloc0(sizeof(*radix_page_info));
287     radix_page_info->count = 0;
288     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
289         if (rmmu_info.ap_encodings[i]) {
290             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
291             radix_page_info->count++;
292         }
293     }
294     return radix_page_info;
295 }
296 
297 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
298                                      bool radix, bool gtse,
299                                      uint64_t proc_tbl)
300 {
301     CPUState *cs = CPU(cpu);
302     int ret;
303     uint64_t flags = 0;
304     struct kvm_ppc_mmuv3_cfg cfg = {
305         .process_table = proc_tbl,
306     };
307 
308     if (radix) {
309         flags |= KVM_PPC_MMUV3_RADIX;
310     }
311     if (gtse) {
312         flags |= KVM_PPC_MMUV3_GTSE;
313     }
314     cfg.flags = flags;
315     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
316     switch (ret) {
317     case 0:
318         return H_SUCCESS;
319     case -EINVAL:
320         return H_PARAMETER;
321     case -ENODEV:
322         return H_NOT_AVAILABLE;
323     default:
324         return H_HARDWARE;
325     }
326 }
327 
328 bool kvmppc_hpt_needs_host_contiguous_pages(void)
329 {
330     static struct kvm_ppc_smmu_info smmu_info;
331 
332     if (!kvm_enabled()) {
333         return false;
334     }
335 
336     kvm_get_smmu_info(&smmu_info, &error_fatal);
337     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
338 }
339 
340 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
341 {
342     struct kvm_ppc_smmu_info smmu_info;
343     int iq, ik, jq, jk;
344     Error *local_err = NULL;
345 
346     /* For now, we only have anything to check on hash64 MMUs */
347     if (!cpu->hash64_opts || !kvm_enabled()) {
348         return;
349     }
350 
351     kvm_get_smmu_info(&smmu_info, &local_err);
352     if (local_err) {
353         error_propagate(errp, local_err);
354         return;
355     }
356 
357     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
358         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
359         error_setg(errp,
360                    "KVM does not support 1TiB segments which guest expects");
361         return;
362     }
363 
364     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
365         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
366                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
367         return;
368     }
369 
370     /*
371      * Verify that every pagesize supported by the cpu model is
372      * supported by KVM with the same encodings
373      */
374     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
375         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
376         struct kvm_ppc_one_seg_page_size *ksps;
377 
378         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
379             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
380                 break;
381             }
382         }
383         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
384             error_setg(errp, "KVM doesn't support for base page shift %u",
385                        qsps->page_shift);
386             return;
387         }
388 
389         ksps = &smmu_info.sps[ik];
390         if (ksps->slb_enc != qsps->slb_enc) {
391             error_setg(errp,
392 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
393                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
394             return;
395         }
396 
397         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
398             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
399                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
400                     break;
401                 }
402             }
403 
404             if (jk >= ARRAY_SIZE(ksps->enc)) {
405                 error_setg(errp, "KVM doesn't support page shift %u/%u",
406                            qsps->enc[jq].page_shift, qsps->page_shift);
407                 return;
408             }
409             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
410                 error_setg(errp,
411 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
412                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
413                            qsps->page_shift, qsps->enc[jq].pte_enc);
414                 return;
415             }
416         }
417     }
418 
419     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
420         /* Mostly what guest pagesizes we can use are related to the
421          * host pages used to map guest RAM, which is handled in the
422          * platform code. Cache-Inhibited largepages (64k) however are
423          * used for I/O, so if they're mapped to the host at all it
424          * will be a normal mapping, not a special hugepage one used
425          * for RAM. */
426         if (getpagesize() < 0x10000) {
427             error_setg(errp,
428                        "KVM can't supply 64kiB CI pages, which guest expects");
429         }
430     }
431 }
432 #endif /* !defined (TARGET_PPC64) */
433 
434 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
435 {
436     return POWERPC_CPU(cpu)->vcpu_id;
437 }
438 
439 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
440  * book3s supports only 1 watchpoint, so array size
441  * of 4 is sufficient for now.
442  */
443 #define MAX_HW_BKPTS 4
444 
445 static struct HWBreakpoint {
446     target_ulong addr;
447     int type;
448 } hw_debug_points[MAX_HW_BKPTS];
449 
450 static CPUWatchpoint hw_watchpoint;
451 
452 /* Default there is no breakpoint and watchpoint supported */
453 static int max_hw_breakpoint;
454 static int max_hw_watchpoint;
455 static int nb_hw_breakpoint;
456 static int nb_hw_watchpoint;
457 
458 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
459 {
460     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
461         max_hw_breakpoint = 2;
462         max_hw_watchpoint = 2;
463     }
464 
465     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
466         fprintf(stderr, "Error initializing h/w breakpoints\n");
467         return;
468     }
469 }
470 
471 int kvm_arch_init_vcpu(CPUState *cs)
472 {
473     PowerPCCPU *cpu = POWERPC_CPU(cs);
474     CPUPPCState *cenv = &cpu->env;
475     int ret;
476 
477     /* Synchronize sregs with kvm */
478     ret = kvm_arch_sync_sregs(cpu);
479     if (ret) {
480         if (ret == -EINVAL) {
481             error_report("Register sync failed... If you're using kvm-hv.ko,"
482                          " only \"-cpu host\" is possible");
483         }
484         return ret;
485     }
486 
487     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
488 
489     switch (cenv->mmu_model) {
490     case POWERPC_MMU_BOOKE206:
491         /* This target supports access to KVM's guest TLB */
492         ret = kvm_booke206_tlb_init(cpu);
493         break;
494     case POWERPC_MMU_2_07:
495         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
496             /* KVM-HV has transactional memory on POWER8 also without the
497              * KVM_CAP_PPC_HTM extension, so enable it here instead as
498              * long as it's availble to userspace on the host. */
499             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
500                 cap_htm = true;
501             }
502         }
503         break;
504     default:
505         break;
506     }
507 
508     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
509     kvmppc_hw_debug_points_init(cenv);
510 
511     return ret;
512 }
513 
514 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
515 {
516     CPUPPCState *env = &cpu->env;
517     CPUState *cs = CPU(cpu);
518     struct kvm_dirty_tlb dirty_tlb;
519     unsigned char *bitmap;
520     int ret;
521 
522     if (!env->kvm_sw_tlb) {
523         return;
524     }
525 
526     bitmap = g_malloc((env->nb_tlb + 7) / 8);
527     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
528 
529     dirty_tlb.bitmap = (uintptr_t)bitmap;
530     dirty_tlb.num_dirty = env->nb_tlb;
531 
532     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
533     if (ret) {
534         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
535                 __func__, strerror(-ret));
536     }
537 
538     g_free(bitmap);
539 }
540 
541 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
542 {
543     PowerPCCPU *cpu = POWERPC_CPU(cs);
544     CPUPPCState *env = &cpu->env;
545     union {
546         uint32_t u32;
547         uint64_t u64;
548     } val;
549     struct kvm_one_reg reg = {
550         .id = id,
551         .addr = (uintptr_t) &val,
552     };
553     int ret;
554 
555     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
556     if (ret != 0) {
557         trace_kvm_failed_spr_get(spr, strerror(errno));
558     } else {
559         switch (id & KVM_REG_SIZE_MASK) {
560         case KVM_REG_SIZE_U32:
561             env->spr[spr] = val.u32;
562             break;
563 
564         case KVM_REG_SIZE_U64:
565             env->spr[spr] = val.u64;
566             break;
567 
568         default:
569             /* Don't handle this size yet */
570             abort();
571         }
572     }
573 }
574 
575 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
576 {
577     PowerPCCPU *cpu = POWERPC_CPU(cs);
578     CPUPPCState *env = &cpu->env;
579     union {
580         uint32_t u32;
581         uint64_t u64;
582     } val;
583     struct kvm_one_reg reg = {
584         .id = id,
585         .addr = (uintptr_t) &val,
586     };
587     int ret;
588 
589     switch (id & KVM_REG_SIZE_MASK) {
590     case KVM_REG_SIZE_U32:
591         val.u32 = env->spr[spr];
592         break;
593 
594     case KVM_REG_SIZE_U64:
595         val.u64 = env->spr[spr];
596         break;
597 
598     default:
599         /* Don't handle this size yet */
600         abort();
601     }
602 
603     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
604     if (ret != 0) {
605         trace_kvm_failed_spr_set(spr, strerror(errno));
606     }
607 }
608 
609 static int kvm_put_fp(CPUState *cs)
610 {
611     PowerPCCPU *cpu = POWERPC_CPU(cs);
612     CPUPPCState *env = &cpu->env;
613     struct kvm_one_reg reg;
614     int i;
615     int ret;
616 
617     if (env->insns_flags & PPC_FLOAT) {
618         uint64_t fpscr = env->fpscr;
619         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
620 
621         reg.id = KVM_REG_PPC_FPSCR;
622         reg.addr = (uintptr_t)&fpscr;
623         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
624         if (ret < 0) {
625             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
626             return ret;
627         }
628 
629         for (i = 0; i < 32; i++) {
630             uint64_t vsr[2];
631             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
632             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
633 
634 #ifdef HOST_WORDS_BIGENDIAN
635             vsr[0] = float64_val(*fpr);
636             vsr[1] = *vsrl;
637 #else
638             vsr[0] = *vsrl;
639             vsr[1] = float64_val(*fpr);
640 #endif
641             reg.addr = (uintptr_t) &vsr;
642             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
643 
644             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
645             if (ret < 0) {
646                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
647                         i, strerror(errno));
648                 return ret;
649             }
650         }
651     }
652 
653     if (env->insns_flags & PPC_ALTIVEC) {
654         reg.id = KVM_REG_PPC_VSCR;
655         reg.addr = (uintptr_t)&env->vscr;
656         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
657         if (ret < 0) {
658             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
659             return ret;
660         }
661 
662         for (i = 0; i < 32; i++) {
663             reg.id = KVM_REG_PPC_VR(i);
664             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
665             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
666             if (ret < 0) {
667                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
668                 return ret;
669             }
670         }
671     }
672 
673     return 0;
674 }
675 
676 static int kvm_get_fp(CPUState *cs)
677 {
678     PowerPCCPU *cpu = POWERPC_CPU(cs);
679     CPUPPCState *env = &cpu->env;
680     struct kvm_one_reg reg;
681     int i;
682     int ret;
683 
684     if (env->insns_flags & PPC_FLOAT) {
685         uint64_t fpscr;
686         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
687 
688         reg.id = KVM_REG_PPC_FPSCR;
689         reg.addr = (uintptr_t)&fpscr;
690         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
691         if (ret < 0) {
692             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
693             return ret;
694         } else {
695             env->fpscr = fpscr;
696         }
697 
698         for (i = 0; i < 32; i++) {
699             uint64_t vsr[2];
700             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
701             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
702 
703             reg.addr = (uintptr_t) &vsr;
704             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
705 
706             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
707             if (ret < 0) {
708                 DPRINTF("Unable to get %s%d from KVM: %s\n",
709                         vsx ? "VSR" : "FPR", i, strerror(errno));
710                 return ret;
711             } else {
712 #ifdef HOST_WORDS_BIGENDIAN
713                 *fpr = vsr[0];
714                 if (vsx) {
715                     *vsrl = vsr[1];
716                 }
717 #else
718                 *fpr = vsr[1];
719                 if (vsx) {
720                     *vsrl = vsr[0];
721                 }
722 #endif
723             }
724         }
725     }
726 
727     if (env->insns_flags & PPC_ALTIVEC) {
728         reg.id = KVM_REG_PPC_VSCR;
729         reg.addr = (uintptr_t)&env->vscr;
730         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
731         if (ret < 0) {
732             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
733             return ret;
734         }
735 
736         for (i = 0; i < 32; i++) {
737             reg.id = KVM_REG_PPC_VR(i);
738             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
739             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
740             if (ret < 0) {
741                 DPRINTF("Unable to get VR%d from KVM: %s\n",
742                         i, strerror(errno));
743                 return ret;
744             }
745         }
746     }
747 
748     return 0;
749 }
750 
751 #if defined(TARGET_PPC64)
752 static int kvm_get_vpa(CPUState *cs)
753 {
754     PowerPCCPU *cpu = POWERPC_CPU(cs);
755     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
756     struct kvm_one_reg reg;
757     int ret;
758 
759     reg.id = KVM_REG_PPC_VPA_ADDR;
760     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
761     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
762     if (ret < 0) {
763         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
764         return ret;
765     }
766 
767     assert((uintptr_t)&spapr_cpu->slb_shadow_size
768            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
769     reg.id = KVM_REG_PPC_VPA_SLB;
770     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
771     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
772     if (ret < 0) {
773         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
774                 strerror(errno));
775         return ret;
776     }
777 
778     assert((uintptr_t)&spapr_cpu->dtl_size
779            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
780     reg.id = KVM_REG_PPC_VPA_DTL;
781     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
782     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
783     if (ret < 0) {
784         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
785                 strerror(errno));
786         return ret;
787     }
788 
789     return 0;
790 }
791 
792 static int kvm_put_vpa(CPUState *cs)
793 {
794     PowerPCCPU *cpu = POWERPC_CPU(cs);
795     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
796     struct kvm_one_reg reg;
797     int ret;
798 
799     /* SLB shadow or DTL can't be registered unless a master VPA is
800      * registered.  That means when restoring state, if a VPA *is*
801      * registered, we need to set that up first.  If not, we need to
802      * deregister the others before deregistering the master VPA */
803     assert(spapr_cpu->vpa_addr
804            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
805 
806     if (spapr_cpu->vpa_addr) {
807         reg.id = KVM_REG_PPC_VPA_ADDR;
808         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
809         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
810         if (ret < 0) {
811             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
812             return ret;
813         }
814     }
815 
816     assert((uintptr_t)&spapr_cpu->slb_shadow_size
817            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
818     reg.id = KVM_REG_PPC_VPA_SLB;
819     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
820     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
821     if (ret < 0) {
822         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
823         return ret;
824     }
825 
826     assert((uintptr_t)&spapr_cpu->dtl_size
827            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
828     reg.id = KVM_REG_PPC_VPA_DTL;
829     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
830     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
831     if (ret < 0) {
832         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
833                 strerror(errno));
834         return ret;
835     }
836 
837     if (!spapr_cpu->vpa_addr) {
838         reg.id = KVM_REG_PPC_VPA_ADDR;
839         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
840         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
841         if (ret < 0) {
842             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
843             return ret;
844         }
845     }
846 
847     return 0;
848 }
849 #endif /* TARGET_PPC64 */
850 
851 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
852 {
853     CPUPPCState *env = &cpu->env;
854     struct kvm_sregs sregs;
855     int i;
856 
857     sregs.pvr = env->spr[SPR_PVR];
858 
859     if (cpu->vhyp) {
860         PPCVirtualHypervisorClass *vhc =
861             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
862         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
863     } else {
864         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
865     }
866 
867     /* Sync SLB */
868 #ifdef TARGET_PPC64
869     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
870         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
871         if (env->slb[i].esid & SLB_ESID_V) {
872             sregs.u.s.ppc64.slb[i].slbe |= i;
873         }
874         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
875     }
876 #endif
877 
878     /* Sync SRs */
879     for (i = 0; i < 16; i++) {
880         sregs.u.s.ppc32.sr[i] = env->sr[i];
881     }
882 
883     /* Sync BATs */
884     for (i = 0; i < 8; i++) {
885         /* Beware. We have to swap upper and lower bits here */
886         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
887             | env->DBAT[1][i];
888         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
889             | env->IBAT[1][i];
890     }
891 
892     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
893 }
894 
895 int kvm_arch_put_registers(CPUState *cs, int level)
896 {
897     PowerPCCPU *cpu = POWERPC_CPU(cs);
898     CPUPPCState *env = &cpu->env;
899     struct kvm_regs regs;
900     int ret;
901     int i;
902 
903     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
904     if (ret < 0) {
905         return ret;
906     }
907 
908     regs.ctr = env->ctr;
909     regs.lr  = env->lr;
910     regs.xer = cpu_read_xer(env);
911     regs.msr = env->msr;
912     regs.pc = env->nip;
913 
914     regs.srr0 = env->spr[SPR_SRR0];
915     regs.srr1 = env->spr[SPR_SRR1];
916 
917     regs.sprg0 = env->spr[SPR_SPRG0];
918     regs.sprg1 = env->spr[SPR_SPRG1];
919     regs.sprg2 = env->spr[SPR_SPRG2];
920     regs.sprg3 = env->spr[SPR_SPRG3];
921     regs.sprg4 = env->spr[SPR_SPRG4];
922     regs.sprg5 = env->spr[SPR_SPRG5];
923     regs.sprg6 = env->spr[SPR_SPRG6];
924     regs.sprg7 = env->spr[SPR_SPRG7];
925 
926     regs.pid = env->spr[SPR_BOOKE_PID];
927 
928     for (i = 0;i < 32; i++)
929         regs.gpr[i] = env->gpr[i];
930 
931     regs.cr = 0;
932     for (i = 0; i < 8; i++) {
933         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
934     }
935 
936     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
937     if (ret < 0)
938         return ret;
939 
940     kvm_put_fp(cs);
941 
942     if (env->tlb_dirty) {
943         kvm_sw_tlb_put(cpu);
944         env->tlb_dirty = false;
945     }
946 
947     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
948         ret = kvmppc_put_books_sregs(cpu);
949         if (ret < 0) {
950             return ret;
951         }
952     }
953 
954     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
955         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
956     }
957 
958     if (cap_one_reg) {
959         int i;
960 
961         /* We deliberately ignore errors here, for kernels which have
962          * the ONE_REG calls, but don't support the specific
963          * registers, there's a reasonable chance things will still
964          * work, at least until we try to migrate. */
965         for (i = 0; i < 1024; i++) {
966             uint64_t id = env->spr_cb[i].one_reg_id;
967 
968             if (id != 0) {
969                 kvm_put_one_spr(cs, id, i);
970             }
971         }
972 
973 #ifdef TARGET_PPC64
974         if (msr_ts) {
975             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
976                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
977             }
978             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
979                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
980             }
981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
991         }
992 
993         if (cap_papr) {
994             if (kvm_put_vpa(cs) < 0) {
995                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
996             }
997         }
998 
999         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1000 #endif /* TARGET_PPC64 */
1001     }
1002 
1003     return ret;
1004 }
1005 
1006 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1007 {
1008      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1009 }
1010 
1011 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1012 {
1013     CPUPPCState *env = &cpu->env;
1014     struct kvm_sregs sregs;
1015     int ret;
1016 
1017     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1018     if (ret < 0) {
1019         return ret;
1020     }
1021 
1022     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1023         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1024         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1025         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1026         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1027         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1028         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1029         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1030         env->spr[SPR_DECR] = sregs.u.e.dec;
1031         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1032         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1033         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1034     }
1035 
1036     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1037         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1038         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1039         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1040         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1041         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1042     }
1043 
1044     if (sregs.u.e.features & KVM_SREGS_E_64) {
1045         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1046     }
1047 
1048     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1049         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1050     }
1051 
1052     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1053         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1054         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1055         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1056         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1057         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1058         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1059         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1060         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1061         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1062         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1063         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1064         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1065         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1066         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1067         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1068         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1069         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1070         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1071         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1072         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1073         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1074         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1075         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1076         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1077         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1078         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1079         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1080         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1081         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1082         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1083         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1084         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1085 
1086         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1087             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1088             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1089             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1090             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1091             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1092             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1093         }
1094 
1095         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1096             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1097             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1098         }
1099 
1100         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1101             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1102             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1103             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1104             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1105         }
1106     }
1107 
1108     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1109         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1110         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1111         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1112         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1113         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1114         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1115         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1116         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1117         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1118         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_EXP) {
1122         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1126         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1127         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1128     }
1129 
1130     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1131         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1132         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1133         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1134 
1135         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1136             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1137             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1138         }
1139     }
1140 
1141     return 0;
1142 }
1143 
1144 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1145 {
1146     CPUPPCState *env = &cpu->env;
1147     struct kvm_sregs sregs;
1148     int ret;
1149     int i;
1150 
1151     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1152     if (ret < 0) {
1153         return ret;
1154     }
1155 
1156     if (!cpu->vhyp) {
1157         ppc_store_sdr1(env, sregs.u.s.sdr1);
1158     }
1159 
1160     /* Sync SLB */
1161 #ifdef TARGET_PPC64
1162     /*
1163      * The packed SLB array we get from KVM_GET_SREGS only contains
1164      * information about valid entries. So we flush our internal copy
1165      * to get rid of stale ones, then put all valid SLB entries back
1166      * in.
1167      */
1168     memset(env->slb, 0, sizeof(env->slb));
1169     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1170         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1171         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1172         /*
1173          * Only restore valid entries
1174          */
1175         if (rb & SLB_ESID_V) {
1176             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1177         }
1178     }
1179 #endif
1180 
1181     /* Sync SRs */
1182     for (i = 0; i < 16; i++) {
1183         env->sr[i] = sregs.u.s.ppc32.sr[i];
1184     }
1185 
1186     /* Sync BATs */
1187     for (i = 0; i < 8; i++) {
1188         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1189         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1190         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1191         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1192     }
1193 
1194     return 0;
1195 }
1196 
1197 int kvm_arch_get_registers(CPUState *cs)
1198 {
1199     PowerPCCPU *cpu = POWERPC_CPU(cs);
1200     CPUPPCState *env = &cpu->env;
1201     struct kvm_regs regs;
1202     uint32_t cr;
1203     int i, ret;
1204 
1205     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1206     if (ret < 0)
1207         return ret;
1208 
1209     cr = regs.cr;
1210     for (i = 7; i >= 0; i--) {
1211         env->crf[i] = cr & 15;
1212         cr >>= 4;
1213     }
1214 
1215     env->ctr = regs.ctr;
1216     env->lr = regs.lr;
1217     cpu_write_xer(env, regs.xer);
1218     env->msr = regs.msr;
1219     env->nip = regs.pc;
1220 
1221     env->spr[SPR_SRR0] = regs.srr0;
1222     env->spr[SPR_SRR1] = regs.srr1;
1223 
1224     env->spr[SPR_SPRG0] = regs.sprg0;
1225     env->spr[SPR_SPRG1] = regs.sprg1;
1226     env->spr[SPR_SPRG2] = regs.sprg2;
1227     env->spr[SPR_SPRG3] = regs.sprg3;
1228     env->spr[SPR_SPRG4] = regs.sprg4;
1229     env->spr[SPR_SPRG5] = regs.sprg5;
1230     env->spr[SPR_SPRG6] = regs.sprg6;
1231     env->spr[SPR_SPRG7] = regs.sprg7;
1232 
1233     env->spr[SPR_BOOKE_PID] = regs.pid;
1234 
1235     for (i = 0;i < 32; i++)
1236         env->gpr[i] = regs.gpr[i];
1237 
1238     kvm_get_fp(cs);
1239 
1240     if (cap_booke_sregs) {
1241         ret = kvmppc_get_booke_sregs(cpu);
1242         if (ret < 0) {
1243             return ret;
1244         }
1245     }
1246 
1247     if (cap_segstate) {
1248         ret = kvmppc_get_books_sregs(cpu);
1249         if (ret < 0) {
1250             return ret;
1251         }
1252     }
1253 
1254     if (cap_hior) {
1255         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1256     }
1257 
1258     if (cap_one_reg) {
1259         int i;
1260 
1261         /* We deliberately ignore errors here, for kernels which have
1262          * the ONE_REG calls, but don't support the specific
1263          * registers, there's a reasonable chance things will still
1264          * work, at least until we try to migrate. */
1265         for (i = 0; i < 1024; i++) {
1266             uint64_t id = env->spr_cb[i].one_reg_id;
1267 
1268             if (id != 0) {
1269                 kvm_get_one_spr(cs, id, i);
1270             }
1271         }
1272 
1273 #ifdef TARGET_PPC64
1274         if (msr_ts) {
1275             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1276                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1277             }
1278             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1279                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1280             }
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1290             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1291         }
1292 
1293         if (cap_papr) {
1294             if (kvm_get_vpa(cs) < 0) {
1295                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1296             }
1297         }
1298 
1299         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1300 #endif
1301     }
1302 
1303     return 0;
1304 }
1305 
1306 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1307 {
1308     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1309 
1310     if (irq != PPC_INTERRUPT_EXT) {
1311         return 0;
1312     }
1313 
1314     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1315         return 0;
1316     }
1317 
1318     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1319 
1320     return 0;
1321 }
1322 
1323 #if defined(TARGET_PPC64)
1324 #define PPC_INPUT_INT PPC970_INPUT_INT
1325 #else
1326 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1327 #endif
1328 
1329 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1330 {
1331     PowerPCCPU *cpu = POWERPC_CPU(cs);
1332     CPUPPCState *env = &cpu->env;
1333     int r;
1334     unsigned irq;
1335 
1336     qemu_mutex_lock_iothread();
1337 
1338     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1339      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1340     if (!cap_interrupt_level &&
1341         run->ready_for_interrupt_injection &&
1342         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1343         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1344     {
1345         /* For now KVM disregards the 'irq' argument. However, in the
1346          * future KVM could cache it in-kernel to avoid a heavyweight exit
1347          * when reading the UIC.
1348          */
1349         irq = KVM_INTERRUPT_SET;
1350 
1351         DPRINTF("injected interrupt %d\n", irq);
1352         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1353         if (r < 0) {
1354             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1355         }
1356 
1357         /* Always wake up soon in case the interrupt was level based */
1358         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1359                        (NANOSECONDS_PER_SECOND / 50));
1360     }
1361 
1362     /* We don't know if there are more interrupts pending after this. However,
1363      * the guest will return to userspace in the course of handling this one
1364      * anyways, so we will get a chance to deliver the rest. */
1365 
1366     qemu_mutex_unlock_iothread();
1367 }
1368 
1369 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1370 {
1371     return MEMTXATTRS_UNSPECIFIED;
1372 }
1373 
1374 int kvm_arch_process_async_events(CPUState *cs)
1375 {
1376     return cs->halted;
1377 }
1378 
1379 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1380 {
1381     CPUState *cs = CPU(cpu);
1382     CPUPPCState *env = &cpu->env;
1383 
1384     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1385         cs->halted = 1;
1386         cs->exception_index = EXCP_HLT;
1387     }
1388 
1389     return 0;
1390 }
1391 
1392 /* map dcr access to existing qemu dcr emulation */
1393 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1394 {
1395     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1396         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1397 
1398     return 0;
1399 }
1400 
1401 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1402 {
1403     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1404         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1405 
1406     return 0;
1407 }
1408 
1409 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1410 {
1411     /* Mixed endian case is not handled */
1412     uint32_t sc = debug_inst_opcode;
1413 
1414     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1415                             sizeof(sc), 0) ||
1416         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1417         return -EINVAL;
1418     }
1419 
1420     return 0;
1421 }
1422 
1423 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1424 {
1425     uint32_t sc;
1426 
1427     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1428         sc != debug_inst_opcode ||
1429         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1430                             sizeof(sc), 1)) {
1431         return -EINVAL;
1432     }
1433 
1434     return 0;
1435 }
1436 
1437 static int find_hw_breakpoint(target_ulong addr, int type)
1438 {
1439     int n;
1440 
1441     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1442            <= ARRAY_SIZE(hw_debug_points));
1443 
1444     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1445         if (hw_debug_points[n].addr == addr &&
1446              hw_debug_points[n].type == type) {
1447             return n;
1448         }
1449     }
1450 
1451     return -1;
1452 }
1453 
1454 static int find_hw_watchpoint(target_ulong addr, int *flag)
1455 {
1456     int n;
1457 
1458     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1459     if (n >= 0) {
1460         *flag = BP_MEM_ACCESS;
1461         return n;
1462     }
1463 
1464     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1465     if (n >= 0) {
1466         *flag = BP_MEM_WRITE;
1467         return n;
1468     }
1469 
1470     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1471     if (n >= 0) {
1472         *flag = BP_MEM_READ;
1473         return n;
1474     }
1475 
1476     return -1;
1477 }
1478 
1479 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1480                                   target_ulong len, int type)
1481 {
1482     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1483         return -ENOBUFS;
1484     }
1485 
1486     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1487     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1488 
1489     switch (type) {
1490     case GDB_BREAKPOINT_HW:
1491         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1492             return -ENOBUFS;
1493         }
1494 
1495         if (find_hw_breakpoint(addr, type) >= 0) {
1496             return -EEXIST;
1497         }
1498 
1499         nb_hw_breakpoint++;
1500         break;
1501 
1502     case GDB_WATCHPOINT_WRITE:
1503     case GDB_WATCHPOINT_READ:
1504     case GDB_WATCHPOINT_ACCESS:
1505         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1506             return -ENOBUFS;
1507         }
1508 
1509         if (find_hw_breakpoint(addr, type) >= 0) {
1510             return -EEXIST;
1511         }
1512 
1513         nb_hw_watchpoint++;
1514         break;
1515 
1516     default:
1517         return -ENOSYS;
1518     }
1519 
1520     return 0;
1521 }
1522 
1523 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1524                                   target_ulong len, int type)
1525 {
1526     int n;
1527 
1528     n = find_hw_breakpoint(addr, type);
1529     if (n < 0) {
1530         return -ENOENT;
1531     }
1532 
1533     switch (type) {
1534     case GDB_BREAKPOINT_HW:
1535         nb_hw_breakpoint--;
1536         break;
1537 
1538     case GDB_WATCHPOINT_WRITE:
1539     case GDB_WATCHPOINT_READ:
1540     case GDB_WATCHPOINT_ACCESS:
1541         nb_hw_watchpoint--;
1542         break;
1543 
1544     default:
1545         return -ENOSYS;
1546     }
1547     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1548 
1549     return 0;
1550 }
1551 
1552 void kvm_arch_remove_all_hw_breakpoints(void)
1553 {
1554     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1555 }
1556 
1557 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1558 {
1559     int n;
1560 
1561     /* Software Breakpoint updates */
1562     if (kvm_sw_breakpoints_active(cs)) {
1563         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1564     }
1565 
1566     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1567            <= ARRAY_SIZE(hw_debug_points));
1568     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1569 
1570     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1571         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1572         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1573         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1574             switch (hw_debug_points[n].type) {
1575             case GDB_BREAKPOINT_HW:
1576                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1577                 break;
1578             case GDB_WATCHPOINT_WRITE:
1579                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1580                 break;
1581             case GDB_WATCHPOINT_READ:
1582                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1583                 break;
1584             case GDB_WATCHPOINT_ACCESS:
1585                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1586                                         KVMPPC_DEBUG_WATCH_READ;
1587                 break;
1588             default:
1589                 cpu_abort(cs, "Unsupported breakpoint type\n");
1590             }
1591             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1592         }
1593     }
1594 }
1595 
1596 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1597 {
1598     CPUState *cs = CPU(cpu);
1599     CPUPPCState *env = &cpu->env;
1600     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1601     int handle = 0;
1602     int n;
1603     int flag = 0;
1604 
1605     if (cs->singlestep_enabled) {
1606         handle = 1;
1607     } else if (arch_info->status) {
1608         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1609             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1610                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1611                 if (n >= 0) {
1612                     handle = 1;
1613                 }
1614             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1615                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1616                 n = find_hw_watchpoint(arch_info->address,  &flag);
1617                 if (n >= 0) {
1618                     handle = 1;
1619                     cs->watchpoint_hit = &hw_watchpoint;
1620                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1621                     hw_watchpoint.flags = flag;
1622                 }
1623             }
1624         }
1625     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1626         handle = 1;
1627     } else {
1628         /* QEMU is not able to handle debug exception, so inject
1629          * program exception to guest;
1630          * Yes program exception NOT debug exception !!
1631          * When QEMU is using debug resources then debug exception must
1632          * be always set. To achieve this we set MSR_DE and also set
1633          * MSRP_DEP so guest cannot change MSR_DE.
1634          * When emulating debug resource for guest we want guest
1635          * to control MSR_DE (enable/disable debug interrupt on need).
1636          * Supporting both configurations are NOT possible.
1637          * So the result is that we cannot share debug resources
1638          * between QEMU and Guest on BOOKE architecture.
1639          * In the current design QEMU gets the priority over guest,
1640          * this means that if QEMU is using debug resources then guest
1641          * cannot use them;
1642          * For software breakpoint QEMU uses a privileged instruction;
1643          * So there cannot be any reason that we are here for guest
1644          * set debug exception, only possibility is guest executed a
1645          * privileged / illegal instruction and that's why we are
1646          * injecting a program interrupt.
1647          */
1648 
1649         cpu_synchronize_state(cs);
1650         /* env->nip is PC, so increment this by 4 to use
1651          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1652          */
1653         env->nip += 4;
1654         cs->exception_index = POWERPC_EXCP_PROGRAM;
1655         env->error_code = POWERPC_EXCP_INVAL;
1656         ppc_cpu_do_interrupt(cs);
1657     }
1658 
1659     return handle;
1660 }
1661 
1662 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1663 {
1664     PowerPCCPU *cpu = POWERPC_CPU(cs);
1665     CPUPPCState *env = &cpu->env;
1666     int ret;
1667 
1668     qemu_mutex_lock_iothread();
1669 
1670     switch (run->exit_reason) {
1671     case KVM_EXIT_DCR:
1672         if (run->dcr.is_write) {
1673             DPRINTF("handle dcr write\n");
1674             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1675         } else {
1676             DPRINTF("handle dcr read\n");
1677             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1678         }
1679         break;
1680     case KVM_EXIT_HLT:
1681         DPRINTF("handle halt\n");
1682         ret = kvmppc_handle_halt(cpu);
1683         break;
1684 #if defined(TARGET_PPC64)
1685     case KVM_EXIT_PAPR_HCALL:
1686         DPRINTF("handle PAPR hypercall\n");
1687         run->papr_hcall.ret = spapr_hypercall(cpu,
1688                                               run->papr_hcall.nr,
1689                                               run->papr_hcall.args);
1690         ret = 0;
1691         break;
1692 #endif
1693     case KVM_EXIT_EPR:
1694         DPRINTF("handle epr\n");
1695         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1696         ret = 0;
1697         break;
1698     case KVM_EXIT_WATCHDOG:
1699         DPRINTF("handle watchdog expiry\n");
1700         watchdog_perform_action();
1701         ret = 0;
1702         break;
1703 
1704     case KVM_EXIT_DEBUG:
1705         DPRINTF("handle debug exception\n");
1706         if (kvm_handle_debug(cpu, run)) {
1707             ret = EXCP_DEBUG;
1708             break;
1709         }
1710         /* re-enter, this exception was guest-internal */
1711         ret = 0;
1712         break;
1713 
1714     default:
1715         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1716         ret = -1;
1717         break;
1718     }
1719 
1720     qemu_mutex_unlock_iothread();
1721     return ret;
1722 }
1723 
1724 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1725 {
1726     CPUState *cs = CPU(cpu);
1727     uint32_t bits = tsr_bits;
1728     struct kvm_one_reg reg = {
1729         .id = KVM_REG_PPC_OR_TSR,
1730         .addr = (uintptr_t) &bits,
1731     };
1732 
1733     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1734 }
1735 
1736 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1737 {
1738 
1739     CPUState *cs = CPU(cpu);
1740     uint32_t bits = tsr_bits;
1741     struct kvm_one_reg reg = {
1742         .id = KVM_REG_PPC_CLEAR_TSR,
1743         .addr = (uintptr_t) &bits,
1744     };
1745 
1746     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1747 }
1748 
1749 int kvmppc_set_tcr(PowerPCCPU *cpu)
1750 {
1751     CPUState *cs = CPU(cpu);
1752     CPUPPCState *env = &cpu->env;
1753     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1754 
1755     struct kvm_one_reg reg = {
1756         .id = KVM_REG_PPC_TCR,
1757         .addr = (uintptr_t) &tcr,
1758     };
1759 
1760     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1761 }
1762 
1763 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1764 {
1765     CPUState *cs = CPU(cpu);
1766     int ret;
1767 
1768     if (!kvm_enabled()) {
1769         return -1;
1770     }
1771 
1772     if (!cap_ppc_watchdog) {
1773         printf("warning: KVM does not support watchdog");
1774         return -1;
1775     }
1776 
1777     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1778     if (ret < 0) {
1779         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1780                 __func__, strerror(-ret));
1781         return ret;
1782     }
1783 
1784     return ret;
1785 }
1786 
1787 static int read_cpuinfo(const char *field, char *value, int len)
1788 {
1789     FILE *f;
1790     int ret = -1;
1791     int field_len = strlen(field);
1792     char line[512];
1793 
1794     f = fopen("/proc/cpuinfo", "r");
1795     if (!f) {
1796         return -1;
1797     }
1798 
1799     do {
1800         if (!fgets(line, sizeof(line), f)) {
1801             break;
1802         }
1803         if (!strncmp(line, field, field_len)) {
1804             pstrcpy(value, len, line);
1805             ret = 0;
1806             break;
1807         }
1808     } while(*line);
1809 
1810     fclose(f);
1811 
1812     return ret;
1813 }
1814 
1815 uint32_t kvmppc_get_tbfreq(void)
1816 {
1817     char line[512];
1818     char *ns;
1819     uint32_t retval = NANOSECONDS_PER_SECOND;
1820 
1821     if (read_cpuinfo("timebase", line, sizeof(line))) {
1822         return retval;
1823     }
1824 
1825     if (!(ns = strchr(line, ':'))) {
1826         return retval;
1827     }
1828 
1829     ns++;
1830 
1831     return atoi(ns);
1832 }
1833 
1834 bool kvmppc_get_host_serial(char **value)
1835 {
1836     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1837                                NULL);
1838 }
1839 
1840 bool kvmppc_get_host_model(char **value)
1841 {
1842     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1843 }
1844 
1845 /* Try to find a device tree node for a CPU with clock-frequency property */
1846 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1847 {
1848     struct dirent *dirp;
1849     DIR *dp;
1850 
1851     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1852         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1853         return -1;
1854     }
1855 
1856     buf[0] = '\0';
1857     while ((dirp = readdir(dp)) != NULL) {
1858         FILE *f;
1859         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1860                  dirp->d_name);
1861         f = fopen(buf, "r");
1862         if (f) {
1863             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1864             fclose(f);
1865             break;
1866         }
1867         buf[0] = '\0';
1868     }
1869     closedir(dp);
1870     if (buf[0] == '\0') {
1871         printf("Unknown host!\n");
1872         return -1;
1873     }
1874 
1875     return 0;
1876 }
1877 
1878 static uint64_t kvmppc_read_int_dt(const char *filename)
1879 {
1880     union {
1881         uint32_t v32;
1882         uint64_t v64;
1883     } u;
1884     FILE *f;
1885     int len;
1886 
1887     f = fopen(filename, "rb");
1888     if (!f) {
1889         return -1;
1890     }
1891 
1892     len = fread(&u, 1, sizeof(u), f);
1893     fclose(f);
1894     switch (len) {
1895     case 4:
1896         /* property is a 32-bit quantity */
1897         return be32_to_cpu(u.v32);
1898     case 8:
1899         return be64_to_cpu(u.v64);
1900     }
1901 
1902     return 0;
1903 }
1904 
1905 /* Read a CPU node property from the host device tree that's a single
1906  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1907  * (can't find or open the property, or doesn't understand the
1908  * format) */
1909 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1910 {
1911     char buf[PATH_MAX], *tmp;
1912     uint64_t val;
1913 
1914     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1915         return -1;
1916     }
1917 
1918     tmp = g_strdup_printf("%s/%s", buf, propname);
1919     val = kvmppc_read_int_dt(tmp);
1920     g_free(tmp);
1921 
1922     return val;
1923 }
1924 
1925 uint64_t kvmppc_get_clockfreq(void)
1926 {
1927     return kvmppc_read_int_cpu_dt("clock-frequency");
1928 }
1929 
1930 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1931  {
1932      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1933      CPUState *cs = CPU(cpu);
1934 
1935     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1936         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1937         return 0;
1938     }
1939 
1940     return 1;
1941 }
1942 
1943 int kvmppc_get_hasidle(CPUPPCState *env)
1944 {
1945     struct kvm_ppc_pvinfo pvinfo;
1946 
1947     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1948         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1949         return 1;
1950     }
1951 
1952     return 0;
1953 }
1954 
1955 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1956 {
1957     uint32_t *hc = (uint32_t*)buf;
1958     struct kvm_ppc_pvinfo pvinfo;
1959 
1960     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1961         memcpy(buf, pvinfo.hcall, buf_len);
1962         return 0;
1963     }
1964 
1965     /*
1966      * Fallback to always fail hypercalls regardless of endianness:
1967      *
1968      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1969      *     li r3, -1
1970      *     b .+8       (becomes nop in wrong endian)
1971      *     bswap32(li r3, -1)
1972      */
1973 
1974     hc[0] = cpu_to_be32(0x08000048);
1975     hc[1] = cpu_to_be32(0x3860ffff);
1976     hc[2] = cpu_to_be32(0x48000008);
1977     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1978 
1979     return 1;
1980 }
1981 
1982 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1983 {
1984     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1985 }
1986 
1987 void kvmppc_enable_logical_ci_hcalls(void)
1988 {
1989     /*
1990      * FIXME: it would be nice if we could detect the cases where
1991      * we're using a device which requires the in kernel
1992      * implementation of these hcalls, but the kernel lacks them and
1993      * produce a warning.
1994      */
1995     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1996     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1997 }
1998 
1999 void kvmppc_enable_set_mode_hcall(void)
2000 {
2001     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2002 }
2003 
2004 void kvmppc_enable_clear_ref_mod_hcalls(void)
2005 {
2006     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2007     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2008 }
2009 
2010 void kvmppc_set_papr(PowerPCCPU *cpu)
2011 {
2012     CPUState *cs = CPU(cpu);
2013     int ret;
2014 
2015     if (!kvm_enabled()) {
2016         return;
2017     }
2018 
2019     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2020     if (ret) {
2021         error_report("This vCPU type or KVM version does not support PAPR");
2022         exit(1);
2023     }
2024 
2025     /* Update the capability flag so we sync the right information
2026      * with kvm */
2027     cap_papr = 1;
2028 }
2029 
2030 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2031 {
2032     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2033 }
2034 
2035 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2036 {
2037     CPUState *cs = CPU(cpu);
2038     int ret;
2039 
2040     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2041     if (ret && mpic_proxy) {
2042         error_report("This KVM version does not support EPR");
2043         exit(1);
2044     }
2045 }
2046 
2047 int kvmppc_smt_threads(void)
2048 {
2049     return cap_ppc_smt ? cap_ppc_smt : 1;
2050 }
2051 
2052 int kvmppc_set_smt_threads(int smt)
2053 {
2054     int ret;
2055 
2056     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2057     if (!ret) {
2058         cap_ppc_smt = smt;
2059     }
2060     return ret;
2061 }
2062 
2063 void kvmppc_hint_smt_possible(Error **errp)
2064 {
2065     int i;
2066     GString *g;
2067     char *s;
2068 
2069     assert(kvm_enabled());
2070     if (cap_ppc_smt_possible) {
2071         g = g_string_new("Available VSMT modes:");
2072         for (i = 63; i >= 0; i--) {
2073             if ((1UL << i) & cap_ppc_smt_possible) {
2074                 g_string_append_printf(g, " %lu", (1UL << i));
2075             }
2076         }
2077         s = g_string_free(g, false);
2078         error_append_hint(errp, "%s.\n", s);
2079         g_free(s);
2080     } else {
2081         error_append_hint(errp,
2082                           "This KVM seems to be too old to support VSMT.\n");
2083     }
2084 }
2085 
2086 
2087 #ifdef TARGET_PPC64
2088 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2089 {
2090     struct kvm_ppc_smmu_info info;
2091     long rampagesize, best_page_shift;
2092     int i;
2093 
2094     /* Find the largest hardware supported page size that's less than
2095      * or equal to the (logical) backing page size of guest RAM */
2096     kvm_get_smmu_info(&info, &error_fatal);
2097     rampagesize = qemu_getrampagesize();
2098     best_page_shift = 0;
2099 
2100     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2101         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2102 
2103         if (!sps->page_shift) {
2104             continue;
2105         }
2106 
2107         if ((sps->page_shift > best_page_shift)
2108             && ((1UL << sps->page_shift) <= rampagesize)) {
2109             best_page_shift = sps->page_shift;
2110         }
2111     }
2112 
2113     return MIN(current_size,
2114                1ULL << (best_page_shift + hash_shift - 7));
2115 }
2116 #endif
2117 
2118 bool kvmppc_spapr_use_multitce(void)
2119 {
2120     return cap_spapr_multitce;
2121 }
2122 
2123 int kvmppc_spapr_enable_inkernel_multitce(void)
2124 {
2125     int ret;
2126 
2127     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                             H_PUT_TCE_INDIRECT, 1);
2129     if (!ret) {
2130         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2131                                 H_STUFF_TCE, 1);
2132     }
2133 
2134     return ret;
2135 }
2136 
2137 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2138                               uint64_t bus_offset, uint32_t nb_table,
2139                               int *pfd, bool need_vfio)
2140 {
2141     long len;
2142     int fd;
2143     void *table;
2144 
2145     /* Must set fd to -1 so we don't try to munmap when called for
2146      * destroying the table, which the upper layers -will- do
2147      */
2148     *pfd = -1;
2149     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2150         return NULL;
2151     }
2152 
2153     if (cap_spapr_tce_64) {
2154         struct kvm_create_spapr_tce_64 args = {
2155             .liobn = liobn,
2156             .page_shift = page_shift,
2157             .offset = bus_offset >> page_shift,
2158             .size = nb_table,
2159             .flags = 0
2160         };
2161         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2162         if (fd < 0) {
2163             fprintf(stderr,
2164                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2165                     liobn);
2166             return NULL;
2167         }
2168     } else if (cap_spapr_tce) {
2169         uint64_t window_size = (uint64_t) nb_table << page_shift;
2170         struct kvm_create_spapr_tce args = {
2171             .liobn = liobn,
2172             .window_size = window_size,
2173         };
2174         if ((window_size != args.window_size) || bus_offset) {
2175             return NULL;
2176         }
2177         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2178         if (fd < 0) {
2179             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2180                     liobn);
2181             return NULL;
2182         }
2183     } else {
2184         return NULL;
2185     }
2186 
2187     len = nb_table * sizeof(uint64_t);
2188     /* FIXME: round this up to page size */
2189 
2190     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2191     if (table == MAP_FAILED) {
2192         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2193                 liobn);
2194         close(fd);
2195         return NULL;
2196     }
2197 
2198     *pfd = fd;
2199     return table;
2200 }
2201 
2202 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2203 {
2204     long len;
2205 
2206     if (fd < 0) {
2207         return -1;
2208     }
2209 
2210     len = nb_table * sizeof(uint64_t);
2211     if ((munmap(table, len) < 0) ||
2212         (close(fd) < 0)) {
2213         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2214                 strerror(errno));
2215         /* Leak the table */
2216     }
2217 
2218     return 0;
2219 }
2220 
2221 int kvmppc_reset_htab(int shift_hint)
2222 {
2223     uint32_t shift = shift_hint;
2224 
2225     if (!kvm_enabled()) {
2226         /* Full emulation, tell caller to allocate htab itself */
2227         return 0;
2228     }
2229     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2230         int ret;
2231         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2232         if (ret == -ENOTTY) {
2233             /* At least some versions of PR KVM advertise the
2234              * capability, but don't implement the ioctl().  Oops.
2235              * Return 0 so that we allocate the htab in qemu, as is
2236              * correct for PR. */
2237             return 0;
2238         } else if (ret < 0) {
2239             return ret;
2240         }
2241         return shift;
2242     }
2243 
2244     /* We have a kernel that predates the htab reset calls.  For PR
2245      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2246      * this era, it has allocated a 16MB fixed size hash table already. */
2247     if (kvmppc_is_pr(kvm_state)) {
2248         /* PR - tell caller to allocate htab */
2249         return 0;
2250     } else {
2251         /* HV - assume 16MB kernel allocated htab */
2252         return 24;
2253     }
2254 }
2255 
2256 static inline uint32_t mfpvr(void)
2257 {
2258     uint32_t pvr;
2259 
2260     asm ("mfpvr %0"
2261          : "=r"(pvr));
2262     return pvr;
2263 }
2264 
2265 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2266 {
2267     if (on) {
2268         *word |= flags;
2269     } else {
2270         *word &= ~flags;
2271     }
2272 }
2273 
2274 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2275 {
2276     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2277     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2278     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2279 
2280     /* Now fix up the class with information we can query from the host */
2281     pcc->pvr = mfpvr();
2282 
2283     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2284                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2285     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2286                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2287     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2288                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2289 
2290     if (dcache_size != -1) {
2291         pcc->l1_dcache_size = dcache_size;
2292     }
2293 
2294     if (icache_size != -1) {
2295         pcc->l1_icache_size = icache_size;
2296     }
2297 
2298 #if defined(TARGET_PPC64)
2299     pcc->radix_page_info = kvm_get_radix_page_info();
2300 
2301     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2302         /*
2303          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2304          * compliant.  More importantly, advertising ISA 3.00
2305          * architected mode may prevent guests from activating
2306          * necessary DD1 workarounds.
2307          */
2308         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2309                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2310     }
2311 #endif /* defined(TARGET_PPC64) */
2312 }
2313 
2314 bool kvmppc_has_cap_epr(void)
2315 {
2316     return cap_epr;
2317 }
2318 
2319 bool kvmppc_has_cap_fixup_hcalls(void)
2320 {
2321     return cap_fixup_hcalls;
2322 }
2323 
2324 bool kvmppc_has_cap_htm(void)
2325 {
2326     return cap_htm;
2327 }
2328 
2329 bool kvmppc_has_cap_mmu_radix(void)
2330 {
2331     return cap_mmu_radix;
2332 }
2333 
2334 bool kvmppc_has_cap_mmu_hash_v3(void)
2335 {
2336     return cap_mmu_hash_v3;
2337 }
2338 
2339 static bool kvmppc_power8_host(void)
2340 {
2341     bool ret = false;
2342 #ifdef TARGET_PPC64
2343     {
2344         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2345         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2346               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2347               (base_pvr == CPU_POWERPC_POWER8_BASE);
2348     }
2349 #endif /* TARGET_PPC64 */
2350     return ret;
2351 }
2352 
2353 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2354 {
2355     bool l1d_thread_priv_req = !kvmppc_power8_host();
2356 
2357     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2358         return 2;
2359     } else if ((!l1d_thread_priv_req ||
2360                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2361                (c.character & c.character_mask
2362                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2363         return 1;
2364     }
2365 
2366     return 0;
2367 }
2368 
2369 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2370 {
2371     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2372         return 2;
2373     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2374         return 1;
2375     }
2376 
2377     return 0;
2378 }
2379 
2380 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2381 {
2382     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2383         return  SPAPR_CAP_FIXED_CCD;
2384     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2385         return SPAPR_CAP_FIXED_IBS;
2386     }
2387 
2388     return 0;
2389 }
2390 
2391 static void kvmppc_get_cpu_characteristics(KVMState *s)
2392 {
2393     struct kvm_ppc_cpu_char c;
2394     int ret;
2395 
2396     /* Assume broken */
2397     cap_ppc_safe_cache = 0;
2398     cap_ppc_safe_bounds_check = 0;
2399     cap_ppc_safe_indirect_branch = 0;
2400 
2401     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2402     if (!ret) {
2403         return;
2404     }
2405     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2406     if (ret < 0) {
2407         return;
2408     }
2409 
2410     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2411     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2412     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2413 }
2414 
2415 int kvmppc_get_cap_safe_cache(void)
2416 {
2417     return cap_ppc_safe_cache;
2418 }
2419 
2420 int kvmppc_get_cap_safe_bounds_check(void)
2421 {
2422     return cap_ppc_safe_bounds_check;
2423 }
2424 
2425 int kvmppc_get_cap_safe_indirect_branch(void)
2426 {
2427     return cap_ppc_safe_indirect_branch;
2428 }
2429 
2430 bool kvmppc_has_cap_nested_kvm_hv(void)
2431 {
2432     return !!cap_ppc_nested_kvm_hv;
2433 }
2434 
2435 int kvmppc_set_cap_nested_kvm_hv(int enable)
2436 {
2437     return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2438 }
2439 
2440 bool kvmppc_has_cap_spapr_vfio(void)
2441 {
2442     return cap_spapr_vfio;
2443 }
2444 
2445 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2446 {
2447     uint32_t host_pvr = mfpvr();
2448     PowerPCCPUClass *pvr_pcc;
2449 
2450     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2451     if (pvr_pcc == NULL) {
2452         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2453     }
2454 
2455     return pvr_pcc;
2456 }
2457 
2458 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2459 {
2460     TypeInfo type_info = {
2461         .name = TYPE_HOST_POWERPC_CPU,
2462         .class_init = kvmppc_host_cpu_class_init,
2463     };
2464     MachineClass *mc = MACHINE_GET_CLASS(ms);
2465     PowerPCCPUClass *pvr_pcc;
2466     ObjectClass *oc;
2467     DeviceClass *dc;
2468     int i;
2469 
2470     pvr_pcc = kvm_ppc_get_host_cpu_class();
2471     if (pvr_pcc == NULL) {
2472         return -1;
2473     }
2474     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2475     type_register(&type_info);
2476     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2477         /* override TCG default cpu type with 'host' cpu model */
2478         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2479     }
2480 
2481     oc = object_class_by_name(type_info.name);
2482     g_assert(oc);
2483 
2484     /*
2485      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2486      * we want "POWER8" to be a "family" alias that points to the current
2487      * host CPU type, too)
2488      */
2489     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2490     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2491         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2492             char *suffix;
2493 
2494             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2495             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2496             if (suffix) {
2497                 *suffix = 0;
2498             }
2499             break;
2500         }
2501     }
2502 
2503     return 0;
2504 }
2505 
2506 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2507 {
2508     struct kvm_rtas_token_args args = {
2509         .token = token,
2510     };
2511 
2512     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2513         return -ENOENT;
2514     }
2515 
2516     strncpy(args.name, function, sizeof(args.name));
2517 
2518     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2519 }
2520 
2521 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2522 {
2523     struct kvm_get_htab_fd s = {
2524         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2525         .start_index = index,
2526     };
2527     int ret;
2528 
2529     if (!cap_htab_fd) {
2530         error_setg(errp, "KVM version doesn't support %s the HPT",
2531                    write ? "writing" : "reading");
2532         return -ENOTSUP;
2533     }
2534 
2535     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2536     if (ret < 0) {
2537         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2538                    write ? "writing" : "reading", write ? "to" : "from",
2539                    strerror(errno));
2540         return -errno;
2541     }
2542 
2543     return ret;
2544 }
2545 
2546 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2547 {
2548     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2549     uint8_t buf[bufsize];
2550     ssize_t rc;
2551 
2552     do {
2553         rc = read(fd, buf, bufsize);
2554         if (rc < 0) {
2555             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2556                     strerror(errno));
2557             return rc;
2558         } else if (rc) {
2559             uint8_t *buffer = buf;
2560             ssize_t n = rc;
2561             while (n) {
2562                 struct kvm_get_htab_header *head =
2563                     (struct kvm_get_htab_header *) buffer;
2564                 size_t chunksize = sizeof(*head) +
2565                      HASH_PTE_SIZE_64 * head->n_valid;
2566 
2567                 qemu_put_be32(f, head->index);
2568                 qemu_put_be16(f, head->n_valid);
2569                 qemu_put_be16(f, head->n_invalid);
2570                 qemu_put_buffer(f, (void *)(head + 1),
2571                                 HASH_PTE_SIZE_64 * head->n_valid);
2572 
2573                 buffer += chunksize;
2574                 n -= chunksize;
2575             }
2576         }
2577     } while ((rc != 0)
2578              && ((max_ns < 0)
2579                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2580 
2581     return (rc == 0) ? 1 : 0;
2582 }
2583 
2584 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2585                            uint16_t n_valid, uint16_t n_invalid)
2586 {
2587     struct kvm_get_htab_header *buf;
2588     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2589     ssize_t rc;
2590 
2591     buf = alloca(chunksize);
2592     buf->index = index;
2593     buf->n_valid = n_valid;
2594     buf->n_invalid = n_invalid;
2595 
2596     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2597 
2598     rc = write(fd, buf, chunksize);
2599     if (rc < 0) {
2600         fprintf(stderr, "Error writing KVM hash table: %s\n",
2601                 strerror(errno));
2602         return rc;
2603     }
2604     if (rc != chunksize) {
2605         /* We should never get a short write on a single chunk */
2606         fprintf(stderr, "Short write, restoring KVM hash table\n");
2607         return -1;
2608     }
2609     return 0;
2610 }
2611 
2612 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2613 {
2614     return true;
2615 }
2616 
2617 void kvm_arch_init_irq_routing(KVMState *s)
2618 {
2619 }
2620 
2621 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2622 {
2623     int fd, rc;
2624     int i;
2625 
2626     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2627 
2628     i = 0;
2629     while (i < n) {
2630         struct kvm_get_htab_header *hdr;
2631         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2632         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2633 
2634         rc = read(fd, buf, sizeof(buf));
2635         if (rc < 0) {
2636             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2637         }
2638 
2639         hdr = (struct kvm_get_htab_header *)buf;
2640         while ((i < n) && ((char *)hdr < (buf + rc))) {
2641             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2642 
2643             if (hdr->index != (ptex + i)) {
2644                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2645                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2646             }
2647 
2648             if (n - i < valid) {
2649                 valid = n - i;
2650             }
2651             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2652             i += valid;
2653 
2654             if ((n - i) < invalid) {
2655                 invalid = n - i;
2656             }
2657             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2658             i += invalid;
2659 
2660             hdr = (struct kvm_get_htab_header *)
2661                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2662         }
2663     }
2664 
2665     close(fd);
2666 }
2667 
2668 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2669 {
2670     int fd, rc;
2671     struct {
2672         struct kvm_get_htab_header hdr;
2673         uint64_t pte0;
2674         uint64_t pte1;
2675     } buf;
2676 
2677     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2678 
2679     buf.hdr.n_valid = 1;
2680     buf.hdr.n_invalid = 0;
2681     buf.hdr.index = ptex;
2682     buf.pte0 = cpu_to_be64(pte0);
2683     buf.pte1 = cpu_to_be64(pte1);
2684 
2685     rc = write(fd, &buf, sizeof(buf));
2686     if (rc != sizeof(buf)) {
2687         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2688     }
2689     close(fd);
2690 }
2691 
2692 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2693                              uint64_t address, uint32_t data, PCIDevice *dev)
2694 {
2695     return 0;
2696 }
2697 
2698 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2699                                 int vector, PCIDevice *dev)
2700 {
2701     return 0;
2702 }
2703 
2704 int kvm_arch_release_virq_post(int virq)
2705 {
2706     return 0;
2707 }
2708 
2709 int kvm_arch_msi_data_to_gsi(uint32_t data)
2710 {
2711     return data & 0xffff;
2712 }
2713 
2714 int kvmppc_enable_hwrng(void)
2715 {
2716     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2717         return -1;
2718     }
2719 
2720     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2721 }
2722 
2723 void kvmppc_check_papr_resize_hpt(Error **errp)
2724 {
2725     if (!kvm_enabled()) {
2726         return; /* No KVM, we're good */
2727     }
2728 
2729     if (cap_resize_hpt) {
2730         return; /* Kernel has explicit support, we're good */
2731     }
2732 
2733     /* Otherwise fallback on looking for PR KVM */
2734     if (kvmppc_is_pr(kvm_state)) {
2735         return;
2736     }
2737 
2738     error_setg(errp,
2739                "Hash page table resizing not available with this KVM version");
2740 }
2741 
2742 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2743 {
2744     CPUState *cs = CPU(cpu);
2745     struct kvm_ppc_resize_hpt rhpt = {
2746         .flags = flags,
2747         .shift = shift,
2748     };
2749 
2750     if (!cap_resize_hpt) {
2751         return -ENOSYS;
2752     }
2753 
2754     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2755 }
2756 
2757 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2758 {
2759     CPUState *cs = CPU(cpu);
2760     struct kvm_ppc_resize_hpt rhpt = {
2761         .flags = flags,
2762         .shift = shift,
2763     };
2764 
2765     if (!cap_resize_hpt) {
2766         return -ENOSYS;
2767     }
2768 
2769     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2770 }
2771 
2772 /*
2773  * This is a helper function to detect a post migration scenario
2774  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2775  * the guest kernel can't handle a PVR value other than the actual host
2776  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2777  *
2778  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2779  * (so, we're HV), return true. The workaround itself is done in
2780  * cpu_post_load.
2781  *
2782  * The order here is important: we'll only check for KVM PR as a
2783  * fallback if the guest kernel can't handle the situation itself.
2784  * We need to avoid as much as possible querying the running KVM type
2785  * in QEMU level.
2786  */
2787 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2788 {
2789     CPUState *cs = CPU(cpu);
2790 
2791     if (!kvm_enabled()) {
2792         return false;
2793     }
2794 
2795     if (cap_ppc_pvr_compat) {
2796         return false;
2797     }
2798 
2799     return !kvmppc_is_pr(cs->kvm_state);
2800 }
2801 
2802 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2803 {
2804     CPUState *cs = CPU(cpu);
2805 
2806     if (kvm_enabled()) {
2807         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2808     }
2809 }
2810