xref: /openbmc/qemu/target/ppc/kvm.c (revision bc5c4f21)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "sysemu/numa.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #if defined(TARGET_PPC64)
49 #include "hw/ppc/spapr_cpu_core.h"
50 #endif
51 
52 //#define DEBUG_KVM
53 
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59     do { } while (0)
60 #endif
61 
62 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
63 
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65     KVM_CAP_LAST_INFO
66 };
67 
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_rma;
74 static int cap_spapr_tce;
75 static int cap_spapr_multitce;
76 static int cap_spapr_vfio;
77 static int cap_hior;
78 static int cap_one_reg;
79 static int cap_epr;
80 static int cap_ppc_watchdog;
81 static int cap_papr;
82 static int cap_htab_fd;
83 static int cap_fixup_hcalls;
84 static int cap_htm;             /* Hardware transactional memory support */
85 
86 static uint32_t debug_inst_opcode;
87 
88 /* XXX We have a race condition where we actually have a level triggered
89  *     interrupt, but the infrastructure can't expose that yet, so the guest
90  *     takes but ignores it, goes to sleep and never gets notified that there's
91  *     still an interrupt pending.
92  *
93  *     As a quick workaround, let's just wake up again 20 ms after we injected
94  *     an interrupt. That way we can assure that we're always reinjecting
95  *     interrupts in case the guest swallowed them.
96  */
97 static QEMUTimer *idle_timer;
98 
99 static void kvm_kick_cpu(void *opaque)
100 {
101     PowerPCCPU *cpu = opaque;
102 
103     qemu_cpu_kick(CPU(cpu));
104 }
105 
106 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
107  * should only be used for fallback tests - generally we should use
108  * explicit capabilities for the features we want, rather than
109  * assuming what is/isn't available depending on the KVM variant. */
110 static bool kvmppc_is_pr(KVMState *ks)
111 {
112     /* Assume KVM-PR if the GET_PVINFO capability is available */
113     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
114 }
115 
116 static int kvm_ppc_register_host_cpu_type(void);
117 
118 int kvm_arch_init(MachineState *ms, KVMState *s)
119 {
120     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
121     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
122     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
123     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
124     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
125     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
126     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
127     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
128     cap_spapr_vfio = false;
129     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
130     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
131     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
132     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
133     /* Note: we don't set cap_papr here, because this capability is
134      * only activated after this by kvmppc_set_papr() */
135     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
136     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
137     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
138 
139     if (!cap_interrupt_level) {
140         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
141                         "VM to stall at times!\n");
142     }
143 
144     kvm_ppc_register_host_cpu_type();
145 
146     return 0;
147 }
148 
149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
150 {
151     return 0;
152 }
153 
154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
155 {
156     CPUPPCState *cenv = &cpu->env;
157     CPUState *cs = CPU(cpu);
158     struct kvm_sregs sregs;
159     int ret;
160 
161     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
162         /* What we're really trying to say is "if we're on BookE, we use
163            the native PVR for now". This is the only sane way to check
164            it though, so we potentially confuse users that they can run
165            BookE guests on BookS. Let's hope nobody dares enough :) */
166         return 0;
167     } else {
168         if (!cap_segstate) {
169             fprintf(stderr, "kvm error: missing PVR setting capability\n");
170             return -ENOSYS;
171         }
172     }
173 
174     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
175     if (ret) {
176         return ret;
177     }
178 
179     sregs.pvr = cenv->spr[SPR_PVR];
180     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
181 }
182 
183 /* Set up a shared TLB array with KVM */
184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
185 {
186     CPUPPCState *env = &cpu->env;
187     CPUState *cs = CPU(cpu);
188     struct kvm_book3e_206_tlb_params params = {};
189     struct kvm_config_tlb cfg = {};
190     unsigned int entries = 0;
191     int ret, i;
192 
193     if (!kvm_enabled() ||
194         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
195         return 0;
196     }
197 
198     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
199 
200     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
201         params.tlb_sizes[i] = booke206_tlb_size(env, i);
202         params.tlb_ways[i] = booke206_tlb_ways(env, i);
203         entries += params.tlb_sizes[i];
204     }
205 
206     assert(entries == env->nb_tlb);
207     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
208 
209     env->tlb_dirty = true;
210 
211     cfg.array = (uintptr_t)env->tlb.tlbm;
212     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
213     cfg.params = (uintptr_t)&params;
214     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
215 
216     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
217     if (ret < 0) {
218         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
219                 __func__, strerror(-ret));
220         return ret;
221     }
222 
223     env->kvm_sw_tlb = true;
224     return 0;
225 }
226 
227 
228 #if defined(TARGET_PPC64)
229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
230                                        struct kvm_ppc_smmu_info *info)
231 {
232     CPUPPCState *env = &cpu->env;
233     CPUState *cs = CPU(cpu);
234 
235     memset(info, 0, sizeof(*info));
236 
237     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
238      * need to "guess" what the supported page sizes are.
239      *
240      * For that to work we make a few assumptions:
241      *
242      * - Check whether we are running "PR" KVM which only supports 4K
243      *   and 16M pages, but supports them regardless of the backing
244      *   store characteritics. We also don't support 1T segments.
245      *
246      *   This is safe as if HV KVM ever supports that capability or PR
247      *   KVM grows supports for more page/segment sizes, those versions
248      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
249      *   will not hit this fallback
250      *
251      * - Else we are running HV KVM. This means we only support page
252      *   sizes that fit in the backing store. Additionally we only
253      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
254      *   P7 encodings for the SLB and hash table. Here too, we assume
255      *   support for any newer processor will mean a kernel that
256      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
257      *   this fallback.
258      */
259     if (kvmppc_is_pr(cs->kvm_state)) {
260         /* No flags */
261         info->flags = 0;
262         info->slb_size = 64;
263 
264         /* Standard 4k base page size segment */
265         info->sps[0].page_shift = 12;
266         info->sps[0].slb_enc = 0;
267         info->sps[0].enc[0].page_shift = 12;
268         info->sps[0].enc[0].pte_enc = 0;
269 
270         /* Standard 16M large page size segment */
271         info->sps[1].page_shift = 24;
272         info->sps[1].slb_enc = SLB_VSID_L;
273         info->sps[1].enc[0].page_shift = 24;
274         info->sps[1].enc[0].pte_enc = 0;
275     } else {
276         int i = 0;
277 
278         /* HV KVM has backing store size restrictions */
279         info->flags = KVM_PPC_PAGE_SIZES_REAL;
280 
281         if (env->mmu_model & POWERPC_MMU_1TSEG) {
282             info->flags |= KVM_PPC_1T_SEGMENTS;
283         }
284 
285         if (env->mmu_model == POWERPC_MMU_2_06 ||
286             env->mmu_model == POWERPC_MMU_2_07) {
287             info->slb_size = 32;
288         } else {
289             info->slb_size = 64;
290         }
291 
292         /* Standard 4k base page size segment */
293         info->sps[i].page_shift = 12;
294         info->sps[i].slb_enc = 0;
295         info->sps[i].enc[0].page_shift = 12;
296         info->sps[i].enc[0].pte_enc = 0;
297         i++;
298 
299         /* 64K on MMU 2.06 and later */
300         if (env->mmu_model == POWERPC_MMU_2_06 ||
301             env->mmu_model == POWERPC_MMU_2_07) {
302             info->sps[i].page_shift = 16;
303             info->sps[i].slb_enc = 0x110;
304             info->sps[i].enc[0].page_shift = 16;
305             info->sps[i].enc[0].pte_enc = 1;
306             i++;
307         }
308 
309         /* Standard 16M large page size segment */
310         info->sps[i].page_shift = 24;
311         info->sps[i].slb_enc = SLB_VSID_L;
312         info->sps[i].enc[0].page_shift = 24;
313         info->sps[i].enc[0].pte_enc = 0;
314     }
315 }
316 
317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
318 {
319     CPUState *cs = CPU(cpu);
320     int ret;
321 
322     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
323         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
324         if (ret == 0) {
325             return;
326         }
327     }
328 
329     kvm_get_fallback_smmu_info(cpu, info);
330 }
331 
332 static long gethugepagesize(const char *mem_path)
333 {
334     struct statfs fs;
335     int ret;
336 
337     do {
338         ret = statfs(mem_path, &fs);
339     } while (ret != 0 && errno == EINTR);
340 
341     if (ret != 0) {
342         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
343                 strerror(errno));
344         exit(1);
345     }
346 
347 #define HUGETLBFS_MAGIC       0x958458f6
348 
349     if (fs.f_type != HUGETLBFS_MAGIC) {
350         /* Explicit mempath, but it's ordinary pages */
351         return getpagesize();
352     }
353 
354     /* It's hugepage, return the huge page size */
355     return fs.f_bsize;
356 }
357 
358 /*
359  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
360  * may or may not name the same files / on the same filesystem now as
361  * when we actually open and map them.  Iterate over the file
362  * descriptors instead, and use qemu_fd_getpagesize().
363  */
364 static int find_max_supported_pagesize(Object *obj, void *opaque)
365 {
366     char *mem_path;
367     long *hpsize_min = opaque;
368 
369     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
370         mem_path = object_property_get_str(obj, "mem-path", NULL);
371         if (mem_path) {
372             long hpsize = gethugepagesize(mem_path);
373             if (hpsize < *hpsize_min) {
374                 *hpsize_min = hpsize;
375             }
376         } else {
377             *hpsize_min = getpagesize();
378         }
379     }
380 
381     return 0;
382 }
383 
384 static long getrampagesize(void)
385 {
386     long hpsize = LONG_MAX;
387     long mainrampagesize;
388     Object *memdev_root;
389 
390     if (mem_path) {
391         mainrampagesize = gethugepagesize(mem_path);
392     } else {
393         mainrampagesize = getpagesize();
394     }
395 
396     /* it's possible we have memory-backend objects with
397      * hugepage-backed RAM. these may get mapped into system
398      * address space via -numa parameters or memory hotplug
399      * hooks. we want to take these into account, but we
400      * also want to make sure these supported hugepage
401      * sizes are applicable across the entire range of memory
402      * we may boot from, so we take the min across all
403      * backends, and assume normal pages in cases where a
404      * backend isn't backed by hugepages.
405      */
406     memdev_root = object_resolve_path("/objects", NULL);
407     if (memdev_root) {
408         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
409     }
410     if (hpsize == LONG_MAX) {
411         /* No additional memory regions found ==> Report main RAM page size */
412         return mainrampagesize;
413     }
414 
415     /* If NUMA is disabled or the NUMA nodes are not backed with a
416      * memory-backend, then there is at least one node using "normal" RAM,
417      * so if its page size is smaller we have got to report that size instead.
418      */
419     if (hpsize > mainrampagesize &&
420         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
421         static bool warned;
422         if (!warned) {
423             error_report("Huge page support disabled (n/a for main memory).");
424             warned = true;
425         }
426         return mainrampagesize;
427     }
428 
429     return hpsize;
430 }
431 
432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
433 {
434     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
435         return true;
436     }
437 
438     return (1ul << shift) <= rampgsize;
439 }
440 
441 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
442 {
443     static struct kvm_ppc_smmu_info smmu_info;
444     static bool has_smmu_info;
445     CPUPPCState *env = &cpu->env;
446     long rampagesize;
447     int iq, ik, jq, jk;
448     bool has_64k_pages = false;
449 
450     /* We only handle page sizes for 64-bit server guests for now */
451     if (!(env->mmu_model & POWERPC_MMU_64)) {
452         return;
453     }
454 
455     /* Collect MMU info from kernel if not already */
456     if (!has_smmu_info) {
457         kvm_get_smmu_info(cpu, &smmu_info);
458         has_smmu_info = true;
459     }
460 
461     rampagesize = getrampagesize();
462 
463     /* Convert to QEMU form */
464     memset(&env->sps, 0, sizeof(env->sps));
465 
466     /* If we have HV KVM, we need to forbid CI large pages if our
467      * host page size is smaller than 64K.
468      */
469     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
470         env->ci_large_pages = getpagesize() >= 0x10000;
471     }
472 
473     /*
474      * XXX This loop should be an entry wide AND of the capabilities that
475      *     the selected CPU has with the capabilities that KVM supports.
476      */
477     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
478         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
479         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
480 
481         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
482                                  ksps->page_shift)) {
483             continue;
484         }
485         qsps->page_shift = ksps->page_shift;
486         qsps->slb_enc = ksps->slb_enc;
487         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
488             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
489                                      ksps->enc[jk].page_shift)) {
490                 continue;
491             }
492             if (ksps->enc[jk].page_shift == 16) {
493                 has_64k_pages = true;
494             }
495             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
496             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
497             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
498                 break;
499             }
500         }
501         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
502             break;
503         }
504     }
505     env->slb_nr = smmu_info.slb_size;
506     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
507         env->mmu_model &= ~POWERPC_MMU_1TSEG;
508     }
509     if (!has_64k_pages) {
510         env->mmu_model &= ~POWERPC_MMU_64K;
511     }
512 }
513 #else /* defined (TARGET_PPC64) */
514 
515 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
516 {
517 }
518 
519 #endif /* !defined (TARGET_PPC64) */
520 
521 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
522 {
523     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
524 }
525 
526 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
527  * book3s supports only 1 watchpoint, so array size
528  * of 4 is sufficient for now.
529  */
530 #define MAX_HW_BKPTS 4
531 
532 static struct HWBreakpoint {
533     target_ulong addr;
534     int type;
535 } hw_debug_points[MAX_HW_BKPTS];
536 
537 static CPUWatchpoint hw_watchpoint;
538 
539 /* Default there is no breakpoint and watchpoint supported */
540 static int max_hw_breakpoint;
541 static int max_hw_watchpoint;
542 static int nb_hw_breakpoint;
543 static int nb_hw_watchpoint;
544 
545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
546 {
547     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
548         max_hw_breakpoint = 2;
549         max_hw_watchpoint = 2;
550     }
551 
552     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
553         fprintf(stderr, "Error initializing h/w breakpoints\n");
554         return;
555     }
556 }
557 
558 int kvm_arch_init_vcpu(CPUState *cs)
559 {
560     PowerPCCPU *cpu = POWERPC_CPU(cs);
561     CPUPPCState *cenv = &cpu->env;
562     int ret;
563 
564     /* Gather server mmu info from KVM and update the CPU state */
565     kvm_fixup_page_sizes(cpu);
566 
567     /* Synchronize sregs with kvm */
568     ret = kvm_arch_sync_sregs(cpu);
569     if (ret) {
570         if (ret == -EINVAL) {
571             error_report("Register sync failed... If you're using kvm-hv.ko,"
572                          " only \"-cpu host\" is possible");
573         }
574         return ret;
575     }
576 
577     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
578 
579     switch (cenv->mmu_model) {
580     case POWERPC_MMU_BOOKE206:
581         /* This target supports access to KVM's guest TLB */
582         ret = kvm_booke206_tlb_init(cpu);
583         break;
584     case POWERPC_MMU_2_07:
585         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
586             /* KVM-HV has transactional memory on POWER8 also without the
587              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
588             cap_htm = true;
589         }
590         break;
591     default:
592         break;
593     }
594 
595     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
596     kvmppc_hw_debug_points_init(cenv);
597 
598     return ret;
599 }
600 
601 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
602 {
603     CPUPPCState *env = &cpu->env;
604     CPUState *cs = CPU(cpu);
605     struct kvm_dirty_tlb dirty_tlb;
606     unsigned char *bitmap;
607     int ret;
608 
609     if (!env->kvm_sw_tlb) {
610         return;
611     }
612 
613     bitmap = g_malloc((env->nb_tlb + 7) / 8);
614     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
615 
616     dirty_tlb.bitmap = (uintptr_t)bitmap;
617     dirty_tlb.num_dirty = env->nb_tlb;
618 
619     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
620     if (ret) {
621         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
622                 __func__, strerror(-ret));
623     }
624 
625     g_free(bitmap);
626 }
627 
628 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
629 {
630     PowerPCCPU *cpu = POWERPC_CPU(cs);
631     CPUPPCState *env = &cpu->env;
632     union {
633         uint32_t u32;
634         uint64_t u64;
635     } val;
636     struct kvm_one_reg reg = {
637         .id = id,
638         .addr = (uintptr_t) &val,
639     };
640     int ret;
641 
642     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643     if (ret != 0) {
644         trace_kvm_failed_spr_get(spr, strerror(errno));
645     } else {
646         switch (id & KVM_REG_SIZE_MASK) {
647         case KVM_REG_SIZE_U32:
648             env->spr[spr] = val.u32;
649             break;
650 
651         case KVM_REG_SIZE_U64:
652             env->spr[spr] = val.u64;
653             break;
654 
655         default:
656             /* Don't handle this size yet */
657             abort();
658         }
659     }
660 }
661 
662 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
663 {
664     PowerPCCPU *cpu = POWERPC_CPU(cs);
665     CPUPPCState *env = &cpu->env;
666     union {
667         uint32_t u32;
668         uint64_t u64;
669     } val;
670     struct kvm_one_reg reg = {
671         .id = id,
672         .addr = (uintptr_t) &val,
673     };
674     int ret;
675 
676     switch (id & KVM_REG_SIZE_MASK) {
677     case KVM_REG_SIZE_U32:
678         val.u32 = env->spr[spr];
679         break;
680 
681     case KVM_REG_SIZE_U64:
682         val.u64 = env->spr[spr];
683         break;
684 
685     default:
686         /* Don't handle this size yet */
687         abort();
688     }
689 
690     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
691     if (ret != 0) {
692         trace_kvm_failed_spr_set(spr, strerror(errno));
693     }
694 }
695 
696 static int kvm_put_fp(CPUState *cs)
697 {
698     PowerPCCPU *cpu = POWERPC_CPU(cs);
699     CPUPPCState *env = &cpu->env;
700     struct kvm_one_reg reg;
701     int i;
702     int ret;
703 
704     if (env->insns_flags & PPC_FLOAT) {
705         uint64_t fpscr = env->fpscr;
706         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
707 
708         reg.id = KVM_REG_PPC_FPSCR;
709         reg.addr = (uintptr_t)&fpscr;
710         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
711         if (ret < 0) {
712             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
713             return ret;
714         }
715 
716         for (i = 0; i < 32; i++) {
717             uint64_t vsr[2];
718 
719 #ifdef HOST_WORDS_BIGENDIAN
720             vsr[0] = float64_val(env->fpr[i]);
721             vsr[1] = env->vsr[i];
722 #else
723             vsr[0] = env->vsr[i];
724             vsr[1] = float64_val(env->fpr[i]);
725 #endif
726             reg.addr = (uintptr_t) &vsr;
727             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
728 
729             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730             if (ret < 0) {
731                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
732                         i, strerror(errno));
733                 return ret;
734             }
735         }
736     }
737 
738     if (env->insns_flags & PPC_ALTIVEC) {
739         reg.id = KVM_REG_PPC_VSCR;
740         reg.addr = (uintptr_t)&env->vscr;
741         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
742         if (ret < 0) {
743             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
744             return ret;
745         }
746 
747         for (i = 0; i < 32; i++) {
748             reg.id = KVM_REG_PPC_VR(i);
749             reg.addr = (uintptr_t)&env->avr[i];
750             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
751             if (ret < 0) {
752                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
753                 return ret;
754             }
755         }
756     }
757 
758     return 0;
759 }
760 
761 static int kvm_get_fp(CPUState *cs)
762 {
763     PowerPCCPU *cpu = POWERPC_CPU(cs);
764     CPUPPCState *env = &cpu->env;
765     struct kvm_one_reg reg;
766     int i;
767     int ret;
768 
769     if (env->insns_flags & PPC_FLOAT) {
770         uint64_t fpscr;
771         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
772 
773         reg.id = KVM_REG_PPC_FPSCR;
774         reg.addr = (uintptr_t)&fpscr;
775         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
776         if (ret < 0) {
777             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
778             return ret;
779         } else {
780             env->fpscr = fpscr;
781         }
782 
783         for (i = 0; i < 32; i++) {
784             uint64_t vsr[2];
785 
786             reg.addr = (uintptr_t) &vsr;
787             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
788 
789             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790             if (ret < 0) {
791                 DPRINTF("Unable to get %s%d from KVM: %s\n",
792                         vsx ? "VSR" : "FPR", i, strerror(errno));
793                 return ret;
794             } else {
795 #ifdef HOST_WORDS_BIGENDIAN
796                 env->fpr[i] = vsr[0];
797                 if (vsx) {
798                     env->vsr[i] = vsr[1];
799                 }
800 #else
801                 env->fpr[i] = vsr[1];
802                 if (vsx) {
803                     env->vsr[i] = vsr[0];
804                 }
805 #endif
806             }
807         }
808     }
809 
810     if (env->insns_flags & PPC_ALTIVEC) {
811         reg.id = KVM_REG_PPC_VSCR;
812         reg.addr = (uintptr_t)&env->vscr;
813         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
814         if (ret < 0) {
815             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
816             return ret;
817         }
818 
819         for (i = 0; i < 32; i++) {
820             reg.id = KVM_REG_PPC_VR(i);
821             reg.addr = (uintptr_t)&env->avr[i];
822             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
823             if (ret < 0) {
824                 DPRINTF("Unable to get VR%d from KVM: %s\n",
825                         i, strerror(errno));
826                 return ret;
827             }
828         }
829     }
830 
831     return 0;
832 }
833 
834 #if defined(TARGET_PPC64)
835 static int kvm_get_vpa(CPUState *cs)
836 {
837     PowerPCCPU *cpu = POWERPC_CPU(cs);
838     CPUPPCState *env = &cpu->env;
839     struct kvm_one_reg reg;
840     int ret;
841 
842     reg.id = KVM_REG_PPC_VPA_ADDR;
843     reg.addr = (uintptr_t)&env->vpa_addr;
844     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
845     if (ret < 0) {
846         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
847         return ret;
848     }
849 
850     assert((uintptr_t)&env->slb_shadow_size
851            == ((uintptr_t)&env->slb_shadow_addr + 8));
852     reg.id = KVM_REG_PPC_VPA_SLB;
853     reg.addr = (uintptr_t)&env->slb_shadow_addr;
854     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
855     if (ret < 0) {
856         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
857                 strerror(errno));
858         return ret;
859     }
860 
861     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
862     reg.id = KVM_REG_PPC_VPA_DTL;
863     reg.addr = (uintptr_t)&env->dtl_addr;
864     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
865     if (ret < 0) {
866         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
867                 strerror(errno));
868         return ret;
869     }
870 
871     return 0;
872 }
873 
874 static int kvm_put_vpa(CPUState *cs)
875 {
876     PowerPCCPU *cpu = POWERPC_CPU(cs);
877     CPUPPCState *env = &cpu->env;
878     struct kvm_one_reg reg;
879     int ret;
880 
881     /* SLB shadow or DTL can't be registered unless a master VPA is
882      * registered.  That means when restoring state, if a VPA *is*
883      * registered, we need to set that up first.  If not, we need to
884      * deregister the others before deregistering the master VPA */
885     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
886 
887     if (env->vpa_addr) {
888         reg.id = KVM_REG_PPC_VPA_ADDR;
889         reg.addr = (uintptr_t)&env->vpa_addr;
890         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
891         if (ret < 0) {
892             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
893             return ret;
894         }
895     }
896 
897     assert((uintptr_t)&env->slb_shadow_size
898            == ((uintptr_t)&env->slb_shadow_addr + 8));
899     reg.id = KVM_REG_PPC_VPA_SLB;
900     reg.addr = (uintptr_t)&env->slb_shadow_addr;
901     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
902     if (ret < 0) {
903         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
904         return ret;
905     }
906 
907     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
908     reg.id = KVM_REG_PPC_VPA_DTL;
909     reg.addr = (uintptr_t)&env->dtl_addr;
910     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
911     if (ret < 0) {
912         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
913                 strerror(errno));
914         return ret;
915     }
916 
917     if (!env->vpa_addr) {
918         reg.id = KVM_REG_PPC_VPA_ADDR;
919         reg.addr = (uintptr_t)&env->vpa_addr;
920         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
921         if (ret < 0) {
922             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
923             return ret;
924         }
925     }
926 
927     return 0;
928 }
929 #endif /* TARGET_PPC64 */
930 
931 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
932 {
933     CPUPPCState *env = &cpu->env;
934     struct kvm_sregs sregs;
935     int i;
936 
937     sregs.pvr = env->spr[SPR_PVR];
938 
939     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
940 
941     /* Sync SLB */
942 #ifdef TARGET_PPC64
943     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
944         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
945         if (env->slb[i].esid & SLB_ESID_V) {
946             sregs.u.s.ppc64.slb[i].slbe |= i;
947         }
948         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
949     }
950 #endif
951 
952     /* Sync SRs */
953     for (i = 0; i < 16; i++) {
954         sregs.u.s.ppc32.sr[i] = env->sr[i];
955     }
956 
957     /* Sync BATs */
958     for (i = 0; i < 8; i++) {
959         /* Beware. We have to swap upper and lower bits here */
960         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
961             | env->DBAT[1][i];
962         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
963             | env->IBAT[1][i];
964     }
965 
966     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
967 }
968 
969 int kvm_arch_put_registers(CPUState *cs, int level)
970 {
971     PowerPCCPU *cpu = POWERPC_CPU(cs);
972     CPUPPCState *env = &cpu->env;
973     struct kvm_regs regs;
974     int ret;
975     int i;
976 
977     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
978     if (ret < 0) {
979         return ret;
980     }
981 
982     regs.ctr = env->ctr;
983     regs.lr  = env->lr;
984     regs.xer = cpu_read_xer(env);
985     regs.msr = env->msr;
986     regs.pc = env->nip;
987 
988     regs.srr0 = env->spr[SPR_SRR0];
989     regs.srr1 = env->spr[SPR_SRR1];
990 
991     regs.sprg0 = env->spr[SPR_SPRG0];
992     regs.sprg1 = env->spr[SPR_SPRG1];
993     regs.sprg2 = env->spr[SPR_SPRG2];
994     regs.sprg3 = env->spr[SPR_SPRG3];
995     regs.sprg4 = env->spr[SPR_SPRG4];
996     regs.sprg5 = env->spr[SPR_SPRG5];
997     regs.sprg6 = env->spr[SPR_SPRG6];
998     regs.sprg7 = env->spr[SPR_SPRG7];
999 
1000     regs.pid = env->spr[SPR_BOOKE_PID];
1001 
1002     for (i = 0;i < 32; i++)
1003         regs.gpr[i] = env->gpr[i];
1004 
1005     regs.cr = 0;
1006     for (i = 0; i < 8; i++) {
1007         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1008     }
1009 
1010     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1011     if (ret < 0)
1012         return ret;
1013 
1014     kvm_put_fp(cs);
1015 
1016     if (env->tlb_dirty) {
1017         kvm_sw_tlb_put(cpu);
1018         env->tlb_dirty = false;
1019     }
1020 
1021     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1022         ret = kvmppc_put_books_sregs(cpu);
1023         if (ret < 0) {
1024             return ret;
1025         }
1026     }
1027 
1028     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1029         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1030     }
1031 
1032     if (cap_one_reg) {
1033         int i;
1034 
1035         /* We deliberately ignore errors here, for kernels which have
1036          * the ONE_REG calls, but don't support the specific
1037          * registers, there's a reasonable chance things will still
1038          * work, at least until we try to migrate. */
1039         for (i = 0; i < 1024; i++) {
1040             uint64_t id = env->spr_cb[i].one_reg_id;
1041 
1042             if (id != 0) {
1043                 kvm_put_one_spr(cs, id, i);
1044             }
1045         }
1046 
1047 #ifdef TARGET_PPC64
1048         if (msr_ts) {
1049             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1050                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1051             }
1052             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1054             }
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1065         }
1066 
1067         if (cap_papr) {
1068             if (kvm_put_vpa(cs) < 0) {
1069                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1070             }
1071         }
1072 
1073         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1074 #endif /* TARGET_PPC64 */
1075     }
1076 
1077     return ret;
1078 }
1079 
1080 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1081 {
1082      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1083 }
1084 
1085 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1086 {
1087     CPUPPCState *env = &cpu->env;
1088     struct kvm_sregs sregs;
1089     int ret;
1090 
1091     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1092     if (ret < 0) {
1093         return ret;
1094     }
1095 
1096     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1097         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1098         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1099         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1100         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1101         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1102         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1103         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1104         env->spr[SPR_DECR] = sregs.u.e.dec;
1105         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1106         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1107         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1108     }
1109 
1110     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1111         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1112         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1113         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1114         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1115         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1116     }
1117 
1118     if (sregs.u.e.features & KVM_SREGS_E_64) {
1119         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1120     }
1121 
1122     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1123         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1127         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1128         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1129         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1130         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1131         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1132         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1133         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1134         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1135         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1136         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1137         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1138         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1139         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1140         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1141         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1142         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1143         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1144         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1145         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1146         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1147         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1148         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1149         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1150         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1151         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1152         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1153         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1154         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1155         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1156         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1157         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1158         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1159 
1160         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1161             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1162             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1163             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1164             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1165             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1166             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1167         }
1168 
1169         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1170             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1171             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1172         }
1173 
1174         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1175             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1176             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1177             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1178             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1179         }
1180     }
1181 
1182     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1183         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1184         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1185         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1186         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1187         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1188         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1189         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1190         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1191         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1192         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1193     }
1194 
1195     if (sregs.u.e.features & KVM_SREGS_EXP) {
1196         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1197     }
1198 
1199     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1200         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1201         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1202     }
1203 
1204     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1205         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1206         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1207         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1208 
1209         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1210             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1211             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1212         }
1213     }
1214 
1215     return 0;
1216 }
1217 
1218 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1219 {
1220     CPUPPCState *env = &cpu->env;
1221     struct kvm_sregs sregs;
1222     int ret;
1223     int i;
1224 
1225     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1226     if (ret < 0) {
1227         return ret;
1228     }
1229 
1230     if (!env->external_htab) {
1231         ppc_store_sdr1(env, sregs.u.s.sdr1);
1232     }
1233 
1234     /* Sync SLB */
1235 #ifdef TARGET_PPC64
1236     /*
1237      * The packed SLB array we get from KVM_GET_SREGS only contains
1238      * information about valid entries. So we flush our internal copy
1239      * to get rid of stale ones, then put all valid SLB entries back
1240      * in.
1241      */
1242     memset(env->slb, 0, sizeof(env->slb));
1243     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1244         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1245         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1246         /*
1247          * Only restore valid entries
1248          */
1249         if (rb & SLB_ESID_V) {
1250             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1251         }
1252     }
1253 #endif
1254 
1255     /* Sync SRs */
1256     for (i = 0; i < 16; i++) {
1257         env->sr[i] = sregs.u.s.ppc32.sr[i];
1258     }
1259 
1260     /* Sync BATs */
1261     for (i = 0; i < 8; i++) {
1262         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1263         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1264         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1265         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1266     }
1267 
1268     return 0;
1269 }
1270 
1271 int kvm_arch_get_registers(CPUState *cs)
1272 {
1273     PowerPCCPU *cpu = POWERPC_CPU(cs);
1274     CPUPPCState *env = &cpu->env;
1275     struct kvm_regs regs;
1276     uint32_t cr;
1277     int i, ret;
1278 
1279     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1280     if (ret < 0)
1281         return ret;
1282 
1283     cr = regs.cr;
1284     for (i = 7; i >= 0; i--) {
1285         env->crf[i] = cr & 15;
1286         cr >>= 4;
1287     }
1288 
1289     env->ctr = regs.ctr;
1290     env->lr = regs.lr;
1291     cpu_write_xer(env, regs.xer);
1292     env->msr = regs.msr;
1293     env->nip = regs.pc;
1294 
1295     env->spr[SPR_SRR0] = regs.srr0;
1296     env->spr[SPR_SRR1] = regs.srr1;
1297 
1298     env->spr[SPR_SPRG0] = regs.sprg0;
1299     env->spr[SPR_SPRG1] = regs.sprg1;
1300     env->spr[SPR_SPRG2] = regs.sprg2;
1301     env->spr[SPR_SPRG3] = regs.sprg3;
1302     env->spr[SPR_SPRG4] = regs.sprg4;
1303     env->spr[SPR_SPRG5] = regs.sprg5;
1304     env->spr[SPR_SPRG6] = regs.sprg6;
1305     env->spr[SPR_SPRG7] = regs.sprg7;
1306 
1307     env->spr[SPR_BOOKE_PID] = regs.pid;
1308 
1309     for (i = 0;i < 32; i++)
1310         env->gpr[i] = regs.gpr[i];
1311 
1312     kvm_get_fp(cs);
1313 
1314     if (cap_booke_sregs) {
1315         ret = kvmppc_get_booke_sregs(cpu);
1316         if (ret < 0) {
1317             return ret;
1318         }
1319     }
1320 
1321     if (cap_segstate) {
1322         ret = kvmppc_get_books_sregs(cpu);
1323         if (ret < 0) {
1324             return ret;
1325         }
1326     }
1327 
1328     if (cap_hior) {
1329         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1330     }
1331 
1332     if (cap_one_reg) {
1333         int i;
1334 
1335         /* We deliberately ignore errors here, for kernels which have
1336          * the ONE_REG calls, but don't support the specific
1337          * registers, there's a reasonable chance things will still
1338          * work, at least until we try to migrate. */
1339         for (i = 0; i < 1024; i++) {
1340             uint64_t id = env->spr_cb[i].one_reg_id;
1341 
1342             if (id != 0) {
1343                 kvm_get_one_spr(cs, id, i);
1344             }
1345         }
1346 
1347 #ifdef TARGET_PPC64
1348         if (msr_ts) {
1349             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1350                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1351             }
1352             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1354             }
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1365         }
1366 
1367         if (cap_papr) {
1368             if (kvm_get_vpa(cs) < 0) {
1369                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1370             }
1371         }
1372 
1373         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1374 #endif
1375     }
1376 
1377     return 0;
1378 }
1379 
1380 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1381 {
1382     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1383 
1384     if (irq != PPC_INTERRUPT_EXT) {
1385         return 0;
1386     }
1387 
1388     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1389         return 0;
1390     }
1391 
1392     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1393 
1394     return 0;
1395 }
1396 
1397 #if defined(TARGET_PPCEMB)
1398 #define PPC_INPUT_INT PPC40x_INPUT_INT
1399 #elif defined(TARGET_PPC64)
1400 #define PPC_INPUT_INT PPC970_INPUT_INT
1401 #else
1402 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1403 #endif
1404 
1405 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1406 {
1407     PowerPCCPU *cpu = POWERPC_CPU(cs);
1408     CPUPPCState *env = &cpu->env;
1409     int r;
1410     unsigned irq;
1411 
1412     qemu_mutex_lock_iothread();
1413 
1414     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1415      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1416     if (!cap_interrupt_level &&
1417         run->ready_for_interrupt_injection &&
1418         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1419         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1420     {
1421         /* For now KVM disregards the 'irq' argument. However, in the
1422          * future KVM could cache it in-kernel to avoid a heavyweight exit
1423          * when reading the UIC.
1424          */
1425         irq = KVM_INTERRUPT_SET;
1426 
1427         DPRINTF("injected interrupt %d\n", irq);
1428         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1429         if (r < 0) {
1430             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1431         }
1432 
1433         /* Always wake up soon in case the interrupt was level based */
1434         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1435                        (NANOSECONDS_PER_SECOND / 50));
1436     }
1437 
1438     /* We don't know if there are more interrupts pending after this. However,
1439      * the guest will return to userspace in the course of handling this one
1440      * anyways, so we will get a chance to deliver the rest. */
1441 
1442     qemu_mutex_unlock_iothread();
1443 }
1444 
1445 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1446 {
1447     return MEMTXATTRS_UNSPECIFIED;
1448 }
1449 
1450 int kvm_arch_process_async_events(CPUState *cs)
1451 {
1452     return cs->halted;
1453 }
1454 
1455 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1456 {
1457     CPUState *cs = CPU(cpu);
1458     CPUPPCState *env = &cpu->env;
1459 
1460     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1461         cs->halted = 1;
1462         cs->exception_index = EXCP_HLT;
1463     }
1464 
1465     return 0;
1466 }
1467 
1468 /* map dcr access to existing qemu dcr emulation */
1469 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1470 {
1471     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1472         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1473 
1474     return 0;
1475 }
1476 
1477 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1478 {
1479     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1480         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1481 
1482     return 0;
1483 }
1484 
1485 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1486 {
1487     /* Mixed endian case is not handled */
1488     uint32_t sc = debug_inst_opcode;
1489 
1490     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1491                             sizeof(sc), 0) ||
1492         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1493         return -EINVAL;
1494     }
1495 
1496     return 0;
1497 }
1498 
1499 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1500 {
1501     uint32_t sc;
1502 
1503     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1504         sc != debug_inst_opcode ||
1505         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1506                             sizeof(sc), 1)) {
1507         return -EINVAL;
1508     }
1509 
1510     return 0;
1511 }
1512 
1513 static int find_hw_breakpoint(target_ulong addr, int type)
1514 {
1515     int n;
1516 
1517     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1518            <= ARRAY_SIZE(hw_debug_points));
1519 
1520     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1521         if (hw_debug_points[n].addr == addr &&
1522              hw_debug_points[n].type == type) {
1523             return n;
1524         }
1525     }
1526 
1527     return -1;
1528 }
1529 
1530 static int find_hw_watchpoint(target_ulong addr, int *flag)
1531 {
1532     int n;
1533 
1534     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1535     if (n >= 0) {
1536         *flag = BP_MEM_ACCESS;
1537         return n;
1538     }
1539 
1540     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1541     if (n >= 0) {
1542         *flag = BP_MEM_WRITE;
1543         return n;
1544     }
1545 
1546     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1547     if (n >= 0) {
1548         *flag = BP_MEM_READ;
1549         return n;
1550     }
1551 
1552     return -1;
1553 }
1554 
1555 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1559         return -ENOBUFS;
1560     }
1561 
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1563     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1564 
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1568             return -ENOBUFS;
1569         }
1570 
1571         if (find_hw_breakpoint(addr, type) >= 0) {
1572             return -EEXIST;
1573         }
1574 
1575         nb_hw_breakpoint++;
1576         break;
1577 
1578     case GDB_WATCHPOINT_WRITE:
1579     case GDB_WATCHPOINT_READ:
1580     case GDB_WATCHPOINT_ACCESS:
1581         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1582             return -ENOBUFS;
1583         }
1584 
1585         if (find_hw_breakpoint(addr, type) >= 0) {
1586             return -EEXIST;
1587         }
1588 
1589         nb_hw_watchpoint++;
1590         break;
1591 
1592     default:
1593         return -ENOSYS;
1594     }
1595 
1596     return 0;
1597 }
1598 
1599 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1600                                   target_ulong len, int type)
1601 {
1602     int n;
1603 
1604     n = find_hw_breakpoint(addr, type);
1605     if (n < 0) {
1606         return -ENOENT;
1607     }
1608 
1609     switch (type) {
1610     case GDB_BREAKPOINT_HW:
1611         nb_hw_breakpoint--;
1612         break;
1613 
1614     case GDB_WATCHPOINT_WRITE:
1615     case GDB_WATCHPOINT_READ:
1616     case GDB_WATCHPOINT_ACCESS:
1617         nb_hw_watchpoint--;
1618         break;
1619 
1620     default:
1621         return -ENOSYS;
1622     }
1623     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1624 
1625     return 0;
1626 }
1627 
1628 void kvm_arch_remove_all_hw_breakpoints(void)
1629 {
1630     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1631 }
1632 
1633 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1634 {
1635     int n;
1636 
1637     /* Software Breakpoint updates */
1638     if (kvm_sw_breakpoints_active(cs)) {
1639         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1640     }
1641 
1642     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1643            <= ARRAY_SIZE(hw_debug_points));
1644     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1645 
1646     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1647         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1648         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1649         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1650             switch (hw_debug_points[n].type) {
1651             case GDB_BREAKPOINT_HW:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1653                 break;
1654             case GDB_WATCHPOINT_WRITE:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1656                 break;
1657             case GDB_WATCHPOINT_READ:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1659                 break;
1660             case GDB_WATCHPOINT_ACCESS:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1662                                         KVMPPC_DEBUG_WATCH_READ;
1663                 break;
1664             default:
1665                 cpu_abort(cs, "Unsupported breakpoint type\n");
1666             }
1667             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1668         }
1669     }
1670 }
1671 
1672 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1673 {
1674     CPUState *cs = CPU(cpu);
1675     CPUPPCState *env = &cpu->env;
1676     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1677     int handle = 0;
1678     int n;
1679     int flag = 0;
1680 
1681     if (cs->singlestep_enabled) {
1682         handle = 1;
1683     } else if (arch_info->status) {
1684         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1685             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1686                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1687                 if (n >= 0) {
1688                     handle = 1;
1689                 }
1690             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1691                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1692                 n = find_hw_watchpoint(arch_info->address,  &flag);
1693                 if (n >= 0) {
1694                     handle = 1;
1695                     cs->watchpoint_hit = &hw_watchpoint;
1696                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1697                     hw_watchpoint.flags = flag;
1698                 }
1699             }
1700         }
1701     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1702         handle = 1;
1703     } else {
1704         /* QEMU is not able to handle debug exception, so inject
1705          * program exception to guest;
1706          * Yes program exception NOT debug exception !!
1707          * When QEMU is using debug resources then debug exception must
1708          * be always set. To achieve this we set MSR_DE and also set
1709          * MSRP_DEP so guest cannot change MSR_DE.
1710          * When emulating debug resource for guest we want guest
1711          * to control MSR_DE (enable/disable debug interrupt on need).
1712          * Supporting both configurations are NOT possible.
1713          * So the result is that we cannot share debug resources
1714          * between QEMU and Guest on BOOKE architecture.
1715          * In the current design QEMU gets the priority over guest,
1716          * this means that if QEMU is using debug resources then guest
1717          * cannot use them;
1718          * For software breakpoint QEMU uses a privileged instruction;
1719          * So there cannot be any reason that we are here for guest
1720          * set debug exception, only possibility is guest executed a
1721          * privileged / illegal instruction and that's why we are
1722          * injecting a program interrupt.
1723          */
1724 
1725         cpu_synchronize_state(cs);
1726         /* env->nip is PC, so increment this by 4 to use
1727          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1728          */
1729         env->nip += 4;
1730         cs->exception_index = POWERPC_EXCP_PROGRAM;
1731         env->error_code = POWERPC_EXCP_INVAL;
1732         ppc_cpu_do_interrupt(cs);
1733     }
1734 
1735     return handle;
1736 }
1737 
1738 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1739 {
1740     PowerPCCPU *cpu = POWERPC_CPU(cs);
1741     CPUPPCState *env = &cpu->env;
1742     int ret;
1743 
1744     qemu_mutex_lock_iothread();
1745 
1746     switch (run->exit_reason) {
1747     case KVM_EXIT_DCR:
1748         if (run->dcr.is_write) {
1749             DPRINTF("handle dcr write\n");
1750             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1751         } else {
1752             DPRINTF("handle dcr read\n");
1753             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1754         }
1755         break;
1756     case KVM_EXIT_HLT:
1757         DPRINTF("handle halt\n");
1758         ret = kvmppc_handle_halt(cpu);
1759         break;
1760 #if defined(TARGET_PPC64)
1761     case KVM_EXIT_PAPR_HCALL:
1762         DPRINTF("handle PAPR hypercall\n");
1763         run->papr_hcall.ret = spapr_hypercall(cpu,
1764                                               run->papr_hcall.nr,
1765                                               run->papr_hcall.args);
1766         ret = 0;
1767         break;
1768 #endif
1769     case KVM_EXIT_EPR:
1770         DPRINTF("handle epr\n");
1771         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1772         ret = 0;
1773         break;
1774     case KVM_EXIT_WATCHDOG:
1775         DPRINTF("handle watchdog expiry\n");
1776         watchdog_perform_action();
1777         ret = 0;
1778         break;
1779 
1780     case KVM_EXIT_DEBUG:
1781         DPRINTF("handle debug exception\n");
1782         if (kvm_handle_debug(cpu, run)) {
1783             ret = EXCP_DEBUG;
1784             break;
1785         }
1786         /* re-enter, this exception was guest-internal */
1787         ret = 0;
1788         break;
1789 
1790     default:
1791         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1792         ret = -1;
1793         break;
1794     }
1795 
1796     qemu_mutex_unlock_iothread();
1797     return ret;
1798 }
1799 
1800 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1801 {
1802     CPUState *cs = CPU(cpu);
1803     uint32_t bits = tsr_bits;
1804     struct kvm_one_reg reg = {
1805         .id = KVM_REG_PPC_OR_TSR,
1806         .addr = (uintptr_t) &bits,
1807     };
1808 
1809     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1810 }
1811 
1812 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1813 {
1814 
1815     CPUState *cs = CPU(cpu);
1816     uint32_t bits = tsr_bits;
1817     struct kvm_one_reg reg = {
1818         .id = KVM_REG_PPC_CLEAR_TSR,
1819         .addr = (uintptr_t) &bits,
1820     };
1821 
1822     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 }
1824 
1825 int kvmppc_set_tcr(PowerPCCPU *cpu)
1826 {
1827     CPUState *cs = CPU(cpu);
1828     CPUPPCState *env = &cpu->env;
1829     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1830 
1831     struct kvm_one_reg reg = {
1832         .id = KVM_REG_PPC_TCR,
1833         .addr = (uintptr_t) &tcr,
1834     };
1835 
1836     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1837 }
1838 
1839 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1840 {
1841     CPUState *cs = CPU(cpu);
1842     int ret;
1843 
1844     if (!kvm_enabled()) {
1845         return -1;
1846     }
1847 
1848     if (!cap_ppc_watchdog) {
1849         printf("warning: KVM does not support watchdog");
1850         return -1;
1851     }
1852 
1853     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1854     if (ret < 0) {
1855         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1856                 __func__, strerror(-ret));
1857         return ret;
1858     }
1859 
1860     return ret;
1861 }
1862 
1863 static int read_cpuinfo(const char *field, char *value, int len)
1864 {
1865     FILE *f;
1866     int ret = -1;
1867     int field_len = strlen(field);
1868     char line[512];
1869 
1870     f = fopen("/proc/cpuinfo", "r");
1871     if (!f) {
1872         return -1;
1873     }
1874 
1875     do {
1876         if (!fgets(line, sizeof(line), f)) {
1877             break;
1878         }
1879         if (!strncmp(line, field, field_len)) {
1880             pstrcpy(value, len, line);
1881             ret = 0;
1882             break;
1883         }
1884     } while(*line);
1885 
1886     fclose(f);
1887 
1888     return ret;
1889 }
1890 
1891 uint32_t kvmppc_get_tbfreq(void)
1892 {
1893     char line[512];
1894     char *ns;
1895     uint32_t retval = NANOSECONDS_PER_SECOND;
1896 
1897     if (read_cpuinfo("timebase", line, sizeof(line))) {
1898         return retval;
1899     }
1900 
1901     if (!(ns = strchr(line, ':'))) {
1902         return retval;
1903     }
1904 
1905     ns++;
1906 
1907     return atoi(ns);
1908 }
1909 
1910 bool kvmppc_get_host_serial(char **value)
1911 {
1912     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1913                                NULL);
1914 }
1915 
1916 bool kvmppc_get_host_model(char **value)
1917 {
1918     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1919 }
1920 
1921 /* Try to find a device tree node for a CPU with clock-frequency property */
1922 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1923 {
1924     struct dirent *dirp;
1925     DIR *dp;
1926 
1927     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1928         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1929         return -1;
1930     }
1931 
1932     buf[0] = '\0';
1933     while ((dirp = readdir(dp)) != NULL) {
1934         FILE *f;
1935         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1936                  dirp->d_name);
1937         f = fopen(buf, "r");
1938         if (f) {
1939             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1940             fclose(f);
1941             break;
1942         }
1943         buf[0] = '\0';
1944     }
1945     closedir(dp);
1946     if (buf[0] == '\0') {
1947         printf("Unknown host!\n");
1948         return -1;
1949     }
1950 
1951     return 0;
1952 }
1953 
1954 static uint64_t kvmppc_read_int_dt(const char *filename)
1955 {
1956     union {
1957         uint32_t v32;
1958         uint64_t v64;
1959     } u;
1960     FILE *f;
1961     int len;
1962 
1963     f = fopen(filename, "rb");
1964     if (!f) {
1965         return -1;
1966     }
1967 
1968     len = fread(&u, 1, sizeof(u), f);
1969     fclose(f);
1970     switch (len) {
1971     case 4:
1972         /* property is a 32-bit quantity */
1973         return be32_to_cpu(u.v32);
1974     case 8:
1975         return be64_to_cpu(u.v64);
1976     }
1977 
1978     return 0;
1979 }
1980 
1981 /* Read a CPU node property from the host device tree that's a single
1982  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1983  * (can't find or open the property, or doesn't understand the
1984  * format) */
1985 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1986 {
1987     char buf[PATH_MAX], *tmp;
1988     uint64_t val;
1989 
1990     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1991         return -1;
1992     }
1993 
1994     tmp = g_strdup_printf("%s/%s", buf, propname);
1995     val = kvmppc_read_int_dt(tmp);
1996     g_free(tmp);
1997 
1998     return val;
1999 }
2000 
2001 uint64_t kvmppc_get_clockfreq(void)
2002 {
2003     return kvmppc_read_int_cpu_dt("clock-frequency");
2004 }
2005 
2006 uint32_t kvmppc_get_vmx(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("ibm,vmx");
2009 }
2010 
2011 uint32_t kvmppc_get_dfp(void)
2012 {
2013     return kvmppc_read_int_cpu_dt("ibm,dfp");
2014 }
2015 
2016 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2017  {
2018      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2019      CPUState *cs = CPU(cpu);
2020 
2021     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2022         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2023         return 0;
2024     }
2025 
2026     return 1;
2027 }
2028 
2029 int kvmppc_get_hasidle(CPUPPCState *env)
2030 {
2031     struct kvm_ppc_pvinfo pvinfo;
2032 
2033     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2034         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2035         return 1;
2036     }
2037 
2038     return 0;
2039 }
2040 
2041 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2042 {
2043     uint32_t *hc = (uint32_t*)buf;
2044     struct kvm_ppc_pvinfo pvinfo;
2045 
2046     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2047         memcpy(buf, pvinfo.hcall, buf_len);
2048         return 0;
2049     }
2050 
2051     /*
2052      * Fallback to always fail hypercalls regardless of endianness:
2053      *
2054      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2055      *     li r3, -1
2056      *     b .+8       (becomes nop in wrong endian)
2057      *     bswap32(li r3, -1)
2058      */
2059 
2060     hc[0] = cpu_to_be32(0x08000048);
2061     hc[1] = cpu_to_be32(0x3860ffff);
2062     hc[2] = cpu_to_be32(0x48000008);
2063     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2064 
2065     return 1;
2066 }
2067 
2068 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2069 {
2070     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2071 }
2072 
2073 void kvmppc_enable_logical_ci_hcalls(void)
2074 {
2075     /*
2076      * FIXME: it would be nice if we could detect the cases where
2077      * we're using a device which requires the in kernel
2078      * implementation of these hcalls, but the kernel lacks them and
2079      * produce a warning.
2080      */
2081     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2082     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2083 }
2084 
2085 void kvmppc_enable_set_mode_hcall(void)
2086 {
2087     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2088 }
2089 
2090 void kvmppc_enable_clear_ref_mod_hcalls(void)
2091 {
2092     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2093     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2094 }
2095 
2096 void kvmppc_set_papr(PowerPCCPU *cpu)
2097 {
2098     CPUState *cs = CPU(cpu);
2099     int ret;
2100 
2101     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2102     if (ret) {
2103         error_report("This vCPU type or KVM version does not support PAPR");
2104         exit(1);
2105     }
2106 
2107     /* Update the capability flag so we sync the right information
2108      * with kvm */
2109     cap_papr = 1;
2110 }
2111 
2112 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2113 {
2114     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2115 }
2116 
2117 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2118 {
2119     CPUState *cs = CPU(cpu);
2120     int ret;
2121 
2122     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2123     if (ret && mpic_proxy) {
2124         error_report("This KVM version does not support EPR");
2125         exit(1);
2126     }
2127 }
2128 
2129 int kvmppc_smt_threads(void)
2130 {
2131     return cap_ppc_smt ? cap_ppc_smt : 1;
2132 }
2133 
2134 #ifdef TARGET_PPC64
2135 off_t kvmppc_alloc_rma(void **rma)
2136 {
2137     off_t size;
2138     int fd;
2139     struct kvm_allocate_rma ret;
2140 
2141     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2142      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2143      *                      not necessary on this hardware
2144      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2145      *
2146      * FIXME: We should allow the user to force contiguous RMA
2147      * allocation in the cap_ppc_rma==1 case.
2148      */
2149     if (cap_ppc_rma < 2) {
2150         return 0;
2151     }
2152 
2153     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2154     if (fd < 0) {
2155         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2156                 strerror(errno));
2157         return -1;
2158     }
2159 
2160     size = MIN(ret.rma_size, 256ul << 20);
2161 
2162     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2163     if (*rma == MAP_FAILED) {
2164         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2165         return -1;
2166     };
2167 
2168     return size;
2169 }
2170 
2171 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2172 {
2173     struct kvm_ppc_smmu_info info;
2174     long rampagesize, best_page_shift;
2175     int i;
2176 
2177     if (cap_ppc_rma >= 2) {
2178         return current_size;
2179     }
2180 
2181     /* Find the largest hardware supported page size that's less than
2182      * or equal to the (logical) backing page size of guest RAM */
2183     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2184     rampagesize = getrampagesize();
2185     best_page_shift = 0;
2186 
2187     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2188         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2189 
2190         if (!sps->page_shift) {
2191             continue;
2192         }
2193 
2194         if ((sps->page_shift > best_page_shift)
2195             && ((1UL << sps->page_shift) <= rampagesize)) {
2196             best_page_shift = sps->page_shift;
2197         }
2198     }
2199 
2200     return MIN(current_size,
2201                1ULL << (best_page_shift + hash_shift - 7));
2202 }
2203 #endif
2204 
2205 bool kvmppc_spapr_use_multitce(void)
2206 {
2207     return cap_spapr_multitce;
2208 }
2209 
2210 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2211                               bool need_vfio)
2212 {
2213     struct kvm_create_spapr_tce args = {
2214         .liobn = liobn,
2215         .window_size = window_size,
2216     };
2217     long len;
2218     int fd;
2219     void *table;
2220 
2221     /* Must set fd to -1 so we don't try to munmap when called for
2222      * destroying the table, which the upper layers -will- do
2223      */
2224     *pfd = -1;
2225     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2226         return NULL;
2227     }
2228 
2229     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2230     if (fd < 0) {
2231         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2232                 liobn);
2233         return NULL;
2234     }
2235 
2236     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2237     /* FIXME: round this up to page size */
2238 
2239     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2240     if (table == MAP_FAILED) {
2241         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2242                 liobn);
2243         close(fd);
2244         return NULL;
2245     }
2246 
2247     *pfd = fd;
2248     return table;
2249 }
2250 
2251 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2252 {
2253     long len;
2254 
2255     if (fd < 0) {
2256         return -1;
2257     }
2258 
2259     len = nb_table * sizeof(uint64_t);
2260     if ((munmap(table, len) < 0) ||
2261         (close(fd) < 0)) {
2262         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2263                 strerror(errno));
2264         /* Leak the table */
2265     }
2266 
2267     return 0;
2268 }
2269 
2270 int kvmppc_reset_htab(int shift_hint)
2271 {
2272     uint32_t shift = shift_hint;
2273 
2274     if (!kvm_enabled()) {
2275         /* Full emulation, tell caller to allocate htab itself */
2276         return 0;
2277     }
2278     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2279         int ret;
2280         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2281         if (ret == -ENOTTY) {
2282             /* At least some versions of PR KVM advertise the
2283              * capability, but don't implement the ioctl().  Oops.
2284              * Return 0 so that we allocate the htab in qemu, as is
2285              * correct for PR. */
2286             return 0;
2287         } else if (ret < 0) {
2288             return ret;
2289         }
2290         return shift;
2291     }
2292 
2293     /* We have a kernel that predates the htab reset calls.  For PR
2294      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2295      * this era, it has allocated a 16MB fixed size hash table already. */
2296     if (kvmppc_is_pr(kvm_state)) {
2297         /* PR - tell caller to allocate htab */
2298         return 0;
2299     } else {
2300         /* HV - assume 16MB kernel allocated htab */
2301         return 24;
2302     }
2303 }
2304 
2305 static inline uint32_t mfpvr(void)
2306 {
2307     uint32_t pvr;
2308 
2309     asm ("mfpvr %0"
2310          : "=r"(pvr));
2311     return pvr;
2312 }
2313 
2314 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2315 {
2316     if (on) {
2317         *word |= flags;
2318     } else {
2319         *word &= ~flags;
2320     }
2321 }
2322 
2323 static void kvmppc_host_cpu_initfn(Object *obj)
2324 {
2325     assert(kvm_enabled());
2326 }
2327 
2328 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2329 {
2330     DeviceClass *dc = DEVICE_CLASS(oc);
2331     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2332     uint32_t vmx = kvmppc_get_vmx();
2333     uint32_t dfp = kvmppc_get_dfp();
2334     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2335     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2336 
2337     /* Now fix up the class with information we can query from the host */
2338     pcc->pvr = mfpvr();
2339 
2340     if (vmx != -1) {
2341         /* Only override when we know what the host supports */
2342         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2343         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2344     }
2345     if (dfp != -1) {
2346         /* Only override when we know what the host supports */
2347         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2348     }
2349 
2350     if (dcache_size != -1) {
2351         pcc->l1_dcache_size = dcache_size;
2352     }
2353 
2354     if (icache_size != -1) {
2355         pcc->l1_icache_size = icache_size;
2356     }
2357 
2358     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2359     dc->cannot_destroy_with_object_finalize_yet = true;
2360 }
2361 
2362 bool kvmppc_has_cap_epr(void)
2363 {
2364     return cap_epr;
2365 }
2366 
2367 bool kvmppc_has_cap_htab_fd(void)
2368 {
2369     return cap_htab_fd;
2370 }
2371 
2372 bool kvmppc_has_cap_fixup_hcalls(void)
2373 {
2374     return cap_fixup_hcalls;
2375 }
2376 
2377 bool kvmppc_has_cap_htm(void)
2378 {
2379     return cap_htm;
2380 }
2381 
2382 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2383 {
2384     ObjectClass *oc = OBJECT_CLASS(pcc);
2385 
2386     while (oc && !object_class_is_abstract(oc)) {
2387         oc = object_class_get_parent(oc);
2388     }
2389     assert(oc);
2390 
2391     return POWERPC_CPU_CLASS(oc);
2392 }
2393 
2394 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2395 {
2396     uint32_t host_pvr = mfpvr();
2397     PowerPCCPUClass *pvr_pcc;
2398 
2399     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2400     if (pvr_pcc == NULL) {
2401         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2402     }
2403 
2404     return pvr_pcc;
2405 }
2406 
2407 static int kvm_ppc_register_host_cpu_type(void)
2408 {
2409     TypeInfo type_info = {
2410         .name = TYPE_HOST_POWERPC_CPU,
2411         .instance_init = kvmppc_host_cpu_initfn,
2412         .class_init = kvmppc_host_cpu_class_init,
2413     };
2414     PowerPCCPUClass *pvr_pcc;
2415     DeviceClass *dc;
2416     int i;
2417 
2418     pvr_pcc = kvm_ppc_get_host_cpu_class();
2419     if (pvr_pcc == NULL) {
2420         return -1;
2421     }
2422     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2423     type_register(&type_info);
2424 
2425 #if defined(TARGET_PPC64)
2426     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427     type_info.parent = TYPE_SPAPR_CPU_CORE,
2428     type_info.instance_size = sizeof(sPAPRCPUCore);
2429     type_info.instance_init = NULL;
2430     type_info.class_init = spapr_cpu_core_class_init;
2431     type_info.class_data = (void *) "host";
2432     type_register(&type_info);
2433     g_free((void *)type_info.name);
2434 #endif
2435 
2436     /*
2437      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2438      * we want "POWER8" to be a "family" alias that points to the current
2439      * host CPU type, too)
2440      */
2441     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2442     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2443         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2444             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2445             char *suffix;
2446 
2447             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2448             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2449             if (suffix) {
2450                 *suffix = 0;
2451             }
2452             ppc_cpu_aliases[i].oc = oc;
2453             break;
2454         }
2455     }
2456 
2457     return 0;
2458 }
2459 
2460 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2461 {
2462     struct kvm_rtas_token_args args = {
2463         .token = token,
2464     };
2465 
2466     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2467         return -ENOENT;
2468     }
2469 
2470     strncpy(args.name, function, sizeof(args.name));
2471 
2472     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2473 }
2474 
2475 int kvmppc_get_htab_fd(bool write)
2476 {
2477     struct kvm_get_htab_fd s = {
2478         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2479         .start_index = 0,
2480     };
2481 
2482     if (!cap_htab_fd) {
2483         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2484         return -1;
2485     }
2486 
2487     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2488 }
2489 
2490 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2491 {
2492     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2493     uint8_t buf[bufsize];
2494     ssize_t rc;
2495 
2496     do {
2497         rc = read(fd, buf, bufsize);
2498         if (rc < 0) {
2499             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2500                     strerror(errno));
2501             return rc;
2502         } else if (rc) {
2503             uint8_t *buffer = buf;
2504             ssize_t n = rc;
2505             while (n) {
2506                 struct kvm_get_htab_header *head =
2507                     (struct kvm_get_htab_header *) buffer;
2508                 size_t chunksize = sizeof(*head) +
2509                      HASH_PTE_SIZE_64 * head->n_valid;
2510 
2511                 qemu_put_be32(f, head->index);
2512                 qemu_put_be16(f, head->n_valid);
2513                 qemu_put_be16(f, head->n_invalid);
2514                 qemu_put_buffer(f, (void *)(head + 1),
2515                                 HASH_PTE_SIZE_64 * head->n_valid);
2516 
2517                 buffer += chunksize;
2518                 n -= chunksize;
2519             }
2520         }
2521     } while ((rc != 0)
2522              && ((max_ns < 0)
2523                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2524 
2525     return (rc == 0) ? 1 : 0;
2526 }
2527 
2528 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2529                            uint16_t n_valid, uint16_t n_invalid)
2530 {
2531     struct kvm_get_htab_header *buf;
2532     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2533     ssize_t rc;
2534 
2535     buf = alloca(chunksize);
2536     buf->index = index;
2537     buf->n_valid = n_valid;
2538     buf->n_invalid = n_invalid;
2539 
2540     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2541 
2542     rc = write(fd, buf, chunksize);
2543     if (rc < 0) {
2544         fprintf(stderr, "Error writing KVM hash table: %s\n",
2545                 strerror(errno));
2546         return rc;
2547     }
2548     if (rc != chunksize) {
2549         /* We should never get a short write on a single chunk */
2550         fprintf(stderr, "Short write, restoring KVM hash table\n");
2551         return -1;
2552     }
2553     return 0;
2554 }
2555 
2556 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2557 {
2558     return true;
2559 }
2560 
2561 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2562 {
2563     return 1;
2564 }
2565 
2566 int kvm_arch_on_sigbus(int code, void *addr)
2567 {
2568     return 1;
2569 }
2570 
2571 void kvm_arch_init_irq_routing(KVMState *s)
2572 {
2573 }
2574 
2575 struct kvm_get_htab_buf {
2576     struct kvm_get_htab_header header;
2577     /*
2578      * We require one extra byte for read
2579      */
2580     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2581 };
2582 
2583 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2584 {
2585     int htab_fd;
2586     struct kvm_get_htab_fd ghf;
2587     struct kvm_get_htab_buf  *hpte_buf;
2588 
2589     ghf.flags = 0;
2590     ghf.start_index = pte_index;
2591     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2592     if (htab_fd < 0) {
2593         goto error_out;
2594     }
2595 
2596     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2597     /*
2598      * Read the hpte group
2599      */
2600     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2601         goto out_close;
2602     }
2603 
2604     close(htab_fd);
2605     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2606 
2607 out_close:
2608     g_free(hpte_buf);
2609     close(htab_fd);
2610 error_out:
2611     return 0;
2612 }
2613 
2614 void kvmppc_hash64_free_pteg(uint64_t token)
2615 {
2616     struct kvm_get_htab_buf *htab_buf;
2617 
2618     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2619                             hpte);
2620     g_free(htab_buf);
2621     return;
2622 }
2623 
2624 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2625                              target_ulong pte0, target_ulong pte1)
2626 {
2627     int htab_fd;
2628     struct kvm_get_htab_fd ghf;
2629     struct kvm_get_htab_buf hpte_buf;
2630 
2631     ghf.flags = 0;
2632     ghf.start_index = 0;     /* Ignored */
2633     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2634     if (htab_fd < 0) {
2635         goto error_out;
2636     }
2637 
2638     hpte_buf.header.n_valid = 1;
2639     hpte_buf.header.n_invalid = 0;
2640     hpte_buf.header.index = pte_index;
2641     hpte_buf.hpte[0] = pte0;
2642     hpte_buf.hpte[1] = pte1;
2643     /*
2644      * Write the hpte entry.
2645      * CAUTION: write() has the warn_unused_result attribute. Hence we
2646      * need to check the return value, even though we do nothing.
2647      */
2648     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2649         goto out_close;
2650     }
2651 
2652 out_close:
2653     close(htab_fd);
2654     return;
2655 
2656 error_out:
2657     return;
2658 }
2659 
2660 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2661                              uint64_t address, uint32_t data, PCIDevice *dev)
2662 {
2663     return 0;
2664 }
2665 
2666 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2667                                 int vector, PCIDevice *dev)
2668 {
2669     return 0;
2670 }
2671 
2672 int kvm_arch_release_virq_post(int virq)
2673 {
2674     return 0;
2675 }
2676 
2677 int kvm_arch_msi_data_to_gsi(uint32_t data)
2678 {
2679     return data & 0xffff;
2680 }
2681 
2682 int kvmppc_enable_hwrng(void)
2683 {
2684     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2685         return -1;
2686     }
2687 
2688     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2689 }
2690