xref: /openbmc/qemu/target/ppc/kvm.c (revision 39164c13)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "sysemu/numa.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #if defined(TARGET_PPC64)
49 #include "hw/ppc/spapr_cpu_core.h"
50 #endif
51 
52 //#define DEBUG_KVM
53 
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59     do { } while (0)
60 #endif
61 
62 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
63 
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65     KVM_CAP_LAST_INFO
66 };
67 
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_rma;
74 static int cap_spapr_tce;
75 static int cap_spapr_multitce;
76 static int cap_spapr_vfio;
77 static int cap_hior;
78 static int cap_one_reg;
79 static int cap_epr;
80 static int cap_ppc_watchdog;
81 static int cap_papr;
82 static int cap_htab_fd;
83 static int cap_fixup_hcalls;
84 static int cap_htm;             /* Hardware transactional memory support */
85 
86 static uint32_t debug_inst_opcode;
87 
88 /* XXX We have a race condition where we actually have a level triggered
89  *     interrupt, but the infrastructure can't expose that yet, so the guest
90  *     takes but ignores it, goes to sleep and never gets notified that there's
91  *     still an interrupt pending.
92  *
93  *     As a quick workaround, let's just wake up again 20 ms after we injected
94  *     an interrupt. That way we can assure that we're always reinjecting
95  *     interrupts in case the guest swallowed them.
96  */
97 static QEMUTimer *idle_timer;
98 
99 static void kvm_kick_cpu(void *opaque)
100 {
101     PowerPCCPU *cpu = opaque;
102 
103     qemu_cpu_kick(CPU(cpu));
104 }
105 
106 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
107  * should only be used for fallback tests - generally we should use
108  * explicit capabilities for the features we want, rather than
109  * assuming what is/isn't available depending on the KVM variant. */
110 static bool kvmppc_is_pr(KVMState *ks)
111 {
112     /* Assume KVM-PR if the GET_PVINFO capability is available */
113     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
114 }
115 
116 static int kvm_ppc_register_host_cpu_type(void);
117 
118 int kvm_arch_init(MachineState *ms, KVMState *s)
119 {
120     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
121     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
122     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
123     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
124     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
125     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
126     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
127     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
128     cap_spapr_vfio = false;
129     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
130     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
131     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
132     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
133     /* Note: we don't set cap_papr here, because this capability is
134      * only activated after this by kvmppc_set_papr() */
135     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
136     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
137     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
138 
139     if (!cap_interrupt_level) {
140         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
141                         "VM to stall at times!\n");
142     }
143 
144     kvm_ppc_register_host_cpu_type();
145 
146     return 0;
147 }
148 
149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
150 {
151     return 0;
152 }
153 
154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
155 {
156     CPUPPCState *cenv = &cpu->env;
157     CPUState *cs = CPU(cpu);
158     struct kvm_sregs sregs;
159     int ret;
160 
161     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
162         /* What we're really trying to say is "if we're on BookE, we use
163            the native PVR for now". This is the only sane way to check
164            it though, so we potentially confuse users that they can run
165            BookE guests on BookS. Let's hope nobody dares enough :) */
166         return 0;
167     } else {
168         if (!cap_segstate) {
169             fprintf(stderr, "kvm error: missing PVR setting capability\n");
170             return -ENOSYS;
171         }
172     }
173 
174     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
175     if (ret) {
176         return ret;
177     }
178 
179     sregs.pvr = cenv->spr[SPR_PVR];
180     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
181 }
182 
183 /* Set up a shared TLB array with KVM */
184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
185 {
186     CPUPPCState *env = &cpu->env;
187     CPUState *cs = CPU(cpu);
188     struct kvm_book3e_206_tlb_params params = {};
189     struct kvm_config_tlb cfg = {};
190     unsigned int entries = 0;
191     int ret, i;
192 
193     if (!kvm_enabled() ||
194         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
195         return 0;
196     }
197 
198     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
199 
200     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
201         params.tlb_sizes[i] = booke206_tlb_size(env, i);
202         params.tlb_ways[i] = booke206_tlb_ways(env, i);
203         entries += params.tlb_sizes[i];
204     }
205 
206     assert(entries == env->nb_tlb);
207     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
208 
209     env->tlb_dirty = true;
210 
211     cfg.array = (uintptr_t)env->tlb.tlbm;
212     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
213     cfg.params = (uintptr_t)&params;
214     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
215 
216     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
217     if (ret < 0) {
218         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
219                 __func__, strerror(-ret));
220         return ret;
221     }
222 
223     env->kvm_sw_tlb = true;
224     return 0;
225 }
226 
227 
228 #if defined(TARGET_PPC64)
229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
230                                        struct kvm_ppc_smmu_info *info)
231 {
232     CPUPPCState *env = &cpu->env;
233     CPUState *cs = CPU(cpu);
234 
235     memset(info, 0, sizeof(*info));
236 
237     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
238      * need to "guess" what the supported page sizes are.
239      *
240      * For that to work we make a few assumptions:
241      *
242      * - Check whether we are running "PR" KVM which only supports 4K
243      *   and 16M pages, but supports them regardless of the backing
244      *   store characteritics. We also don't support 1T segments.
245      *
246      *   This is safe as if HV KVM ever supports that capability or PR
247      *   KVM grows supports for more page/segment sizes, those versions
248      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
249      *   will not hit this fallback
250      *
251      * - Else we are running HV KVM. This means we only support page
252      *   sizes that fit in the backing store. Additionally we only
253      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
254      *   P7 encodings for the SLB and hash table. Here too, we assume
255      *   support for any newer processor will mean a kernel that
256      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
257      *   this fallback.
258      */
259     if (kvmppc_is_pr(cs->kvm_state)) {
260         /* No flags */
261         info->flags = 0;
262         info->slb_size = 64;
263 
264         /* Standard 4k base page size segment */
265         info->sps[0].page_shift = 12;
266         info->sps[0].slb_enc = 0;
267         info->sps[0].enc[0].page_shift = 12;
268         info->sps[0].enc[0].pte_enc = 0;
269 
270         /* Standard 16M large page size segment */
271         info->sps[1].page_shift = 24;
272         info->sps[1].slb_enc = SLB_VSID_L;
273         info->sps[1].enc[0].page_shift = 24;
274         info->sps[1].enc[0].pte_enc = 0;
275     } else {
276         int i = 0;
277 
278         /* HV KVM has backing store size restrictions */
279         info->flags = KVM_PPC_PAGE_SIZES_REAL;
280 
281         if (env->mmu_model & POWERPC_MMU_1TSEG) {
282             info->flags |= KVM_PPC_1T_SEGMENTS;
283         }
284 
285         if (env->mmu_model == POWERPC_MMU_2_06 ||
286             env->mmu_model == POWERPC_MMU_2_07) {
287             info->slb_size = 32;
288         } else {
289             info->slb_size = 64;
290         }
291 
292         /* Standard 4k base page size segment */
293         info->sps[i].page_shift = 12;
294         info->sps[i].slb_enc = 0;
295         info->sps[i].enc[0].page_shift = 12;
296         info->sps[i].enc[0].pte_enc = 0;
297         i++;
298 
299         /* 64K on MMU 2.06 and later */
300         if (env->mmu_model == POWERPC_MMU_2_06 ||
301             env->mmu_model == POWERPC_MMU_2_07) {
302             info->sps[i].page_shift = 16;
303             info->sps[i].slb_enc = 0x110;
304             info->sps[i].enc[0].page_shift = 16;
305             info->sps[i].enc[0].pte_enc = 1;
306             i++;
307         }
308 
309         /* Standard 16M large page size segment */
310         info->sps[i].page_shift = 24;
311         info->sps[i].slb_enc = SLB_VSID_L;
312         info->sps[i].enc[0].page_shift = 24;
313         info->sps[i].enc[0].pte_enc = 0;
314     }
315 }
316 
317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
318 {
319     CPUState *cs = CPU(cpu);
320     int ret;
321 
322     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
323         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
324         if (ret == 0) {
325             return;
326         }
327     }
328 
329     kvm_get_fallback_smmu_info(cpu, info);
330 }
331 
332 static long gethugepagesize(const char *mem_path)
333 {
334     struct statfs fs;
335     int ret;
336 
337     do {
338         ret = statfs(mem_path, &fs);
339     } while (ret != 0 && errno == EINTR);
340 
341     if (ret != 0) {
342         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
343                 strerror(errno));
344         exit(1);
345     }
346 
347 #define HUGETLBFS_MAGIC       0x958458f6
348 
349     if (fs.f_type != HUGETLBFS_MAGIC) {
350         /* Explicit mempath, but it's ordinary pages */
351         return getpagesize();
352     }
353 
354     /* It's hugepage, return the huge page size */
355     return fs.f_bsize;
356 }
357 
358 /*
359  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
360  * may or may not name the same files / on the same filesystem now as
361  * when we actually open and map them.  Iterate over the file
362  * descriptors instead, and use qemu_fd_getpagesize().
363  */
364 static int find_max_supported_pagesize(Object *obj, void *opaque)
365 {
366     char *mem_path;
367     long *hpsize_min = opaque;
368 
369     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
370         mem_path = object_property_get_str(obj, "mem-path", NULL);
371         if (mem_path) {
372             long hpsize = gethugepagesize(mem_path);
373             if (hpsize < *hpsize_min) {
374                 *hpsize_min = hpsize;
375             }
376         } else {
377             *hpsize_min = getpagesize();
378         }
379     }
380 
381     return 0;
382 }
383 
384 static long getrampagesize(void)
385 {
386     long hpsize = LONG_MAX;
387     long mainrampagesize;
388     Object *memdev_root;
389 
390     if (mem_path) {
391         mainrampagesize = gethugepagesize(mem_path);
392     } else {
393         mainrampagesize = getpagesize();
394     }
395 
396     /* it's possible we have memory-backend objects with
397      * hugepage-backed RAM. these may get mapped into system
398      * address space via -numa parameters or memory hotplug
399      * hooks. we want to take these into account, but we
400      * also want to make sure these supported hugepage
401      * sizes are applicable across the entire range of memory
402      * we may boot from, so we take the min across all
403      * backends, and assume normal pages in cases where a
404      * backend isn't backed by hugepages.
405      */
406     memdev_root = object_resolve_path("/objects", NULL);
407     if (memdev_root) {
408         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
409     }
410     if (hpsize == LONG_MAX) {
411         /* No additional memory regions found ==> Report main RAM page size */
412         return mainrampagesize;
413     }
414 
415     /* If NUMA is disabled or the NUMA nodes are not backed with a
416      * memory-backend, then there is at least one node using "normal" RAM,
417      * so if its page size is smaller we have got to report that size instead.
418      */
419     if (hpsize > mainrampagesize &&
420         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
421         static bool warned;
422         if (!warned) {
423             error_report("Huge page support disabled (n/a for main memory).");
424             warned = true;
425         }
426         return mainrampagesize;
427     }
428 
429     return hpsize;
430 }
431 
432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
433 {
434     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
435         return true;
436     }
437 
438     return (1ul << shift) <= rampgsize;
439 }
440 
441 static long max_cpu_page_size;
442 
443 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
444 {
445     static struct kvm_ppc_smmu_info smmu_info;
446     static bool has_smmu_info;
447     CPUPPCState *env = &cpu->env;
448     int iq, ik, jq, jk;
449     bool has_64k_pages = false;
450 
451     /* We only handle page sizes for 64-bit server guests for now */
452     if (!(env->mmu_model & POWERPC_MMU_64)) {
453         return;
454     }
455 
456     /* Collect MMU info from kernel if not already */
457     if (!has_smmu_info) {
458         kvm_get_smmu_info(cpu, &smmu_info);
459         has_smmu_info = true;
460     }
461 
462     if (!max_cpu_page_size) {
463         max_cpu_page_size = getrampagesize();
464     }
465 
466     /* Convert to QEMU form */
467     memset(&env->sps, 0, sizeof(env->sps));
468 
469     /* If we have HV KVM, we need to forbid CI large pages if our
470      * host page size is smaller than 64K.
471      */
472     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
473         env->ci_large_pages = getpagesize() >= 0x10000;
474     }
475 
476     /*
477      * XXX This loop should be an entry wide AND of the capabilities that
478      *     the selected CPU has with the capabilities that KVM supports.
479      */
480     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
481         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
482         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
483 
484         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
485                                  ksps->page_shift)) {
486             continue;
487         }
488         qsps->page_shift = ksps->page_shift;
489         qsps->slb_enc = ksps->slb_enc;
490         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
491             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
492                                      ksps->enc[jk].page_shift)) {
493                 continue;
494             }
495             if (ksps->enc[jk].page_shift == 16) {
496                 has_64k_pages = true;
497             }
498             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
499             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
500             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
501                 break;
502             }
503         }
504         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
505             break;
506         }
507     }
508     env->slb_nr = smmu_info.slb_size;
509     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
510         env->mmu_model &= ~POWERPC_MMU_1TSEG;
511     }
512     if (!has_64k_pages) {
513         env->mmu_model &= ~POWERPC_MMU_64K;
514     }
515 }
516 
517 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
518 {
519     Object *mem_obj = object_resolve_path(obj_path, NULL);
520     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
521     long pagesize;
522 
523     if (mempath) {
524         pagesize = gethugepagesize(mempath);
525     } else {
526         pagesize = getpagesize();
527     }
528 
529     return pagesize >= max_cpu_page_size;
530 }
531 
532 #else /* defined (TARGET_PPC64) */
533 
534 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
535 {
536 }
537 
538 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
539 {
540     return true;
541 }
542 
543 #endif /* !defined (TARGET_PPC64) */
544 
545 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
546 {
547     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
548 }
549 
550 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
551  * book3s supports only 1 watchpoint, so array size
552  * of 4 is sufficient for now.
553  */
554 #define MAX_HW_BKPTS 4
555 
556 static struct HWBreakpoint {
557     target_ulong addr;
558     int type;
559 } hw_debug_points[MAX_HW_BKPTS];
560 
561 static CPUWatchpoint hw_watchpoint;
562 
563 /* Default there is no breakpoint and watchpoint supported */
564 static int max_hw_breakpoint;
565 static int max_hw_watchpoint;
566 static int nb_hw_breakpoint;
567 static int nb_hw_watchpoint;
568 
569 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
570 {
571     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
572         max_hw_breakpoint = 2;
573         max_hw_watchpoint = 2;
574     }
575 
576     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
577         fprintf(stderr, "Error initializing h/w breakpoints\n");
578         return;
579     }
580 }
581 
582 int kvm_arch_init_vcpu(CPUState *cs)
583 {
584     PowerPCCPU *cpu = POWERPC_CPU(cs);
585     CPUPPCState *cenv = &cpu->env;
586     int ret;
587 
588     /* Gather server mmu info from KVM and update the CPU state */
589     kvm_fixup_page_sizes(cpu);
590 
591     /* Synchronize sregs with kvm */
592     ret = kvm_arch_sync_sregs(cpu);
593     if (ret) {
594         if (ret == -EINVAL) {
595             error_report("Register sync failed... If you're using kvm-hv.ko,"
596                          " only \"-cpu host\" is possible");
597         }
598         return ret;
599     }
600 
601     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
602 
603     switch (cenv->mmu_model) {
604     case POWERPC_MMU_BOOKE206:
605         /* This target supports access to KVM's guest TLB */
606         ret = kvm_booke206_tlb_init(cpu);
607         break;
608     case POWERPC_MMU_2_07:
609         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
610             /* KVM-HV has transactional memory on POWER8 also without the
611              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
612             cap_htm = true;
613         }
614         break;
615     default:
616         break;
617     }
618 
619     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
620     kvmppc_hw_debug_points_init(cenv);
621 
622     return ret;
623 }
624 
625 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
626 {
627     CPUPPCState *env = &cpu->env;
628     CPUState *cs = CPU(cpu);
629     struct kvm_dirty_tlb dirty_tlb;
630     unsigned char *bitmap;
631     int ret;
632 
633     if (!env->kvm_sw_tlb) {
634         return;
635     }
636 
637     bitmap = g_malloc((env->nb_tlb + 7) / 8);
638     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
639 
640     dirty_tlb.bitmap = (uintptr_t)bitmap;
641     dirty_tlb.num_dirty = env->nb_tlb;
642 
643     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
644     if (ret) {
645         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
646                 __func__, strerror(-ret));
647     }
648 
649     g_free(bitmap);
650 }
651 
652 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
653 {
654     PowerPCCPU *cpu = POWERPC_CPU(cs);
655     CPUPPCState *env = &cpu->env;
656     union {
657         uint32_t u32;
658         uint64_t u64;
659     } val;
660     struct kvm_one_reg reg = {
661         .id = id,
662         .addr = (uintptr_t) &val,
663     };
664     int ret;
665 
666     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
667     if (ret != 0) {
668         trace_kvm_failed_spr_get(spr, strerror(errno));
669     } else {
670         switch (id & KVM_REG_SIZE_MASK) {
671         case KVM_REG_SIZE_U32:
672             env->spr[spr] = val.u32;
673             break;
674 
675         case KVM_REG_SIZE_U64:
676             env->spr[spr] = val.u64;
677             break;
678 
679         default:
680             /* Don't handle this size yet */
681             abort();
682         }
683     }
684 }
685 
686 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
687 {
688     PowerPCCPU *cpu = POWERPC_CPU(cs);
689     CPUPPCState *env = &cpu->env;
690     union {
691         uint32_t u32;
692         uint64_t u64;
693     } val;
694     struct kvm_one_reg reg = {
695         .id = id,
696         .addr = (uintptr_t) &val,
697     };
698     int ret;
699 
700     switch (id & KVM_REG_SIZE_MASK) {
701     case KVM_REG_SIZE_U32:
702         val.u32 = env->spr[spr];
703         break;
704 
705     case KVM_REG_SIZE_U64:
706         val.u64 = env->spr[spr];
707         break;
708 
709     default:
710         /* Don't handle this size yet */
711         abort();
712     }
713 
714     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
715     if (ret != 0) {
716         trace_kvm_failed_spr_set(spr, strerror(errno));
717     }
718 }
719 
720 static int kvm_put_fp(CPUState *cs)
721 {
722     PowerPCCPU *cpu = POWERPC_CPU(cs);
723     CPUPPCState *env = &cpu->env;
724     struct kvm_one_reg reg;
725     int i;
726     int ret;
727 
728     if (env->insns_flags & PPC_FLOAT) {
729         uint64_t fpscr = env->fpscr;
730         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
731 
732         reg.id = KVM_REG_PPC_FPSCR;
733         reg.addr = (uintptr_t)&fpscr;
734         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735         if (ret < 0) {
736             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
737             return ret;
738         }
739 
740         for (i = 0; i < 32; i++) {
741             uint64_t vsr[2];
742 
743 #ifdef HOST_WORDS_BIGENDIAN
744             vsr[0] = float64_val(env->fpr[i]);
745             vsr[1] = env->vsr[i];
746 #else
747             vsr[0] = env->vsr[i];
748             vsr[1] = float64_val(env->fpr[i]);
749 #endif
750             reg.addr = (uintptr_t) &vsr;
751             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
752 
753             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
754             if (ret < 0) {
755                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
756                         i, strerror(errno));
757                 return ret;
758             }
759         }
760     }
761 
762     if (env->insns_flags & PPC_ALTIVEC) {
763         reg.id = KVM_REG_PPC_VSCR;
764         reg.addr = (uintptr_t)&env->vscr;
765         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
766         if (ret < 0) {
767             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
768             return ret;
769         }
770 
771         for (i = 0; i < 32; i++) {
772             reg.id = KVM_REG_PPC_VR(i);
773             reg.addr = (uintptr_t)&env->avr[i];
774             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
775             if (ret < 0) {
776                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
777                 return ret;
778             }
779         }
780     }
781 
782     return 0;
783 }
784 
785 static int kvm_get_fp(CPUState *cs)
786 {
787     PowerPCCPU *cpu = POWERPC_CPU(cs);
788     CPUPPCState *env = &cpu->env;
789     struct kvm_one_reg reg;
790     int i;
791     int ret;
792 
793     if (env->insns_flags & PPC_FLOAT) {
794         uint64_t fpscr;
795         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
796 
797         reg.id = KVM_REG_PPC_FPSCR;
798         reg.addr = (uintptr_t)&fpscr;
799         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
800         if (ret < 0) {
801             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
802             return ret;
803         } else {
804             env->fpscr = fpscr;
805         }
806 
807         for (i = 0; i < 32; i++) {
808             uint64_t vsr[2];
809 
810             reg.addr = (uintptr_t) &vsr;
811             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
812 
813             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
814             if (ret < 0) {
815                 DPRINTF("Unable to get %s%d from KVM: %s\n",
816                         vsx ? "VSR" : "FPR", i, strerror(errno));
817                 return ret;
818             } else {
819 #ifdef HOST_WORDS_BIGENDIAN
820                 env->fpr[i] = vsr[0];
821                 if (vsx) {
822                     env->vsr[i] = vsr[1];
823                 }
824 #else
825                 env->fpr[i] = vsr[1];
826                 if (vsx) {
827                     env->vsr[i] = vsr[0];
828                 }
829 #endif
830             }
831         }
832     }
833 
834     if (env->insns_flags & PPC_ALTIVEC) {
835         reg.id = KVM_REG_PPC_VSCR;
836         reg.addr = (uintptr_t)&env->vscr;
837         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
838         if (ret < 0) {
839             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
840             return ret;
841         }
842 
843         for (i = 0; i < 32; i++) {
844             reg.id = KVM_REG_PPC_VR(i);
845             reg.addr = (uintptr_t)&env->avr[i];
846             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
847             if (ret < 0) {
848                 DPRINTF("Unable to get VR%d from KVM: %s\n",
849                         i, strerror(errno));
850                 return ret;
851             }
852         }
853     }
854 
855     return 0;
856 }
857 
858 #if defined(TARGET_PPC64)
859 static int kvm_get_vpa(CPUState *cs)
860 {
861     PowerPCCPU *cpu = POWERPC_CPU(cs);
862     CPUPPCState *env = &cpu->env;
863     struct kvm_one_reg reg;
864     int ret;
865 
866     reg.id = KVM_REG_PPC_VPA_ADDR;
867     reg.addr = (uintptr_t)&env->vpa_addr;
868     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
869     if (ret < 0) {
870         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
871         return ret;
872     }
873 
874     assert((uintptr_t)&env->slb_shadow_size
875            == ((uintptr_t)&env->slb_shadow_addr + 8));
876     reg.id = KVM_REG_PPC_VPA_SLB;
877     reg.addr = (uintptr_t)&env->slb_shadow_addr;
878     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
879     if (ret < 0) {
880         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
881                 strerror(errno));
882         return ret;
883     }
884 
885     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
886     reg.id = KVM_REG_PPC_VPA_DTL;
887     reg.addr = (uintptr_t)&env->dtl_addr;
888     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
889     if (ret < 0) {
890         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
891                 strerror(errno));
892         return ret;
893     }
894 
895     return 0;
896 }
897 
898 static int kvm_put_vpa(CPUState *cs)
899 {
900     PowerPCCPU *cpu = POWERPC_CPU(cs);
901     CPUPPCState *env = &cpu->env;
902     struct kvm_one_reg reg;
903     int ret;
904 
905     /* SLB shadow or DTL can't be registered unless a master VPA is
906      * registered.  That means when restoring state, if a VPA *is*
907      * registered, we need to set that up first.  If not, we need to
908      * deregister the others before deregistering the master VPA */
909     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
910 
911     if (env->vpa_addr) {
912         reg.id = KVM_REG_PPC_VPA_ADDR;
913         reg.addr = (uintptr_t)&env->vpa_addr;
914         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915         if (ret < 0) {
916             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917             return ret;
918         }
919     }
920 
921     assert((uintptr_t)&env->slb_shadow_size
922            == ((uintptr_t)&env->slb_shadow_addr + 8));
923     reg.id = KVM_REG_PPC_VPA_SLB;
924     reg.addr = (uintptr_t)&env->slb_shadow_addr;
925     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926     if (ret < 0) {
927         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
928         return ret;
929     }
930 
931     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
932     reg.id = KVM_REG_PPC_VPA_DTL;
933     reg.addr = (uintptr_t)&env->dtl_addr;
934     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
935     if (ret < 0) {
936         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
937                 strerror(errno));
938         return ret;
939     }
940 
941     if (!env->vpa_addr) {
942         reg.id = KVM_REG_PPC_VPA_ADDR;
943         reg.addr = (uintptr_t)&env->vpa_addr;
944         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
945         if (ret < 0) {
946             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
947             return ret;
948         }
949     }
950 
951     return 0;
952 }
953 #endif /* TARGET_PPC64 */
954 
955 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
956 {
957     CPUPPCState *env = &cpu->env;
958     struct kvm_sregs sregs;
959     int i;
960 
961     sregs.pvr = env->spr[SPR_PVR];
962 
963     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
964 
965     /* Sync SLB */
966 #ifdef TARGET_PPC64
967     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
968         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
969         if (env->slb[i].esid & SLB_ESID_V) {
970             sregs.u.s.ppc64.slb[i].slbe |= i;
971         }
972         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
973     }
974 #endif
975 
976     /* Sync SRs */
977     for (i = 0; i < 16; i++) {
978         sregs.u.s.ppc32.sr[i] = env->sr[i];
979     }
980 
981     /* Sync BATs */
982     for (i = 0; i < 8; i++) {
983         /* Beware. We have to swap upper and lower bits here */
984         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
985             | env->DBAT[1][i];
986         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
987             | env->IBAT[1][i];
988     }
989 
990     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
991 }
992 
993 int kvm_arch_put_registers(CPUState *cs, int level)
994 {
995     PowerPCCPU *cpu = POWERPC_CPU(cs);
996     CPUPPCState *env = &cpu->env;
997     struct kvm_regs regs;
998     int ret;
999     int i;
1000 
1001     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1002     if (ret < 0) {
1003         return ret;
1004     }
1005 
1006     regs.ctr = env->ctr;
1007     regs.lr  = env->lr;
1008     regs.xer = cpu_read_xer(env);
1009     regs.msr = env->msr;
1010     regs.pc = env->nip;
1011 
1012     regs.srr0 = env->spr[SPR_SRR0];
1013     regs.srr1 = env->spr[SPR_SRR1];
1014 
1015     regs.sprg0 = env->spr[SPR_SPRG0];
1016     regs.sprg1 = env->spr[SPR_SPRG1];
1017     regs.sprg2 = env->spr[SPR_SPRG2];
1018     regs.sprg3 = env->spr[SPR_SPRG3];
1019     regs.sprg4 = env->spr[SPR_SPRG4];
1020     regs.sprg5 = env->spr[SPR_SPRG5];
1021     regs.sprg6 = env->spr[SPR_SPRG6];
1022     regs.sprg7 = env->spr[SPR_SPRG7];
1023 
1024     regs.pid = env->spr[SPR_BOOKE_PID];
1025 
1026     for (i = 0;i < 32; i++)
1027         regs.gpr[i] = env->gpr[i];
1028 
1029     regs.cr = 0;
1030     for (i = 0; i < 8; i++) {
1031         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1032     }
1033 
1034     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1035     if (ret < 0)
1036         return ret;
1037 
1038     kvm_put_fp(cs);
1039 
1040     if (env->tlb_dirty) {
1041         kvm_sw_tlb_put(cpu);
1042         env->tlb_dirty = false;
1043     }
1044 
1045     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1046         ret = kvmppc_put_books_sregs(cpu);
1047         if (ret < 0) {
1048             return ret;
1049         }
1050     }
1051 
1052     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1053         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1054     }
1055 
1056     if (cap_one_reg) {
1057         int i;
1058 
1059         /* We deliberately ignore errors here, for kernels which have
1060          * the ONE_REG calls, but don't support the specific
1061          * registers, there's a reasonable chance things will still
1062          * work, at least until we try to migrate. */
1063         for (i = 0; i < 1024; i++) {
1064             uint64_t id = env->spr_cb[i].one_reg_id;
1065 
1066             if (id != 0) {
1067                 kvm_put_one_spr(cs, id, i);
1068             }
1069         }
1070 
1071 #ifdef TARGET_PPC64
1072         if (msr_ts) {
1073             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1074                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1075             }
1076             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1077                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1078             }
1079             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1080             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1081             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1082             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1083             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1084             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1085             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1086             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1087             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1088             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1089         }
1090 
1091         if (cap_papr) {
1092             if (kvm_put_vpa(cs) < 0) {
1093                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1094             }
1095         }
1096 
1097         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1098 #endif /* TARGET_PPC64 */
1099     }
1100 
1101     return ret;
1102 }
1103 
1104 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1105 {
1106      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1107 }
1108 
1109 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1110 {
1111     CPUPPCState *env = &cpu->env;
1112     struct kvm_sregs sregs;
1113     int ret;
1114 
1115     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1116     if (ret < 0) {
1117         return ret;
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1121         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1122         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1123         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1124         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1125         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1126         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1127         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1128         env->spr[SPR_DECR] = sregs.u.e.dec;
1129         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1130         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1131         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1132     }
1133 
1134     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1135         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1136         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1137         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1138         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1139         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1140     }
1141 
1142     if (sregs.u.e.features & KVM_SREGS_E_64) {
1143         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1144     }
1145 
1146     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1147         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1148     }
1149 
1150     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1151         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1152         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1153         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1154         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1155         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1156         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1157         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1158         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1159         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1160         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1161         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1162         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1163         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1164         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1165         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1166         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1167         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1168         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1169         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1170         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1171         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1172         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1173         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1174         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1175         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1176         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1177         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1178         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1179         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1180         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1181         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1182         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1183 
1184         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1185             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1186             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1187             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1188             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1189             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1190             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1191         }
1192 
1193         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1194             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1195             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1196         }
1197 
1198         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1199             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1200             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1201             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1202             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1203         }
1204     }
1205 
1206     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1207         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1208         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1209         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1210         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1211         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1212         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1213         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1214         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1215         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1216         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1217     }
1218 
1219     if (sregs.u.e.features & KVM_SREGS_EXP) {
1220         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1221     }
1222 
1223     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1224         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1225         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1226     }
1227 
1228     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1229         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1230         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1231         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1232 
1233         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1234             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1235             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1236         }
1237     }
1238 
1239     return 0;
1240 }
1241 
1242 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1243 {
1244     CPUPPCState *env = &cpu->env;
1245     struct kvm_sregs sregs;
1246     int ret;
1247     int i;
1248 
1249     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1250     if (ret < 0) {
1251         return ret;
1252     }
1253 
1254     if (!env->external_htab) {
1255         ppc_store_sdr1(env, sregs.u.s.sdr1);
1256     }
1257 
1258     /* Sync SLB */
1259 #ifdef TARGET_PPC64
1260     /*
1261      * The packed SLB array we get from KVM_GET_SREGS only contains
1262      * information about valid entries. So we flush our internal copy
1263      * to get rid of stale ones, then put all valid SLB entries back
1264      * in.
1265      */
1266     memset(env->slb, 0, sizeof(env->slb));
1267     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1268         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1269         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1270         /*
1271          * Only restore valid entries
1272          */
1273         if (rb & SLB_ESID_V) {
1274             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1275         }
1276     }
1277 #endif
1278 
1279     /* Sync SRs */
1280     for (i = 0; i < 16; i++) {
1281         env->sr[i] = sregs.u.s.ppc32.sr[i];
1282     }
1283 
1284     /* Sync BATs */
1285     for (i = 0; i < 8; i++) {
1286         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1287         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1288         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1289         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1290     }
1291 
1292     return 0;
1293 }
1294 
1295 int kvm_arch_get_registers(CPUState *cs)
1296 {
1297     PowerPCCPU *cpu = POWERPC_CPU(cs);
1298     CPUPPCState *env = &cpu->env;
1299     struct kvm_regs regs;
1300     uint32_t cr;
1301     int i, ret;
1302 
1303     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1304     if (ret < 0)
1305         return ret;
1306 
1307     cr = regs.cr;
1308     for (i = 7; i >= 0; i--) {
1309         env->crf[i] = cr & 15;
1310         cr >>= 4;
1311     }
1312 
1313     env->ctr = regs.ctr;
1314     env->lr = regs.lr;
1315     cpu_write_xer(env, regs.xer);
1316     env->msr = regs.msr;
1317     env->nip = regs.pc;
1318 
1319     env->spr[SPR_SRR0] = regs.srr0;
1320     env->spr[SPR_SRR1] = regs.srr1;
1321 
1322     env->spr[SPR_SPRG0] = regs.sprg0;
1323     env->spr[SPR_SPRG1] = regs.sprg1;
1324     env->spr[SPR_SPRG2] = regs.sprg2;
1325     env->spr[SPR_SPRG3] = regs.sprg3;
1326     env->spr[SPR_SPRG4] = regs.sprg4;
1327     env->spr[SPR_SPRG5] = regs.sprg5;
1328     env->spr[SPR_SPRG6] = regs.sprg6;
1329     env->spr[SPR_SPRG7] = regs.sprg7;
1330 
1331     env->spr[SPR_BOOKE_PID] = regs.pid;
1332 
1333     for (i = 0;i < 32; i++)
1334         env->gpr[i] = regs.gpr[i];
1335 
1336     kvm_get_fp(cs);
1337 
1338     if (cap_booke_sregs) {
1339         ret = kvmppc_get_booke_sregs(cpu);
1340         if (ret < 0) {
1341             return ret;
1342         }
1343     }
1344 
1345     if (cap_segstate) {
1346         ret = kvmppc_get_books_sregs(cpu);
1347         if (ret < 0) {
1348             return ret;
1349         }
1350     }
1351 
1352     if (cap_hior) {
1353         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1354     }
1355 
1356     if (cap_one_reg) {
1357         int i;
1358 
1359         /* We deliberately ignore errors here, for kernels which have
1360          * the ONE_REG calls, but don't support the specific
1361          * registers, there's a reasonable chance things will still
1362          * work, at least until we try to migrate. */
1363         for (i = 0; i < 1024; i++) {
1364             uint64_t id = env->spr_cb[i].one_reg_id;
1365 
1366             if (id != 0) {
1367                 kvm_get_one_spr(cs, id, i);
1368             }
1369         }
1370 
1371 #ifdef TARGET_PPC64
1372         if (msr_ts) {
1373             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1374                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1375             }
1376             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1377                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1378             }
1379             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1380             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1381             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1382             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1383             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1384             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1385             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1386             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1387             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1388             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1389         }
1390 
1391         if (cap_papr) {
1392             if (kvm_get_vpa(cs) < 0) {
1393                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1394             }
1395         }
1396 
1397         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1398 #endif
1399     }
1400 
1401     return 0;
1402 }
1403 
1404 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1405 {
1406     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1407 
1408     if (irq != PPC_INTERRUPT_EXT) {
1409         return 0;
1410     }
1411 
1412     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1413         return 0;
1414     }
1415 
1416     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1417 
1418     return 0;
1419 }
1420 
1421 #if defined(TARGET_PPCEMB)
1422 #define PPC_INPUT_INT PPC40x_INPUT_INT
1423 #elif defined(TARGET_PPC64)
1424 #define PPC_INPUT_INT PPC970_INPUT_INT
1425 #else
1426 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1427 #endif
1428 
1429 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1430 {
1431     PowerPCCPU *cpu = POWERPC_CPU(cs);
1432     CPUPPCState *env = &cpu->env;
1433     int r;
1434     unsigned irq;
1435 
1436     qemu_mutex_lock_iothread();
1437 
1438     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1439      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1440     if (!cap_interrupt_level &&
1441         run->ready_for_interrupt_injection &&
1442         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1443         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1444     {
1445         /* For now KVM disregards the 'irq' argument. However, in the
1446          * future KVM could cache it in-kernel to avoid a heavyweight exit
1447          * when reading the UIC.
1448          */
1449         irq = KVM_INTERRUPT_SET;
1450 
1451         DPRINTF("injected interrupt %d\n", irq);
1452         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1453         if (r < 0) {
1454             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1455         }
1456 
1457         /* Always wake up soon in case the interrupt was level based */
1458         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1459                        (NANOSECONDS_PER_SECOND / 50));
1460     }
1461 
1462     /* We don't know if there are more interrupts pending after this. However,
1463      * the guest will return to userspace in the course of handling this one
1464      * anyways, so we will get a chance to deliver the rest. */
1465 
1466     qemu_mutex_unlock_iothread();
1467 }
1468 
1469 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1470 {
1471     return MEMTXATTRS_UNSPECIFIED;
1472 }
1473 
1474 int kvm_arch_process_async_events(CPUState *cs)
1475 {
1476     return cs->halted;
1477 }
1478 
1479 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1480 {
1481     CPUState *cs = CPU(cpu);
1482     CPUPPCState *env = &cpu->env;
1483 
1484     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1485         cs->halted = 1;
1486         cs->exception_index = EXCP_HLT;
1487     }
1488 
1489     return 0;
1490 }
1491 
1492 /* map dcr access to existing qemu dcr emulation */
1493 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1494 {
1495     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1496         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1497 
1498     return 0;
1499 }
1500 
1501 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1502 {
1503     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1504         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1505 
1506     return 0;
1507 }
1508 
1509 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1510 {
1511     /* Mixed endian case is not handled */
1512     uint32_t sc = debug_inst_opcode;
1513 
1514     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1515                             sizeof(sc), 0) ||
1516         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1517         return -EINVAL;
1518     }
1519 
1520     return 0;
1521 }
1522 
1523 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1524 {
1525     uint32_t sc;
1526 
1527     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1528         sc != debug_inst_opcode ||
1529         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1530                             sizeof(sc), 1)) {
1531         return -EINVAL;
1532     }
1533 
1534     return 0;
1535 }
1536 
1537 static int find_hw_breakpoint(target_ulong addr, int type)
1538 {
1539     int n;
1540 
1541     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1542            <= ARRAY_SIZE(hw_debug_points));
1543 
1544     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1545         if (hw_debug_points[n].addr == addr &&
1546              hw_debug_points[n].type == type) {
1547             return n;
1548         }
1549     }
1550 
1551     return -1;
1552 }
1553 
1554 static int find_hw_watchpoint(target_ulong addr, int *flag)
1555 {
1556     int n;
1557 
1558     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1559     if (n >= 0) {
1560         *flag = BP_MEM_ACCESS;
1561         return n;
1562     }
1563 
1564     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1565     if (n >= 0) {
1566         *flag = BP_MEM_WRITE;
1567         return n;
1568     }
1569 
1570     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1571     if (n >= 0) {
1572         *flag = BP_MEM_READ;
1573         return n;
1574     }
1575 
1576     return -1;
1577 }
1578 
1579 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1580                                   target_ulong len, int type)
1581 {
1582     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1583         return -ENOBUFS;
1584     }
1585 
1586     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1587     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1588 
1589     switch (type) {
1590     case GDB_BREAKPOINT_HW:
1591         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1592             return -ENOBUFS;
1593         }
1594 
1595         if (find_hw_breakpoint(addr, type) >= 0) {
1596             return -EEXIST;
1597         }
1598 
1599         nb_hw_breakpoint++;
1600         break;
1601 
1602     case GDB_WATCHPOINT_WRITE:
1603     case GDB_WATCHPOINT_READ:
1604     case GDB_WATCHPOINT_ACCESS:
1605         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1606             return -ENOBUFS;
1607         }
1608 
1609         if (find_hw_breakpoint(addr, type) >= 0) {
1610             return -EEXIST;
1611         }
1612 
1613         nb_hw_watchpoint++;
1614         break;
1615 
1616     default:
1617         return -ENOSYS;
1618     }
1619 
1620     return 0;
1621 }
1622 
1623 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1624                                   target_ulong len, int type)
1625 {
1626     int n;
1627 
1628     n = find_hw_breakpoint(addr, type);
1629     if (n < 0) {
1630         return -ENOENT;
1631     }
1632 
1633     switch (type) {
1634     case GDB_BREAKPOINT_HW:
1635         nb_hw_breakpoint--;
1636         break;
1637 
1638     case GDB_WATCHPOINT_WRITE:
1639     case GDB_WATCHPOINT_READ:
1640     case GDB_WATCHPOINT_ACCESS:
1641         nb_hw_watchpoint--;
1642         break;
1643 
1644     default:
1645         return -ENOSYS;
1646     }
1647     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1648 
1649     return 0;
1650 }
1651 
1652 void kvm_arch_remove_all_hw_breakpoints(void)
1653 {
1654     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1655 }
1656 
1657 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1658 {
1659     int n;
1660 
1661     /* Software Breakpoint updates */
1662     if (kvm_sw_breakpoints_active(cs)) {
1663         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1664     }
1665 
1666     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1667            <= ARRAY_SIZE(hw_debug_points));
1668     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1669 
1670     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1671         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1672         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1673         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1674             switch (hw_debug_points[n].type) {
1675             case GDB_BREAKPOINT_HW:
1676                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1677                 break;
1678             case GDB_WATCHPOINT_WRITE:
1679                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1680                 break;
1681             case GDB_WATCHPOINT_READ:
1682                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1683                 break;
1684             case GDB_WATCHPOINT_ACCESS:
1685                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1686                                         KVMPPC_DEBUG_WATCH_READ;
1687                 break;
1688             default:
1689                 cpu_abort(cs, "Unsupported breakpoint type\n");
1690             }
1691             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1692         }
1693     }
1694 }
1695 
1696 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1697 {
1698     CPUState *cs = CPU(cpu);
1699     CPUPPCState *env = &cpu->env;
1700     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1701     int handle = 0;
1702     int n;
1703     int flag = 0;
1704 
1705     if (cs->singlestep_enabled) {
1706         handle = 1;
1707     } else if (arch_info->status) {
1708         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1709             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1710                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1711                 if (n >= 0) {
1712                     handle = 1;
1713                 }
1714             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1715                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1716                 n = find_hw_watchpoint(arch_info->address,  &flag);
1717                 if (n >= 0) {
1718                     handle = 1;
1719                     cs->watchpoint_hit = &hw_watchpoint;
1720                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1721                     hw_watchpoint.flags = flag;
1722                 }
1723             }
1724         }
1725     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1726         handle = 1;
1727     } else {
1728         /* QEMU is not able to handle debug exception, so inject
1729          * program exception to guest;
1730          * Yes program exception NOT debug exception !!
1731          * When QEMU is using debug resources then debug exception must
1732          * be always set. To achieve this we set MSR_DE and also set
1733          * MSRP_DEP so guest cannot change MSR_DE.
1734          * When emulating debug resource for guest we want guest
1735          * to control MSR_DE (enable/disable debug interrupt on need).
1736          * Supporting both configurations are NOT possible.
1737          * So the result is that we cannot share debug resources
1738          * between QEMU and Guest on BOOKE architecture.
1739          * In the current design QEMU gets the priority over guest,
1740          * this means that if QEMU is using debug resources then guest
1741          * cannot use them;
1742          * For software breakpoint QEMU uses a privileged instruction;
1743          * So there cannot be any reason that we are here for guest
1744          * set debug exception, only possibility is guest executed a
1745          * privileged / illegal instruction and that's why we are
1746          * injecting a program interrupt.
1747          */
1748 
1749         cpu_synchronize_state(cs);
1750         /* env->nip is PC, so increment this by 4 to use
1751          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1752          */
1753         env->nip += 4;
1754         cs->exception_index = POWERPC_EXCP_PROGRAM;
1755         env->error_code = POWERPC_EXCP_INVAL;
1756         ppc_cpu_do_interrupt(cs);
1757     }
1758 
1759     return handle;
1760 }
1761 
1762 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1763 {
1764     PowerPCCPU *cpu = POWERPC_CPU(cs);
1765     CPUPPCState *env = &cpu->env;
1766     int ret;
1767 
1768     qemu_mutex_lock_iothread();
1769 
1770     switch (run->exit_reason) {
1771     case KVM_EXIT_DCR:
1772         if (run->dcr.is_write) {
1773             DPRINTF("handle dcr write\n");
1774             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1775         } else {
1776             DPRINTF("handle dcr read\n");
1777             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1778         }
1779         break;
1780     case KVM_EXIT_HLT:
1781         DPRINTF("handle halt\n");
1782         ret = kvmppc_handle_halt(cpu);
1783         break;
1784 #if defined(TARGET_PPC64)
1785     case KVM_EXIT_PAPR_HCALL:
1786         DPRINTF("handle PAPR hypercall\n");
1787         run->papr_hcall.ret = spapr_hypercall(cpu,
1788                                               run->papr_hcall.nr,
1789                                               run->papr_hcall.args);
1790         ret = 0;
1791         break;
1792 #endif
1793     case KVM_EXIT_EPR:
1794         DPRINTF("handle epr\n");
1795         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1796         ret = 0;
1797         break;
1798     case KVM_EXIT_WATCHDOG:
1799         DPRINTF("handle watchdog expiry\n");
1800         watchdog_perform_action();
1801         ret = 0;
1802         break;
1803 
1804     case KVM_EXIT_DEBUG:
1805         DPRINTF("handle debug exception\n");
1806         if (kvm_handle_debug(cpu, run)) {
1807             ret = EXCP_DEBUG;
1808             break;
1809         }
1810         /* re-enter, this exception was guest-internal */
1811         ret = 0;
1812         break;
1813 
1814     default:
1815         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1816         ret = -1;
1817         break;
1818     }
1819 
1820     qemu_mutex_unlock_iothread();
1821     return ret;
1822 }
1823 
1824 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     uint32_t bits = tsr_bits;
1828     struct kvm_one_reg reg = {
1829         .id = KVM_REG_PPC_OR_TSR,
1830         .addr = (uintptr_t) &bits,
1831     };
1832 
1833     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 }
1835 
1836 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1837 {
1838 
1839     CPUState *cs = CPU(cpu);
1840     uint32_t bits = tsr_bits;
1841     struct kvm_one_reg reg = {
1842         .id = KVM_REG_PPC_CLEAR_TSR,
1843         .addr = (uintptr_t) &bits,
1844     };
1845 
1846     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1847 }
1848 
1849 int kvmppc_set_tcr(PowerPCCPU *cpu)
1850 {
1851     CPUState *cs = CPU(cpu);
1852     CPUPPCState *env = &cpu->env;
1853     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1854 
1855     struct kvm_one_reg reg = {
1856         .id = KVM_REG_PPC_TCR,
1857         .addr = (uintptr_t) &tcr,
1858     };
1859 
1860     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1861 }
1862 
1863 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1864 {
1865     CPUState *cs = CPU(cpu);
1866     int ret;
1867 
1868     if (!kvm_enabled()) {
1869         return -1;
1870     }
1871 
1872     if (!cap_ppc_watchdog) {
1873         printf("warning: KVM does not support watchdog");
1874         return -1;
1875     }
1876 
1877     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1878     if (ret < 0) {
1879         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1880                 __func__, strerror(-ret));
1881         return ret;
1882     }
1883 
1884     return ret;
1885 }
1886 
1887 static int read_cpuinfo(const char *field, char *value, int len)
1888 {
1889     FILE *f;
1890     int ret = -1;
1891     int field_len = strlen(field);
1892     char line[512];
1893 
1894     f = fopen("/proc/cpuinfo", "r");
1895     if (!f) {
1896         return -1;
1897     }
1898 
1899     do {
1900         if (!fgets(line, sizeof(line), f)) {
1901             break;
1902         }
1903         if (!strncmp(line, field, field_len)) {
1904             pstrcpy(value, len, line);
1905             ret = 0;
1906             break;
1907         }
1908     } while(*line);
1909 
1910     fclose(f);
1911 
1912     return ret;
1913 }
1914 
1915 uint32_t kvmppc_get_tbfreq(void)
1916 {
1917     char line[512];
1918     char *ns;
1919     uint32_t retval = NANOSECONDS_PER_SECOND;
1920 
1921     if (read_cpuinfo("timebase", line, sizeof(line))) {
1922         return retval;
1923     }
1924 
1925     if (!(ns = strchr(line, ':'))) {
1926         return retval;
1927     }
1928 
1929     ns++;
1930 
1931     return atoi(ns);
1932 }
1933 
1934 bool kvmppc_get_host_serial(char **value)
1935 {
1936     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1937                                NULL);
1938 }
1939 
1940 bool kvmppc_get_host_model(char **value)
1941 {
1942     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1943 }
1944 
1945 /* Try to find a device tree node for a CPU with clock-frequency property */
1946 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1947 {
1948     struct dirent *dirp;
1949     DIR *dp;
1950 
1951     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1952         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1953         return -1;
1954     }
1955 
1956     buf[0] = '\0';
1957     while ((dirp = readdir(dp)) != NULL) {
1958         FILE *f;
1959         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1960                  dirp->d_name);
1961         f = fopen(buf, "r");
1962         if (f) {
1963             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1964             fclose(f);
1965             break;
1966         }
1967         buf[0] = '\0';
1968     }
1969     closedir(dp);
1970     if (buf[0] == '\0') {
1971         printf("Unknown host!\n");
1972         return -1;
1973     }
1974 
1975     return 0;
1976 }
1977 
1978 static uint64_t kvmppc_read_int_dt(const char *filename)
1979 {
1980     union {
1981         uint32_t v32;
1982         uint64_t v64;
1983     } u;
1984     FILE *f;
1985     int len;
1986 
1987     f = fopen(filename, "rb");
1988     if (!f) {
1989         return -1;
1990     }
1991 
1992     len = fread(&u, 1, sizeof(u), f);
1993     fclose(f);
1994     switch (len) {
1995     case 4:
1996         /* property is a 32-bit quantity */
1997         return be32_to_cpu(u.v32);
1998     case 8:
1999         return be64_to_cpu(u.v64);
2000     }
2001 
2002     return 0;
2003 }
2004 
2005 /* Read a CPU node property from the host device tree that's a single
2006  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
2007  * (can't find or open the property, or doesn't understand the
2008  * format) */
2009 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
2010 {
2011     char buf[PATH_MAX], *tmp;
2012     uint64_t val;
2013 
2014     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2015         return -1;
2016     }
2017 
2018     tmp = g_strdup_printf("%s/%s", buf, propname);
2019     val = kvmppc_read_int_dt(tmp);
2020     g_free(tmp);
2021 
2022     return val;
2023 }
2024 
2025 uint64_t kvmppc_get_clockfreq(void)
2026 {
2027     return kvmppc_read_int_cpu_dt("clock-frequency");
2028 }
2029 
2030 uint32_t kvmppc_get_vmx(void)
2031 {
2032     return kvmppc_read_int_cpu_dt("ibm,vmx");
2033 }
2034 
2035 uint32_t kvmppc_get_dfp(void)
2036 {
2037     return kvmppc_read_int_cpu_dt("ibm,dfp");
2038 }
2039 
2040 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2041  {
2042      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2043      CPUState *cs = CPU(cpu);
2044 
2045     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2046         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2047         return 0;
2048     }
2049 
2050     return 1;
2051 }
2052 
2053 int kvmppc_get_hasidle(CPUPPCState *env)
2054 {
2055     struct kvm_ppc_pvinfo pvinfo;
2056 
2057     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2058         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2059         return 1;
2060     }
2061 
2062     return 0;
2063 }
2064 
2065 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2066 {
2067     uint32_t *hc = (uint32_t*)buf;
2068     struct kvm_ppc_pvinfo pvinfo;
2069 
2070     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2071         memcpy(buf, pvinfo.hcall, buf_len);
2072         return 0;
2073     }
2074 
2075     /*
2076      * Fallback to always fail hypercalls regardless of endianness:
2077      *
2078      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2079      *     li r3, -1
2080      *     b .+8       (becomes nop in wrong endian)
2081      *     bswap32(li r3, -1)
2082      */
2083 
2084     hc[0] = cpu_to_be32(0x08000048);
2085     hc[1] = cpu_to_be32(0x3860ffff);
2086     hc[2] = cpu_to_be32(0x48000008);
2087     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2088 
2089     return 1;
2090 }
2091 
2092 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2093 {
2094     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2095 }
2096 
2097 void kvmppc_enable_logical_ci_hcalls(void)
2098 {
2099     /*
2100      * FIXME: it would be nice if we could detect the cases where
2101      * we're using a device which requires the in kernel
2102      * implementation of these hcalls, but the kernel lacks them and
2103      * produce a warning.
2104      */
2105     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2106     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2107 }
2108 
2109 void kvmppc_enable_set_mode_hcall(void)
2110 {
2111     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2112 }
2113 
2114 void kvmppc_enable_clear_ref_mod_hcalls(void)
2115 {
2116     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2117     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2118 }
2119 
2120 void kvmppc_set_papr(PowerPCCPU *cpu)
2121 {
2122     CPUState *cs = CPU(cpu);
2123     int ret;
2124 
2125     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2126     if (ret) {
2127         error_report("This vCPU type or KVM version does not support PAPR");
2128         exit(1);
2129     }
2130 
2131     /* Update the capability flag so we sync the right information
2132      * with kvm */
2133     cap_papr = 1;
2134 }
2135 
2136 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2137 {
2138     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2139 }
2140 
2141 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2142 {
2143     CPUState *cs = CPU(cpu);
2144     int ret;
2145 
2146     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2147     if (ret && mpic_proxy) {
2148         error_report("This KVM version does not support EPR");
2149         exit(1);
2150     }
2151 }
2152 
2153 int kvmppc_smt_threads(void)
2154 {
2155     return cap_ppc_smt ? cap_ppc_smt : 1;
2156 }
2157 
2158 #ifdef TARGET_PPC64
2159 off_t kvmppc_alloc_rma(void **rma)
2160 {
2161     off_t size;
2162     int fd;
2163     struct kvm_allocate_rma ret;
2164 
2165     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2166      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2167      *                      not necessary on this hardware
2168      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2169      *
2170      * FIXME: We should allow the user to force contiguous RMA
2171      * allocation in the cap_ppc_rma==1 case.
2172      */
2173     if (cap_ppc_rma < 2) {
2174         return 0;
2175     }
2176 
2177     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2178     if (fd < 0) {
2179         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2180                 strerror(errno));
2181         return -1;
2182     }
2183 
2184     size = MIN(ret.rma_size, 256ul << 20);
2185 
2186     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2187     if (*rma == MAP_FAILED) {
2188         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2189         return -1;
2190     };
2191 
2192     return size;
2193 }
2194 
2195 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2196 {
2197     struct kvm_ppc_smmu_info info;
2198     long rampagesize, best_page_shift;
2199     int i;
2200 
2201     if (cap_ppc_rma >= 2) {
2202         return current_size;
2203     }
2204 
2205     /* Find the largest hardware supported page size that's less than
2206      * or equal to the (logical) backing page size of guest RAM */
2207     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2208     rampagesize = getrampagesize();
2209     best_page_shift = 0;
2210 
2211     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2212         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2213 
2214         if (!sps->page_shift) {
2215             continue;
2216         }
2217 
2218         if ((sps->page_shift > best_page_shift)
2219             && ((1UL << sps->page_shift) <= rampagesize)) {
2220             best_page_shift = sps->page_shift;
2221         }
2222     }
2223 
2224     return MIN(current_size,
2225                1ULL << (best_page_shift + hash_shift - 7));
2226 }
2227 #endif
2228 
2229 bool kvmppc_spapr_use_multitce(void)
2230 {
2231     return cap_spapr_multitce;
2232 }
2233 
2234 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2235                               bool need_vfio)
2236 {
2237     struct kvm_create_spapr_tce args = {
2238         .liobn = liobn,
2239         .window_size = window_size,
2240     };
2241     long len;
2242     int fd;
2243     void *table;
2244 
2245     /* Must set fd to -1 so we don't try to munmap when called for
2246      * destroying the table, which the upper layers -will- do
2247      */
2248     *pfd = -1;
2249     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2250         return NULL;
2251     }
2252 
2253     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2254     if (fd < 0) {
2255         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2256                 liobn);
2257         return NULL;
2258     }
2259 
2260     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2261     /* FIXME: round this up to page size */
2262 
2263     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2264     if (table == MAP_FAILED) {
2265         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2266                 liobn);
2267         close(fd);
2268         return NULL;
2269     }
2270 
2271     *pfd = fd;
2272     return table;
2273 }
2274 
2275 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2276 {
2277     long len;
2278 
2279     if (fd < 0) {
2280         return -1;
2281     }
2282 
2283     len = nb_table * sizeof(uint64_t);
2284     if ((munmap(table, len) < 0) ||
2285         (close(fd) < 0)) {
2286         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2287                 strerror(errno));
2288         /* Leak the table */
2289     }
2290 
2291     return 0;
2292 }
2293 
2294 int kvmppc_reset_htab(int shift_hint)
2295 {
2296     uint32_t shift = shift_hint;
2297 
2298     if (!kvm_enabled()) {
2299         /* Full emulation, tell caller to allocate htab itself */
2300         return 0;
2301     }
2302     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2303         int ret;
2304         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2305         if (ret == -ENOTTY) {
2306             /* At least some versions of PR KVM advertise the
2307              * capability, but don't implement the ioctl().  Oops.
2308              * Return 0 so that we allocate the htab in qemu, as is
2309              * correct for PR. */
2310             return 0;
2311         } else if (ret < 0) {
2312             return ret;
2313         }
2314         return shift;
2315     }
2316 
2317     /* We have a kernel that predates the htab reset calls.  For PR
2318      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2319      * this era, it has allocated a 16MB fixed size hash table already. */
2320     if (kvmppc_is_pr(kvm_state)) {
2321         /* PR - tell caller to allocate htab */
2322         return 0;
2323     } else {
2324         /* HV - assume 16MB kernel allocated htab */
2325         return 24;
2326     }
2327 }
2328 
2329 static inline uint32_t mfpvr(void)
2330 {
2331     uint32_t pvr;
2332 
2333     asm ("mfpvr %0"
2334          : "=r"(pvr));
2335     return pvr;
2336 }
2337 
2338 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2339 {
2340     if (on) {
2341         *word |= flags;
2342     } else {
2343         *word &= ~flags;
2344     }
2345 }
2346 
2347 static void kvmppc_host_cpu_initfn(Object *obj)
2348 {
2349     assert(kvm_enabled());
2350 }
2351 
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2353 {
2354     DeviceClass *dc = DEVICE_CLASS(oc);
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t vmx = kvmppc_get_vmx();
2357     uint32_t dfp = kvmppc_get_dfp();
2358     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2359     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2360 
2361     /* Now fix up the class with information we can query from the host */
2362     pcc->pvr = mfpvr();
2363 
2364     if (vmx != -1) {
2365         /* Only override when we know what the host supports */
2366         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2367         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2368     }
2369     if (dfp != -1) {
2370         /* Only override when we know what the host supports */
2371         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2372     }
2373 
2374     if (dcache_size != -1) {
2375         pcc->l1_dcache_size = dcache_size;
2376     }
2377 
2378     if (icache_size != -1) {
2379         pcc->l1_icache_size = icache_size;
2380     }
2381 
2382     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2383     dc->cannot_destroy_with_object_finalize_yet = true;
2384 }
2385 
2386 bool kvmppc_has_cap_epr(void)
2387 {
2388     return cap_epr;
2389 }
2390 
2391 bool kvmppc_has_cap_htab_fd(void)
2392 {
2393     return cap_htab_fd;
2394 }
2395 
2396 bool kvmppc_has_cap_fixup_hcalls(void)
2397 {
2398     return cap_fixup_hcalls;
2399 }
2400 
2401 bool kvmppc_has_cap_htm(void)
2402 {
2403     return cap_htm;
2404 }
2405 
2406 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2407 {
2408     ObjectClass *oc = OBJECT_CLASS(pcc);
2409 
2410     while (oc && !object_class_is_abstract(oc)) {
2411         oc = object_class_get_parent(oc);
2412     }
2413     assert(oc);
2414 
2415     return POWERPC_CPU_CLASS(oc);
2416 }
2417 
2418 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2419 {
2420     uint32_t host_pvr = mfpvr();
2421     PowerPCCPUClass *pvr_pcc;
2422 
2423     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2424     if (pvr_pcc == NULL) {
2425         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2426     }
2427 
2428     return pvr_pcc;
2429 }
2430 
2431 static int kvm_ppc_register_host_cpu_type(void)
2432 {
2433     TypeInfo type_info = {
2434         .name = TYPE_HOST_POWERPC_CPU,
2435         .instance_init = kvmppc_host_cpu_initfn,
2436         .class_init = kvmppc_host_cpu_class_init,
2437     };
2438     PowerPCCPUClass *pvr_pcc;
2439     DeviceClass *dc;
2440     int i;
2441 
2442     pvr_pcc = kvm_ppc_get_host_cpu_class();
2443     if (pvr_pcc == NULL) {
2444         return -1;
2445     }
2446     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2447     type_register(&type_info);
2448 
2449 #if defined(TARGET_PPC64)
2450     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2451     type_info.parent = TYPE_SPAPR_CPU_CORE,
2452     type_info.instance_size = sizeof(sPAPRCPUCore);
2453     type_info.instance_init = NULL;
2454     type_info.class_init = spapr_cpu_core_class_init;
2455     type_info.class_data = (void *) "host";
2456     type_register(&type_info);
2457     g_free((void *)type_info.name);
2458 #endif
2459 
2460     /*
2461      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2462      * we want "POWER8" to be a "family" alias that points to the current
2463      * host CPU type, too)
2464      */
2465     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2466     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2467         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2468             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2469             char *suffix;
2470 
2471             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2472             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2473             if (suffix) {
2474                 *suffix = 0;
2475             }
2476             ppc_cpu_aliases[i].oc = oc;
2477             break;
2478         }
2479     }
2480 
2481     return 0;
2482 }
2483 
2484 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2485 {
2486     struct kvm_rtas_token_args args = {
2487         .token = token,
2488     };
2489 
2490     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2491         return -ENOENT;
2492     }
2493 
2494     strncpy(args.name, function, sizeof(args.name));
2495 
2496     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2497 }
2498 
2499 int kvmppc_get_htab_fd(bool write)
2500 {
2501     struct kvm_get_htab_fd s = {
2502         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2503         .start_index = 0,
2504     };
2505 
2506     if (!cap_htab_fd) {
2507         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2508         return -1;
2509     }
2510 
2511     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2512 }
2513 
2514 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2515 {
2516     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2517     uint8_t buf[bufsize];
2518     ssize_t rc;
2519 
2520     do {
2521         rc = read(fd, buf, bufsize);
2522         if (rc < 0) {
2523             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2524                     strerror(errno));
2525             return rc;
2526         } else if (rc) {
2527             uint8_t *buffer = buf;
2528             ssize_t n = rc;
2529             while (n) {
2530                 struct kvm_get_htab_header *head =
2531                     (struct kvm_get_htab_header *) buffer;
2532                 size_t chunksize = sizeof(*head) +
2533                      HASH_PTE_SIZE_64 * head->n_valid;
2534 
2535                 qemu_put_be32(f, head->index);
2536                 qemu_put_be16(f, head->n_valid);
2537                 qemu_put_be16(f, head->n_invalid);
2538                 qemu_put_buffer(f, (void *)(head + 1),
2539                                 HASH_PTE_SIZE_64 * head->n_valid);
2540 
2541                 buffer += chunksize;
2542                 n -= chunksize;
2543             }
2544         }
2545     } while ((rc != 0)
2546              && ((max_ns < 0)
2547                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2548 
2549     return (rc == 0) ? 1 : 0;
2550 }
2551 
2552 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2553                            uint16_t n_valid, uint16_t n_invalid)
2554 {
2555     struct kvm_get_htab_header *buf;
2556     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2557     ssize_t rc;
2558 
2559     buf = alloca(chunksize);
2560     buf->index = index;
2561     buf->n_valid = n_valid;
2562     buf->n_invalid = n_invalid;
2563 
2564     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2565 
2566     rc = write(fd, buf, chunksize);
2567     if (rc < 0) {
2568         fprintf(stderr, "Error writing KVM hash table: %s\n",
2569                 strerror(errno));
2570         return rc;
2571     }
2572     if (rc != chunksize) {
2573         /* We should never get a short write on a single chunk */
2574         fprintf(stderr, "Short write, restoring KVM hash table\n");
2575         return -1;
2576     }
2577     return 0;
2578 }
2579 
2580 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2581 {
2582     return true;
2583 }
2584 
2585 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2586 {
2587     return 1;
2588 }
2589 
2590 int kvm_arch_on_sigbus(int code, void *addr)
2591 {
2592     return 1;
2593 }
2594 
2595 void kvm_arch_init_irq_routing(KVMState *s)
2596 {
2597 }
2598 
2599 struct kvm_get_htab_buf {
2600     struct kvm_get_htab_header header;
2601     /*
2602      * We require one extra byte for read
2603      */
2604     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2605 };
2606 
2607 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2608 {
2609     int htab_fd;
2610     struct kvm_get_htab_fd ghf;
2611     struct kvm_get_htab_buf  *hpte_buf;
2612 
2613     ghf.flags = 0;
2614     ghf.start_index = pte_index;
2615     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2616     if (htab_fd < 0) {
2617         goto error_out;
2618     }
2619 
2620     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2621     /*
2622      * Read the hpte group
2623      */
2624     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2625         goto out_close;
2626     }
2627 
2628     close(htab_fd);
2629     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2630 
2631 out_close:
2632     g_free(hpte_buf);
2633     close(htab_fd);
2634 error_out:
2635     return 0;
2636 }
2637 
2638 void kvmppc_hash64_free_pteg(uint64_t token)
2639 {
2640     struct kvm_get_htab_buf *htab_buf;
2641 
2642     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2643                             hpte);
2644     g_free(htab_buf);
2645     return;
2646 }
2647 
2648 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2649                              target_ulong pte0, target_ulong pte1)
2650 {
2651     int htab_fd;
2652     struct kvm_get_htab_fd ghf;
2653     struct kvm_get_htab_buf hpte_buf;
2654 
2655     ghf.flags = 0;
2656     ghf.start_index = 0;     /* Ignored */
2657     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2658     if (htab_fd < 0) {
2659         goto error_out;
2660     }
2661 
2662     hpte_buf.header.n_valid = 1;
2663     hpte_buf.header.n_invalid = 0;
2664     hpte_buf.header.index = pte_index;
2665     hpte_buf.hpte[0] = pte0;
2666     hpte_buf.hpte[1] = pte1;
2667     /*
2668      * Write the hpte entry.
2669      * CAUTION: write() has the warn_unused_result attribute. Hence we
2670      * need to check the return value, even though we do nothing.
2671      */
2672     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2673         goto out_close;
2674     }
2675 
2676 out_close:
2677     close(htab_fd);
2678     return;
2679 
2680 error_out:
2681     return;
2682 }
2683 
2684 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2685                              uint64_t address, uint32_t data, PCIDevice *dev)
2686 {
2687     return 0;
2688 }
2689 
2690 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2691                                 int vector, PCIDevice *dev)
2692 {
2693     return 0;
2694 }
2695 
2696 int kvm_arch_release_virq_post(int virq)
2697 {
2698     return 0;
2699 }
2700 
2701 int kvm_arch_msi_data_to_gsi(uint32_t data)
2702 {
2703     return data & 0xffff;
2704 }
2705 
2706 int kvmppc_enable_hwrng(void)
2707 {
2708     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2709         return -1;
2710     }
2711 
2712     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2713 }
2714