xref: /openbmc/qemu/target/ppc/kvm.c (revision c39f95dc)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
55 
56 //#define DEBUG_KVM
57 
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63     do { } while (0)
64 #endif
65 
66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
67 
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69     KVM_CAP_LAST_INFO
70 };
71 
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_smt_possible;
78 static int cap_ppc_rma;
79 static int cap_spapr_tce;
80 static int cap_spapr_tce_64;
81 static int cap_spapr_multitce;
82 static int cap_spapr_vfio;
83 static int cap_hior;
84 static int cap_one_reg;
85 static int cap_epr;
86 static int cap_ppc_watchdog;
87 static int cap_papr;
88 static int cap_htab_fd;
89 static int cap_fixup_hcalls;
90 static int cap_htm;             /* Hardware transactional memory support */
91 static int cap_mmu_radix;
92 static int cap_mmu_hash_v3;
93 static int cap_resize_hpt;
94 static int cap_ppc_pvr_compat;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139     cap_spapr_vfio = false;
140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144     /* Note: we don't set cap_papr here, because this capability is
145      * only activated after this by kvmppc_set_papr() */
146     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (env->mmu_model & POWERPC_MMU_1TSEG) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
308            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
323             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
410 {
411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412         return true;
413     }
414 
415     return (1ul << shift) <= rampgsize;
416 }
417 
418 static long max_cpu_page_size;
419 
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
421 {
422     static struct kvm_ppc_smmu_info smmu_info;
423     static bool has_smmu_info;
424     CPUPPCState *env = &cpu->env;
425     int iq, ik, jq, jk;
426     bool has_64k_pages = false;
427 
428     /* We only handle page sizes for 64-bit server guests for now */
429     if (!(env->mmu_model & POWERPC_MMU_64)) {
430         return;
431     }
432 
433     /* Collect MMU info from kernel if not already */
434     if (!has_smmu_info) {
435         kvm_get_smmu_info(cpu, &smmu_info);
436         has_smmu_info = true;
437     }
438 
439     if (!max_cpu_page_size) {
440         max_cpu_page_size = qemu_getrampagesize();
441     }
442 
443     /* Convert to QEMU form */
444     memset(&env->sps, 0, sizeof(env->sps));
445 
446     /* If we have HV KVM, we need to forbid CI large pages if our
447      * host page size is smaller than 64K.
448      */
449     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
450         env->ci_large_pages = getpagesize() >= 0x10000;
451     }
452 
453     /*
454      * XXX This loop should be an entry wide AND of the capabilities that
455      *     the selected CPU has with the capabilities that KVM supports.
456      */
457     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
458         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
459         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
460 
461         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
462                                  ksps->page_shift)) {
463             continue;
464         }
465         qsps->page_shift = ksps->page_shift;
466         qsps->slb_enc = ksps->slb_enc;
467         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
468             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
469                                      ksps->enc[jk].page_shift)) {
470                 continue;
471             }
472             if (ksps->enc[jk].page_shift == 16) {
473                 has_64k_pages = true;
474             }
475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478                 break;
479             }
480         }
481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482             break;
483         }
484     }
485     env->slb_nr = smmu_info.slb_size;
486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487         env->mmu_model &= ~POWERPC_MMU_1TSEG;
488     }
489     if (!has_64k_pages) {
490         env->mmu_model &= ~POWERPC_MMU_64K;
491     }
492 }
493 
494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
495 {
496     Object *mem_obj = object_resolve_path(obj_path, NULL);
497     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
498     long pagesize;
499 
500     if (mempath) {
501         pagesize = qemu_mempath_getpagesize(mempath);
502         g_free(mempath);
503     } else {
504         pagesize = getpagesize();
505     }
506 
507     return pagesize >= max_cpu_page_size;
508 }
509 
510 #else /* defined (TARGET_PPC64) */
511 
512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
513 {
514 }
515 
516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
517 {
518     return true;
519 }
520 
521 #endif /* !defined (TARGET_PPC64) */
522 
523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
524 {
525     return POWERPC_CPU(cpu)->vcpu_id;
526 }
527 
528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
529  * book3s supports only 1 watchpoint, so array size
530  * of 4 is sufficient for now.
531  */
532 #define MAX_HW_BKPTS 4
533 
534 static struct HWBreakpoint {
535     target_ulong addr;
536     int type;
537 } hw_debug_points[MAX_HW_BKPTS];
538 
539 static CPUWatchpoint hw_watchpoint;
540 
541 /* Default there is no breakpoint and watchpoint supported */
542 static int max_hw_breakpoint;
543 static int max_hw_watchpoint;
544 static int nb_hw_breakpoint;
545 static int nb_hw_watchpoint;
546 
547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
548 {
549     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
550         max_hw_breakpoint = 2;
551         max_hw_watchpoint = 2;
552     }
553 
554     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
555         fprintf(stderr, "Error initializing h/w breakpoints\n");
556         return;
557     }
558 }
559 
560 int kvm_arch_init_vcpu(CPUState *cs)
561 {
562     PowerPCCPU *cpu = POWERPC_CPU(cs);
563     CPUPPCState *cenv = &cpu->env;
564     int ret;
565 
566     /* Gather server mmu info from KVM and update the CPU state */
567     kvm_fixup_page_sizes(cpu);
568 
569     /* Synchronize sregs with kvm */
570     ret = kvm_arch_sync_sregs(cpu);
571     if (ret) {
572         if (ret == -EINVAL) {
573             error_report("Register sync failed... If you're using kvm-hv.ko,"
574                          " only \"-cpu host\" is possible");
575         }
576         return ret;
577     }
578 
579     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
580 
581     switch (cenv->mmu_model) {
582     case POWERPC_MMU_BOOKE206:
583         /* This target supports access to KVM's guest TLB */
584         ret = kvm_booke206_tlb_init(cpu);
585         break;
586     case POWERPC_MMU_2_07:
587         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
588             /* KVM-HV has transactional memory on POWER8 also without the
589              * KVM_CAP_PPC_HTM extension, so enable it here instead as
590              * long as it's availble to userspace on the host. */
591             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
592                 cap_htm = true;
593             }
594         }
595         break;
596     default:
597         break;
598     }
599 
600     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
601     kvmppc_hw_debug_points_init(cenv);
602 
603     return ret;
604 }
605 
606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
607 {
608     CPUPPCState *env = &cpu->env;
609     CPUState *cs = CPU(cpu);
610     struct kvm_dirty_tlb dirty_tlb;
611     unsigned char *bitmap;
612     int ret;
613 
614     if (!env->kvm_sw_tlb) {
615         return;
616     }
617 
618     bitmap = g_malloc((env->nb_tlb + 7) / 8);
619     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
620 
621     dirty_tlb.bitmap = (uintptr_t)bitmap;
622     dirty_tlb.num_dirty = env->nb_tlb;
623 
624     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
625     if (ret) {
626         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
627                 __func__, strerror(-ret));
628     }
629 
630     g_free(bitmap);
631 }
632 
633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
634 {
635     PowerPCCPU *cpu = POWERPC_CPU(cs);
636     CPUPPCState *env = &cpu->env;
637     union {
638         uint32_t u32;
639         uint64_t u64;
640     } val;
641     struct kvm_one_reg reg = {
642         .id = id,
643         .addr = (uintptr_t) &val,
644     };
645     int ret;
646 
647     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648     if (ret != 0) {
649         trace_kvm_failed_spr_get(spr, strerror(errno));
650     } else {
651         switch (id & KVM_REG_SIZE_MASK) {
652         case KVM_REG_SIZE_U32:
653             env->spr[spr] = val.u32;
654             break;
655 
656         case KVM_REG_SIZE_U64:
657             env->spr[spr] = val.u64;
658             break;
659 
660         default:
661             /* Don't handle this size yet */
662             abort();
663         }
664     }
665 }
666 
667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
668 {
669     PowerPCCPU *cpu = POWERPC_CPU(cs);
670     CPUPPCState *env = &cpu->env;
671     union {
672         uint32_t u32;
673         uint64_t u64;
674     } val;
675     struct kvm_one_reg reg = {
676         .id = id,
677         .addr = (uintptr_t) &val,
678     };
679     int ret;
680 
681     switch (id & KVM_REG_SIZE_MASK) {
682     case KVM_REG_SIZE_U32:
683         val.u32 = env->spr[spr];
684         break;
685 
686     case KVM_REG_SIZE_U64:
687         val.u64 = env->spr[spr];
688         break;
689 
690     default:
691         /* Don't handle this size yet */
692         abort();
693     }
694 
695     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
696     if (ret != 0) {
697         trace_kvm_failed_spr_set(spr, strerror(errno));
698     }
699 }
700 
701 static int kvm_put_fp(CPUState *cs)
702 {
703     PowerPCCPU *cpu = POWERPC_CPU(cs);
704     CPUPPCState *env = &cpu->env;
705     struct kvm_one_reg reg;
706     int i;
707     int ret;
708 
709     if (env->insns_flags & PPC_FLOAT) {
710         uint64_t fpscr = env->fpscr;
711         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
712 
713         reg.id = KVM_REG_PPC_FPSCR;
714         reg.addr = (uintptr_t)&fpscr;
715         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716         if (ret < 0) {
717             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
718             return ret;
719         }
720 
721         for (i = 0; i < 32; i++) {
722             uint64_t vsr[2];
723 
724 #ifdef HOST_WORDS_BIGENDIAN
725             vsr[0] = float64_val(env->fpr[i]);
726             vsr[1] = env->vsr[i];
727 #else
728             vsr[0] = env->vsr[i];
729             vsr[1] = float64_val(env->fpr[i]);
730 #endif
731             reg.addr = (uintptr_t) &vsr;
732             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
733 
734             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735             if (ret < 0) {
736                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
737                         i, strerror(errno));
738                 return ret;
739             }
740         }
741     }
742 
743     if (env->insns_flags & PPC_ALTIVEC) {
744         reg.id = KVM_REG_PPC_VSCR;
745         reg.addr = (uintptr_t)&env->vscr;
746         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
747         if (ret < 0) {
748             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
749             return ret;
750         }
751 
752         for (i = 0; i < 32; i++) {
753             reg.id = KVM_REG_PPC_VR(i);
754             reg.addr = (uintptr_t)&env->avr[i];
755             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
756             if (ret < 0) {
757                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
758                 return ret;
759             }
760         }
761     }
762 
763     return 0;
764 }
765 
766 static int kvm_get_fp(CPUState *cs)
767 {
768     PowerPCCPU *cpu = POWERPC_CPU(cs);
769     CPUPPCState *env = &cpu->env;
770     struct kvm_one_reg reg;
771     int i;
772     int ret;
773 
774     if (env->insns_flags & PPC_FLOAT) {
775         uint64_t fpscr;
776         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
777 
778         reg.id = KVM_REG_PPC_FPSCR;
779         reg.addr = (uintptr_t)&fpscr;
780         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781         if (ret < 0) {
782             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
783             return ret;
784         } else {
785             env->fpscr = fpscr;
786         }
787 
788         for (i = 0; i < 32; i++) {
789             uint64_t vsr[2];
790 
791             reg.addr = (uintptr_t) &vsr;
792             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
793 
794             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
795             if (ret < 0) {
796                 DPRINTF("Unable to get %s%d from KVM: %s\n",
797                         vsx ? "VSR" : "FPR", i, strerror(errno));
798                 return ret;
799             } else {
800 #ifdef HOST_WORDS_BIGENDIAN
801                 env->fpr[i] = vsr[0];
802                 if (vsx) {
803                     env->vsr[i] = vsr[1];
804                 }
805 #else
806                 env->fpr[i] = vsr[1];
807                 if (vsx) {
808                     env->vsr[i] = vsr[0];
809                 }
810 #endif
811             }
812         }
813     }
814 
815     if (env->insns_flags & PPC_ALTIVEC) {
816         reg.id = KVM_REG_PPC_VSCR;
817         reg.addr = (uintptr_t)&env->vscr;
818         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
819         if (ret < 0) {
820             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
821             return ret;
822         }
823 
824         for (i = 0; i < 32; i++) {
825             reg.id = KVM_REG_PPC_VR(i);
826             reg.addr = (uintptr_t)&env->avr[i];
827             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
828             if (ret < 0) {
829                 DPRINTF("Unable to get VR%d from KVM: %s\n",
830                         i, strerror(errno));
831                 return ret;
832             }
833         }
834     }
835 
836     return 0;
837 }
838 
839 #if defined(TARGET_PPC64)
840 static int kvm_get_vpa(CPUState *cs)
841 {
842     PowerPCCPU *cpu = POWERPC_CPU(cs);
843     CPUPPCState *env = &cpu->env;
844     struct kvm_one_reg reg;
845     int ret;
846 
847     reg.id = KVM_REG_PPC_VPA_ADDR;
848     reg.addr = (uintptr_t)&env->vpa_addr;
849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850     if (ret < 0) {
851         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
852         return ret;
853     }
854 
855     assert((uintptr_t)&env->slb_shadow_size
856            == ((uintptr_t)&env->slb_shadow_addr + 8));
857     reg.id = KVM_REG_PPC_VPA_SLB;
858     reg.addr = (uintptr_t)&env->slb_shadow_addr;
859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860     if (ret < 0) {
861         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
862                 strerror(errno));
863         return ret;
864     }
865 
866     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
867     reg.id = KVM_REG_PPC_VPA_DTL;
868     reg.addr = (uintptr_t)&env->dtl_addr;
869     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
870     if (ret < 0) {
871         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
872                 strerror(errno));
873         return ret;
874     }
875 
876     return 0;
877 }
878 
879 static int kvm_put_vpa(CPUState *cs)
880 {
881     PowerPCCPU *cpu = POWERPC_CPU(cs);
882     CPUPPCState *env = &cpu->env;
883     struct kvm_one_reg reg;
884     int ret;
885 
886     /* SLB shadow or DTL can't be registered unless a master VPA is
887      * registered.  That means when restoring state, if a VPA *is*
888      * registered, we need to set that up first.  If not, we need to
889      * deregister the others before deregistering the master VPA */
890     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
891 
892     if (env->vpa_addr) {
893         reg.id = KVM_REG_PPC_VPA_ADDR;
894         reg.addr = (uintptr_t)&env->vpa_addr;
895         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896         if (ret < 0) {
897             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
898             return ret;
899         }
900     }
901 
902     assert((uintptr_t)&env->slb_shadow_size
903            == ((uintptr_t)&env->slb_shadow_addr + 8));
904     reg.id = KVM_REG_PPC_VPA_SLB;
905     reg.addr = (uintptr_t)&env->slb_shadow_addr;
906     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
907     if (ret < 0) {
908         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
909         return ret;
910     }
911 
912     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
913     reg.id = KVM_REG_PPC_VPA_DTL;
914     reg.addr = (uintptr_t)&env->dtl_addr;
915     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916     if (ret < 0) {
917         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
918                 strerror(errno));
919         return ret;
920     }
921 
922     if (!env->vpa_addr) {
923         reg.id = KVM_REG_PPC_VPA_ADDR;
924         reg.addr = (uintptr_t)&env->vpa_addr;
925         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926         if (ret < 0) {
927             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
928             return ret;
929         }
930     }
931 
932     return 0;
933 }
934 #endif /* TARGET_PPC64 */
935 
936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
937 {
938     CPUPPCState *env = &cpu->env;
939     struct kvm_sregs sregs;
940     int i;
941 
942     sregs.pvr = env->spr[SPR_PVR];
943 
944     if (cpu->vhyp) {
945         PPCVirtualHypervisorClass *vhc =
946             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
947         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
948     } else {
949         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
950     }
951 
952     /* Sync SLB */
953 #ifdef TARGET_PPC64
954     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
955         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
956         if (env->slb[i].esid & SLB_ESID_V) {
957             sregs.u.s.ppc64.slb[i].slbe |= i;
958         }
959         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
960     }
961 #endif
962 
963     /* Sync SRs */
964     for (i = 0; i < 16; i++) {
965         sregs.u.s.ppc32.sr[i] = env->sr[i];
966     }
967 
968     /* Sync BATs */
969     for (i = 0; i < 8; i++) {
970         /* Beware. We have to swap upper and lower bits here */
971         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
972             | env->DBAT[1][i];
973         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
974             | env->IBAT[1][i];
975     }
976 
977     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
978 }
979 
980 int kvm_arch_put_registers(CPUState *cs, int level)
981 {
982     PowerPCCPU *cpu = POWERPC_CPU(cs);
983     CPUPPCState *env = &cpu->env;
984     struct kvm_regs regs;
985     int ret;
986     int i;
987 
988     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
989     if (ret < 0) {
990         return ret;
991     }
992 
993     regs.ctr = env->ctr;
994     regs.lr  = env->lr;
995     regs.xer = cpu_read_xer(env);
996     regs.msr = env->msr;
997     regs.pc = env->nip;
998 
999     regs.srr0 = env->spr[SPR_SRR0];
1000     regs.srr1 = env->spr[SPR_SRR1];
1001 
1002     regs.sprg0 = env->spr[SPR_SPRG0];
1003     regs.sprg1 = env->spr[SPR_SPRG1];
1004     regs.sprg2 = env->spr[SPR_SPRG2];
1005     regs.sprg3 = env->spr[SPR_SPRG3];
1006     regs.sprg4 = env->spr[SPR_SPRG4];
1007     regs.sprg5 = env->spr[SPR_SPRG5];
1008     regs.sprg6 = env->spr[SPR_SPRG6];
1009     regs.sprg7 = env->spr[SPR_SPRG7];
1010 
1011     regs.pid = env->spr[SPR_BOOKE_PID];
1012 
1013     for (i = 0;i < 32; i++)
1014         regs.gpr[i] = env->gpr[i];
1015 
1016     regs.cr = 0;
1017     for (i = 0; i < 8; i++) {
1018         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1019     }
1020 
1021     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1022     if (ret < 0)
1023         return ret;
1024 
1025     kvm_put_fp(cs);
1026 
1027     if (env->tlb_dirty) {
1028         kvm_sw_tlb_put(cpu);
1029         env->tlb_dirty = false;
1030     }
1031 
1032     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1033         ret = kvmppc_put_books_sregs(cpu);
1034         if (ret < 0) {
1035             return ret;
1036         }
1037     }
1038 
1039     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1040         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1041     }
1042 
1043     if (cap_one_reg) {
1044         int i;
1045 
1046         /* We deliberately ignore errors here, for kernels which have
1047          * the ONE_REG calls, but don't support the specific
1048          * registers, there's a reasonable chance things will still
1049          * work, at least until we try to migrate. */
1050         for (i = 0; i < 1024; i++) {
1051             uint64_t id = env->spr_cb[i].one_reg_id;
1052 
1053             if (id != 0) {
1054                 kvm_put_one_spr(cs, id, i);
1055             }
1056         }
1057 
1058 #ifdef TARGET_PPC64
1059         if (msr_ts) {
1060             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1061                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1062             }
1063             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1064                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1065             }
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1070             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1073             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1074             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1075             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1076         }
1077 
1078         if (cap_papr) {
1079             if (kvm_put_vpa(cs) < 0) {
1080                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1081             }
1082         }
1083 
1084         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1085 #endif /* TARGET_PPC64 */
1086     }
1087 
1088     return ret;
1089 }
1090 
1091 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1092 {
1093      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1094 }
1095 
1096 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1097 {
1098     CPUPPCState *env = &cpu->env;
1099     struct kvm_sregs sregs;
1100     int ret;
1101 
1102     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1103     if (ret < 0) {
1104         return ret;
1105     }
1106 
1107     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1108         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1109         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1110         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1111         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1112         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1113         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1114         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1115         env->spr[SPR_DECR] = sregs.u.e.dec;
1116         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1117         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1118         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1122         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1123         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1124         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1125         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1126         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1127     }
1128 
1129     if (sregs.u.e.features & KVM_SREGS_E_64) {
1130         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1131     }
1132 
1133     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1134         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1135     }
1136 
1137     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1138         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1139         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1140         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1141         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1142         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1143         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1144         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1145         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1146         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1147         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1148         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1149         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1150         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1151         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1152         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1153         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1154         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1155         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1156         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1157         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1158         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1159         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1160         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1161         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1162         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1163         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1164         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1165         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1166         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1167         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1168         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1169         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1170 
1171         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1172             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1173             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1174             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1175             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1176             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1177             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1178         }
1179 
1180         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1181             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1182             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1183         }
1184 
1185         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1186             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1187             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1188             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1189             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1190         }
1191     }
1192 
1193     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1194         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1195         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1196         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1197         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1198         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1199         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1200         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1201         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1202         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1203         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1204     }
1205 
1206     if (sregs.u.e.features & KVM_SREGS_EXP) {
1207         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1208     }
1209 
1210     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1211         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1212         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1213     }
1214 
1215     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1216         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1217         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1218         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1219 
1220         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1221             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1222             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1223         }
1224     }
1225 
1226     return 0;
1227 }
1228 
1229 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1230 {
1231     CPUPPCState *env = &cpu->env;
1232     struct kvm_sregs sregs;
1233     int ret;
1234     int i;
1235 
1236     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1237     if (ret < 0) {
1238         return ret;
1239     }
1240 
1241     if (!cpu->vhyp) {
1242         ppc_store_sdr1(env, sregs.u.s.sdr1);
1243     }
1244 
1245     /* Sync SLB */
1246 #ifdef TARGET_PPC64
1247     /*
1248      * The packed SLB array we get from KVM_GET_SREGS only contains
1249      * information about valid entries. So we flush our internal copy
1250      * to get rid of stale ones, then put all valid SLB entries back
1251      * in.
1252      */
1253     memset(env->slb, 0, sizeof(env->slb));
1254     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1255         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1256         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1257         /*
1258          * Only restore valid entries
1259          */
1260         if (rb & SLB_ESID_V) {
1261             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1262         }
1263     }
1264 #endif
1265 
1266     /* Sync SRs */
1267     for (i = 0; i < 16; i++) {
1268         env->sr[i] = sregs.u.s.ppc32.sr[i];
1269     }
1270 
1271     /* Sync BATs */
1272     for (i = 0; i < 8; i++) {
1273         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1274         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1275         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1276         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1277     }
1278 
1279     return 0;
1280 }
1281 
1282 int kvm_arch_get_registers(CPUState *cs)
1283 {
1284     PowerPCCPU *cpu = POWERPC_CPU(cs);
1285     CPUPPCState *env = &cpu->env;
1286     struct kvm_regs regs;
1287     uint32_t cr;
1288     int i, ret;
1289 
1290     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1291     if (ret < 0)
1292         return ret;
1293 
1294     cr = regs.cr;
1295     for (i = 7; i >= 0; i--) {
1296         env->crf[i] = cr & 15;
1297         cr >>= 4;
1298     }
1299 
1300     env->ctr = regs.ctr;
1301     env->lr = regs.lr;
1302     cpu_write_xer(env, regs.xer);
1303     env->msr = regs.msr;
1304     env->nip = regs.pc;
1305 
1306     env->spr[SPR_SRR0] = regs.srr0;
1307     env->spr[SPR_SRR1] = regs.srr1;
1308 
1309     env->spr[SPR_SPRG0] = regs.sprg0;
1310     env->spr[SPR_SPRG1] = regs.sprg1;
1311     env->spr[SPR_SPRG2] = regs.sprg2;
1312     env->spr[SPR_SPRG3] = regs.sprg3;
1313     env->spr[SPR_SPRG4] = regs.sprg4;
1314     env->spr[SPR_SPRG5] = regs.sprg5;
1315     env->spr[SPR_SPRG6] = regs.sprg6;
1316     env->spr[SPR_SPRG7] = regs.sprg7;
1317 
1318     env->spr[SPR_BOOKE_PID] = regs.pid;
1319 
1320     for (i = 0;i < 32; i++)
1321         env->gpr[i] = regs.gpr[i];
1322 
1323     kvm_get_fp(cs);
1324 
1325     if (cap_booke_sregs) {
1326         ret = kvmppc_get_booke_sregs(cpu);
1327         if (ret < 0) {
1328             return ret;
1329         }
1330     }
1331 
1332     if (cap_segstate) {
1333         ret = kvmppc_get_books_sregs(cpu);
1334         if (ret < 0) {
1335             return ret;
1336         }
1337     }
1338 
1339     if (cap_hior) {
1340         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1341     }
1342 
1343     if (cap_one_reg) {
1344         int i;
1345 
1346         /* We deliberately ignore errors here, for kernels which have
1347          * the ONE_REG calls, but don't support the specific
1348          * registers, there's a reasonable chance things will still
1349          * work, at least until we try to migrate. */
1350         for (i = 0; i < 1024; i++) {
1351             uint64_t id = env->spr_cb[i].one_reg_id;
1352 
1353             if (id != 0) {
1354                 kvm_get_one_spr(cs, id, i);
1355             }
1356         }
1357 
1358 #ifdef TARGET_PPC64
1359         if (msr_ts) {
1360             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1361                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1362             }
1363             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1364                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1365             }
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1370             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1373             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1374             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1375             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1376         }
1377 
1378         if (cap_papr) {
1379             if (kvm_get_vpa(cs) < 0) {
1380                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1381             }
1382         }
1383 
1384         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1385 #endif
1386     }
1387 
1388     return 0;
1389 }
1390 
1391 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1392 {
1393     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1394 
1395     if (irq != PPC_INTERRUPT_EXT) {
1396         return 0;
1397     }
1398 
1399     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1400         return 0;
1401     }
1402 
1403     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1404 
1405     return 0;
1406 }
1407 
1408 #if defined(TARGET_PPCEMB)
1409 #define PPC_INPUT_INT PPC40x_INPUT_INT
1410 #elif defined(TARGET_PPC64)
1411 #define PPC_INPUT_INT PPC970_INPUT_INT
1412 #else
1413 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1414 #endif
1415 
1416 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1417 {
1418     PowerPCCPU *cpu = POWERPC_CPU(cs);
1419     CPUPPCState *env = &cpu->env;
1420     int r;
1421     unsigned irq;
1422 
1423     qemu_mutex_lock_iothread();
1424 
1425     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1426      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1427     if (!cap_interrupt_level &&
1428         run->ready_for_interrupt_injection &&
1429         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1430         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1431     {
1432         /* For now KVM disregards the 'irq' argument. However, in the
1433          * future KVM could cache it in-kernel to avoid a heavyweight exit
1434          * when reading the UIC.
1435          */
1436         irq = KVM_INTERRUPT_SET;
1437 
1438         DPRINTF("injected interrupt %d\n", irq);
1439         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1440         if (r < 0) {
1441             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1442         }
1443 
1444         /* Always wake up soon in case the interrupt was level based */
1445         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1446                        (NANOSECONDS_PER_SECOND / 50));
1447     }
1448 
1449     /* We don't know if there are more interrupts pending after this. However,
1450      * the guest will return to userspace in the course of handling this one
1451      * anyways, so we will get a chance to deliver the rest. */
1452 
1453     qemu_mutex_unlock_iothread();
1454 }
1455 
1456 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1457 {
1458     return MEMTXATTRS_UNSPECIFIED;
1459 }
1460 
1461 int kvm_arch_process_async_events(CPUState *cs)
1462 {
1463     return cs->halted;
1464 }
1465 
1466 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1467 {
1468     CPUState *cs = CPU(cpu);
1469     CPUPPCState *env = &cpu->env;
1470 
1471     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1472         cs->halted = 1;
1473         cs->exception_index = EXCP_HLT;
1474     }
1475 
1476     return 0;
1477 }
1478 
1479 /* map dcr access to existing qemu dcr emulation */
1480 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1481 {
1482     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1483         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1484 
1485     return 0;
1486 }
1487 
1488 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1489 {
1490     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1491         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1492 
1493     return 0;
1494 }
1495 
1496 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1497 {
1498     /* Mixed endian case is not handled */
1499     uint32_t sc = debug_inst_opcode;
1500 
1501     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1502                             sizeof(sc), 0) ||
1503         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1504         return -EINVAL;
1505     }
1506 
1507     return 0;
1508 }
1509 
1510 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1511 {
1512     uint32_t sc;
1513 
1514     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1515         sc != debug_inst_opcode ||
1516         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1517                             sizeof(sc), 1)) {
1518         return -EINVAL;
1519     }
1520 
1521     return 0;
1522 }
1523 
1524 static int find_hw_breakpoint(target_ulong addr, int type)
1525 {
1526     int n;
1527 
1528     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1529            <= ARRAY_SIZE(hw_debug_points));
1530 
1531     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1532         if (hw_debug_points[n].addr == addr &&
1533              hw_debug_points[n].type == type) {
1534             return n;
1535         }
1536     }
1537 
1538     return -1;
1539 }
1540 
1541 static int find_hw_watchpoint(target_ulong addr, int *flag)
1542 {
1543     int n;
1544 
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1546     if (n >= 0) {
1547         *flag = BP_MEM_ACCESS;
1548         return n;
1549     }
1550 
1551     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1552     if (n >= 0) {
1553         *flag = BP_MEM_WRITE;
1554         return n;
1555     }
1556 
1557     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1558     if (n >= 0) {
1559         *flag = BP_MEM_READ;
1560         return n;
1561     }
1562 
1563     return -1;
1564 }
1565 
1566 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1567                                   target_ulong len, int type)
1568 {
1569     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1570         return -ENOBUFS;
1571     }
1572 
1573     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1574     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1575 
1576     switch (type) {
1577     case GDB_BREAKPOINT_HW:
1578         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1579             return -ENOBUFS;
1580         }
1581 
1582         if (find_hw_breakpoint(addr, type) >= 0) {
1583             return -EEXIST;
1584         }
1585 
1586         nb_hw_breakpoint++;
1587         break;
1588 
1589     case GDB_WATCHPOINT_WRITE:
1590     case GDB_WATCHPOINT_READ:
1591     case GDB_WATCHPOINT_ACCESS:
1592         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1593             return -ENOBUFS;
1594         }
1595 
1596         if (find_hw_breakpoint(addr, type) >= 0) {
1597             return -EEXIST;
1598         }
1599 
1600         nb_hw_watchpoint++;
1601         break;
1602 
1603     default:
1604         return -ENOSYS;
1605     }
1606 
1607     return 0;
1608 }
1609 
1610 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1611                                   target_ulong len, int type)
1612 {
1613     int n;
1614 
1615     n = find_hw_breakpoint(addr, type);
1616     if (n < 0) {
1617         return -ENOENT;
1618     }
1619 
1620     switch (type) {
1621     case GDB_BREAKPOINT_HW:
1622         nb_hw_breakpoint--;
1623         break;
1624 
1625     case GDB_WATCHPOINT_WRITE:
1626     case GDB_WATCHPOINT_READ:
1627     case GDB_WATCHPOINT_ACCESS:
1628         nb_hw_watchpoint--;
1629         break;
1630 
1631     default:
1632         return -ENOSYS;
1633     }
1634     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1635 
1636     return 0;
1637 }
1638 
1639 void kvm_arch_remove_all_hw_breakpoints(void)
1640 {
1641     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1642 }
1643 
1644 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1645 {
1646     int n;
1647 
1648     /* Software Breakpoint updates */
1649     if (kvm_sw_breakpoints_active(cs)) {
1650         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1651     }
1652 
1653     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1654            <= ARRAY_SIZE(hw_debug_points));
1655     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1656 
1657     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1658         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1659         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1660         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1661             switch (hw_debug_points[n].type) {
1662             case GDB_BREAKPOINT_HW:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1664                 break;
1665             case GDB_WATCHPOINT_WRITE:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1667                 break;
1668             case GDB_WATCHPOINT_READ:
1669                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1670                 break;
1671             case GDB_WATCHPOINT_ACCESS:
1672                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1673                                         KVMPPC_DEBUG_WATCH_READ;
1674                 break;
1675             default:
1676                 cpu_abort(cs, "Unsupported breakpoint type\n");
1677             }
1678             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1679         }
1680     }
1681 }
1682 
1683 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1684 {
1685     CPUState *cs = CPU(cpu);
1686     CPUPPCState *env = &cpu->env;
1687     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1688     int handle = 0;
1689     int n;
1690     int flag = 0;
1691 
1692     if (cs->singlestep_enabled) {
1693         handle = 1;
1694     } else if (arch_info->status) {
1695         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1696             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1697                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1698                 if (n >= 0) {
1699                     handle = 1;
1700                 }
1701             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1702                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1703                 n = find_hw_watchpoint(arch_info->address,  &flag);
1704                 if (n >= 0) {
1705                     handle = 1;
1706                     cs->watchpoint_hit = &hw_watchpoint;
1707                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1708                     hw_watchpoint.flags = flag;
1709                 }
1710             }
1711         }
1712     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1713         handle = 1;
1714     } else {
1715         /* QEMU is not able to handle debug exception, so inject
1716          * program exception to guest;
1717          * Yes program exception NOT debug exception !!
1718          * When QEMU is using debug resources then debug exception must
1719          * be always set. To achieve this we set MSR_DE and also set
1720          * MSRP_DEP so guest cannot change MSR_DE.
1721          * When emulating debug resource for guest we want guest
1722          * to control MSR_DE (enable/disable debug interrupt on need).
1723          * Supporting both configurations are NOT possible.
1724          * So the result is that we cannot share debug resources
1725          * between QEMU and Guest on BOOKE architecture.
1726          * In the current design QEMU gets the priority over guest,
1727          * this means that if QEMU is using debug resources then guest
1728          * cannot use them;
1729          * For software breakpoint QEMU uses a privileged instruction;
1730          * So there cannot be any reason that we are here for guest
1731          * set debug exception, only possibility is guest executed a
1732          * privileged / illegal instruction and that's why we are
1733          * injecting a program interrupt.
1734          */
1735 
1736         cpu_synchronize_state(cs);
1737         /* env->nip is PC, so increment this by 4 to use
1738          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1739          */
1740         env->nip += 4;
1741         cs->exception_index = POWERPC_EXCP_PROGRAM;
1742         env->error_code = POWERPC_EXCP_INVAL;
1743         ppc_cpu_do_interrupt(cs);
1744     }
1745 
1746     return handle;
1747 }
1748 
1749 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1750 {
1751     PowerPCCPU *cpu = POWERPC_CPU(cs);
1752     CPUPPCState *env = &cpu->env;
1753     int ret;
1754 
1755     qemu_mutex_lock_iothread();
1756 
1757     switch (run->exit_reason) {
1758     case KVM_EXIT_DCR:
1759         if (run->dcr.is_write) {
1760             DPRINTF("handle dcr write\n");
1761             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1762         } else {
1763             DPRINTF("handle dcr read\n");
1764             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1765         }
1766         break;
1767     case KVM_EXIT_HLT:
1768         DPRINTF("handle halt\n");
1769         ret = kvmppc_handle_halt(cpu);
1770         break;
1771 #if defined(TARGET_PPC64)
1772     case KVM_EXIT_PAPR_HCALL:
1773         DPRINTF("handle PAPR hypercall\n");
1774         run->papr_hcall.ret = spapr_hypercall(cpu,
1775                                               run->papr_hcall.nr,
1776                                               run->papr_hcall.args);
1777         ret = 0;
1778         break;
1779 #endif
1780     case KVM_EXIT_EPR:
1781         DPRINTF("handle epr\n");
1782         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1783         ret = 0;
1784         break;
1785     case KVM_EXIT_WATCHDOG:
1786         DPRINTF("handle watchdog expiry\n");
1787         watchdog_perform_action();
1788         ret = 0;
1789         break;
1790 
1791     case KVM_EXIT_DEBUG:
1792         DPRINTF("handle debug exception\n");
1793         if (kvm_handle_debug(cpu, run)) {
1794             ret = EXCP_DEBUG;
1795             break;
1796         }
1797         /* re-enter, this exception was guest-internal */
1798         ret = 0;
1799         break;
1800 
1801     default:
1802         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1803         ret = -1;
1804         break;
1805     }
1806 
1807     qemu_mutex_unlock_iothread();
1808     return ret;
1809 }
1810 
1811 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813     CPUState *cs = CPU(cpu);
1814     uint32_t bits = tsr_bits;
1815     struct kvm_one_reg reg = {
1816         .id = KVM_REG_PPC_OR_TSR,
1817         .addr = (uintptr_t) &bits,
1818     };
1819 
1820     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1821 }
1822 
1823 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1824 {
1825 
1826     CPUState *cs = CPU(cpu);
1827     uint32_t bits = tsr_bits;
1828     struct kvm_one_reg reg = {
1829         .id = KVM_REG_PPC_CLEAR_TSR,
1830         .addr = (uintptr_t) &bits,
1831     };
1832 
1833     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 }
1835 
1836 int kvmppc_set_tcr(PowerPCCPU *cpu)
1837 {
1838     CPUState *cs = CPU(cpu);
1839     CPUPPCState *env = &cpu->env;
1840     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1841 
1842     struct kvm_one_reg reg = {
1843         .id = KVM_REG_PPC_TCR,
1844         .addr = (uintptr_t) &tcr,
1845     };
1846 
1847     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1848 }
1849 
1850 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1851 {
1852     CPUState *cs = CPU(cpu);
1853     int ret;
1854 
1855     if (!kvm_enabled()) {
1856         return -1;
1857     }
1858 
1859     if (!cap_ppc_watchdog) {
1860         printf("warning: KVM does not support watchdog");
1861         return -1;
1862     }
1863 
1864     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1865     if (ret < 0) {
1866         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1867                 __func__, strerror(-ret));
1868         return ret;
1869     }
1870 
1871     return ret;
1872 }
1873 
1874 static int read_cpuinfo(const char *field, char *value, int len)
1875 {
1876     FILE *f;
1877     int ret = -1;
1878     int field_len = strlen(field);
1879     char line[512];
1880 
1881     f = fopen("/proc/cpuinfo", "r");
1882     if (!f) {
1883         return -1;
1884     }
1885 
1886     do {
1887         if (!fgets(line, sizeof(line), f)) {
1888             break;
1889         }
1890         if (!strncmp(line, field, field_len)) {
1891             pstrcpy(value, len, line);
1892             ret = 0;
1893             break;
1894         }
1895     } while(*line);
1896 
1897     fclose(f);
1898 
1899     return ret;
1900 }
1901 
1902 uint32_t kvmppc_get_tbfreq(void)
1903 {
1904     char line[512];
1905     char *ns;
1906     uint32_t retval = NANOSECONDS_PER_SECOND;
1907 
1908     if (read_cpuinfo("timebase", line, sizeof(line))) {
1909         return retval;
1910     }
1911 
1912     if (!(ns = strchr(line, ':'))) {
1913         return retval;
1914     }
1915 
1916     ns++;
1917 
1918     return atoi(ns);
1919 }
1920 
1921 bool kvmppc_get_host_serial(char **value)
1922 {
1923     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1924                                NULL);
1925 }
1926 
1927 bool kvmppc_get_host_model(char **value)
1928 {
1929     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1930 }
1931 
1932 /* Try to find a device tree node for a CPU with clock-frequency property */
1933 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1934 {
1935     struct dirent *dirp;
1936     DIR *dp;
1937 
1938     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1939         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1940         return -1;
1941     }
1942 
1943     buf[0] = '\0';
1944     while ((dirp = readdir(dp)) != NULL) {
1945         FILE *f;
1946         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1947                  dirp->d_name);
1948         f = fopen(buf, "r");
1949         if (f) {
1950             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1951             fclose(f);
1952             break;
1953         }
1954         buf[0] = '\0';
1955     }
1956     closedir(dp);
1957     if (buf[0] == '\0') {
1958         printf("Unknown host!\n");
1959         return -1;
1960     }
1961 
1962     return 0;
1963 }
1964 
1965 static uint64_t kvmppc_read_int_dt(const char *filename)
1966 {
1967     union {
1968         uint32_t v32;
1969         uint64_t v64;
1970     } u;
1971     FILE *f;
1972     int len;
1973 
1974     f = fopen(filename, "rb");
1975     if (!f) {
1976         return -1;
1977     }
1978 
1979     len = fread(&u, 1, sizeof(u), f);
1980     fclose(f);
1981     switch (len) {
1982     case 4:
1983         /* property is a 32-bit quantity */
1984         return be32_to_cpu(u.v32);
1985     case 8:
1986         return be64_to_cpu(u.v64);
1987     }
1988 
1989     return 0;
1990 }
1991 
1992 /* Read a CPU node property from the host device tree that's a single
1993  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1994  * (can't find or open the property, or doesn't understand the
1995  * format) */
1996 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1997 {
1998     char buf[PATH_MAX], *tmp;
1999     uint64_t val;
2000 
2001     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2002         return -1;
2003     }
2004 
2005     tmp = g_strdup_printf("%s/%s", buf, propname);
2006     val = kvmppc_read_int_dt(tmp);
2007     g_free(tmp);
2008 
2009     return val;
2010 }
2011 
2012 uint64_t kvmppc_get_clockfreq(void)
2013 {
2014     return kvmppc_read_int_cpu_dt("clock-frequency");
2015 }
2016 
2017 uint32_t kvmppc_get_vmx(void)
2018 {
2019     return kvmppc_read_int_cpu_dt("ibm,vmx");
2020 }
2021 
2022 uint32_t kvmppc_get_dfp(void)
2023 {
2024     return kvmppc_read_int_cpu_dt("ibm,dfp");
2025 }
2026 
2027 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2028  {
2029      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2030      CPUState *cs = CPU(cpu);
2031 
2032     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2033         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2034         return 0;
2035     }
2036 
2037     return 1;
2038 }
2039 
2040 int kvmppc_get_hasidle(CPUPPCState *env)
2041 {
2042     struct kvm_ppc_pvinfo pvinfo;
2043 
2044     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2045         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2046         return 1;
2047     }
2048 
2049     return 0;
2050 }
2051 
2052 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2053 {
2054     uint32_t *hc = (uint32_t*)buf;
2055     struct kvm_ppc_pvinfo pvinfo;
2056 
2057     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2058         memcpy(buf, pvinfo.hcall, buf_len);
2059         return 0;
2060     }
2061 
2062     /*
2063      * Fallback to always fail hypercalls regardless of endianness:
2064      *
2065      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2066      *     li r3, -1
2067      *     b .+8       (becomes nop in wrong endian)
2068      *     bswap32(li r3, -1)
2069      */
2070 
2071     hc[0] = cpu_to_be32(0x08000048);
2072     hc[1] = cpu_to_be32(0x3860ffff);
2073     hc[2] = cpu_to_be32(0x48000008);
2074     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2075 
2076     return 1;
2077 }
2078 
2079 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2080 {
2081     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2082 }
2083 
2084 void kvmppc_enable_logical_ci_hcalls(void)
2085 {
2086     /*
2087      * FIXME: it would be nice if we could detect the cases where
2088      * we're using a device which requires the in kernel
2089      * implementation of these hcalls, but the kernel lacks them and
2090      * produce a warning.
2091      */
2092     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2093     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2094 }
2095 
2096 void kvmppc_enable_set_mode_hcall(void)
2097 {
2098     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2099 }
2100 
2101 void kvmppc_enable_clear_ref_mod_hcalls(void)
2102 {
2103     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2104     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2105 }
2106 
2107 void kvmppc_set_papr(PowerPCCPU *cpu)
2108 {
2109     CPUState *cs = CPU(cpu);
2110     int ret;
2111 
2112     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2113     if (ret) {
2114         error_report("This vCPU type or KVM version does not support PAPR");
2115         exit(1);
2116     }
2117 
2118     /* Update the capability flag so we sync the right information
2119      * with kvm */
2120     cap_papr = 1;
2121 }
2122 
2123 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2124 {
2125     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2126 }
2127 
2128 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2129 {
2130     CPUState *cs = CPU(cpu);
2131     int ret;
2132 
2133     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2134     if (ret && mpic_proxy) {
2135         error_report("This KVM version does not support EPR");
2136         exit(1);
2137     }
2138 }
2139 
2140 int kvmppc_smt_threads(void)
2141 {
2142     return cap_ppc_smt ? cap_ppc_smt : 1;
2143 }
2144 
2145 int kvmppc_set_smt_threads(int smt)
2146 {
2147     int ret;
2148 
2149     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2150     if (!ret) {
2151         cap_ppc_smt = smt;
2152     }
2153     return ret;
2154 }
2155 
2156 void kvmppc_hint_smt_possible(Error **errp)
2157 {
2158     int i;
2159     GString *g;
2160     char *s;
2161 
2162     assert(kvm_enabled());
2163     if (cap_ppc_smt_possible) {
2164         g = g_string_new("Available VSMT modes:");
2165         for (i = 63; i >= 0; i--) {
2166             if ((1UL << i) & cap_ppc_smt_possible) {
2167                 g_string_append_printf(g, " %lu", (1UL << i));
2168             }
2169         }
2170         s = g_string_free(g, false);
2171         error_append_hint(errp, "%s.\n", s);
2172         g_free(s);
2173     } else {
2174         error_append_hint(errp,
2175                           "This KVM seems to be too old to support VSMT.\n");
2176     }
2177 }
2178 
2179 
2180 #ifdef TARGET_PPC64
2181 off_t kvmppc_alloc_rma(void **rma)
2182 {
2183     off_t size;
2184     int fd;
2185     struct kvm_allocate_rma ret;
2186 
2187     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2188      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2189      *                      not necessary on this hardware
2190      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2191      *
2192      * FIXME: We should allow the user to force contiguous RMA
2193      * allocation in the cap_ppc_rma==1 case.
2194      */
2195     if (cap_ppc_rma < 2) {
2196         return 0;
2197     }
2198 
2199     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2200     if (fd < 0) {
2201         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2202                 strerror(errno));
2203         return -1;
2204     }
2205 
2206     size = MIN(ret.rma_size, 256ul << 20);
2207 
2208     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2209     if (*rma == MAP_FAILED) {
2210         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2211         return -1;
2212     };
2213 
2214     return size;
2215 }
2216 
2217 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2218 {
2219     struct kvm_ppc_smmu_info info;
2220     long rampagesize, best_page_shift;
2221     int i;
2222 
2223     if (cap_ppc_rma >= 2) {
2224         return current_size;
2225     }
2226 
2227     /* Find the largest hardware supported page size that's less than
2228      * or equal to the (logical) backing page size of guest RAM */
2229     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2230     rampagesize = qemu_getrampagesize();
2231     best_page_shift = 0;
2232 
2233     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2234         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2235 
2236         if (!sps->page_shift) {
2237             continue;
2238         }
2239 
2240         if ((sps->page_shift > best_page_shift)
2241             && ((1UL << sps->page_shift) <= rampagesize)) {
2242             best_page_shift = sps->page_shift;
2243         }
2244     }
2245 
2246     return MIN(current_size,
2247                1ULL << (best_page_shift + hash_shift - 7));
2248 }
2249 #endif
2250 
2251 bool kvmppc_spapr_use_multitce(void)
2252 {
2253     return cap_spapr_multitce;
2254 }
2255 
2256 int kvmppc_spapr_enable_inkernel_multitce(void)
2257 {
2258     int ret;
2259 
2260     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2261                             H_PUT_TCE_INDIRECT, 1);
2262     if (!ret) {
2263         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2264                                 H_STUFF_TCE, 1);
2265     }
2266 
2267     return ret;
2268 }
2269 
2270 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2271                               uint64_t bus_offset, uint32_t nb_table,
2272                               int *pfd, bool need_vfio)
2273 {
2274     long len;
2275     int fd;
2276     void *table;
2277 
2278     /* Must set fd to -1 so we don't try to munmap when called for
2279      * destroying the table, which the upper layers -will- do
2280      */
2281     *pfd = -1;
2282     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2283         return NULL;
2284     }
2285 
2286     if (cap_spapr_tce_64) {
2287         struct kvm_create_spapr_tce_64 args = {
2288             .liobn = liobn,
2289             .page_shift = page_shift,
2290             .offset = bus_offset >> page_shift,
2291             .size = nb_table,
2292             .flags = 0
2293         };
2294         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2295         if (fd < 0) {
2296             fprintf(stderr,
2297                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2298                     liobn);
2299             return NULL;
2300         }
2301     } else if (cap_spapr_tce) {
2302         uint64_t window_size = (uint64_t) nb_table << page_shift;
2303         struct kvm_create_spapr_tce args = {
2304             .liobn = liobn,
2305             .window_size = window_size,
2306         };
2307         if ((window_size != args.window_size) || bus_offset) {
2308             return NULL;
2309         }
2310         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2311         if (fd < 0) {
2312             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2313                     liobn);
2314             return NULL;
2315         }
2316     } else {
2317         return NULL;
2318     }
2319 
2320     len = nb_table * sizeof(uint64_t);
2321     /* FIXME: round this up to page size */
2322 
2323     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2324     if (table == MAP_FAILED) {
2325         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2326                 liobn);
2327         close(fd);
2328         return NULL;
2329     }
2330 
2331     *pfd = fd;
2332     return table;
2333 }
2334 
2335 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2336 {
2337     long len;
2338 
2339     if (fd < 0) {
2340         return -1;
2341     }
2342 
2343     len = nb_table * sizeof(uint64_t);
2344     if ((munmap(table, len) < 0) ||
2345         (close(fd) < 0)) {
2346         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2347                 strerror(errno));
2348         /* Leak the table */
2349     }
2350 
2351     return 0;
2352 }
2353 
2354 int kvmppc_reset_htab(int shift_hint)
2355 {
2356     uint32_t shift = shift_hint;
2357 
2358     if (!kvm_enabled()) {
2359         /* Full emulation, tell caller to allocate htab itself */
2360         return 0;
2361     }
2362     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2363         int ret;
2364         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2365         if (ret == -ENOTTY) {
2366             /* At least some versions of PR KVM advertise the
2367              * capability, but don't implement the ioctl().  Oops.
2368              * Return 0 so that we allocate the htab in qemu, as is
2369              * correct for PR. */
2370             return 0;
2371         } else if (ret < 0) {
2372             return ret;
2373         }
2374         return shift;
2375     }
2376 
2377     /* We have a kernel that predates the htab reset calls.  For PR
2378      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2379      * this era, it has allocated a 16MB fixed size hash table already. */
2380     if (kvmppc_is_pr(kvm_state)) {
2381         /* PR - tell caller to allocate htab */
2382         return 0;
2383     } else {
2384         /* HV - assume 16MB kernel allocated htab */
2385         return 24;
2386     }
2387 }
2388 
2389 static inline uint32_t mfpvr(void)
2390 {
2391     uint32_t pvr;
2392 
2393     asm ("mfpvr %0"
2394          : "=r"(pvr));
2395     return pvr;
2396 }
2397 
2398 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2399 {
2400     if (on) {
2401         *word |= flags;
2402     } else {
2403         *word &= ~flags;
2404     }
2405 }
2406 
2407 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2408 {
2409     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2410     uint32_t vmx = kvmppc_get_vmx();
2411     uint32_t dfp = kvmppc_get_dfp();
2412     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2413     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2414 
2415     /* Now fix up the class with information we can query from the host */
2416     pcc->pvr = mfpvr();
2417 
2418     if (vmx != -1) {
2419         /* Only override when we know what the host supports */
2420         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2421         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2422     }
2423     if (dfp != -1) {
2424         /* Only override when we know what the host supports */
2425         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2426     }
2427 
2428     if (dcache_size != -1) {
2429         pcc->l1_dcache_size = dcache_size;
2430     }
2431 
2432     if (icache_size != -1) {
2433         pcc->l1_icache_size = icache_size;
2434     }
2435 
2436 #if defined(TARGET_PPC64)
2437     pcc->radix_page_info = kvm_get_radix_page_info();
2438 
2439     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2440         /*
2441          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2442          * compliant.  More importantly, advertising ISA 3.00
2443          * architected mode may prevent guests from activating
2444          * necessary DD1 workarounds.
2445          */
2446         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2447                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2448     }
2449 #endif /* defined(TARGET_PPC64) */
2450 }
2451 
2452 bool kvmppc_has_cap_epr(void)
2453 {
2454     return cap_epr;
2455 }
2456 
2457 bool kvmppc_has_cap_fixup_hcalls(void)
2458 {
2459     return cap_fixup_hcalls;
2460 }
2461 
2462 bool kvmppc_has_cap_htm(void)
2463 {
2464     return cap_htm;
2465 }
2466 
2467 bool kvmppc_has_cap_mmu_radix(void)
2468 {
2469     return cap_mmu_radix;
2470 }
2471 
2472 bool kvmppc_has_cap_mmu_hash_v3(void)
2473 {
2474     return cap_mmu_hash_v3;
2475 }
2476 
2477 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2478 {
2479     uint32_t host_pvr = mfpvr();
2480     PowerPCCPUClass *pvr_pcc;
2481 
2482     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2483     if (pvr_pcc == NULL) {
2484         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2485     }
2486 
2487     return pvr_pcc;
2488 }
2489 
2490 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2491 {
2492     TypeInfo type_info = {
2493         .name = TYPE_HOST_POWERPC_CPU,
2494         .class_init = kvmppc_host_cpu_class_init,
2495     };
2496     MachineClass *mc = MACHINE_GET_CLASS(ms);
2497     PowerPCCPUClass *pvr_pcc;
2498     ObjectClass *oc;
2499     DeviceClass *dc;
2500     int i;
2501 
2502     pvr_pcc = kvm_ppc_get_host_cpu_class();
2503     if (pvr_pcc == NULL) {
2504         return -1;
2505     }
2506     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2507     type_register(&type_info);
2508     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2509         /* override TCG default cpu type with 'host' cpu model */
2510         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2511     }
2512 
2513     oc = object_class_by_name(type_info.name);
2514     g_assert(oc);
2515 
2516     /*
2517      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2518      * we want "POWER8" to be a "family" alias that points to the current
2519      * host CPU type, too)
2520      */
2521     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2522     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2523         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2524             char *suffix;
2525 
2526             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2527             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2528             if (suffix) {
2529                 *suffix = 0;
2530             }
2531             break;
2532         }
2533     }
2534 
2535     return 0;
2536 }
2537 
2538 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2539 {
2540     struct kvm_rtas_token_args args = {
2541         .token = token,
2542     };
2543 
2544     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2545         return -ENOENT;
2546     }
2547 
2548     strncpy(args.name, function, sizeof(args.name));
2549 
2550     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2551 }
2552 
2553 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2554 {
2555     struct kvm_get_htab_fd s = {
2556         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2557         .start_index = index,
2558     };
2559     int ret;
2560 
2561     if (!cap_htab_fd) {
2562         error_setg(errp, "KVM version doesn't support %s the HPT",
2563                    write ? "writing" : "reading");
2564         return -ENOTSUP;
2565     }
2566 
2567     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2568     if (ret < 0) {
2569         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2570                    write ? "writing" : "reading", write ? "to" : "from",
2571                    strerror(errno));
2572         return -errno;
2573     }
2574 
2575     return ret;
2576 }
2577 
2578 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2579 {
2580     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2581     uint8_t buf[bufsize];
2582     ssize_t rc;
2583 
2584     do {
2585         rc = read(fd, buf, bufsize);
2586         if (rc < 0) {
2587             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2588                     strerror(errno));
2589             return rc;
2590         } else if (rc) {
2591             uint8_t *buffer = buf;
2592             ssize_t n = rc;
2593             while (n) {
2594                 struct kvm_get_htab_header *head =
2595                     (struct kvm_get_htab_header *) buffer;
2596                 size_t chunksize = sizeof(*head) +
2597                      HASH_PTE_SIZE_64 * head->n_valid;
2598 
2599                 qemu_put_be32(f, head->index);
2600                 qemu_put_be16(f, head->n_valid);
2601                 qemu_put_be16(f, head->n_invalid);
2602                 qemu_put_buffer(f, (void *)(head + 1),
2603                                 HASH_PTE_SIZE_64 * head->n_valid);
2604 
2605                 buffer += chunksize;
2606                 n -= chunksize;
2607             }
2608         }
2609     } while ((rc != 0)
2610              && ((max_ns < 0)
2611                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2612 
2613     return (rc == 0) ? 1 : 0;
2614 }
2615 
2616 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2617                            uint16_t n_valid, uint16_t n_invalid)
2618 {
2619     struct kvm_get_htab_header *buf;
2620     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2621     ssize_t rc;
2622 
2623     buf = alloca(chunksize);
2624     buf->index = index;
2625     buf->n_valid = n_valid;
2626     buf->n_invalid = n_invalid;
2627 
2628     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2629 
2630     rc = write(fd, buf, chunksize);
2631     if (rc < 0) {
2632         fprintf(stderr, "Error writing KVM hash table: %s\n",
2633                 strerror(errno));
2634         return rc;
2635     }
2636     if (rc != chunksize) {
2637         /* We should never get a short write on a single chunk */
2638         fprintf(stderr, "Short write, restoring KVM hash table\n");
2639         return -1;
2640     }
2641     return 0;
2642 }
2643 
2644 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2645 {
2646     return true;
2647 }
2648 
2649 void kvm_arch_init_irq_routing(KVMState *s)
2650 {
2651 }
2652 
2653 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2654 {
2655     int fd, rc;
2656     int i;
2657 
2658     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2659 
2660     i = 0;
2661     while (i < n) {
2662         struct kvm_get_htab_header *hdr;
2663         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2664         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2665 
2666         rc = read(fd, buf, sizeof(buf));
2667         if (rc < 0) {
2668             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2669         }
2670 
2671         hdr = (struct kvm_get_htab_header *)buf;
2672         while ((i < n) && ((char *)hdr < (buf + rc))) {
2673             int invalid = hdr->n_invalid;
2674 
2675             if (hdr->index != (ptex + i)) {
2676                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2677                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2678             }
2679 
2680             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2681             i += hdr->n_valid;
2682 
2683             if ((n - i) < invalid) {
2684                 invalid = n - i;
2685             }
2686             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2687             i += hdr->n_invalid;
2688 
2689             hdr = (struct kvm_get_htab_header *)
2690                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2691         }
2692     }
2693 
2694     close(fd);
2695 }
2696 
2697 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2698 {
2699     int fd, rc;
2700     struct {
2701         struct kvm_get_htab_header hdr;
2702         uint64_t pte0;
2703         uint64_t pte1;
2704     } buf;
2705 
2706     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2707 
2708     buf.hdr.n_valid = 1;
2709     buf.hdr.n_invalid = 0;
2710     buf.hdr.index = ptex;
2711     buf.pte0 = cpu_to_be64(pte0);
2712     buf.pte1 = cpu_to_be64(pte1);
2713 
2714     rc = write(fd, &buf, sizeof(buf));
2715     if (rc != sizeof(buf)) {
2716         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2717     }
2718     close(fd);
2719 }
2720 
2721 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2722                              uint64_t address, uint32_t data, PCIDevice *dev)
2723 {
2724     return 0;
2725 }
2726 
2727 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2728                                 int vector, PCIDevice *dev)
2729 {
2730     return 0;
2731 }
2732 
2733 int kvm_arch_release_virq_post(int virq)
2734 {
2735     return 0;
2736 }
2737 
2738 int kvm_arch_msi_data_to_gsi(uint32_t data)
2739 {
2740     return data & 0xffff;
2741 }
2742 
2743 int kvmppc_enable_hwrng(void)
2744 {
2745     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2746         return -1;
2747     }
2748 
2749     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2750 }
2751 
2752 void kvmppc_check_papr_resize_hpt(Error **errp)
2753 {
2754     if (!kvm_enabled()) {
2755         return; /* No KVM, we're good */
2756     }
2757 
2758     if (cap_resize_hpt) {
2759         return; /* Kernel has explicit support, we're good */
2760     }
2761 
2762     /* Otherwise fallback on looking for PR KVM */
2763     if (kvmppc_is_pr(kvm_state)) {
2764         return;
2765     }
2766 
2767     error_setg(errp,
2768                "Hash page table resizing not available with this KVM version");
2769 }
2770 
2771 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2772 {
2773     CPUState *cs = CPU(cpu);
2774     struct kvm_ppc_resize_hpt rhpt = {
2775         .flags = flags,
2776         .shift = shift,
2777     };
2778 
2779     if (!cap_resize_hpt) {
2780         return -ENOSYS;
2781     }
2782 
2783     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2784 }
2785 
2786 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2787 {
2788     CPUState *cs = CPU(cpu);
2789     struct kvm_ppc_resize_hpt rhpt = {
2790         .flags = flags,
2791         .shift = shift,
2792     };
2793 
2794     if (!cap_resize_hpt) {
2795         return -ENOSYS;
2796     }
2797 
2798     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2799 }
2800 
2801 /*
2802  * This is a helper function to detect a post migration scenario
2803  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2804  * the guest kernel can't handle a PVR value other than the actual host
2805  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2806  *
2807  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2808  * (so, we're HV), return true. The workaround itself is done in
2809  * cpu_post_load.
2810  *
2811  * The order here is important: we'll only check for KVM PR as a
2812  * fallback if the guest kernel can't handle the situation itself.
2813  * We need to avoid as much as possible querying the running KVM type
2814  * in QEMU level.
2815  */
2816 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2817 {
2818     CPUState *cs = CPU(cpu);
2819 
2820     if (!kvm_enabled()) {
2821         return false;
2822     }
2823 
2824     if (cap_ppc_pvr_compat) {
2825         return false;
2826     }
2827 
2828     return !kvmppc_is_pr(cs->kvm_state);
2829 }
2830