xref: /openbmc/qemu/target/ppc/kvm.c (revision 80adf54e)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
55 
56 //#define DEBUG_KVM
57 
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63     do { } while (0)
64 #endif
65 
66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
67 
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69     KVM_CAP_LAST_INFO
70 };
71 
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_rma;
78 static int cap_spapr_tce;
79 static int cap_spapr_tce_64;
80 static int cap_spapr_multitce;
81 static int cap_spapr_vfio;
82 static int cap_hior;
83 static int cap_one_reg;
84 static int cap_epr;
85 static int cap_ppc_watchdog;
86 static int cap_papr;
87 static int cap_htab_fd;
88 static int cap_fixup_hcalls;
89 static int cap_htm;             /* Hardware transactional memory support */
90 static int cap_mmu_radix;
91 static int cap_mmu_hash_v3;
92 static int cap_resize_hpt;
93 
94 static uint32_t debug_inst_opcode;
95 
96 /* XXX We have a race condition where we actually have a level triggered
97  *     interrupt, but the infrastructure can't expose that yet, so the guest
98  *     takes but ignores it, goes to sleep and never gets notified that there's
99  *     still an interrupt pending.
100  *
101  *     As a quick workaround, let's just wake up again 20 ms after we injected
102  *     an interrupt. That way we can assure that we're always reinjecting
103  *     interrupts in case the guest swallowed them.
104  */
105 static QEMUTimer *idle_timer;
106 
107 static void kvm_kick_cpu(void *opaque)
108 {
109     PowerPCCPU *cpu = opaque;
110 
111     qemu_cpu_kick(CPU(cpu));
112 }
113 
114 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
115  * should only be used for fallback tests - generally we should use
116  * explicit capabilities for the features we want, rather than
117  * assuming what is/isn't available depending on the KVM variant. */
118 static bool kvmppc_is_pr(KVMState *ks)
119 {
120     /* Assume KVM-PR if the GET_PVINFO capability is available */
121     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
122 }
123 
124 static int kvm_ppc_register_host_cpu_type(void);
125 
126 int kvm_arch_init(MachineState *ms, KVMState *s)
127 {
128     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
129     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
130     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
131     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
132     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
133     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
134     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
135     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
136     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
137     cap_spapr_vfio = false;
138     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
139     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
140     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
141     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
142     /* Note: we don't set cap_papr here, because this capability is
143      * only activated after this by kvmppc_set_papr() */
144     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
145     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
146     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
147     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
148     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
149     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
150 
151     if (!cap_interrupt_level) {
152         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
153                         "VM to stall at times!\n");
154     }
155 
156     kvm_ppc_register_host_cpu_type();
157 
158     return 0;
159 }
160 
161 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
162 {
163     return 0;
164 }
165 
166 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
167 {
168     CPUPPCState *cenv = &cpu->env;
169     CPUState *cs = CPU(cpu);
170     struct kvm_sregs sregs;
171     int ret;
172 
173     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
174         /* What we're really trying to say is "if we're on BookE, we use
175            the native PVR for now". This is the only sane way to check
176            it though, so we potentially confuse users that they can run
177            BookE guests on BookS. Let's hope nobody dares enough :) */
178         return 0;
179     } else {
180         if (!cap_segstate) {
181             fprintf(stderr, "kvm error: missing PVR setting capability\n");
182             return -ENOSYS;
183         }
184     }
185 
186     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
187     if (ret) {
188         return ret;
189     }
190 
191     sregs.pvr = cenv->spr[SPR_PVR];
192     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
193 }
194 
195 /* Set up a shared TLB array with KVM */
196 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
197 {
198     CPUPPCState *env = &cpu->env;
199     CPUState *cs = CPU(cpu);
200     struct kvm_book3e_206_tlb_params params = {};
201     struct kvm_config_tlb cfg = {};
202     unsigned int entries = 0;
203     int ret, i;
204 
205     if (!kvm_enabled() ||
206         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
207         return 0;
208     }
209 
210     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
211 
212     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
213         params.tlb_sizes[i] = booke206_tlb_size(env, i);
214         params.tlb_ways[i] = booke206_tlb_ways(env, i);
215         entries += params.tlb_sizes[i];
216     }
217 
218     assert(entries == env->nb_tlb);
219     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
220 
221     env->tlb_dirty = true;
222 
223     cfg.array = (uintptr_t)env->tlb.tlbm;
224     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
225     cfg.params = (uintptr_t)&params;
226     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
227 
228     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
229     if (ret < 0) {
230         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
231                 __func__, strerror(-ret));
232         return ret;
233     }
234 
235     env->kvm_sw_tlb = true;
236     return 0;
237 }
238 
239 
240 #if defined(TARGET_PPC64)
241 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
242                                        struct kvm_ppc_smmu_info *info)
243 {
244     CPUPPCState *env = &cpu->env;
245     CPUState *cs = CPU(cpu);
246 
247     memset(info, 0, sizeof(*info));
248 
249     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
250      * need to "guess" what the supported page sizes are.
251      *
252      * For that to work we make a few assumptions:
253      *
254      * - Check whether we are running "PR" KVM which only supports 4K
255      *   and 16M pages, but supports them regardless of the backing
256      *   store characteritics. We also don't support 1T segments.
257      *
258      *   This is safe as if HV KVM ever supports that capability or PR
259      *   KVM grows supports for more page/segment sizes, those versions
260      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
261      *   will not hit this fallback
262      *
263      * - Else we are running HV KVM. This means we only support page
264      *   sizes that fit in the backing store. Additionally we only
265      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
266      *   P7 encodings for the SLB and hash table. Here too, we assume
267      *   support for any newer processor will mean a kernel that
268      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
269      *   this fallback.
270      */
271     if (kvmppc_is_pr(cs->kvm_state)) {
272         /* No flags */
273         info->flags = 0;
274         info->slb_size = 64;
275 
276         /* Standard 4k base page size segment */
277         info->sps[0].page_shift = 12;
278         info->sps[0].slb_enc = 0;
279         info->sps[0].enc[0].page_shift = 12;
280         info->sps[0].enc[0].pte_enc = 0;
281 
282         /* Standard 16M large page size segment */
283         info->sps[1].page_shift = 24;
284         info->sps[1].slb_enc = SLB_VSID_L;
285         info->sps[1].enc[0].page_shift = 24;
286         info->sps[1].enc[0].pte_enc = 0;
287     } else {
288         int i = 0;
289 
290         /* HV KVM has backing store size restrictions */
291         info->flags = KVM_PPC_PAGE_SIZES_REAL;
292 
293         if (env->mmu_model & POWERPC_MMU_1TSEG) {
294             info->flags |= KVM_PPC_1T_SEGMENTS;
295         }
296 
297         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
298            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
299             info->slb_size = 32;
300         } else {
301             info->slb_size = 64;
302         }
303 
304         /* Standard 4k base page size segment */
305         info->sps[i].page_shift = 12;
306         info->sps[i].slb_enc = 0;
307         info->sps[i].enc[0].page_shift = 12;
308         info->sps[i].enc[0].pte_enc = 0;
309         i++;
310 
311         /* 64K on MMU 2.06 and later */
312         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
313             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
314             info->sps[i].page_shift = 16;
315             info->sps[i].slb_enc = 0x110;
316             info->sps[i].enc[0].page_shift = 16;
317             info->sps[i].enc[0].pte_enc = 1;
318             i++;
319         }
320 
321         /* Standard 16M large page size segment */
322         info->sps[i].page_shift = 24;
323         info->sps[i].slb_enc = SLB_VSID_L;
324         info->sps[i].enc[0].page_shift = 24;
325         info->sps[i].enc[0].pte_enc = 0;
326     }
327 }
328 
329 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
330 {
331     CPUState *cs = CPU(cpu);
332     int ret;
333 
334     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
335         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
336         if (ret == 0) {
337             return;
338         }
339     }
340 
341     kvm_get_fallback_smmu_info(cpu, info);
342 }
343 
344 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
345 {
346     KVMState *s = KVM_STATE(current_machine->accelerator);
347     struct ppc_radix_page_info *radix_page_info;
348     struct kvm_ppc_rmmu_info rmmu_info;
349     int i;
350 
351     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
352         return NULL;
353     }
354     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
355         return NULL;
356     }
357     radix_page_info = g_malloc0(sizeof(*radix_page_info));
358     radix_page_info->count = 0;
359     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
360         if (rmmu_info.ap_encodings[i]) {
361             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
362             radix_page_info->count++;
363         }
364     }
365     return radix_page_info;
366 }
367 
368 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
369                                      bool radix, bool gtse,
370                                      uint64_t proc_tbl)
371 {
372     CPUState *cs = CPU(cpu);
373     int ret;
374     uint64_t flags = 0;
375     struct kvm_ppc_mmuv3_cfg cfg = {
376         .process_table = proc_tbl,
377     };
378 
379     if (radix) {
380         flags |= KVM_PPC_MMUV3_RADIX;
381     }
382     if (gtse) {
383         flags |= KVM_PPC_MMUV3_GTSE;
384     }
385     cfg.flags = flags;
386     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
387     switch (ret) {
388     case 0:
389         return H_SUCCESS;
390     case -EINVAL:
391         return H_PARAMETER;
392     case -ENODEV:
393         return H_NOT_AVAILABLE;
394     default:
395         return H_HARDWARE;
396     }
397 }
398 
399 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
400 {
401     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
402         return true;
403     }
404 
405     return (1ul << shift) <= rampgsize;
406 }
407 
408 static long max_cpu_page_size;
409 
410 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
411 {
412     static struct kvm_ppc_smmu_info smmu_info;
413     static bool has_smmu_info;
414     CPUPPCState *env = &cpu->env;
415     int iq, ik, jq, jk;
416     bool has_64k_pages = false;
417 
418     /* We only handle page sizes for 64-bit server guests for now */
419     if (!(env->mmu_model & POWERPC_MMU_64)) {
420         return;
421     }
422 
423     /* Collect MMU info from kernel if not already */
424     if (!has_smmu_info) {
425         kvm_get_smmu_info(cpu, &smmu_info);
426         has_smmu_info = true;
427     }
428 
429     if (!max_cpu_page_size) {
430         max_cpu_page_size = qemu_getrampagesize();
431     }
432 
433     /* Convert to QEMU form */
434     memset(&env->sps, 0, sizeof(env->sps));
435 
436     /* If we have HV KVM, we need to forbid CI large pages if our
437      * host page size is smaller than 64K.
438      */
439     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
440         env->ci_large_pages = getpagesize() >= 0x10000;
441     }
442 
443     /*
444      * XXX This loop should be an entry wide AND of the capabilities that
445      *     the selected CPU has with the capabilities that KVM supports.
446      */
447     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
448         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
449         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
450 
451         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
452                                  ksps->page_shift)) {
453             continue;
454         }
455         qsps->page_shift = ksps->page_shift;
456         qsps->slb_enc = ksps->slb_enc;
457         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
458             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
459                                      ksps->enc[jk].page_shift)) {
460                 continue;
461             }
462             if (ksps->enc[jk].page_shift == 16) {
463                 has_64k_pages = true;
464             }
465             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
466             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
467             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
468                 break;
469             }
470         }
471         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
472             break;
473         }
474     }
475     env->slb_nr = smmu_info.slb_size;
476     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
477         env->mmu_model &= ~POWERPC_MMU_1TSEG;
478     }
479     if (!has_64k_pages) {
480         env->mmu_model &= ~POWERPC_MMU_64K;
481     }
482 }
483 
484 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
485 {
486     Object *mem_obj = object_resolve_path(obj_path, NULL);
487     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
488     long pagesize;
489 
490     if (mempath) {
491         pagesize = qemu_mempath_getpagesize(mempath);
492         g_free(mempath);
493     } else {
494         pagesize = getpagesize();
495     }
496 
497     return pagesize >= max_cpu_page_size;
498 }
499 
500 #else /* defined (TARGET_PPC64) */
501 
502 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
503 {
504 }
505 
506 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
507 {
508     return true;
509 }
510 
511 #endif /* !defined (TARGET_PPC64) */
512 
513 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
514 {
515     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
516 }
517 
518 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
519  * book3s supports only 1 watchpoint, so array size
520  * of 4 is sufficient for now.
521  */
522 #define MAX_HW_BKPTS 4
523 
524 static struct HWBreakpoint {
525     target_ulong addr;
526     int type;
527 } hw_debug_points[MAX_HW_BKPTS];
528 
529 static CPUWatchpoint hw_watchpoint;
530 
531 /* Default there is no breakpoint and watchpoint supported */
532 static int max_hw_breakpoint;
533 static int max_hw_watchpoint;
534 static int nb_hw_breakpoint;
535 static int nb_hw_watchpoint;
536 
537 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
538 {
539     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
540         max_hw_breakpoint = 2;
541         max_hw_watchpoint = 2;
542     }
543 
544     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
545         fprintf(stderr, "Error initializing h/w breakpoints\n");
546         return;
547     }
548 }
549 
550 int kvm_arch_init_vcpu(CPUState *cs)
551 {
552     PowerPCCPU *cpu = POWERPC_CPU(cs);
553     CPUPPCState *cenv = &cpu->env;
554     int ret;
555 
556     /* Gather server mmu info from KVM and update the CPU state */
557     kvm_fixup_page_sizes(cpu);
558 
559     /* Synchronize sregs with kvm */
560     ret = kvm_arch_sync_sregs(cpu);
561     if (ret) {
562         if (ret == -EINVAL) {
563             error_report("Register sync failed... If you're using kvm-hv.ko,"
564                          " only \"-cpu host\" is possible");
565         }
566         return ret;
567     }
568 
569     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
570 
571     switch (cenv->mmu_model) {
572     case POWERPC_MMU_BOOKE206:
573         /* This target supports access to KVM's guest TLB */
574         ret = kvm_booke206_tlb_init(cpu);
575         break;
576     case POWERPC_MMU_2_07:
577         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
578             /* KVM-HV has transactional memory on POWER8 also without the
579              * KVM_CAP_PPC_HTM extension, so enable it here instead as
580              * long as it's availble to userspace on the host. */
581             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
582                 cap_htm = true;
583             }
584         }
585         break;
586     default:
587         break;
588     }
589 
590     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
591     kvmppc_hw_debug_points_init(cenv);
592 
593     return ret;
594 }
595 
596 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
597 {
598     CPUPPCState *env = &cpu->env;
599     CPUState *cs = CPU(cpu);
600     struct kvm_dirty_tlb dirty_tlb;
601     unsigned char *bitmap;
602     int ret;
603 
604     if (!env->kvm_sw_tlb) {
605         return;
606     }
607 
608     bitmap = g_malloc((env->nb_tlb + 7) / 8);
609     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
610 
611     dirty_tlb.bitmap = (uintptr_t)bitmap;
612     dirty_tlb.num_dirty = env->nb_tlb;
613 
614     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
615     if (ret) {
616         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
617                 __func__, strerror(-ret));
618     }
619 
620     g_free(bitmap);
621 }
622 
623 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
624 {
625     PowerPCCPU *cpu = POWERPC_CPU(cs);
626     CPUPPCState *env = &cpu->env;
627     union {
628         uint32_t u32;
629         uint64_t u64;
630     } val;
631     struct kvm_one_reg reg = {
632         .id = id,
633         .addr = (uintptr_t) &val,
634     };
635     int ret;
636 
637     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
638     if (ret != 0) {
639         trace_kvm_failed_spr_get(spr, strerror(errno));
640     } else {
641         switch (id & KVM_REG_SIZE_MASK) {
642         case KVM_REG_SIZE_U32:
643             env->spr[spr] = val.u32;
644             break;
645 
646         case KVM_REG_SIZE_U64:
647             env->spr[spr] = val.u64;
648             break;
649 
650         default:
651             /* Don't handle this size yet */
652             abort();
653         }
654     }
655 }
656 
657 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
658 {
659     PowerPCCPU *cpu = POWERPC_CPU(cs);
660     CPUPPCState *env = &cpu->env;
661     union {
662         uint32_t u32;
663         uint64_t u64;
664     } val;
665     struct kvm_one_reg reg = {
666         .id = id,
667         .addr = (uintptr_t) &val,
668     };
669     int ret;
670 
671     switch (id & KVM_REG_SIZE_MASK) {
672     case KVM_REG_SIZE_U32:
673         val.u32 = env->spr[spr];
674         break;
675 
676     case KVM_REG_SIZE_U64:
677         val.u64 = env->spr[spr];
678         break;
679 
680     default:
681         /* Don't handle this size yet */
682         abort();
683     }
684 
685     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
686     if (ret != 0) {
687         trace_kvm_failed_spr_set(spr, strerror(errno));
688     }
689 }
690 
691 static int kvm_put_fp(CPUState *cs)
692 {
693     PowerPCCPU *cpu = POWERPC_CPU(cs);
694     CPUPPCState *env = &cpu->env;
695     struct kvm_one_reg reg;
696     int i;
697     int ret;
698 
699     if (env->insns_flags & PPC_FLOAT) {
700         uint64_t fpscr = env->fpscr;
701         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 
703         reg.id = KVM_REG_PPC_FPSCR;
704         reg.addr = (uintptr_t)&fpscr;
705         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
706         if (ret < 0) {
707             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
708             return ret;
709         }
710 
711         for (i = 0; i < 32; i++) {
712             uint64_t vsr[2];
713 
714 #ifdef HOST_WORDS_BIGENDIAN
715             vsr[0] = float64_val(env->fpr[i]);
716             vsr[1] = env->vsr[i];
717 #else
718             vsr[0] = env->vsr[i];
719             vsr[1] = float64_val(env->fpr[i]);
720 #endif
721             reg.addr = (uintptr_t) &vsr;
722             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
723 
724             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
725             if (ret < 0) {
726                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
727                         i, strerror(errno));
728                 return ret;
729             }
730         }
731     }
732 
733     if (env->insns_flags & PPC_ALTIVEC) {
734         reg.id = KVM_REG_PPC_VSCR;
735         reg.addr = (uintptr_t)&env->vscr;
736         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
737         if (ret < 0) {
738             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
739             return ret;
740         }
741 
742         for (i = 0; i < 32; i++) {
743             reg.id = KVM_REG_PPC_VR(i);
744             reg.addr = (uintptr_t)&env->avr[i];
745             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
746             if (ret < 0) {
747                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
748                 return ret;
749             }
750         }
751     }
752 
753     return 0;
754 }
755 
756 static int kvm_get_fp(CPUState *cs)
757 {
758     PowerPCCPU *cpu = POWERPC_CPU(cs);
759     CPUPPCState *env = &cpu->env;
760     struct kvm_one_reg reg;
761     int i;
762     int ret;
763 
764     if (env->insns_flags & PPC_FLOAT) {
765         uint64_t fpscr;
766         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
767 
768         reg.id = KVM_REG_PPC_FPSCR;
769         reg.addr = (uintptr_t)&fpscr;
770         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
771         if (ret < 0) {
772             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
773             return ret;
774         } else {
775             env->fpscr = fpscr;
776         }
777 
778         for (i = 0; i < 32; i++) {
779             uint64_t vsr[2];
780 
781             reg.addr = (uintptr_t) &vsr;
782             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
783 
784             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
785             if (ret < 0) {
786                 DPRINTF("Unable to get %s%d from KVM: %s\n",
787                         vsx ? "VSR" : "FPR", i, strerror(errno));
788                 return ret;
789             } else {
790 #ifdef HOST_WORDS_BIGENDIAN
791                 env->fpr[i] = vsr[0];
792                 if (vsx) {
793                     env->vsr[i] = vsr[1];
794                 }
795 #else
796                 env->fpr[i] = vsr[1];
797                 if (vsx) {
798                     env->vsr[i] = vsr[0];
799                 }
800 #endif
801             }
802         }
803     }
804 
805     if (env->insns_flags & PPC_ALTIVEC) {
806         reg.id = KVM_REG_PPC_VSCR;
807         reg.addr = (uintptr_t)&env->vscr;
808         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
809         if (ret < 0) {
810             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
811             return ret;
812         }
813 
814         for (i = 0; i < 32; i++) {
815             reg.id = KVM_REG_PPC_VR(i);
816             reg.addr = (uintptr_t)&env->avr[i];
817             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
818             if (ret < 0) {
819                 DPRINTF("Unable to get VR%d from KVM: %s\n",
820                         i, strerror(errno));
821                 return ret;
822             }
823         }
824     }
825 
826     return 0;
827 }
828 
829 #if defined(TARGET_PPC64)
830 static int kvm_get_vpa(CPUState *cs)
831 {
832     PowerPCCPU *cpu = POWERPC_CPU(cs);
833     CPUPPCState *env = &cpu->env;
834     struct kvm_one_reg reg;
835     int ret;
836 
837     reg.id = KVM_REG_PPC_VPA_ADDR;
838     reg.addr = (uintptr_t)&env->vpa_addr;
839     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
840     if (ret < 0) {
841         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
842         return ret;
843     }
844 
845     assert((uintptr_t)&env->slb_shadow_size
846            == ((uintptr_t)&env->slb_shadow_addr + 8));
847     reg.id = KVM_REG_PPC_VPA_SLB;
848     reg.addr = (uintptr_t)&env->slb_shadow_addr;
849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850     if (ret < 0) {
851         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
852                 strerror(errno));
853         return ret;
854     }
855 
856     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
857     reg.id = KVM_REG_PPC_VPA_DTL;
858     reg.addr = (uintptr_t)&env->dtl_addr;
859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860     if (ret < 0) {
861         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
862                 strerror(errno));
863         return ret;
864     }
865 
866     return 0;
867 }
868 
869 static int kvm_put_vpa(CPUState *cs)
870 {
871     PowerPCCPU *cpu = POWERPC_CPU(cs);
872     CPUPPCState *env = &cpu->env;
873     struct kvm_one_reg reg;
874     int ret;
875 
876     /* SLB shadow or DTL can't be registered unless a master VPA is
877      * registered.  That means when restoring state, if a VPA *is*
878      * registered, we need to set that up first.  If not, we need to
879      * deregister the others before deregistering the master VPA */
880     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
881 
882     if (env->vpa_addr) {
883         reg.id = KVM_REG_PPC_VPA_ADDR;
884         reg.addr = (uintptr_t)&env->vpa_addr;
885         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
886         if (ret < 0) {
887             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
888             return ret;
889         }
890     }
891 
892     assert((uintptr_t)&env->slb_shadow_size
893            == ((uintptr_t)&env->slb_shadow_addr + 8));
894     reg.id = KVM_REG_PPC_VPA_SLB;
895     reg.addr = (uintptr_t)&env->slb_shadow_addr;
896     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
897     if (ret < 0) {
898         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
899         return ret;
900     }
901 
902     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
903     reg.id = KVM_REG_PPC_VPA_DTL;
904     reg.addr = (uintptr_t)&env->dtl_addr;
905     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
906     if (ret < 0) {
907         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
908                 strerror(errno));
909         return ret;
910     }
911 
912     if (!env->vpa_addr) {
913         reg.id = KVM_REG_PPC_VPA_ADDR;
914         reg.addr = (uintptr_t)&env->vpa_addr;
915         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916         if (ret < 0) {
917             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
918             return ret;
919         }
920     }
921 
922     return 0;
923 }
924 #endif /* TARGET_PPC64 */
925 
926 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
927 {
928     CPUPPCState *env = &cpu->env;
929     struct kvm_sregs sregs;
930     int i;
931 
932     sregs.pvr = env->spr[SPR_PVR];
933 
934     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
935 
936     /* Sync SLB */
937 #ifdef TARGET_PPC64
938     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
939         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
940         if (env->slb[i].esid & SLB_ESID_V) {
941             sregs.u.s.ppc64.slb[i].slbe |= i;
942         }
943         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
944     }
945 #endif
946 
947     /* Sync SRs */
948     for (i = 0; i < 16; i++) {
949         sregs.u.s.ppc32.sr[i] = env->sr[i];
950     }
951 
952     /* Sync BATs */
953     for (i = 0; i < 8; i++) {
954         /* Beware. We have to swap upper and lower bits here */
955         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
956             | env->DBAT[1][i];
957         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
958             | env->IBAT[1][i];
959     }
960 
961     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
962 }
963 
964 int kvm_arch_put_registers(CPUState *cs, int level)
965 {
966     PowerPCCPU *cpu = POWERPC_CPU(cs);
967     CPUPPCState *env = &cpu->env;
968     struct kvm_regs regs;
969     int ret;
970     int i;
971 
972     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
973     if (ret < 0) {
974         return ret;
975     }
976 
977     regs.ctr = env->ctr;
978     regs.lr  = env->lr;
979     regs.xer = cpu_read_xer(env);
980     regs.msr = env->msr;
981     regs.pc = env->nip;
982 
983     regs.srr0 = env->spr[SPR_SRR0];
984     regs.srr1 = env->spr[SPR_SRR1];
985 
986     regs.sprg0 = env->spr[SPR_SPRG0];
987     regs.sprg1 = env->spr[SPR_SPRG1];
988     regs.sprg2 = env->spr[SPR_SPRG2];
989     regs.sprg3 = env->spr[SPR_SPRG3];
990     regs.sprg4 = env->spr[SPR_SPRG4];
991     regs.sprg5 = env->spr[SPR_SPRG5];
992     regs.sprg6 = env->spr[SPR_SPRG6];
993     regs.sprg7 = env->spr[SPR_SPRG7];
994 
995     regs.pid = env->spr[SPR_BOOKE_PID];
996 
997     for (i = 0;i < 32; i++)
998         regs.gpr[i] = env->gpr[i];
999 
1000     regs.cr = 0;
1001     for (i = 0; i < 8; i++) {
1002         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1003     }
1004 
1005     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1006     if (ret < 0)
1007         return ret;
1008 
1009     kvm_put_fp(cs);
1010 
1011     if (env->tlb_dirty) {
1012         kvm_sw_tlb_put(cpu);
1013         env->tlb_dirty = false;
1014     }
1015 
1016     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1017         ret = kvmppc_put_books_sregs(cpu);
1018         if (ret < 0) {
1019             return ret;
1020         }
1021     }
1022 
1023     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1024         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1025     }
1026 
1027     if (cap_one_reg) {
1028         int i;
1029 
1030         /* We deliberately ignore errors here, for kernels which have
1031          * the ONE_REG calls, but don't support the specific
1032          * registers, there's a reasonable chance things will still
1033          * work, at least until we try to migrate. */
1034         for (i = 0; i < 1024; i++) {
1035             uint64_t id = env->spr_cb[i].one_reg_id;
1036 
1037             if (id != 0) {
1038                 kvm_put_one_spr(cs, id, i);
1039             }
1040         }
1041 
1042 #ifdef TARGET_PPC64
1043         if (msr_ts) {
1044             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1045                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1046             }
1047             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1048                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1049             }
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1060         }
1061 
1062         if (cap_papr) {
1063             if (kvm_put_vpa(cs) < 0) {
1064                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1065             }
1066         }
1067 
1068         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1069 #endif /* TARGET_PPC64 */
1070     }
1071 
1072     return ret;
1073 }
1074 
1075 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1076 {
1077      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1078 }
1079 
1080 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1081 {
1082     CPUPPCState *env = &cpu->env;
1083     struct kvm_sregs sregs;
1084     int ret;
1085 
1086     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1087     if (ret < 0) {
1088         return ret;
1089     }
1090 
1091     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1092         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1093         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1094         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1095         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1096         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1097         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1098         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1099         env->spr[SPR_DECR] = sregs.u.e.dec;
1100         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1101         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1102         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1103     }
1104 
1105     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1106         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1107         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1108         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1109         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1110         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1111     }
1112 
1113     if (sregs.u.e.features & KVM_SREGS_E_64) {
1114         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1115     }
1116 
1117     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1118         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1122         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1123         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1124         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1125         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1126         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1127         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1128         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1129         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1130         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1131         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1132         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1133         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1134         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1135         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1136         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1137         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1138         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1139         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1140         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1141         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1142         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1143         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1144         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1145         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1146         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1147         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1148         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1149         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1150         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1151         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1152         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1153         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1154 
1155         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1156             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1157             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1158             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1159             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1160             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1161             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1162         }
1163 
1164         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1165             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1166             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1167         }
1168 
1169         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1170             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1171             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1172             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1173             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1174         }
1175     }
1176 
1177     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1178         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1179         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1180         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1181         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1182         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1183         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1184         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1185         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1186         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1187         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1188     }
1189 
1190     if (sregs.u.e.features & KVM_SREGS_EXP) {
1191         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1192     }
1193 
1194     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1195         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1196         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1197     }
1198 
1199     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1200         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1201         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1202         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1203 
1204         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1205             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1206             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1207         }
1208     }
1209 
1210     return 0;
1211 }
1212 
1213 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1214 {
1215     CPUPPCState *env = &cpu->env;
1216     struct kvm_sregs sregs;
1217     int ret;
1218     int i;
1219 
1220     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1221     if (ret < 0) {
1222         return ret;
1223     }
1224 
1225     if (!cpu->vhyp) {
1226         ppc_store_sdr1(env, sregs.u.s.sdr1);
1227     }
1228 
1229     /* Sync SLB */
1230 #ifdef TARGET_PPC64
1231     /*
1232      * The packed SLB array we get from KVM_GET_SREGS only contains
1233      * information about valid entries. So we flush our internal copy
1234      * to get rid of stale ones, then put all valid SLB entries back
1235      * in.
1236      */
1237     memset(env->slb, 0, sizeof(env->slb));
1238     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1239         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1240         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1241         /*
1242          * Only restore valid entries
1243          */
1244         if (rb & SLB_ESID_V) {
1245             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1246         }
1247     }
1248 #endif
1249 
1250     /* Sync SRs */
1251     for (i = 0; i < 16; i++) {
1252         env->sr[i] = sregs.u.s.ppc32.sr[i];
1253     }
1254 
1255     /* Sync BATs */
1256     for (i = 0; i < 8; i++) {
1257         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1258         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1259         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1260         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1261     }
1262 
1263     return 0;
1264 }
1265 
1266 int kvm_arch_get_registers(CPUState *cs)
1267 {
1268     PowerPCCPU *cpu = POWERPC_CPU(cs);
1269     CPUPPCState *env = &cpu->env;
1270     struct kvm_regs regs;
1271     uint32_t cr;
1272     int i, ret;
1273 
1274     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1275     if (ret < 0)
1276         return ret;
1277 
1278     cr = regs.cr;
1279     for (i = 7; i >= 0; i--) {
1280         env->crf[i] = cr & 15;
1281         cr >>= 4;
1282     }
1283 
1284     env->ctr = regs.ctr;
1285     env->lr = regs.lr;
1286     cpu_write_xer(env, regs.xer);
1287     env->msr = regs.msr;
1288     env->nip = regs.pc;
1289 
1290     env->spr[SPR_SRR0] = regs.srr0;
1291     env->spr[SPR_SRR1] = regs.srr1;
1292 
1293     env->spr[SPR_SPRG0] = regs.sprg0;
1294     env->spr[SPR_SPRG1] = regs.sprg1;
1295     env->spr[SPR_SPRG2] = regs.sprg2;
1296     env->spr[SPR_SPRG3] = regs.sprg3;
1297     env->spr[SPR_SPRG4] = regs.sprg4;
1298     env->spr[SPR_SPRG5] = regs.sprg5;
1299     env->spr[SPR_SPRG6] = regs.sprg6;
1300     env->spr[SPR_SPRG7] = regs.sprg7;
1301 
1302     env->spr[SPR_BOOKE_PID] = regs.pid;
1303 
1304     for (i = 0;i < 32; i++)
1305         env->gpr[i] = regs.gpr[i];
1306 
1307     kvm_get_fp(cs);
1308 
1309     if (cap_booke_sregs) {
1310         ret = kvmppc_get_booke_sregs(cpu);
1311         if (ret < 0) {
1312             return ret;
1313         }
1314     }
1315 
1316     if (cap_segstate) {
1317         ret = kvmppc_get_books_sregs(cpu);
1318         if (ret < 0) {
1319             return ret;
1320         }
1321     }
1322 
1323     if (cap_hior) {
1324         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1325     }
1326 
1327     if (cap_one_reg) {
1328         int i;
1329 
1330         /* We deliberately ignore errors here, for kernels which have
1331          * the ONE_REG calls, but don't support the specific
1332          * registers, there's a reasonable chance things will still
1333          * work, at least until we try to migrate. */
1334         for (i = 0; i < 1024; i++) {
1335             uint64_t id = env->spr_cb[i].one_reg_id;
1336 
1337             if (id != 0) {
1338                 kvm_get_one_spr(cs, id, i);
1339             }
1340         }
1341 
1342 #ifdef TARGET_PPC64
1343         if (msr_ts) {
1344             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1345                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1346             }
1347             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1348                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1349             }
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1360         }
1361 
1362         if (cap_papr) {
1363             if (kvm_get_vpa(cs) < 0) {
1364                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1365             }
1366         }
1367 
1368         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1369 #endif
1370     }
1371 
1372     return 0;
1373 }
1374 
1375 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1376 {
1377     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1378 
1379     if (irq != PPC_INTERRUPT_EXT) {
1380         return 0;
1381     }
1382 
1383     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1384         return 0;
1385     }
1386 
1387     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1388 
1389     return 0;
1390 }
1391 
1392 #if defined(TARGET_PPCEMB)
1393 #define PPC_INPUT_INT PPC40x_INPUT_INT
1394 #elif defined(TARGET_PPC64)
1395 #define PPC_INPUT_INT PPC970_INPUT_INT
1396 #else
1397 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1398 #endif
1399 
1400 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1401 {
1402     PowerPCCPU *cpu = POWERPC_CPU(cs);
1403     CPUPPCState *env = &cpu->env;
1404     int r;
1405     unsigned irq;
1406 
1407     qemu_mutex_lock_iothread();
1408 
1409     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1410      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1411     if (!cap_interrupt_level &&
1412         run->ready_for_interrupt_injection &&
1413         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1414         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1415     {
1416         /* For now KVM disregards the 'irq' argument. However, in the
1417          * future KVM could cache it in-kernel to avoid a heavyweight exit
1418          * when reading the UIC.
1419          */
1420         irq = KVM_INTERRUPT_SET;
1421 
1422         DPRINTF("injected interrupt %d\n", irq);
1423         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1424         if (r < 0) {
1425             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1426         }
1427 
1428         /* Always wake up soon in case the interrupt was level based */
1429         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1430                        (NANOSECONDS_PER_SECOND / 50));
1431     }
1432 
1433     /* We don't know if there are more interrupts pending after this. However,
1434      * the guest will return to userspace in the course of handling this one
1435      * anyways, so we will get a chance to deliver the rest. */
1436 
1437     qemu_mutex_unlock_iothread();
1438 }
1439 
1440 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1441 {
1442     return MEMTXATTRS_UNSPECIFIED;
1443 }
1444 
1445 int kvm_arch_process_async_events(CPUState *cs)
1446 {
1447     return cs->halted;
1448 }
1449 
1450 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1451 {
1452     CPUState *cs = CPU(cpu);
1453     CPUPPCState *env = &cpu->env;
1454 
1455     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1456         cs->halted = 1;
1457         cs->exception_index = EXCP_HLT;
1458     }
1459 
1460     return 0;
1461 }
1462 
1463 /* map dcr access to existing qemu dcr emulation */
1464 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1465 {
1466     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1467         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1468 
1469     return 0;
1470 }
1471 
1472 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1473 {
1474     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1475         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1476 
1477     return 0;
1478 }
1479 
1480 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1481 {
1482     /* Mixed endian case is not handled */
1483     uint32_t sc = debug_inst_opcode;
1484 
1485     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1486                             sizeof(sc), 0) ||
1487         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1488         return -EINVAL;
1489     }
1490 
1491     return 0;
1492 }
1493 
1494 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1495 {
1496     uint32_t sc;
1497 
1498     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1499         sc != debug_inst_opcode ||
1500         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1501                             sizeof(sc), 1)) {
1502         return -EINVAL;
1503     }
1504 
1505     return 0;
1506 }
1507 
1508 static int find_hw_breakpoint(target_ulong addr, int type)
1509 {
1510     int n;
1511 
1512     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1513            <= ARRAY_SIZE(hw_debug_points));
1514 
1515     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1516         if (hw_debug_points[n].addr == addr &&
1517              hw_debug_points[n].type == type) {
1518             return n;
1519         }
1520     }
1521 
1522     return -1;
1523 }
1524 
1525 static int find_hw_watchpoint(target_ulong addr, int *flag)
1526 {
1527     int n;
1528 
1529     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1530     if (n >= 0) {
1531         *flag = BP_MEM_ACCESS;
1532         return n;
1533     }
1534 
1535     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1536     if (n >= 0) {
1537         *flag = BP_MEM_WRITE;
1538         return n;
1539     }
1540 
1541     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1542     if (n >= 0) {
1543         *flag = BP_MEM_READ;
1544         return n;
1545     }
1546 
1547     return -1;
1548 }
1549 
1550 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1551                                   target_ulong len, int type)
1552 {
1553     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1554         return -ENOBUFS;
1555     }
1556 
1557     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1558     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1559 
1560     switch (type) {
1561     case GDB_BREAKPOINT_HW:
1562         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1563             return -ENOBUFS;
1564         }
1565 
1566         if (find_hw_breakpoint(addr, type) >= 0) {
1567             return -EEXIST;
1568         }
1569 
1570         nb_hw_breakpoint++;
1571         break;
1572 
1573     case GDB_WATCHPOINT_WRITE:
1574     case GDB_WATCHPOINT_READ:
1575     case GDB_WATCHPOINT_ACCESS:
1576         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1577             return -ENOBUFS;
1578         }
1579 
1580         if (find_hw_breakpoint(addr, type) >= 0) {
1581             return -EEXIST;
1582         }
1583 
1584         nb_hw_watchpoint++;
1585         break;
1586 
1587     default:
1588         return -ENOSYS;
1589     }
1590 
1591     return 0;
1592 }
1593 
1594 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1595                                   target_ulong len, int type)
1596 {
1597     int n;
1598 
1599     n = find_hw_breakpoint(addr, type);
1600     if (n < 0) {
1601         return -ENOENT;
1602     }
1603 
1604     switch (type) {
1605     case GDB_BREAKPOINT_HW:
1606         nb_hw_breakpoint--;
1607         break;
1608 
1609     case GDB_WATCHPOINT_WRITE:
1610     case GDB_WATCHPOINT_READ:
1611     case GDB_WATCHPOINT_ACCESS:
1612         nb_hw_watchpoint--;
1613         break;
1614 
1615     default:
1616         return -ENOSYS;
1617     }
1618     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1619 
1620     return 0;
1621 }
1622 
1623 void kvm_arch_remove_all_hw_breakpoints(void)
1624 {
1625     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1626 }
1627 
1628 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1629 {
1630     int n;
1631 
1632     /* Software Breakpoint updates */
1633     if (kvm_sw_breakpoints_active(cs)) {
1634         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1635     }
1636 
1637     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1638            <= ARRAY_SIZE(hw_debug_points));
1639     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1640 
1641     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1642         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1643         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1644         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1645             switch (hw_debug_points[n].type) {
1646             case GDB_BREAKPOINT_HW:
1647                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1648                 break;
1649             case GDB_WATCHPOINT_WRITE:
1650                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1651                 break;
1652             case GDB_WATCHPOINT_READ:
1653                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1654                 break;
1655             case GDB_WATCHPOINT_ACCESS:
1656                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1657                                         KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             default:
1660                 cpu_abort(cs, "Unsupported breakpoint type\n");
1661             }
1662             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1663         }
1664     }
1665 }
1666 
1667 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1668 {
1669     CPUState *cs = CPU(cpu);
1670     CPUPPCState *env = &cpu->env;
1671     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1672     int handle = 0;
1673     int n;
1674     int flag = 0;
1675 
1676     if (cs->singlestep_enabled) {
1677         handle = 1;
1678     } else if (arch_info->status) {
1679         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1680             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1681                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1682                 if (n >= 0) {
1683                     handle = 1;
1684                 }
1685             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1686                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1687                 n = find_hw_watchpoint(arch_info->address,  &flag);
1688                 if (n >= 0) {
1689                     handle = 1;
1690                     cs->watchpoint_hit = &hw_watchpoint;
1691                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1692                     hw_watchpoint.flags = flag;
1693                 }
1694             }
1695         }
1696     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1697         handle = 1;
1698     } else {
1699         /* QEMU is not able to handle debug exception, so inject
1700          * program exception to guest;
1701          * Yes program exception NOT debug exception !!
1702          * When QEMU is using debug resources then debug exception must
1703          * be always set. To achieve this we set MSR_DE and also set
1704          * MSRP_DEP so guest cannot change MSR_DE.
1705          * When emulating debug resource for guest we want guest
1706          * to control MSR_DE (enable/disable debug interrupt on need).
1707          * Supporting both configurations are NOT possible.
1708          * So the result is that we cannot share debug resources
1709          * between QEMU and Guest on BOOKE architecture.
1710          * In the current design QEMU gets the priority over guest,
1711          * this means that if QEMU is using debug resources then guest
1712          * cannot use them;
1713          * For software breakpoint QEMU uses a privileged instruction;
1714          * So there cannot be any reason that we are here for guest
1715          * set debug exception, only possibility is guest executed a
1716          * privileged / illegal instruction and that's why we are
1717          * injecting a program interrupt.
1718          */
1719 
1720         cpu_synchronize_state(cs);
1721         /* env->nip is PC, so increment this by 4 to use
1722          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1723          */
1724         env->nip += 4;
1725         cs->exception_index = POWERPC_EXCP_PROGRAM;
1726         env->error_code = POWERPC_EXCP_INVAL;
1727         ppc_cpu_do_interrupt(cs);
1728     }
1729 
1730     return handle;
1731 }
1732 
1733 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1734 {
1735     PowerPCCPU *cpu = POWERPC_CPU(cs);
1736     CPUPPCState *env = &cpu->env;
1737     int ret;
1738 
1739     qemu_mutex_lock_iothread();
1740 
1741     switch (run->exit_reason) {
1742     case KVM_EXIT_DCR:
1743         if (run->dcr.is_write) {
1744             DPRINTF("handle dcr write\n");
1745             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1746         } else {
1747             DPRINTF("handle dcr read\n");
1748             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1749         }
1750         break;
1751     case KVM_EXIT_HLT:
1752         DPRINTF("handle halt\n");
1753         ret = kvmppc_handle_halt(cpu);
1754         break;
1755 #if defined(TARGET_PPC64)
1756     case KVM_EXIT_PAPR_HCALL:
1757         DPRINTF("handle PAPR hypercall\n");
1758         run->papr_hcall.ret = spapr_hypercall(cpu,
1759                                               run->papr_hcall.nr,
1760                                               run->papr_hcall.args);
1761         ret = 0;
1762         break;
1763 #endif
1764     case KVM_EXIT_EPR:
1765         DPRINTF("handle epr\n");
1766         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1767         ret = 0;
1768         break;
1769     case KVM_EXIT_WATCHDOG:
1770         DPRINTF("handle watchdog expiry\n");
1771         watchdog_perform_action();
1772         ret = 0;
1773         break;
1774 
1775     case KVM_EXIT_DEBUG:
1776         DPRINTF("handle debug exception\n");
1777         if (kvm_handle_debug(cpu, run)) {
1778             ret = EXCP_DEBUG;
1779             break;
1780         }
1781         /* re-enter, this exception was guest-internal */
1782         ret = 0;
1783         break;
1784 
1785     default:
1786         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1787         ret = -1;
1788         break;
1789     }
1790 
1791     qemu_mutex_unlock_iothread();
1792     return ret;
1793 }
1794 
1795 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1796 {
1797     CPUState *cs = CPU(cpu);
1798     uint32_t bits = tsr_bits;
1799     struct kvm_one_reg reg = {
1800         .id = KVM_REG_PPC_OR_TSR,
1801         .addr = (uintptr_t) &bits,
1802     };
1803 
1804     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1805 }
1806 
1807 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1808 {
1809 
1810     CPUState *cs = CPU(cpu);
1811     uint32_t bits = tsr_bits;
1812     struct kvm_one_reg reg = {
1813         .id = KVM_REG_PPC_CLEAR_TSR,
1814         .addr = (uintptr_t) &bits,
1815     };
1816 
1817     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1818 }
1819 
1820 int kvmppc_set_tcr(PowerPCCPU *cpu)
1821 {
1822     CPUState *cs = CPU(cpu);
1823     CPUPPCState *env = &cpu->env;
1824     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1825 
1826     struct kvm_one_reg reg = {
1827         .id = KVM_REG_PPC_TCR,
1828         .addr = (uintptr_t) &tcr,
1829     };
1830 
1831     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1832 }
1833 
1834 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1835 {
1836     CPUState *cs = CPU(cpu);
1837     int ret;
1838 
1839     if (!kvm_enabled()) {
1840         return -1;
1841     }
1842 
1843     if (!cap_ppc_watchdog) {
1844         printf("warning: KVM does not support watchdog");
1845         return -1;
1846     }
1847 
1848     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1849     if (ret < 0) {
1850         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1851                 __func__, strerror(-ret));
1852         return ret;
1853     }
1854 
1855     return ret;
1856 }
1857 
1858 static int read_cpuinfo(const char *field, char *value, int len)
1859 {
1860     FILE *f;
1861     int ret = -1;
1862     int field_len = strlen(field);
1863     char line[512];
1864 
1865     f = fopen("/proc/cpuinfo", "r");
1866     if (!f) {
1867         return -1;
1868     }
1869 
1870     do {
1871         if (!fgets(line, sizeof(line), f)) {
1872             break;
1873         }
1874         if (!strncmp(line, field, field_len)) {
1875             pstrcpy(value, len, line);
1876             ret = 0;
1877             break;
1878         }
1879     } while(*line);
1880 
1881     fclose(f);
1882 
1883     return ret;
1884 }
1885 
1886 uint32_t kvmppc_get_tbfreq(void)
1887 {
1888     char line[512];
1889     char *ns;
1890     uint32_t retval = NANOSECONDS_PER_SECOND;
1891 
1892     if (read_cpuinfo("timebase", line, sizeof(line))) {
1893         return retval;
1894     }
1895 
1896     if (!(ns = strchr(line, ':'))) {
1897         return retval;
1898     }
1899 
1900     ns++;
1901 
1902     return atoi(ns);
1903 }
1904 
1905 bool kvmppc_get_host_serial(char **value)
1906 {
1907     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1908                                NULL);
1909 }
1910 
1911 bool kvmppc_get_host_model(char **value)
1912 {
1913     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1914 }
1915 
1916 /* Try to find a device tree node for a CPU with clock-frequency property */
1917 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1918 {
1919     struct dirent *dirp;
1920     DIR *dp;
1921 
1922     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1923         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1924         return -1;
1925     }
1926 
1927     buf[0] = '\0';
1928     while ((dirp = readdir(dp)) != NULL) {
1929         FILE *f;
1930         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1931                  dirp->d_name);
1932         f = fopen(buf, "r");
1933         if (f) {
1934             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1935             fclose(f);
1936             break;
1937         }
1938         buf[0] = '\0';
1939     }
1940     closedir(dp);
1941     if (buf[0] == '\0') {
1942         printf("Unknown host!\n");
1943         return -1;
1944     }
1945 
1946     return 0;
1947 }
1948 
1949 static uint64_t kvmppc_read_int_dt(const char *filename)
1950 {
1951     union {
1952         uint32_t v32;
1953         uint64_t v64;
1954     } u;
1955     FILE *f;
1956     int len;
1957 
1958     f = fopen(filename, "rb");
1959     if (!f) {
1960         return -1;
1961     }
1962 
1963     len = fread(&u, 1, sizeof(u), f);
1964     fclose(f);
1965     switch (len) {
1966     case 4:
1967         /* property is a 32-bit quantity */
1968         return be32_to_cpu(u.v32);
1969     case 8:
1970         return be64_to_cpu(u.v64);
1971     }
1972 
1973     return 0;
1974 }
1975 
1976 /* Read a CPU node property from the host device tree that's a single
1977  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1978  * (can't find or open the property, or doesn't understand the
1979  * format) */
1980 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1981 {
1982     char buf[PATH_MAX], *tmp;
1983     uint64_t val;
1984 
1985     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1986         return -1;
1987     }
1988 
1989     tmp = g_strdup_printf("%s/%s", buf, propname);
1990     val = kvmppc_read_int_dt(tmp);
1991     g_free(tmp);
1992 
1993     return val;
1994 }
1995 
1996 uint64_t kvmppc_get_clockfreq(void)
1997 {
1998     return kvmppc_read_int_cpu_dt("clock-frequency");
1999 }
2000 
2001 uint32_t kvmppc_get_vmx(void)
2002 {
2003     return kvmppc_read_int_cpu_dt("ibm,vmx");
2004 }
2005 
2006 uint32_t kvmppc_get_dfp(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("ibm,dfp");
2009 }
2010 
2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2012  {
2013      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2014      CPUState *cs = CPU(cpu);
2015 
2016     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2017         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2018         return 0;
2019     }
2020 
2021     return 1;
2022 }
2023 
2024 int kvmppc_get_hasidle(CPUPPCState *env)
2025 {
2026     struct kvm_ppc_pvinfo pvinfo;
2027 
2028     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2029         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2030         return 1;
2031     }
2032 
2033     return 0;
2034 }
2035 
2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2037 {
2038     uint32_t *hc = (uint32_t*)buf;
2039     struct kvm_ppc_pvinfo pvinfo;
2040 
2041     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2042         memcpy(buf, pvinfo.hcall, buf_len);
2043         return 0;
2044     }
2045 
2046     /*
2047      * Fallback to always fail hypercalls regardless of endianness:
2048      *
2049      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2050      *     li r3, -1
2051      *     b .+8       (becomes nop in wrong endian)
2052      *     bswap32(li r3, -1)
2053      */
2054 
2055     hc[0] = cpu_to_be32(0x08000048);
2056     hc[1] = cpu_to_be32(0x3860ffff);
2057     hc[2] = cpu_to_be32(0x48000008);
2058     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2059 
2060     return 1;
2061 }
2062 
2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2064 {
2065     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2066 }
2067 
2068 void kvmppc_enable_logical_ci_hcalls(void)
2069 {
2070     /*
2071      * FIXME: it would be nice if we could detect the cases where
2072      * we're using a device which requires the in kernel
2073      * implementation of these hcalls, but the kernel lacks them and
2074      * produce a warning.
2075      */
2076     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2077     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2078 }
2079 
2080 void kvmppc_enable_set_mode_hcall(void)
2081 {
2082     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2083 }
2084 
2085 void kvmppc_enable_clear_ref_mod_hcalls(void)
2086 {
2087     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2088     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2089 }
2090 
2091 void kvmppc_set_papr(PowerPCCPU *cpu)
2092 {
2093     CPUState *cs = CPU(cpu);
2094     int ret;
2095 
2096     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2097     if (ret) {
2098         error_report("This vCPU type or KVM version does not support PAPR");
2099         exit(1);
2100     }
2101 
2102     /* Update the capability flag so we sync the right information
2103      * with kvm */
2104     cap_papr = 1;
2105 }
2106 
2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2108 {
2109     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2110 }
2111 
2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2113 {
2114     CPUState *cs = CPU(cpu);
2115     int ret;
2116 
2117     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2118     if (ret && mpic_proxy) {
2119         error_report("This KVM version does not support EPR");
2120         exit(1);
2121     }
2122 }
2123 
2124 int kvmppc_smt_threads(void)
2125 {
2126     return cap_ppc_smt ? cap_ppc_smt : 1;
2127 }
2128 
2129 #ifdef TARGET_PPC64
2130 off_t kvmppc_alloc_rma(void **rma)
2131 {
2132     off_t size;
2133     int fd;
2134     struct kvm_allocate_rma ret;
2135 
2136     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2137      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2138      *                      not necessary on this hardware
2139      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2140      *
2141      * FIXME: We should allow the user to force contiguous RMA
2142      * allocation in the cap_ppc_rma==1 case.
2143      */
2144     if (cap_ppc_rma < 2) {
2145         return 0;
2146     }
2147 
2148     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2149     if (fd < 0) {
2150         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2151                 strerror(errno));
2152         return -1;
2153     }
2154 
2155     size = MIN(ret.rma_size, 256ul << 20);
2156 
2157     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2158     if (*rma == MAP_FAILED) {
2159         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2160         return -1;
2161     };
2162 
2163     return size;
2164 }
2165 
2166 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2167 {
2168     struct kvm_ppc_smmu_info info;
2169     long rampagesize, best_page_shift;
2170     int i;
2171 
2172     if (cap_ppc_rma >= 2) {
2173         return current_size;
2174     }
2175 
2176     /* Find the largest hardware supported page size that's less than
2177      * or equal to the (logical) backing page size of guest RAM */
2178     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2179     rampagesize = qemu_getrampagesize();
2180     best_page_shift = 0;
2181 
2182     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2183         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2184 
2185         if (!sps->page_shift) {
2186             continue;
2187         }
2188 
2189         if ((sps->page_shift > best_page_shift)
2190             && ((1UL << sps->page_shift) <= rampagesize)) {
2191             best_page_shift = sps->page_shift;
2192         }
2193     }
2194 
2195     return MIN(current_size,
2196                1ULL << (best_page_shift + hash_shift - 7));
2197 }
2198 #endif
2199 
2200 bool kvmppc_spapr_use_multitce(void)
2201 {
2202     return cap_spapr_multitce;
2203 }
2204 
2205 int kvmppc_spapr_enable_inkernel_multitce(void)
2206 {
2207     int ret;
2208 
2209     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                             H_PUT_TCE_INDIRECT, 1);
2211     if (!ret) {
2212         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2213                                 H_STUFF_TCE, 1);
2214     }
2215 
2216     return ret;
2217 }
2218 
2219 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2220                               uint64_t bus_offset, uint32_t nb_table,
2221                               int *pfd, bool need_vfio)
2222 {
2223     long len;
2224     int fd;
2225     void *table;
2226 
2227     /* Must set fd to -1 so we don't try to munmap when called for
2228      * destroying the table, which the upper layers -will- do
2229      */
2230     *pfd = -1;
2231     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2232         return NULL;
2233     }
2234 
2235     if (cap_spapr_tce_64) {
2236         struct kvm_create_spapr_tce_64 args = {
2237             .liobn = liobn,
2238             .page_shift = page_shift,
2239             .offset = bus_offset >> page_shift,
2240             .size = nb_table,
2241             .flags = 0
2242         };
2243         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2244         if (fd < 0) {
2245             fprintf(stderr,
2246                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2247                     liobn);
2248             return NULL;
2249         }
2250     } else if (cap_spapr_tce) {
2251         uint64_t window_size = (uint64_t) nb_table << page_shift;
2252         struct kvm_create_spapr_tce args = {
2253             .liobn = liobn,
2254             .window_size = window_size,
2255         };
2256         if ((window_size != args.window_size) || bus_offset) {
2257             return NULL;
2258         }
2259         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2260         if (fd < 0) {
2261             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2262                     liobn);
2263             return NULL;
2264         }
2265     } else {
2266         return NULL;
2267     }
2268 
2269     len = nb_table * sizeof(uint64_t);
2270     /* FIXME: round this up to page size */
2271 
2272     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2273     if (table == MAP_FAILED) {
2274         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2275                 liobn);
2276         close(fd);
2277         return NULL;
2278     }
2279 
2280     *pfd = fd;
2281     return table;
2282 }
2283 
2284 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2285 {
2286     long len;
2287 
2288     if (fd < 0) {
2289         return -1;
2290     }
2291 
2292     len = nb_table * sizeof(uint64_t);
2293     if ((munmap(table, len) < 0) ||
2294         (close(fd) < 0)) {
2295         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2296                 strerror(errno));
2297         /* Leak the table */
2298     }
2299 
2300     return 0;
2301 }
2302 
2303 int kvmppc_reset_htab(int shift_hint)
2304 {
2305     uint32_t shift = shift_hint;
2306 
2307     if (!kvm_enabled()) {
2308         /* Full emulation, tell caller to allocate htab itself */
2309         return 0;
2310     }
2311     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2312         int ret;
2313         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2314         if (ret == -ENOTTY) {
2315             /* At least some versions of PR KVM advertise the
2316              * capability, but don't implement the ioctl().  Oops.
2317              * Return 0 so that we allocate the htab in qemu, as is
2318              * correct for PR. */
2319             return 0;
2320         } else if (ret < 0) {
2321             return ret;
2322         }
2323         return shift;
2324     }
2325 
2326     /* We have a kernel that predates the htab reset calls.  For PR
2327      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2328      * this era, it has allocated a 16MB fixed size hash table already. */
2329     if (kvmppc_is_pr(kvm_state)) {
2330         /* PR - tell caller to allocate htab */
2331         return 0;
2332     } else {
2333         /* HV - assume 16MB kernel allocated htab */
2334         return 24;
2335     }
2336 }
2337 
2338 static inline uint32_t mfpvr(void)
2339 {
2340     uint32_t pvr;
2341 
2342     asm ("mfpvr %0"
2343          : "=r"(pvr));
2344     return pvr;
2345 }
2346 
2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2348 {
2349     if (on) {
2350         *word |= flags;
2351     } else {
2352         *word &= ~flags;
2353     }
2354 }
2355 
2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2357 {
2358     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359     uint32_t vmx = kvmppc_get_vmx();
2360     uint32_t dfp = kvmppc_get_dfp();
2361     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2362     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2363 
2364     /* Now fix up the class with information we can query from the host */
2365     pcc->pvr = mfpvr();
2366 
2367     if (vmx != -1) {
2368         /* Only override when we know what the host supports */
2369         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2370         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2371     }
2372     if (dfp != -1) {
2373         /* Only override when we know what the host supports */
2374         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2375     }
2376 
2377     if (dcache_size != -1) {
2378         pcc->l1_dcache_size = dcache_size;
2379     }
2380 
2381     if (icache_size != -1) {
2382         pcc->l1_icache_size = icache_size;
2383     }
2384 
2385 #if defined(TARGET_PPC64)
2386     pcc->radix_page_info = kvm_get_radix_page_info();
2387 
2388     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2389         /*
2390          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2391          * compliant.  More importantly, advertising ISA 3.00
2392          * architected mode may prevent guests from activating
2393          * necessary DD1 workarounds.
2394          */
2395         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2396                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2397     }
2398 #endif /* defined(TARGET_PPC64) */
2399 }
2400 
2401 bool kvmppc_has_cap_epr(void)
2402 {
2403     return cap_epr;
2404 }
2405 
2406 bool kvmppc_has_cap_htab_fd(void)
2407 {
2408     return cap_htab_fd;
2409 }
2410 
2411 bool kvmppc_has_cap_fixup_hcalls(void)
2412 {
2413     return cap_fixup_hcalls;
2414 }
2415 
2416 bool kvmppc_has_cap_htm(void)
2417 {
2418     return cap_htm;
2419 }
2420 
2421 bool kvmppc_has_cap_mmu_radix(void)
2422 {
2423     return cap_mmu_radix;
2424 }
2425 
2426 bool kvmppc_has_cap_mmu_hash_v3(void)
2427 {
2428     return cap_mmu_hash_v3;
2429 }
2430 
2431 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2432 {
2433     uint32_t host_pvr = mfpvr();
2434     PowerPCCPUClass *pvr_pcc;
2435 
2436     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2437     if (pvr_pcc == NULL) {
2438         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2439     }
2440 
2441     return pvr_pcc;
2442 }
2443 
2444 static int kvm_ppc_register_host_cpu_type(void)
2445 {
2446     TypeInfo type_info = {
2447         .name = TYPE_HOST_POWERPC_CPU,
2448         .class_init = kvmppc_host_cpu_class_init,
2449     };
2450     PowerPCCPUClass *pvr_pcc;
2451     ObjectClass *oc;
2452     DeviceClass *dc;
2453     int i;
2454 
2455     pvr_pcc = kvm_ppc_get_host_cpu_class();
2456     if (pvr_pcc == NULL) {
2457         return -1;
2458     }
2459     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2460     type_register(&type_info);
2461 
2462     oc = object_class_by_name(type_info.name);
2463     g_assert(oc);
2464 
2465 #if defined(TARGET_PPC64)
2466     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2467     type_info.parent = TYPE_SPAPR_CPU_CORE,
2468     type_info.instance_size = sizeof(sPAPRCPUCore);
2469     type_info.instance_init = NULL;
2470     type_info.class_init = spapr_cpu_core_class_init;
2471     type_info.class_data = (void *) "host";
2472     type_register(&type_info);
2473     g_free((void *)type_info.name);
2474 #endif
2475 
2476     /*
2477      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2478      * we want "POWER8" to be a "family" alias that points to the current
2479      * host CPU type, too)
2480      */
2481     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2482     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2483         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2484             char *suffix;
2485 
2486             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2487             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2488             if (suffix) {
2489                 *suffix = 0;
2490             }
2491             ppc_cpu_aliases[i].oc = oc;
2492             break;
2493         }
2494     }
2495 
2496     return 0;
2497 }
2498 
2499 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2500 {
2501     struct kvm_rtas_token_args args = {
2502         .token = token,
2503     };
2504 
2505     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2506         return -ENOENT;
2507     }
2508 
2509     strncpy(args.name, function, sizeof(args.name));
2510 
2511     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2512 }
2513 
2514 int kvmppc_get_htab_fd(bool write)
2515 {
2516     struct kvm_get_htab_fd s = {
2517         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2518         .start_index = 0,
2519     };
2520 
2521     if (!cap_htab_fd) {
2522         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2523         return -1;
2524     }
2525 
2526     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2527 }
2528 
2529 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2530 {
2531     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2532     uint8_t buf[bufsize];
2533     ssize_t rc;
2534 
2535     do {
2536         rc = read(fd, buf, bufsize);
2537         if (rc < 0) {
2538             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2539                     strerror(errno));
2540             return rc;
2541         } else if (rc) {
2542             uint8_t *buffer = buf;
2543             ssize_t n = rc;
2544             while (n) {
2545                 struct kvm_get_htab_header *head =
2546                     (struct kvm_get_htab_header *) buffer;
2547                 size_t chunksize = sizeof(*head) +
2548                      HASH_PTE_SIZE_64 * head->n_valid;
2549 
2550                 qemu_put_be32(f, head->index);
2551                 qemu_put_be16(f, head->n_valid);
2552                 qemu_put_be16(f, head->n_invalid);
2553                 qemu_put_buffer(f, (void *)(head + 1),
2554                                 HASH_PTE_SIZE_64 * head->n_valid);
2555 
2556                 buffer += chunksize;
2557                 n -= chunksize;
2558             }
2559         }
2560     } while ((rc != 0)
2561              && ((max_ns < 0)
2562                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2563 
2564     return (rc == 0) ? 1 : 0;
2565 }
2566 
2567 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2568                            uint16_t n_valid, uint16_t n_invalid)
2569 {
2570     struct kvm_get_htab_header *buf;
2571     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2572     ssize_t rc;
2573 
2574     buf = alloca(chunksize);
2575     buf->index = index;
2576     buf->n_valid = n_valid;
2577     buf->n_invalid = n_invalid;
2578 
2579     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2580 
2581     rc = write(fd, buf, chunksize);
2582     if (rc < 0) {
2583         fprintf(stderr, "Error writing KVM hash table: %s\n",
2584                 strerror(errno));
2585         return rc;
2586     }
2587     if (rc != chunksize) {
2588         /* We should never get a short write on a single chunk */
2589         fprintf(stderr, "Short write, restoring KVM hash table\n");
2590         return -1;
2591     }
2592     return 0;
2593 }
2594 
2595 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2596 {
2597     return true;
2598 }
2599 
2600 void kvm_arch_init_irq_routing(KVMState *s)
2601 {
2602 }
2603 
2604 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2605 {
2606     struct kvm_get_htab_fd ghf = {
2607         .flags = 0,
2608         .start_index = ptex,
2609     };
2610     int fd, rc;
2611     int i;
2612 
2613     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2614     if (fd < 0) {
2615         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2616     }
2617 
2618     i = 0;
2619     while (i < n) {
2620         struct kvm_get_htab_header *hdr;
2621         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2622         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2623 
2624         rc = read(fd, buf, sizeof(buf));
2625         if (rc < 0) {
2626             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2627         }
2628 
2629         hdr = (struct kvm_get_htab_header *)buf;
2630         while ((i < n) && ((char *)hdr < (buf + rc))) {
2631             int invalid = hdr->n_invalid;
2632 
2633             if (hdr->index != (ptex + i)) {
2634                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2635                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2636             }
2637 
2638             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2639             i += hdr->n_valid;
2640 
2641             if ((n - i) < invalid) {
2642                 invalid = n - i;
2643             }
2644             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645             i += hdr->n_invalid;
2646 
2647             hdr = (struct kvm_get_htab_header *)
2648                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2649         }
2650     }
2651 
2652     close(fd);
2653 }
2654 
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2656 {
2657     int fd, rc;
2658     struct kvm_get_htab_fd ghf;
2659     struct {
2660         struct kvm_get_htab_header hdr;
2661         uint64_t pte0;
2662         uint64_t pte1;
2663     } buf;
2664 
2665     ghf.flags = 0;
2666     ghf.start_index = 0;     /* Ignored */
2667     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2668     if (fd < 0) {
2669         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2670     }
2671 
2672     buf.hdr.n_valid = 1;
2673     buf.hdr.n_invalid = 0;
2674     buf.hdr.index = ptex;
2675     buf.pte0 = cpu_to_be64(pte0);
2676     buf.pte1 = cpu_to_be64(pte1);
2677 
2678     rc = write(fd, &buf, sizeof(buf));
2679     if (rc != sizeof(buf)) {
2680         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2681     }
2682     close(fd);
2683 }
2684 
2685 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2686                              uint64_t address, uint32_t data, PCIDevice *dev)
2687 {
2688     return 0;
2689 }
2690 
2691 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2692                                 int vector, PCIDevice *dev)
2693 {
2694     return 0;
2695 }
2696 
2697 int kvm_arch_release_virq_post(int virq)
2698 {
2699     return 0;
2700 }
2701 
2702 int kvm_arch_msi_data_to_gsi(uint32_t data)
2703 {
2704     return data & 0xffff;
2705 }
2706 
2707 int kvmppc_enable_hwrng(void)
2708 {
2709     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2710         return -1;
2711     }
2712 
2713     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2714 }
2715 
2716 void kvmppc_check_papr_resize_hpt(Error **errp)
2717 {
2718     if (!kvm_enabled()) {
2719         return; /* No KVM, we're good */
2720     }
2721 
2722     if (cap_resize_hpt) {
2723         return; /* Kernel has explicit support, we're good */
2724     }
2725 
2726     /* Otherwise fallback on looking for PR KVM */
2727     if (kvmppc_is_pr(kvm_state)) {
2728         return;
2729     }
2730 
2731     error_setg(errp,
2732                "Hash page table resizing not available with this KVM version");
2733 }
2734 
2735 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2736 {
2737     CPUState *cs = CPU(cpu);
2738     struct kvm_ppc_resize_hpt rhpt = {
2739         .flags = flags,
2740         .shift = shift,
2741     };
2742 
2743     if (!cap_resize_hpt) {
2744         return -ENOSYS;
2745     }
2746 
2747     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2748 }
2749 
2750 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2751 {
2752     CPUState *cs = CPU(cpu);
2753     struct kvm_ppc_resize_hpt rhpt = {
2754         .flags = flags,
2755         .shift = shift,
2756     };
2757 
2758     if (!cap_resize_hpt) {
2759         return -ENOSYS;
2760     }
2761 
2762     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2763 }
2764 
2765 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2766 {
2767     target_ulong sdr1 = arg.target_ptr;
2768     PowerPCCPU *cpu = POWERPC_CPU(cs);
2769     CPUPPCState *env = &cpu->env;
2770 
2771     /* This is just for the benefit of PR KVM */
2772     cpu_synchronize_state(cs);
2773     env->spr[SPR_SDR1] = sdr1;
2774     if (kvmppc_put_books_sregs(cpu) < 0) {
2775         error_report("Unable to update SDR1 in KVM");
2776         exit(1);
2777     }
2778 }
2779 
2780 void kvmppc_update_sdr1(target_ulong sdr1)
2781 {
2782     CPUState *cs;
2783 
2784     CPU_FOREACH(cs) {
2785         run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2786     }
2787 }
2788