xref: /openbmc/qemu/target/ppc/kvm.c (revision 864a2178d4c014a217cacf76e42b818fe9feb1d4)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
53 #include "sysemu/kvm_int.h"
54 
55 //#define DEBUG_KVM
56 
57 #ifdef DEBUG_KVM
58 #define DPRINTF(fmt, ...) \
59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
60 #else
61 #define DPRINTF(fmt, ...) \
62     do { } while (0)
63 #endif
64 
65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
66 
67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
68     KVM_CAP_LAST_INFO
69 };
70 
71 static int cap_interrupt_unset = false;
72 static int cap_interrupt_level = false;
73 static int cap_segstate;
74 static int cap_booke_sregs;
75 static int cap_ppc_smt;
76 static int cap_ppc_rma;
77 static int cap_spapr_tce;
78 static int cap_spapr_tce_64;
79 static int cap_spapr_multitce;
80 static int cap_spapr_vfio;
81 static int cap_hior;
82 static int cap_one_reg;
83 static int cap_epr;
84 static int cap_ppc_watchdog;
85 static int cap_papr;
86 static int cap_htab_fd;
87 static int cap_fixup_hcalls;
88 static int cap_htm;             /* Hardware transactional memory support */
89 static int cap_mmu_radix;
90 static int cap_mmu_hash_v3;
91 
92 static uint32_t debug_inst_opcode;
93 
94 /* XXX We have a race condition where we actually have a level triggered
95  *     interrupt, but the infrastructure can't expose that yet, so the guest
96  *     takes but ignores it, goes to sleep and never gets notified that there's
97  *     still an interrupt pending.
98  *
99  *     As a quick workaround, let's just wake up again 20 ms after we injected
100  *     an interrupt. That way we can assure that we're always reinjecting
101  *     interrupts in case the guest swallowed them.
102  */
103 static QEMUTimer *idle_timer;
104 
105 static void kvm_kick_cpu(void *opaque)
106 {
107     PowerPCCPU *cpu = opaque;
108 
109     qemu_cpu_kick(CPU(cpu));
110 }
111 
112 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
113  * should only be used for fallback tests - generally we should use
114  * explicit capabilities for the features we want, rather than
115  * assuming what is/isn't available depending on the KVM variant. */
116 static bool kvmppc_is_pr(KVMState *ks)
117 {
118     /* Assume KVM-PR if the GET_PVINFO capability is available */
119     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
120 }
121 
122 static int kvm_ppc_register_host_cpu_type(void);
123 
124 int kvm_arch_init(MachineState *ms, KVMState *s)
125 {
126     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
127     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
128     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
129     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
130     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
131     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
135     cap_spapr_vfio = false;
136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
140     /* Note: we don't set cap_papr here, because this capability is
141      * only activated after this by kvmppc_set_papr() */
142     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
143     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
144     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
145     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
146     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
147 
148     if (!cap_interrupt_level) {
149         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
150                         "VM to stall at times!\n");
151     }
152 
153     kvm_ppc_register_host_cpu_type();
154 
155     return 0;
156 }
157 
158 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
159 {
160     return 0;
161 }
162 
163 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
164 {
165     CPUPPCState *cenv = &cpu->env;
166     CPUState *cs = CPU(cpu);
167     struct kvm_sregs sregs;
168     int ret;
169 
170     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
171         /* What we're really trying to say is "if we're on BookE, we use
172            the native PVR for now". This is the only sane way to check
173            it though, so we potentially confuse users that they can run
174            BookE guests on BookS. Let's hope nobody dares enough :) */
175         return 0;
176     } else {
177         if (!cap_segstate) {
178             fprintf(stderr, "kvm error: missing PVR setting capability\n");
179             return -ENOSYS;
180         }
181     }
182 
183     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
184     if (ret) {
185         return ret;
186     }
187 
188     sregs.pvr = cenv->spr[SPR_PVR];
189     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
190 }
191 
192 /* Set up a shared TLB array with KVM */
193 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
194 {
195     CPUPPCState *env = &cpu->env;
196     CPUState *cs = CPU(cpu);
197     struct kvm_book3e_206_tlb_params params = {};
198     struct kvm_config_tlb cfg = {};
199     unsigned int entries = 0;
200     int ret, i;
201 
202     if (!kvm_enabled() ||
203         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
204         return 0;
205     }
206 
207     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
208 
209     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
210         params.tlb_sizes[i] = booke206_tlb_size(env, i);
211         params.tlb_ways[i] = booke206_tlb_ways(env, i);
212         entries += params.tlb_sizes[i];
213     }
214 
215     assert(entries == env->nb_tlb);
216     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
217 
218     env->tlb_dirty = true;
219 
220     cfg.array = (uintptr_t)env->tlb.tlbm;
221     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
222     cfg.params = (uintptr_t)&params;
223     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
224 
225     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
226     if (ret < 0) {
227         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
228                 __func__, strerror(-ret));
229         return ret;
230     }
231 
232     env->kvm_sw_tlb = true;
233     return 0;
234 }
235 
236 
237 #if defined(TARGET_PPC64)
238 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
239                                        struct kvm_ppc_smmu_info *info)
240 {
241     CPUPPCState *env = &cpu->env;
242     CPUState *cs = CPU(cpu);
243 
244     memset(info, 0, sizeof(*info));
245 
246     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
247      * need to "guess" what the supported page sizes are.
248      *
249      * For that to work we make a few assumptions:
250      *
251      * - Check whether we are running "PR" KVM which only supports 4K
252      *   and 16M pages, but supports them regardless of the backing
253      *   store characteritics. We also don't support 1T segments.
254      *
255      *   This is safe as if HV KVM ever supports that capability or PR
256      *   KVM grows supports for more page/segment sizes, those versions
257      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
258      *   will not hit this fallback
259      *
260      * - Else we are running HV KVM. This means we only support page
261      *   sizes that fit in the backing store. Additionally we only
262      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
263      *   P7 encodings for the SLB and hash table. Here too, we assume
264      *   support for any newer processor will mean a kernel that
265      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
266      *   this fallback.
267      */
268     if (kvmppc_is_pr(cs->kvm_state)) {
269         /* No flags */
270         info->flags = 0;
271         info->slb_size = 64;
272 
273         /* Standard 4k base page size segment */
274         info->sps[0].page_shift = 12;
275         info->sps[0].slb_enc = 0;
276         info->sps[0].enc[0].page_shift = 12;
277         info->sps[0].enc[0].pte_enc = 0;
278 
279         /* Standard 16M large page size segment */
280         info->sps[1].page_shift = 24;
281         info->sps[1].slb_enc = SLB_VSID_L;
282         info->sps[1].enc[0].page_shift = 24;
283         info->sps[1].enc[0].pte_enc = 0;
284     } else {
285         int i = 0;
286 
287         /* HV KVM has backing store size restrictions */
288         info->flags = KVM_PPC_PAGE_SIZES_REAL;
289 
290         if (env->mmu_model & POWERPC_MMU_1TSEG) {
291             info->flags |= KVM_PPC_1T_SEGMENTS;
292         }
293 
294         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
295            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
296             info->slb_size = 32;
297         } else {
298             info->slb_size = 64;
299         }
300 
301         /* Standard 4k base page size segment */
302         info->sps[i].page_shift = 12;
303         info->sps[i].slb_enc = 0;
304         info->sps[i].enc[0].page_shift = 12;
305         info->sps[i].enc[0].pte_enc = 0;
306         i++;
307 
308         /* 64K on MMU 2.06 and later */
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->sps[i].page_shift = 16;
312             info->sps[i].slb_enc = 0x110;
313             info->sps[i].enc[0].page_shift = 16;
314             info->sps[i].enc[0].pte_enc = 1;
315             i++;
316         }
317 
318         /* Standard 16M large page size segment */
319         info->sps[i].page_shift = 24;
320         info->sps[i].slb_enc = SLB_VSID_L;
321         info->sps[i].enc[0].page_shift = 24;
322         info->sps[i].enc[0].pte_enc = 0;
323     }
324 }
325 
326 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
327 {
328     CPUState *cs = CPU(cpu);
329     int ret;
330 
331     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
332         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
333         if (ret == 0) {
334             return;
335         }
336     }
337 
338     kvm_get_fallback_smmu_info(cpu, info);
339 }
340 
341 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
342 {
343     KVMState *s = KVM_STATE(current_machine->accelerator);
344     struct ppc_radix_page_info *radix_page_info;
345     struct kvm_ppc_rmmu_info rmmu_info;
346     int i;
347 
348     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
349         return NULL;
350     }
351     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
352         return NULL;
353     }
354     radix_page_info = g_malloc0(sizeof(*radix_page_info));
355     radix_page_info->count = 0;
356     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
357         if (rmmu_info.ap_encodings[i]) {
358             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
359             radix_page_info->count++;
360         }
361     }
362     return radix_page_info;
363 }
364 
365 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
366                                      bool radix, bool gtse,
367                                      uint64_t proc_tbl)
368 {
369     CPUState *cs = CPU(cpu);
370     int ret;
371     uint64_t flags = 0;
372     struct kvm_ppc_mmuv3_cfg cfg = {
373         .process_table = proc_tbl,
374     };
375 
376     if (radix) {
377         flags |= KVM_PPC_MMUV3_RADIX;
378     }
379     if (gtse) {
380         flags |= KVM_PPC_MMUV3_GTSE;
381     }
382     cfg.flags = flags;
383     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
384     switch (ret) {
385     case 0:
386         return H_SUCCESS;
387     case -EINVAL:
388         return H_PARAMETER;
389     case -ENODEV:
390         return H_NOT_AVAILABLE;
391     default:
392         return H_HARDWARE;
393     }
394 }
395 
396 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
397 {
398     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
399         return true;
400     }
401 
402     return (1ul << shift) <= rampgsize;
403 }
404 
405 static long max_cpu_page_size;
406 
407 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
408 {
409     static struct kvm_ppc_smmu_info smmu_info;
410     static bool has_smmu_info;
411     CPUPPCState *env = &cpu->env;
412     int iq, ik, jq, jk;
413     bool has_64k_pages = false;
414 
415     /* We only handle page sizes for 64-bit server guests for now */
416     if (!(env->mmu_model & POWERPC_MMU_64)) {
417         return;
418     }
419 
420     /* Collect MMU info from kernel if not already */
421     if (!has_smmu_info) {
422         kvm_get_smmu_info(cpu, &smmu_info);
423         has_smmu_info = true;
424     }
425 
426     if (!max_cpu_page_size) {
427         max_cpu_page_size = qemu_getrampagesize();
428     }
429 
430     /* Convert to QEMU form */
431     memset(&env->sps, 0, sizeof(env->sps));
432 
433     /* If we have HV KVM, we need to forbid CI large pages if our
434      * host page size is smaller than 64K.
435      */
436     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
437         env->ci_large_pages = getpagesize() >= 0x10000;
438     }
439 
440     /*
441      * XXX This loop should be an entry wide AND of the capabilities that
442      *     the selected CPU has with the capabilities that KVM supports.
443      */
444     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
445         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
446         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
447 
448         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
449                                  ksps->page_shift)) {
450             continue;
451         }
452         qsps->page_shift = ksps->page_shift;
453         qsps->slb_enc = ksps->slb_enc;
454         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
455             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
456                                      ksps->enc[jk].page_shift)) {
457                 continue;
458             }
459             if (ksps->enc[jk].page_shift == 16) {
460                 has_64k_pages = true;
461             }
462             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
463             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
464             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
465                 break;
466             }
467         }
468         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
469             break;
470         }
471     }
472     env->slb_nr = smmu_info.slb_size;
473     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
474         env->mmu_model &= ~POWERPC_MMU_1TSEG;
475     }
476     if (!has_64k_pages) {
477         env->mmu_model &= ~POWERPC_MMU_64K;
478     }
479 }
480 
481 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
482 {
483     Object *mem_obj = object_resolve_path(obj_path, NULL);
484     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
485     long pagesize;
486 
487     if (mempath) {
488         pagesize = qemu_mempath_getpagesize(mempath);
489         g_free(mempath);
490     } else {
491         pagesize = getpagesize();
492     }
493 
494     return pagesize >= max_cpu_page_size;
495 }
496 
497 #else /* defined (TARGET_PPC64) */
498 
499 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
500 {
501 }
502 
503 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
504 {
505     return true;
506 }
507 
508 #endif /* !defined (TARGET_PPC64) */
509 
510 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
511 {
512     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
513 }
514 
515 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
516  * book3s supports only 1 watchpoint, so array size
517  * of 4 is sufficient for now.
518  */
519 #define MAX_HW_BKPTS 4
520 
521 static struct HWBreakpoint {
522     target_ulong addr;
523     int type;
524 } hw_debug_points[MAX_HW_BKPTS];
525 
526 static CPUWatchpoint hw_watchpoint;
527 
528 /* Default there is no breakpoint and watchpoint supported */
529 static int max_hw_breakpoint;
530 static int max_hw_watchpoint;
531 static int nb_hw_breakpoint;
532 static int nb_hw_watchpoint;
533 
534 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
535 {
536     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
537         max_hw_breakpoint = 2;
538         max_hw_watchpoint = 2;
539     }
540 
541     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
542         fprintf(stderr, "Error initializing h/w breakpoints\n");
543         return;
544     }
545 }
546 
547 int kvm_arch_init_vcpu(CPUState *cs)
548 {
549     PowerPCCPU *cpu = POWERPC_CPU(cs);
550     CPUPPCState *cenv = &cpu->env;
551     int ret;
552 
553     /* Gather server mmu info from KVM and update the CPU state */
554     kvm_fixup_page_sizes(cpu);
555 
556     /* Synchronize sregs with kvm */
557     ret = kvm_arch_sync_sregs(cpu);
558     if (ret) {
559         if (ret == -EINVAL) {
560             error_report("Register sync failed... If you're using kvm-hv.ko,"
561                          " only \"-cpu host\" is possible");
562         }
563         return ret;
564     }
565 
566     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
567 
568     switch (cenv->mmu_model) {
569     case POWERPC_MMU_BOOKE206:
570         /* This target supports access to KVM's guest TLB */
571         ret = kvm_booke206_tlb_init(cpu);
572         break;
573     case POWERPC_MMU_2_07:
574         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
575             /* KVM-HV has transactional memory on POWER8 also without the
576              * KVM_CAP_PPC_HTM extension, so enable it here instead as
577              * long as it's availble to userspace on the host. */
578             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
579                 cap_htm = true;
580             }
581         }
582         break;
583     default:
584         break;
585     }
586 
587     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
588     kvmppc_hw_debug_points_init(cenv);
589 
590     return ret;
591 }
592 
593 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
594 {
595     CPUPPCState *env = &cpu->env;
596     CPUState *cs = CPU(cpu);
597     struct kvm_dirty_tlb dirty_tlb;
598     unsigned char *bitmap;
599     int ret;
600 
601     if (!env->kvm_sw_tlb) {
602         return;
603     }
604 
605     bitmap = g_malloc((env->nb_tlb + 7) / 8);
606     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
607 
608     dirty_tlb.bitmap = (uintptr_t)bitmap;
609     dirty_tlb.num_dirty = env->nb_tlb;
610 
611     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
612     if (ret) {
613         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
614                 __func__, strerror(-ret));
615     }
616 
617     g_free(bitmap);
618 }
619 
620 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
621 {
622     PowerPCCPU *cpu = POWERPC_CPU(cs);
623     CPUPPCState *env = &cpu->env;
624     union {
625         uint32_t u32;
626         uint64_t u64;
627     } val;
628     struct kvm_one_reg reg = {
629         .id = id,
630         .addr = (uintptr_t) &val,
631     };
632     int ret;
633 
634     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
635     if (ret != 0) {
636         trace_kvm_failed_spr_get(spr, strerror(errno));
637     } else {
638         switch (id & KVM_REG_SIZE_MASK) {
639         case KVM_REG_SIZE_U32:
640             env->spr[spr] = val.u32;
641             break;
642 
643         case KVM_REG_SIZE_U64:
644             env->spr[spr] = val.u64;
645             break;
646 
647         default:
648             /* Don't handle this size yet */
649             abort();
650         }
651     }
652 }
653 
654 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
655 {
656     PowerPCCPU *cpu = POWERPC_CPU(cs);
657     CPUPPCState *env = &cpu->env;
658     union {
659         uint32_t u32;
660         uint64_t u64;
661     } val;
662     struct kvm_one_reg reg = {
663         .id = id,
664         .addr = (uintptr_t) &val,
665     };
666     int ret;
667 
668     switch (id & KVM_REG_SIZE_MASK) {
669     case KVM_REG_SIZE_U32:
670         val.u32 = env->spr[spr];
671         break;
672 
673     case KVM_REG_SIZE_U64:
674         val.u64 = env->spr[spr];
675         break;
676 
677     default:
678         /* Don't handle this size yet */
679         abort();
680     }
681 
682     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
683     if (ret != 0) {
684         trace_kvm_failed_spr_set(spr, strerror(errno));
685     }
686 }
687 
688 static int kvm_put_fp(CPUState *cs)
689 {
690     PowerPCCPU *cpu = POWERPC_CPU(cs);
691     CPUPPCState *env = &cpu->env;
692     struct kvm_one_reg reg;
693     int i;
694     int ret;
695 
696     if (env->insns_flags & PPC_FLOAT) {
697         uint64_t fpscr = env->fpscr;
698         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
699 
700         reg.id = KVM_REG_PPC_FPSCR;
701         reg.addr = (uintptr_t)&fpscr;
702         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
703         if (ret < 0) {
704             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
705             return ret;
706         }
707 
708         for (i = 0; i < 32; i++) {
709             uint64_t vsr[2];
710 
711 #ifdef HOST_WORDS_BIGENDIAN
712             vsr[0] = float64_val(env->fpr[i]);
713             vsr[1] = env->vsr[i];
714 #else
715             vsr[0] = env->vsr[i];
716             vsr[1] = float64_val(env->fpr[i]);
717 #endif
718             reg.addr = (uintptr_t) &vsr;
719             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
720 
721             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
722             if (ret < 0) {
723                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
724                         i, strerror(errno));
725                 return ret;
726             }
727         }
728     }
729 
730     if (env->insns_flags & PPC_ALTIVEC) {
731         reg.id = KVM_REG_PPC_VSCR;
732         reg.addr = (uintptr_t)&env->vscr;
733         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
734         if (ret < 0) {
735             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
736             return ret;
737         }
738 
739         for (i = 0; i < 32; i++) {
740             reg.id = KVM_REG_PPC_VR(i);
741             reg.addr = (uintptr_t)&env->avr[i];
742             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
743             if (ret < 0) {
744                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
745                 return ret;
746             }
747         }
748     }
749 
750     return 0;
751 }
752 
753 static int kvm_get_fp(CPUState *cs)
754 {
755     PowerPCCPU *cpu = POWERPC_CPU(cs);
756     CPUPPCState *env = &cpu->env;
757     struct kvm_one_reg reg;
758     int i;
759     int ret;
760 
761     if (env->insns_flags & PPC_FLOAT) {
762         uint64_t fpscr;
763         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
764 
765         reg.id = KVM_REG_PPC_FPSCR;
766         reg.addr = (uintptr_t)&fpscr;
767         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768         if (ret < 0) {
769             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
770             return ret;
771         } else {
772             env->fpscr = fpscr;
773         }
774 
775         for (i = 0; i < 32; i++) {
776             uint64_t vsr[2];
777 
778             reg.addr = (uintptr_t) &vsr;
779             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
780 
781             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
782             if (ret < 0) {
783                 DPRINTF("Unable to get %s%d from KVM: %s\n",
784                         vsx ? "VSR" : "FPR", i, strerror(errno));
785                 return ret;
786             } else {
787 #ifdef HOST_WORDS_BIGENDIAN
788                 env->fpr[i] = vsr[0];
789                 if (vsx) {
790                     env->vsr[i] = vsr[1];
791                 }
792 #else
793                 env->fpr[i] = vsr[1];
794                 if (vsx) {
795                     env->vsr[i] = vsr[0];
796                 }
797 #endif
798             }
799         }
800     }
801 
802     if (env->insns_flags & PPC_ALTIVEC) {
803         reg.id = KVM_REG_PPC_VSCR;
804         reg.addr = (uintptr_t)&env->vscr;
805         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
806         if (ret < 0) {
807             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
808             return ret;
809         }
810 
811         for (i = 0; i < 32; i++) {
812             reg.id = KVM_REG_PPC_VR(i);
813             reg.addr = (uintptr_t)&env->avr[i];
814             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
815             if (ret < 0) {
816                 DPRINTF("Unable to get VR%d from KVM: %s\n",
817                         i, strerror(errno));
818                 return ret;
819             }
820         }
821     }
822 
823     return 0;
824 }
825 
826 #if defined(TARGET_PPC64)
827 static int kvm_get_vpa(CPUState *cs)
828 {
829     PowerPCCPU *cpu = POWERPC_CPU(cs);
830     CPUPPCState *env = &cpu->env;
831     struct kvm_one_reg reg;
832     int ret;
833 
834     reg.id = KVM_REG_PPC_VPA_ADDR;
835     reg.addr = (uintptr_t)&env->vpa_addr;
836     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
837     if (ret < 0) {
838         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
839         return ret;
840     }
841 
842     assert((uintptr_t)&env->slb_shadow_size
843            == ((uintptr_t)&env->slb_shadow_addr + 8));
844     reg.id = KVM_REG_PPC_VPA_SLB;
845     reg.addr = (uintptr_t)&env->slb_shadow_addr;
846     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
847     if (ret < 0) {
848         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
849                 strerror(errno));
850         return ret;
851     }
852 
853     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
854     reg.id = KVM_REG_PPC_VPA_DTL;
855     reg.addr = (uintptr_t)&env->dtl_addr;
856     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
857     if (ret < 0) {
858         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
859                 strerror(errno));
860         return ret;
861     }
862 
863     return 0;
864 }
865 
866 static int kvm_put_vpa(CPUState *cs)
867 {
868     PowerPCCPU *cpu = POWERPC_CPU(cs);
869     CPUPPCState *env = &cpu->env;
870     struct kvm_one_reg reg;
871     int ret;
872 
873     /* SLB shadow or DTL can't be registered unless a master VPA is
874      * registered.  That means when restoring state, if a VPA *is*
875      * registered, we need to set that up first.  If not, we need to
876      * deregister the others before deregistering the master VPA */
877     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
878 
879     if (env->vpa_addr) {
880         reg.id = KVM_REG_PPC_VPA_ADDR;
881         reg.addr = (uintptr_t)&env->vpa_addr;
882         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
883         if (ret < 0) {
884             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
885             return ret;
886         }
887     }
888 
889     assert((uintptr_t)&env->slb_shadow_size
890            == ((uintptr_t)&env->slb_shadow_addr + 8));
891     reg.id = KVM_REG_PPC_VPA_SLB;
892     reg.addr = (uintptr_t)&env->slb_shadow_addr;
893     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
894     if (ret < 0) {
895         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
896         return ret;
897     }
898 
899     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
900     reg.id = KVM_REG_PPC_VPA_DTL;
901     reg.addr = (uintptr_t)&env->dtl_addr;
902     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
903     if (ret < 0) {
904         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
905                 strerror(errno));
906         return ret;
907     }
908 
909     if (!env->vpa_addr) {
910         reg.id = KVM_REG_PPC_VPA_ADDR;
911         reg.addr = (uintptr_t)&env->vpa_addr;
912         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
913         if (ret < 0) {
914             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
915             return ret;
916         }
917     }
918 
919     return 0;
920 }
921 #endif /* TARGET_PPC64 */
922 
923 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
924 {
925     CPUPPCState *env = &cpu->env;
926     struct kvm_sregs sregs;
927     int i;
928 
929     sregs.pvr = env->spr[SPR_PVR];
930 
931     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
932 
933     /* Sync SLB */
934 #ifdef TARGET_PPC64
935     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
936         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
937         if (env->slb[i].esid & SLB_ESID_V) {
938             sregs.u.s.ppc64.slb[i].slbe |= i;
939         }
940         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
941     }
942 #endif
943 
944     /* Sync SRs */
945     for (i = 0; i < 16; i++) {
946         sregs.u.s.ppc32.sr[i] = env->sr[i];
947     }
948 
949     /* Sync BATs */
950     for (i = 0; i < 8; i++) {
951         /* Beware. We have to swap upper and lower bits here */
952         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
953             | env->DBAT[1][i];
954         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
955             | env->IBAT[1][i];
956     }
957 
958     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
959 }
960 
961 int kvm_arch_put_registers(CPUState *cs, int level)
962 {
963     PowerPCCPU *cpu = POWERPC_CPU(cs);
964     CPUPPCState *env = &cpu->env;
965     struct kvm_regs regs;
966     int ret;
967     int i;
968 
969     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
970     if (ret < 0) {
971         return ret;
972     }
973 
974     regs.ctr = env->ctr;
975     regs.lr  = env->lr;
976     regs.xer = cpu_read_xer(env);
977     regs.msr = env->msr;
978     regs.pc = env->nip;
979 
980     regs.srr0 = env->spr[SPR_SRR0];
981     regs.srr1 = env->spr[SPR_SRR1];
982 
983     regs.sprg0 = env->spr[SPR_SPRG0];
984     regs.sprg1 = env->spr[SPR_SPRG1];
985     regs.sprg2 = env->spr[SPR_SPRG2];
986     regs.sprg3 = env->spr[SPR_SPRG3];
987     regs.sprg4 = env->spr[SPR_SPRG4];
988     regs.sprg5 = env->spr[SPR_SPRG5];
989     regs.sprg6 = env->spr[SPR_SPRG6];
990     regs.sprg7 = env->spr[SPR_SPRG7];
991 
992     regs.pid = env->spr[SPR_BOOKE_PID];
993 
994     for (i = 0;i < 32; i++)
995         regs.gpr[i] = env->gpr[i];
996 
997     regs.cr = 0;
998     for (i = 0; i < 8; i++) {
999         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1000     }
1001 
1002     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1003     if (ret < 0)
1004         return ret;
1005 
1006     kvm_put_fp(cs);
1007 
1008     if (env->tlb_dirty) {
1009         kvm_sw_tlb_put(cpu);
1010         env->tlb_dirty = false;
1011     }
1012 
1013     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1014         ret = kvmppc_put_books_sregs(cpu);
1015         if (ret < 0) {
1016             return ret;
1017         }
1018     }
1019 
1020     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1021         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1022     }
1023 
1024     if (cap_one_reg) {
1025         int i;
1026 
1027         /* We deliberately ignore errors here, for kernels which have
1028          * the ONE_REG calls, but don't support the specific
1029          * registers, there's a reasonable chance things will still
1030          * work, at least until we try to migrate. */
1031         for (i = 0; i < 1024; i++) {
1032             uint64_t id = env->spr_cb[i].one_reg_id;
1033 
1034             if (id != 0) {
1035                 kvm_put_one_spr(cs, id, i);
1036             }
1037         }
1038 
1039 #ifdef TARGET_PPC64
1040         if (msr_ts) {
1041             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1042                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1043             }
1044             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1045                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1046             }
1047             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1048             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1057         }
1058 
1059         if (cap_papr) {
1060             if (kvm_put_vpa(cs) < 0) {
1061                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1062             }
1063         }
1064 
1065         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1066 #endif /* TARGET_PPC64 */
1067     }
1068 
1069     return ret;
1070 }
1071 
1072 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1073 {
1074      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1075 }
1076 
1077 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1078 {
1079     CPUPPCState *env = &cpu->env;
1080     struct kvm_sregs sregs;
1081     int ret;
1082 
1083     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1084     if (ret < 0) {
1085         return ret;
1086     }
1087 
1088     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1089         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1090         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1091         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1092         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1093         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1094         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1095         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1096         env->spr[SPR_DECR] = sregs.u.e.dec;
1097         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1098         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1099         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1100     }
1101 
1102     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1103         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1104         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1105         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1106         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1107         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1108     }
1109 
1110     if (sregs.u.e.features & KVM_SREGS_E_64) {
1111         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1112     }
1113 
1114     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1115         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1116     }
1117 
1118     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1119         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1120         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1121         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1122         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1123         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1124         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1125         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1126         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1127         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1128         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1129         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1130         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1131         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1132         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1133         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1134         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1135         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1136         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1137         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1138         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1139         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1140         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1141         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1142         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1143         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1144         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1145         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1146         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1147         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1148         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1149         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1150         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1151 
1152         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1153             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1154             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1155             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1156             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1157             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1158             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1159         }
1160 
1161         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1162             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1163             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1164         }
1165 
1166         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1167             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1168             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1169             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1170             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1171         }
1172     }
1173 
1174     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1175         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1176         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1177         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1178         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1179         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1180         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1181         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1182         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1183         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1184         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1185     }
1186 
1187     if (sregs.u.e.features & KVM_SREGS_EXP) {
1188         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1189     }
1190 
1191     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1192         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1193         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1194     }
1195 
1196     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1197         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1198         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1199         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1200 
1201         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1202             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1203             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1204         }
1205     }
1206 
1207     return 0;
1208 }
1209 
1210 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1211 {
1212     CPUPPCState *env = &cpu->env;
1213     struct kvm_sregs sregs;
1214     int ret;
1215     int i;
1216 
1217     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1218     if (ret < 0) {
1219         return ret;
1220     }
1221 
1222     if (!cpu->vhyp) {
1223         ppc_store_sdr1(env, sregs.u.s.sdr1);
1224     }
1225 
1226     /* Sync SLB */
1227 #ifdef TARGET_PPC64
1228     /*
1229      * The packed SLB array we get from KVM_GET_SREGS only contains
1230      * information about valid entries. So we flush our internal copy
1231      * to get rid of stale ones, then put all valid SLB entries back
1232      * in.
1233      */
1234     memset(env->slb, 0, sizeof(env->slb));
1235     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1236         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1237         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1238         /*
1239          * Only restore valid entries
1240          */
1241         if (rb & SLB_ESID_V) {
1242             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1243         }
1244     }
1245 #endif
1246 
1247     /* Sync SRs */
1248     for (i = 0; i < 16; i++) {
1249         env->sr[i] = sregs.u.s.ppc32.sr[i];
1250     }
1251 
1252     /* Sync BATs */
1253     for (i = 0; i < 8; i++) {
1254         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1255         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1256         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1257         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1258     }
1259 
1260     return 0;
1261 }
1262 
1263 int kvm_arch_get_registers(CPUState *cs)
1264 {
1265     PowerPCCPU *cpu = POWERPC_CPU(cs);
1266     CPUPPCState *env = &cpu->env;
1267     struct kvm_regs regs;
1268     uint32_t cr;
1269     int i, ret;
1270 
1271     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1272     if (ret < 0)
1273         return ret;
1274 
1275     cr = regs.cr;
1276     for (i = 7; i >= 0; i--) {
1277         env->crf[i] = cr & 15;
1278         cr >>= 4;
1279     }
1280 
1281     env->ctr = regs.ctr;
1282     env->lr = regs.lr;
1283     cpu_write_xer(env, regs.xer);
1284     env->msr = regs.msr;
1285     env->nip = regs.pc;
1286 
1287     env->spr[SPR_SRR0] = regs.srr0;
1288     env->spr[SPR_SRR1] = regs.srr1;
1289 
1290     env->spr[SPR_SPRG0] = regs.sprg0;
1291     env->spr[SPR_SPRG1] = regs.sprg1;
1292     env->spr[SPR_SPRG2] = regs.sprg2;
1293     env->spr[SPR_SPRG3] = regs.sprg3;
1294     env->spr[SPR_SPRG4] = regs.sprg4;
1295     env->spr[SPR_SPRG5] = regs.sprg5;
1296     env->spr[SPR_SPRG6] = regs.sprg6;
1297     env->spr[SPR_SPRG7] = regs.sprg7;
1298 
1299     env->spr[SPR_BOOKE_PID] = regs.pid;
1300 
1301     for (i = 0;i < 32; i++)
1302         env->gpr[i] = regs.gpr[i];
1303 
1304     kvm_get_fp(cs);
1305 
1306     if (cap_booke_sregs) {
1307         ret = kvmppc_get_booke_sregs(cpu);
1308         if (ret < 0) {
1309             return ret;
1310         }
1311     }
1312 
1313     if (cap_segstate) {
1314         ret = kvmppc_get_books_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319 
1320     if (cap_hior) {
1321         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1322     }
1323 
1324     if (cap_one_reg) {
1325         int i;
1326 
1327         /* We deliberately ignore errors here, for kernels which have
1328          * the ONE_REG calls, but don't support the specific
1329          * registers, there's a reasonable chance things will still
1330          * work, at least until we try to migrate. */
1331         for (i = 0; i < 1024; i++) {
1332             uint64_t id = env->spr_cb[i].one_reg_id;
1333 
1334             if (id != 0) {
1335                 kvm_get_one_spr(cs, id, i);
1336             }
1337         }
1338 
1339 #ifdef TARGET_PPC64
1340         if (msr_ts) {
1341             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1342                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1343             }
1344             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1345                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1346             }
1347             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1348             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1357         }
1358 
1359         if (cap_papr) {
1360             if (kvm_get_vpa(cs) < 0) {
1361                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1362             }
1363         }
1364 
1365         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1366 #endif
1367     }
1368 
1369     return 0;
1370 }
1371 
1372 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1373 {
1374     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1375 
1376     if (irq != PPC_INTERRUPT_EXT) {
1377         return 0;
1378     }
1379 
1380     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1381         return 0;
1382     }
1383 
1384     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1385 
1386     return 0;
1387 }
1388 
1389 #if defined(TARGET_PPCEMB)
1390 #define PPC_INPUT_INT PPC40x_INPUT_INT
1391 #elif defined(TARGET_PPC64)
1392 #define PPC_INPUT_INT PPC970_INPUT_INT
1393 #else
1394 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1395 #endif
1396 
1397 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1398 {
1399     PowerPCCPU *cpu = POWERPC_CPU(cs);
1400     CPUPPCState *env = &cpu->env;
1401     int r;
1402     unsigned irq;
1403 
1404     qemu_mutex_lock_iothread();
1405 
1406     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1407      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1408     if (!cap_interrupt_level &&
1409         run->ready_for_interrupt_injection &&
1410         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1411         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1412     {
1413         /* For now KVM disregards the 'irq' argument. However, in the
1414          * future KVM could cache it in-kernel to avoid a heavyweight exit
1415          * when reading the UIC.
1416          */
1417         irq = KVM_INTERRUPT_SET;
1418 
1419         DPRINTF("injected interrupt %d\n", irq);
1420         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1421         if (r < 0) {
1422             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1423         }
1424 
1425         /* Always wake up soon in case the interrupt was level based */
1426         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1427                        (NANOSECONDS_PER_SECOND / 50));
1428     }
1429 
1430     /* We don't know if there are more interrupts pending after this. However,
1431      * the guest will return to userspace in the course of handling this one
1432      * anyways, so we will get a chance to deliver the rest. */
1433 
1434     qemu_mutex_unlock_iothread();
1435 }
1436 
1437 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1438 {
1439     return MEMTXATTRS_UNSPECIFIED;
1440 }
1441 
1442 int kvm_arch_process_async_events(CPUState *cs)
1443 {
1444     return cs->halted;
1445 }
1446 
1447 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1448 {
1449     CPUState *cs = CPU(cpu);
1450     CPUPPCState *env = &cpu->env;
1451 
1452     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1453         cs->halted = 1;
1454         cs->exception_index = EXCP_HLT;
1455     }
1456 
1457     return 0;
1458 }
1459 
1460 /* map dcr access to existing qemu dcr emulation */
1461 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1462 {
1463     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1464         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1465 
1466     return 0;
1467 }
1468 
1469 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1470 {
1471     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1472         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1473 
1474     return 0;
1475 }
1476 
1477 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1478 {
1479     /* Mixed endian case is not handled */
1480     uint32_t sc = debug_inst_opcode;
1481 
1482     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1483                             sizeof(sc), 0) ||
1484         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1485         return -EINVAL;
1486     }
1487 
1488     return 0;
1489 }
1490 
1491 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1492 {
1493     uint32_t sc;
1494 
1495     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1496         sc != debug_inst_opcode ||
1497         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1498                             sizeof(sc), 1)) {
1499         return -EINVAL;
1500     }
1501 
1502     return 0;
1503 }
1504 
1505 static int find_hw_breakpoint(target_ulong addr, int type)
1506 {
1507     int n;
1508 
1509     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1510            <= ARRAY_SIZE(hw_debug_points));
1511 
1512     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1513         if (hw_debug_points[n].addr == addr &&
1514              hw_debug_points[n].type == type) {
1515             return n;
1516         }
1517     }
1518 
1519     return -1;
1520 }
1521 
1522 static int find_hw_watchpoint(target_ulong addr, int *flag)
1523 {
1524     int n;
1525 
1526     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1527     if (n >= 0) {
1528         *flag = BP_MEM_ACCESS;
1529         return n;
1530     }
1531 
1532     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1533     if (n >= 0) {
1534         *flag = BP_MEM_WRITE;
1535         return n;
1536     }
1537 
1538     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1539     if (n >= 0) {
1540         *flag = BP_MEM_READ;
1541         return n;
1542     }
1543 
1544     return -1;
1545 }
1546 
1547 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1548                                   target_ulong len, int type)
1549 {
1550     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1551         return -ENOBUFS;
1552     }
1553 
1554     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1555     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1556 
1557     switch (type) {
1558     case GDB_BREAKPOINT_HW:
1559         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1560             return -ENOBUFS;
1561         }
1562 
1563         if (find_hw_breakpoint(addr, type) >= 0) {
1564             return -EEXIST;
1565         }
1566 
1567         nb_hw_breakpoint++;
1568         break;
1569 
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1574             return -ENOBUFS;
1575         }
1576 
1577         if (find_hw_breakpoint(addr, type) >= 0) {
1578             return -EEXIST;
1579         }
1580 
1581         nb_hw_watchpoint++;
1582         break;
1583 
1584     default:
1585         return -ENOSYS;
1586     }
1587 
1588     return 0;
1589 }
1590 
1591 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1592                                   target_ulong len, int type)
1593 {
1594     int n;
1595 
1596     n = find_hw_breakpoint(addr, type);
1597     if (n < 0) {
1598         return -ENOENT;
1599     }
1600 
1601     switch (type) {
1602     case GDB_BREAKPOINT_HW:
1603         nb_hw_breakpoint--;
1604         break;
1605 
1606     case GDB_WATCHPOINT_WRITE:
1607     case GDB_WATCHPOINT_READ:
1608     case GDB_WATCHPOINT_ACCESS:
1609         nb_hw_watchpoint--;
1610         break;
1611 
1612     default:
1613         return -ENOSYS;
1614     }
1615     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1616 
1617     return 0;
1618 }
1619 
1620 void kvm_arch_remove_all_hw_breakpoints(void)
1621 {
1622     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1623 }
1624 
1625 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1626 {
1627     int n;
1628 
1629     /* Software Breakpoint updates */
1630     if (kvm_sw_breakpoints_active(cs)) {
1631         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1632     }
1633 
1634     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1635            <= ARRAY_SIZE(hw_debug_points));
1636     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1637 
1638     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1639         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1640         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1641         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1642             switch (hw_debug_points[n].type) {
1643             case GDB_BREAKPOINT_HW:
1644                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1645                 break;
1646             case GDB_WATCHPOINT_WRITE:
1647                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1648                 break;
1649             case GDB_WATCHPOINT_READ:
1650                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1651                 break;
1652             case GDB_WATCHPOINT_ACCESS:
1653                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1654                                         KVMPPC_DEBUG_WATCH_READ;
1655                 break;
1656             default:
1657                 cpu_abort(cs, "Unsupported breakpoint type\n");
1658             }
1659             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1660         }
1661     }
1662 }
1663 
1664 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1665 {
1666     CPUState *cs = CPU(cpu);
1667     CPUPPCState *env = &cpu->env;
1668     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1669     int handle = 0;
1670     int n;
1671     int flag = 0;
1672 
1673     if (cs->singlestep_enabled) {
1674         handle = 1;
1675     } else if (arch_info->status) {
1676         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1677             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1678                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1679                 if (n >= 0) {
1680                     handle = 1;
1681                 }
1682             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1683                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1684                 n = find_hw_watchpoint(arch_info->address,  &flag);
1685                 if (n >= 0) {
1686                     handle = 1;
1687                     cs->watchpoint_hit = &hw_watchpoint;
1688                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1689                     hw_watchpoint.flags = flag;
1690                 }
1691             }
1692         }
1693     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1694         handle = 1;
1695     } else {
1696         /* QEMU is not able to handle debug exception, so inject
1697          * program exception to guest;
1698          * Yes program exception NOT debug exception !!
1699          * When QEMU is using debug resources then debug exception must
1700          * be always set. To achieve this we set MSR_DE and also set
1701          * MSRP_DEP so guest cannot change MSR_DE.
1702          * When emulating debug resource for guest we want guest
1703          * to control MSR_DE (enable/disable debug interrupt on need).
1704          * Supporting both configurations are NOT possible.
1705          * So the result is that we cannot share debug resources
1706          * between QEMU and Guest on BOOKE architecture.
1707          * In the current design QEMU gets the priority over guest,
1708          * this means that if QEMU is using debug resources then guest
1709          * cannot use them;
1710          * For software breakpoint QEMU uses a privileged instruction;
1711          * So there cannot be any reason that we are here for guest
1712          * set debug exception, only possibility is guest executed a
1713          * privileged / illegal instruction and that's why we are
1714          * injecting a program interrupt.
1715          */
1716 
1717         cpu_synchronize_state(cs);
1718         /* env->nip is PC, so increment this by 4 to use
1719          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1720          */
1721         env->nip += 4;
1722         cs->exception_index = POWERPC_EXCP_PROGRAM;
1723         env->error_code = POWERPC_EXCP_INVAL;
1724         ppc_cpu_do_interrupt(cs);
1725     }
1726 
1727     return handle;
1728 }
1729 
1730 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1731 {
1732     PowerPCCPU *cpu = POWERPC_CPU(cs);
1733     CPUPPCState *env = &cpu->env;
1734     int ret;
1735 
1736     qemu_mutex_lock_iothread();
1737 
1738     switch (run->exit_reason) {
1739     case KVM_EXIT_DCR:
1740         if (run->dcr.is_write) {
1741             DPRINTF("handle dcr write\n");
1742             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1743         } else {
1744             DPRINTF("handle dcr read\n");
1745             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1746         }
1747         break;
1748     case KVM_EXIT_HLT:
1749         DPRINTF("handle halt\n");
1750         ret = kvmppc_handle_halt(cpu);
1751         break;
1752 #if defined(TARGET_PPC64)
1753     case KVM_EXIT_PAPR_HCALL:
1754         DPRINTF("handle PAPR hypercall\n");
1755         run->papr_hcall.ret = spapr_hypercall(cpu,
1756                                               run->papr_hcall.nr,
1757                                               run->papr_hcall.args);
1758         ret = 0;
1759         break;
1760 #endif
1761     case KVM_EXIT_EPR:
1762         DPRINTF("handle epr\n");
1763         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1764         ret = 0;
1765         break;
1766     case KVM_EXIT_WATCHDOG:
1767         DPRINTF("handle watchdog expiry\n");
1768         watchdog_perform_action();
1769         ret = 0;
1770         break;
1771 
1772     case KVM_EXIT_DEBUG:
1773         DPRINTF("handle debug exception\n");
1774         if (kvm_handle_debug(cpu, run)) {
1775             ret = EXCP_DEBUG;
1776             break;
1777         }
1778         /* re-enter, this exception was guest-internal */
1779         ret = 0;
1780         break;
1781 
1782     default:
1783         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1784         ret = -1;
1785         break;
1786     }
1787 
1788     qemu_mutex_unlock_iothread();
1789     return ret;
1790 }
1791 
1792 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1793 {
1794     CPUState *cs = CPU(cpu);
1795     uint32_t bits = tsr_bits;
1796     struct kvm_one_reg reg = {
1797         .id = KVM_REG_PPC_OR_TSR,
1798         .addr = (uintptr_t) &bits,
1799     };
1800 
1801     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802 }
1803 
1804 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1805 {
1806 
1807     CPUState *cs = CPU(cpu);
1808     uint32_t bits = tsr_bits;
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_CLEAR_TSR,
1811         .addr = (uintptr_t) &bits,
1812     };
1813 
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816 
1817 int kvmppc_set_tcr(PowerPCCPU *cpu)
1818 {
1819     CPUState *cs = CPU(cpu);
1820     CPUPPCState *env = &cpu->env;
1821     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1822 
1823     struct kvm_one_reg reg = {
1824         .id = KVM_REG_PPC_TCR,
1825         .addr = (uintptr_t) &tcr,
1826     };
1827 
1828     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1829 }
1830 
1831 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1832 {
1833     CPUState *cs = CPU(cpu);
1834     int ret;
1835 
1836     if (!kvm_enabled()) {
1837         return -1;
1838     }
1839 
1840     if (!cap_ppc_watchdog) {
1841         printf("warning: KVM does not support watchdog");
1842         return -1;
1843     }
1844 
1845     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1846     if (ret < 0) {
1847         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1848                 __func__, strerror(-ret));
1849         return ret;
1850     }
1851 
1852     return ret;
1853 }
1854 
1855 static int read_cpuinfo(const char *field, char *value, int len)
1856 {
1857     FILE *f;
1858     int ret = -1;
1859     int field_len = strlen(field);
1860     char line[512];
1861 
1862     f = fopen("/proc/cpuinfo", "r");
1863     if (!f) {
1864         return -1;
1865     }
1866 
1867     do {
1868         if (!fgets(line, sizeof(line), f)) {
1869             break;
1870         }
1871         if (!strncmp(line, field, field_len)) {
1872             pstrcpy(value, len, line);
1873             ret = 0;
1874             break;
1875         }
1876     } while(*line);
1877 
1878     fclose(f);
1879 
1880     return ret;
1881 }
1882 
1883 uint32_t kvmppc_get_tbfreq(void)
1884 {
1885     char line[512];
1886     char *ns;
1887     uint32_t retval = NANOSECONDS_PER_SECOND;
1888 
1889     if (read_cpuinfo("timebase", line, sizeof(line))) {
1890         return retval;
1891     }
1892 
1893     if (!(ns = strchr(line, ':'))) {
1894         return retval;
1895     }
1896 
1897     ns++;
1898 
1899     return atoi(ns);
1900 }
1901 
1902 bool kvmppc_get_host_serial(char **value)
1903 {
1904     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1905                                NULL);
1906 }
1907 
1908 bool kvmppc_get_host_model(char **value)
1909 {
1910     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1911 }
1912 
1913 /* Try to find a device tree node for a CPU with clock-frequency property */
1914 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1915 {
1916     struct dirent *dirp;
1917     DIR *dp;
1918 
1919     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1920         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1921         return -1;
1922     }
1923 
1924     buf[0] = '\0';
1925     while ((dirp = readdir(dp)) != NULL) {
1926         FILE *f;
1927         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1928                  dirp->d_name);
1929         f = fopen(buf, "r");
1930         if (f) {
1931             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1932             fclose(f);
1933             break;
1934         }
1935         buf[0] = '\0';
1936     }
1937     closedir(dp);
1938     if (buf[0] == '\0') {
1939         printf("Unknown host!\n");
1940         return -1;
1941     }
1942 
1943     return 0;
1944 }
1945 
1946 static uint64_t kvmppc_read_int_dt(const char *filename)
1947 {
1948     union {
1949         uint32_t v32;
1950         uint64_t v64;
1951     } u;
1952     FILE *f;
1953     int len;
1954 
1955     f = fopen(filename, "rb");
1956     if (!f) {
1957         return -1;
1958     }
1959 
1960     len = fread(&u, 1, sizeof(u), f);
1961     fclose(f);
1962     switch (len) {
1963     case 4:
1964         /* property is a 32-bit quantity */
1965         return be32_to_cpu(u.v32);
1966     case 8:
1967         return be64_to_cpu(u.v64);
1968     }
1969 
1970     return 0;
1971 }
1972 
1973 /* Read a CPU node property from the host device tree that's a single
1974  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1975  * (can't find or open the property, or doesn't understand the
1976  * format) */
1977 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1978 {
1979     char buf[PATH_MAX], *tmp;
1980     uint64_t val;
1981 
1982     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1983         return -1;
1984     }
1985 
1986     tmp = g_strdup_printf("%s/%s", buf, propname);
1987     val = kvmppc_read_int_dt(tmp);
1988     g_free(tmp);
1989 
1990     return val;
1991 }
1992 
1993 uint64_t kvmppc_get_clockfreq(void)
1994 {
1995     return kvmppc_read_int_cpu_dt("clock-frequency");
1996 }
1997 
1998 uint32_t kvmppc_get_vmx(void)
1999 {
2000     return kvmppc_read_int_cpu_dt("ibm,vmx");
2001 }
2002 
2003 uint32_t kvmppc_get_dfp(void)
2004 {
2005     return kvmppc_read_int_cpu_dt("ibm,dfp");
2006 }
2007 
2008 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2009  {
2010      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2011      CPUState *cs = CPU(cpu);
2012 
2013     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2014         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2015         return 0;
2016     }
2017 
2018     return 1;
2019 }
2020 
2021 int kvmppc_get_hasidle(CPUPPCState *env)
2022 {
2023     struct kvm_ppc_pvinfo pvinfo;
2024 
2025     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2026         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2027         return 1;
2028     }
2029 
2030     return 0;
2031 }
2032 
2033 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2034 {
2035     uint32_t *hc = (uint32_t*)buf;
2036     struct kvm_ppc_pvinfo pvinfo;
2037 
2038     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2039         memcpy(buf, pvinfo.hcall, buf_len);
2040         return 0;
2041     }
2042 
2043     /*
2044      * Fallback to always fail hypercalls regardless of endianness:
2045      *
2046      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2047      *     li r3, -1
2048      *     b .+8       (becomes nop in wrong endian)
2049      *     bswap32(li r3, -1)
2050      */
2051 
2052     hc[0] = cpu_to_be32(0x08000048);
2053     hc[1] = cpu_to_be32(0x3860ffff);
2054     hc[2] = cpu_to_be32(0x48000008);
2055     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2056 
2057     return 1;
2058 }
2059 
2060 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2061 {
2062     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2063 }
2064 
2065 void kvmppc_enable_logical_ci_hcalls(void)
2066 {
2067     /*
2068      * FIXME: it would be nice if we could detect the cases where
2069      * we're using a device which requires the in kernel
2070      * implementation of these hcalls, but the kernel lacks them and
2071      * produce a warning.
2072      */
2073     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2074     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2075 }
2076 
2077 void kvmppc_enable_set_mode_hcall(void)
2078 {
2079     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2080 }
2081 
2082 void kvmppc_enable_clear_ref_mod_hcalls(void)
2083 {
2084     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2085     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2086 }
2087 
2088 void kvmppc_set_papr(PowerPCCPU *cpu)
2089 {
2090     CPUState *cs = CPU(cpu);
2091     int ret;
2092 
2093     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2094     if (ret) {
2095         error_report("This vCPU type or KVM version does not support PAPR");
2096         exit(1);
2097     }
2098 
2099     /* Update the capability flag so we sync the right information
2100      * with kvm */
2101     cap_papr = 1;
2102 }
2103 
2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105 {
2106     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107 }
2108 
2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110 {
2111     CPUState *cs = CPU(cpu);
2112     int ret;
2113 
2114     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115     if (ret && mpic_proxy) {
2116         error_report("This KVM version does not support EPR");
2117         exit(1);
2118     }
2119 }
2120 
2121 int kvmppc_smt_threads(void)
2122 {
2123     return cap_ppc_smt ? cap_ppc_smt : 1;
2124 }
2125 
2126 #ifdef TARGET_PPC64
2127 off_t kvmppc_alloc_rma(void **rma)
2128 {
2129     off_t size;
2130     int fd;
2131     struct kvm_allocate_rma ret;
2132 
2133     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2134      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2135      *                      not necessary on this hardware
2136      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2137      *
2138      * FIXME: We should allow the user to force contiguous RMA
2139      * allocation in the cap_ppc_rma==1 case.
2140      */
2141     if (cap_ppc_rma < 2) {
2142         return 0;
2143     }
2144 
2145     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2146     if (fd < 0) {
2147         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2148                 strerror(errno));
2149         return -1;
2150     }
2151 
2152     size = MIN(ret.rma_size, 256ul << 20);
2153 
2154     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2155     if (*rma == MAP_FAILED) {
2156         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2157         return -1;
2158     };
2159 
2160     return size;
2161 }
2162 
2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2164 {
2165     struct kvm_ppc_smmu_info info;
2166     long rampagesize, best_page_shift;
2167     int i;
2168 
2169     if (cap_ppc_rma >= 2) {
2170         return current_size;
2171     }
2172 
2173     /* Find the largest hardware supported page size that's less than
2174      * or equal to the (logical) backing page size of guest RAM */
2175     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2176     rampagesize = qemu_getrampagesize();
2177     best_page_shift = 0;
2178 
2179     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2180         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2181 
2182         if (!sps->page_shift) {
2183             continue;
2184         }
2185 
2186         if ((sps->page_shift > best_page_shift)
2187             && ((1UL << sps->page_shift) <= rampagesize)) {
2188             best_page_shift = sps->page_shift;
2189         }
2190     }
2191 
2192     return MIN(current_size,
2193                1ULL << (best_page_shift + hash_shift - 7));
2194 }
2195 #endif
2196 
2197 bool kvmppc_spapr_use_multitce(void)
2198 {
2199     return cap_spapr_multitce;
2200 }
2201 
2202 int kvmppc_spapr_enable_inkernel_multitce(void)
2203 {
2204     int ret;
2205 
2206     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                             H_PUT_TCE_INDIRECT, 1);
2208     if (!ret) {
2209         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                                 H_STUFF_TCE, 1);
2211     }
2212 
2213     return ret;
2214 }
2215 
2216 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2217                               uint64_t bus_offset, uint32_t nb_table,
2218                               int *pfd, bool need_vfio)
2219 {
2220     long len;
2221     int fd;
2222     void *table;
2223 
2224     /* Must set fd to -1 so we don't try to munmap when called for
2225      * destroying the table, which the upper layers -will- do
2226      */
2227     *pfd = -1;
2228     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2229         return NULL;
2230     }
2231 
2232     if (cap_spapr_tce_64) {
2233         struct kvm_create_spapr_tce_64 args = {
2234             .liobn = liobn,
2235             .page_shift = page_shift,
2236             .offset = bus_offset >> page_shift,
2237             .size = nb_table,
2238             .flags = 0
2239         };
2240         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2241         if (fd < 0) {
2242             fprintf(stderr,
2243                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2244                     liobn);
2245             return NULL;
2246         }
2247     } else if (cap_spapr_tce) {
2248         uint64_t window_size = (uint64_t) nb_table << page_shift;
2249         struct kvm_create_spapr_tce args = {
2250             .liobn = liobn,
2251             .window_size = window_size,
2252         };
2253         if ((window_size != args.window_size) || bus_offset) {
2254             return NULL;
2255         }
2256         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257         if (fd < 0) {
2258             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2259                     liobn);
2260             return NULL;
2261         }
2262     } else {
2263         return NULL;
2264     }
2265 
2266     len = nb_table * sizeof(uint64_t);
2267     /* FIXME: round this up to page size */
2268 
2269     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2270     if (table == MAP_FAILED) {
2271         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2272                 liobn);
2273         close(fd);
2274         return NULL;
2275     }
2276 
2277     *pfd = fd;
2278     return table;
2279 }
2280 
2281 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2282 {
2283     long len;
2284 
2285     if (fd < 0) {
2286         return -1;
2287     }
2288 
2289     len = nb_table * sizeof(uint64_t);
2290     if ((munmap(table, len) < 0) ||
2291         (close(fd) < 0)) {
2292         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293                 strerror(errno));
2294         /* Leak the table */
2295     }
2296 
2297     return 0;
2298 }
2299 
2300 int kvmppc_reset_htab(int shift_hint)
2301 {
2302     uint32_t shift = shift_hint;
2303 
2304     if (!kvm_enabled()) {
2305         /* Full emulation, tell caller to allocate htab itself */
2306         return 0;
2307     }
2308     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309         int ret;
2310         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2311         if (ret == -ENOTTY) {
2312             /* At least some versions of PR KVM advertise the
2313              * capability, but don't implement the ioctl().  Oops.
2314              * Return 0 so that we allocate the htab in qemu, as is
2315              * correct for PR. */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322 
2323     /* We have a kernel that predates the htab reset calls.  For PR
2324      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2325      * this era, it has allocated a 16MB fixed size hash table already. */
2326     if (kvmppc_is_pr(kvm_state)) {
2327         /* PR - tell caller to allocate htab */
2328         return 0;
2329     } else {
2330         /* HV - assume 16MB kernel allocated htab */
2331         return 24;
2332     }
2333 }
2334 
2335 static inline uint32_t mfpvr(void)
2336 {
2337     uint32_t pvr;
2338 
2339     asm ("mfpvr %0"
2340          : "=r"(pvr));
2341     return pvr;
2342 }
2343 
2344 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2345 {
2346     if (on) {
2347         *word |= flags;
2348     } else {
2349         *word &= ~flags;
2350     }
2351 }
2352 
2353 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2354 {
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t vmx = kvmppc_get_vmx();
2357     uint32_t dfp = kvmppc_get_dfp();
2358     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2359     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2360 
2361     /* Now fix up the class with information we can query from the host */
2362     pcc->pvr = mfpvr();
2363 
2364     if (vmx != -1) {
2365         /* Only override when we know what the host supports */
2366         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2367         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2368     }
2369     if (dfp != -1) {
2370         /* Only override when we know what the host supports */
2371         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2372     }
2373 
2374     if (dcache_size != -1) {
2375         pcc->l1_dcache_size = dcache_size;
2376     }
2377 
2378     if (icache_size != -1) {
2379         pcc->l1_icache_size = icache_size;
2380     }
2381 
2382 #if defined(TARGET_PPC64)
2383     pcc->radix_page_info = kvm_get_radix_page_info();
2384 
2385     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2386         /*
2387          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2388          * compliant.  More importantly, advertising ISA 3.00
2389          * architected mode may prevent guests from activating
2390          * necessary DD1 workarounds.
2391          */
2392         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2393                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2394     }
2395 #endif /* defined(TARGET_PPC64) */
2396 }
2397 
2398 bool kvmppc_has_cap_epr(void)
2399 {
2400     return cap_epr;
2401 }
2402 
2403 bool kvmppc_has_cap_htab_fd(void)
2404 {
2405     return cap_htab_fd;
2406 }
2407 
2408 bool kvmppc_has_cap_fixup_hcalls(void)
2409 {
2410     return cap_fixup_hcalls;
2411 }
2412 
2413 bool kvmppc_has_cap_htm(void)
2414 {
2415     return cap_htm;
2416 }
2417 
2418 bool kvmppc_has_cap_mmu_radix(void)
2419 {
2420     return cap_mmu_radix;
2421 }
2422 
2423 bool kvmppc_has_cap_mmu_hash_v3(void)
2424 {
2425     return cap_mmu_hash_v3;
2426 }
2427 
2428 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2429 {
2430     uint32_t host_pvr = mfpvr();
2431     PowerPCCPUClass *pvr_pcc;
2432 
2433     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2434     if (pvr_pcc == NULL) {
2435         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2436     }
2437 
2438     return pvr_pcc;
2439 }
2440 
2441 static int kvm_ppc_register_host_cpu_type(void)
2442 {
2443     TypeInfo type_info = {
2444         .name = TYPE_HOST_POWERPC_CPU,
2445         .class_init = kvmppc_host_cpu_class_init,
2446     };
2447     PowerPCCPUClass *pvr_pcc;
2448     ObjectClass *oc;
2449     DeviceClass *dc;
2450     int i;
2451 
2452     pvr_pcc = kvm_ppc_get_host_cpu_class();
2453     if (pvr_pcc == NULL) {
2454         return -1;
2455     }
2456     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2457     type_register(&type_info);
2458 
2459     oc = object_class_by_name(type_info.name);
2460     g_assert(oc);
2461 
2462 #if defined(TARGET_PPC64)
2463     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2464     type_info.parent = TYPE_SPAPR_CPU_CORE,
2465     type_info.instance_size = sizeof(sPAPRCPUCore);
2466     type_info.instance_init = NULL;
2467     type_info.class_init = spapr_cpu_core_class_init;
2468     type_info.class_data = (void *) "host";
2469     type_register(&type_info);
2470     g_free((void *)type_info.name);
2471 #endif
2472 
2473     /*
2474      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2475      * we want "POWER8" to be a "family" alias that points to the current
2476      * host CPU type, too)
2477      */
2478     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2479     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2480         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2481             char *suffix;
2482 
2483             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2484             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2485             if (suffix) {
2486                 *suffix = 0;
2487             }
2488             ppc_cpu_aliases[i].oc = oc;
2489             break;
2490         }
2491     }
2492 
2493     return 0;
2494 }
2495 
2496 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2497 {
2498     struct kvm_rtas_token_args args = {
2499         .token = token,
2500     };
2501 
2502     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2503         return -ENOENT;
2504     }
2505 
2506     strncpy(args.name, function, sizeof(args.name));
2507 
2508     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2509 }
2510 
2511 int kvmppc_get_htab_fd(bool write)
2512 {
2513     struct kvm_get_htab_fd s = {
2514         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2515         .start_index = 0,
2516     };
2517 
2518     if (!cap_htab_fd) {
2519         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2520         return -1;
2521     }
2522 
2523     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2524 }
2525 
2526 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2527 {
2528     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2529     uint8_t buf[bufsize];
2530     ssize_t rc;
2531 
2532     do {
2533         rc = read(fd, buf, bufsize);
2534         if (rc < 0) {
2535             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2536                     strerror(errno));
2537             return rc;
2538         } else if (rc) {
2539             uint8_t *buffer = buf;
2540             ssize_t n = rc;
2541             while (n) {
2542                 struct kvm_get_htab_header *head =
2543                     (struct kvm_get_htab_header *) buffer;
2544                 size_t chunksize = sizeof(*head) +
2545                      HASH_PTE_SIZE_64 * head->n_valid;
2546 
2547                 qemu_put_be32(f, head->index);
2548                 qemu_put_be16(f, head->n_valid);
2549                 qemu_put_be16(f, head->n_invalid);
2550                 qemu_put_buffer(f, (void *)(head + 1),
2551                                 HASH_PTE_SIZE_64 * head->n_valid);
2552 
2553                 buffer += chunksize;
2554                 n -= chunksize;
2555             }
2556         }
2557     } while ((rc != 0)
2558              && ((max_ns < 0)
2559                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2560 
2561     return (rc == 0) ? 1 : 0;
2562 }
2563 
2564 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2565                            uint16_t n_valid, uint16_t n_invalid)
2566 {
2567     struct kvm_get_htab_header *buf;
2568     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2569     ssize_t rc;
2570 
2571     buf = alloca(chunksize);
2572     buf->index = index;
2573     buf->n_valid = n_valid;
2574     buf->n_invalid = n_invalid;
2575 
2576     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2577 
2578     rc = write(fd, buf, chunksize);
2579     if (rc < 0) {
2580         fprintf(stderr, "Error writing KVM hash table: %s\n",
2581                 strerror(errno));
2582         return rc;
2583     }
2584     if (rc != chunksize) {
2585         /* We should never get a short write on a single chunk */
2586         fprintf(stderr, "Short write, restoring KVM hash table\n");
2587         return -1;
2588     }
2589     return 0;
2590 }
2591 
2592 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2593 {
2594     return true;
2595 }
2596 
2597 void kvm_arch_init_irq_routing(KVMState *s)
2598 {
2599 }
2600 
2601 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2602 {
2603     struct kvm_get_htab_fd ghf = {
2604         .flags = 0,
2605         .start_index = ptex,
2606     };
2607     int fd, rc;
2608     int i;
2609 
2610     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2611     if (fd < 0) {
2612         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2613     }
2614 
2615     i = 0;
2616     while (i < n) {
2617         struct kvm_get_htab_header *hdr;
2618         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2619         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2620 
2621         rc = read(fd, buf, sizeof(buf));
2622         if (rc < 0) {
2623             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624         }
2625 
2626         hdr = (struct kvm_get_htab_header *)buf;
2627         while ((i < n) && ((char *)hdr < (buf + rc))) {
2628             int invalid = hdr->n_invalid;
2629 
2630             if (hdr->index != (ptex + i)) {
2631                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2632                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2633             }
2634 
2635             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2636             i += hdr->n_valid;
2637 
2638             if ((n - i) < invalid) {
2639                 invalid = n - i;
2640             }
2641             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2642             i += hdr->n_invalid;
2643 
2644             hdr = (struct kvm_get_htab_header *)
2645                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2646         }
2647     }
2648 
2649     close(fd);
2650 }
2651 
2652 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2653 {
2654     int fd, rc;
2655     struct kvm_get_htab_fd ghf;
2656     struct {
2657         struct kvm_get_htab_header hdr;
2658         uint64_t pte0;
2659         uint64_t pte1;
2660     } buf;
2661 
2662     ghf.flags = 0;
2663     ghf.start_index = 0;     /* Ignored */
2664     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2665     if (fd < 0) {
2666         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2667     }
2668 
2669     buf.hdr.n_valid = 1;
2670     buf.hdr.n_invalid = 0;
2671     buf.hdr.index = ptex;
2672     buf.pte0 = cpu_to_be64(pte0);
2673     buf.pte1 = cpu_to_be64(pte1);
2674 
2675     rc = write(fd, &buf, sizeof(buf));
2676     if (rc != sizeof(buf)) {
2677         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2678     }
2679     close(fd);
2680 }
2681 
2682 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2683                              uint64_t address, uint32_t data, PCIDevice *dev)
2684 {
2685     return 0;
2686 }
2687 
2688 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2689                                 int vector, PCIDevice *dev)
2690 {
2691     return 0;
2692 }
2693 
2694 int kvm_arch_release_virq_post(int virq)
2695 {
2696     return 0;
2697 }
2698 
2699 int kvm_arch_msi_data_to_gsi(uint32_t data)
2700 {
2701     return data & 0xffff;
2702 }
2703 
2704 int kvmppc_enable_hwrng(void)
2705 {
2706     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2707         return -1;
2708     }
2709 
2710     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2711 }
2712