xref: /openbmc/qemu/target/ppc/kvm.c (revision 795c40b8)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
53 #include "sysemu/kvm_int.h"
54 
55 //#define DEBUG_KVM
56 
57 #ifdef DEBUG_KVM
58 #define DPRINTF(fmt, ...) \
59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
60 #else
61 #define DPRINTF(fmt, ...) \
62     do { } while (0)
63 #endif
64 
65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
66 
67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
68     KVM_CAP_LAST_INFO
69 };
70 
71 static int cap_interrupt_unset = false;
72 static int cap_interrupt_level = false;
73 static int cap_segstate;
74 static int cap_booke_sregs;
75 static int cap_ppc_smt;
76 static int cap_ppc_rma;
77 static int cap_spapr_tce;
78 static int cap_spapr_tce_64;
79 static int cap_spapr_multitce;
80 static int cap_spapr_vfio;
81 static int cap_hior;
82 static int cap_one_reg;
83 static int cap_epr;
84 static int cap_ppc_watchdog;
85 static int cap_papr;
86 static int cap_htab_fd;
87 static int cap_fixup_hcalls;
88 static int cap_htm;             /* Hardware transactional memory support */
89 static int cap_mmu_radix;
90 static int cap_mmu_hash_v3;
91 
92 static uint32_t debug_inst_opcode;
93 
94 /* XXX We have a race condition where we actually have a level triggered
95  *     interrupt, but the infrastructure can't expose that yet, so the guest
96  *     takes but ignores it, goes to sleep and never gets notified that there's
97  *     still an interrupt pending.
98  *
99  *     As a quick workaround, let's just wake up again 20 ms after we injected
100  *     an interrupt. That way we can assure that we're always reinjecting
101  *     interrupts in case the guest swallowed them.
102  */
103 static QEMUTimer *idle_timer;
104 
105 static void kvm_kick_cpu(void *opaque)
106 {
107     PowerPCCPU *cpu = opaque;
108 
109     qemu_cpu_kick(CPU(cpu));
110 }
111 
112 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
113  * should only be used for fallback tests - generally we should use
114  * explicit capabilities for the features we want, rather than
115  * assuming what is/isn't available depending on the KVM variant. */
116 static bool kvmppc_is_pr(KVMState *ks)
117 {
118     /* Assume KVM-PR if the GET_PVINFO capability is available */
119     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
120 }
121 
122 static int kvm_ppc_register_host_cpu_type(void);
123 
124 int kvm_arch_init(MachineState *ms, KVMState *s)
125 {
126     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
127     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
128     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
129     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
130     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
131     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
135     cap_spapr_vfio = false;
136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
140     /* Note: we don't set cap_papr here, because this capability is
141      * only activated after this by kvmppc_set_papr() */
142     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
143     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
144     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
145     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
146     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
147 
148     if (!cap_interrupt_level) {
149         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
150                         "VM to stall at times!\n");
151     }
152 
153     kvm_ppc_register_host_cpu_type();
154 
155     return 0;
156 }
157 
158 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
159 {
160     return 0;
161 }
162 
163 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
164 {
165     CPUPPCState *cenv = &cpu->env;
166     CPUState *cs = CPU(cpu);
167     struct kvm_sregs sregs;
168     int ret;
169 
170     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
171         /* What we're really trying to say is "if we're on BookE, we use
172            the native PVR for now". This is the only sane way to check
173            it though, so we potentially confuse users that they can run
174            BookE guests on BookS. Let's hope nobody dares enough :) */
175         return 0;
176     } else {
177         if (!cap_segstate) {
178             fprintf(stderr, "kvm error: missing PVR setting capability\n");
179             return -ENOSYS;
180         }
181     }
182 
183     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
184     if (ret) {
185         return ret;
186     }
187 
188     sregs.pvr = cenv->spr[SPR_PVR];
189     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
190 }
191 
192 /* Set up a shared TLB array with KVM */
193 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
194 {
195     CPUPPCState *env = &cpu->env;
196     CPUState *cs = CPU(cpu);
197     struct kvm_book3e_206_tlb_params params = {};
198     struct kvm_config_tlb cfg = {};
199     unsigned int entries = 0;
200     int ret, i;
201 
202     if (!kvm_enabled() ||
203         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
204         return 0;
205     }
206 
207     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
208 
209     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
210         params.tlb_sizes[i] = booke206_tlb_size(env, i);
211         params.tlb_ways[i] = booke206_tlb_ways(env, i);
212         entries += params.tlb_sizes[i];
213     }
214 
215     assert(entries == env->nb_tlb);
216     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
217 
218     env->tlb_dirty = true;
219 
220     cfg.array = (uintptr_t)env->tlb.tlbm;
221     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
222     cfg.params = (uintptr_t)&params;
223     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
224 
225     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
226     if (ret < 0) {
227         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
228                 __func__, strerror(-ret));
229         return ret;
230     }
231 
232     env->kvm_sw_tlb = true;
233     return 0;
234 }
235 
236 
237 #if defined(TARGET_PPC64)
238 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
239                                        struct kvm_ppc_smmu_info *info)
240 {
241     CPUPPCState *env = &cpu->env;
242     CPUState *cs = CPU(cpu);
243 
244     memset(info, 0, sizeof(*info));
245 
246     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
247      * need to "guess" what the supported page sizes are.
248      *
249      * For that to work we make a few assumptions:
250      *
251      * - Check whether we are running "PR" KVM which only supports 4K
252      *   and 16M pages, but supports them regardless of the backing
253      *   store characteritics. We also don't support 1T segments.
254      *
255      *   This is safe as if HV KVM ever supports that capability or PR
256      *   KVM grows supports for more page/segment sizes, those versions
257      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
258      *   will not hit this fallback
259      *
260      * - Else we are running HV KVM. This means we only support page
261      *   sizes that fit in the backing store. Additionally we only
262      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
263      *   P7 encodings for the SLB and hash table. Here too, we assume
264      *   support for any newer processor will mean a kernel that
265      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
266      *   this fallback.
267      */
268     if (kvmppc_is_pr(cs->kvm_state)) {
269         /* No flags */
270         info->flags = 0;
271         info->slb_size = 64;
272 
273         /* Standard 4k base page size segment */
274         info->sps[0].page_shift = 12;
275         info->sps[0].slb_enc = 0;
276         info->sps[0].enc[0].page_shift = 12;
277         info->sps[0].enc[0].pte_enc = 0;
278 
279         /* Standard 16M large page size segment */
280         info->sps[1].page_shift = 24;
281         info->sps[1].slb_enc = SLB_VSID_L;
282         info->sps[1].enc[0].page_shift = 24;
283         info->sps[1].enc[0].pte_enc = 0;
284     } else {
285         int i = 0;
286 
287         /* HV KVM has backing store size restrictions */
288         info->flags = KVM_PPC_PAGE_SIZES_REAL;
289 
290         if (env->mmu_model & POWERPC_MMU_1TSEG) {
291             info->flags |= KVM_PPC_1T_SEGMENTS;
292         }
293 
294         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
295            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
296             info->slb_size = 32;
297         } else {
298             info->slb_size = 64;
299         }
300 
301         /* Standard 4k base page size segment */
302         info->sps[i].page_shift = 12;
303         info->sps[i].slb_enc = 0;
304         info->sps[i].enc[0].page_shift = 12;
305         info->sps[i].enc[0].pte_enc = 0;
306         i++;
307 
308         /* 64K on MMU 2.06 and later */
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->sps[i].page_shift = 16;
312             info->sps[i].slb_enc = 0x110;
313             info->sps[i].enc[0].page_shift = 16;
314             info->sps[i].enc[0].pte_enc = 1;
315             i++;
316         }
317 
318         /* Standard 16M large page size segment */
319         info->sps[i].page_shift = 24;
320         info->sps[i].slb_enc = SLB_VSID_L;
321         info->sps[i].enc[0].page_shift = 24;
322         info->sps[i].enc[0].pte_enc = 0;
323     }
324 }
325 
326 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
327 {
328     CPUState *cs = CPU(cpu);
329     int ret;
330 
331     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
332         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
333         if (ret == 0) {
334             return;
335         }
336     }
337 
338     kvm_get_fallback_smmu_info(cpu, info);
339 }
340 
341 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
342 {
343     KVMState *s = KVM_STATE(current_machine->accelerator);
344     struct ppc_radix_page_info *radix_page_info;
345     struct kvm_ppc_rmmu_info rmmu_info;
346     int i;
347 
348     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
349         return NULL;
350     }
351     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
352         return NULL;
353     }
354     radix_page_info = g_malloc0(sizeof(*radix_page_info));
355     radix_page_info->count = 0;
356     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
357         if (rmmu_info.ap_encodings[i]) {
358             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
359             radix_page_info->count++;
360         }
361     }
362     return radix_page_info;
363 }
364 
365 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
366                                      bool radix, bool gtse,
367                                      uint64_t proc_tbl)
368 {
369     CPUState *cs = CPU(cpu);
370     int ret;
371     uint64_t flags = 0;
372     struct kvm_ppc_mmuv3_cfg cfg = {
373         .process_table = proc_tbl,
374     };
375 
376     if (radix) {
377         flags |= KVM_PPC_MMUV3_RADIX;
378     }
379     if (gtse) {
380         flags |= KVM_PPC_MMUV3_GTSE;
381     }
382     cfg.flags = flags;
383     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
384     switch (ret) {
385     case 0:
386         return H_SUCCESS;
387     case -EINVAL:
388         return H_PARAMETER;
389     case -ENODEV:
390         return H_NOT_AVAILABLE;
391     default:
392         return H_HARDWARE;
393     }
394 }
395 
396 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
397 {
398     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
399         return true;
400     }
401 
402     return (1ul << shift) <= rampgsize;
403 }
404 
405 static long max_cpu_page_size;
406 
407 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
408 {
409     static struct kvm_ppc_smmu_info smmu_info;
410     static bool has_smmu_info;
411     CPUPPCState *env = &cpu->env;
412     int iq, ik, jq, jk;
413     bool has_64k_pages = false;
414 
415     /* We only handle page sizes for 64-bit server guests for now */
416     if (!(env->mmu_model & POWERPC_MMU_64)) {
417         return;
418     }
419 
420     /* Collect MMU info from kernel if not already */
421     if (!has_smmu_info) {
422         kvm_get_smmu_info(cpu, &smmu_info);
423         has_smmu_info = true;
424     }
425 
426     if (!max_cpu_page_size) {
427         max_cpu_page_size = qemu_getrampagesize();
428     }
429 
430     /* Convert to QEMU form */
431     memset(&env->sps, 0, sizeof(env->sps));
432 
433     /* If we have HV KVM, we need to forbid CI large pages if our
434      * host page size is smaller than 64K.
435      */
436     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
437         env->ci_large_pages = getpagesize() >= 0x10000;
438     }
439 
440     /*
441      * XXX This loop should be an entry wide AND of the capabilities that
442      *     the selected CPU has with the capabilities that KVM supports.
443      */
444     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
445         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
446         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
447 
448         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
449                                  ksps->page_shift)) {
450             continue;
451         }
452         qsps->page_shift = ksps->page_shift;
453         qsps->slb_enc = ksps->slb_enc;
454         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
455             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
456                                      ksps->enc[jk].page_shift)) {
457                 continue;
458             }
459             if (ksps->enc[jk].page_shift == 16) {
460                 has_64k_pages = true;
461             }
462             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
463             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
464             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
465                 break;
466             }
467         }
468         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
469             break;
470         }
471     }
472     env->slb_nr = smmu_info.slb_size;
473     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
474         env->mmu_model &= ~POWERPC_MMU_1TSEG;
475     }
476     if (!has_64k_pages) {
477         env->mmu_model &= ~POWERPC_MMU_64K;
478     }
479 }
480 
481 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
482 {
483     Object *mem_obj = object_resolve_path(obj_path, NULL);
484     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
485     long pagesize;
486 
487     if (mempath) {
488         pagesize = qemu_mempath_getpagesize(mempath);
489     } else {
490         pagesize = getpagesize();
491     }
492 
493     return pagesize >= max_cpu_page_size;
494 }
495 
496 #else /* defined (TARGET_PPC64) */
497 
498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
499 {
500 }
501 
502 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
503 {
504     return true;
505 }
506 
507 #endif /* !defined (TARGET_PPC64) */
508 
509 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
510 {
511     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
512 }
513 
514 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
515  * book3s supports only 1 watchpoint, so array size
516  * of 4 is sufficient for now.
517  */
518 #define MAX_HW_BKPTS 4
519 
520 static struct HWBreakpoint {
521     target_ulong addr;
522     int type;
523 } hw_debug_points[MAX_HW_BKPTS];
524 
525 static CPUWatchpoint hw_watchpoint;
526 
527 /* Default there is no breakpoint and watchpoint supported */
528 static int max_hw_breakpoint;
529 static int max_hw_watchpoint;
530 static int nb_hw_breakpoint;
531 static int nb_hw_watchpoint;
532 
533 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
534 {
535     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
536         max_hw_breakpoint = 2;
537         max_hw_watchpoint = 2;
538     }
539 
540     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
541         fprintf(stderr, "Error initializing h/w breakpoints\n");
542         return;
543     }
544 }
545 
546 int kvm_arch_init_vcpu(CPUState *cs)
547 {
548     PowerPCCPU *cpu = POWERPC_CPU(cs);
549     CPUPPCState *cenv = &cpu->env;
550     int ret;
551 
552     /* Gather server mmu info from KVM and update the CPU state */
553     kvm_fixup_page_sizes(cpu);
554 
555     /* Synchronize sregs with kvm */
556     ret = kvm_arch_sync_sregs(cpu);
557     if (ret) {
558         if (ret == -EINVAL) {
559             error_report("Register sync failed... If you're using kvm-hv.ko,"
560                          " only \"-cpu host\" is possible");
561         }
562         return ret;
563     }
564 
565     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
566 
567     switch (cenv->mmu_model) {
568     case POWERPC_MMU_BOOKE206:
569         /* This target supports access to KVM's guest TLB */
570         ret = kvm_booke206_tlb_init(cpu);
571         break;
572     case POWERPC_MMU_2_07:
573         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
574             /* KVM-HV has transactional memory on POWER8 also without the
575              * KVM_CAP_PPC_HTM extension, so enable it here instead as
576              * long as it's availble to userspace on the host. */
577             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
578                 cap_htm = true;
579             }
580         }
581         break;
582     default:
583         break;
584     }
585 
586     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
587     kvmppc_hw_debug_points_init(cenv);
588 
589     return ret;
590 }
591 
592 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
593 {
594     CPUPPCState *env = &cpu->env;
595     CPUState *cs = CPU(cpu);
596     struct kvm_dirty_tlb dirty_tlb;
597     unsigned char *bitmap;
598     int ret;
599 
600     if (!env->kvm_sw_tlb) {
601         return;
602     }
603 
604     bitmap = g_malloc((env->nb_tlb + 7) / 8);
605     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
606 
607     dirty_tlb.bitmap = (uintptr_t)bitmap;
608     dirty_tlb.num_dirty = env->nb_tlb;
609 
610     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
611     if (ret) {
612         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
613                 __func__, strerror(-ret));
614     }
615 
616     g_free(bitmap);
617 }
618 
619 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
620 {
621     PowerPCCPU *cpu = POWERPC_CPU(cs);
622     CPUPPCState *env = &cpu->env;
623     union {
624         uint32_t u32;
625         uint64_t u64;
626     } val;
627     struct kvm_one_reg reg = {
628         .id = id,
629         .addr = (uintptr_t) &val,
630     };
631     int ret;
632 
633     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634     if (ret != 0) {
635         trace_kvm_failed_spr_get(spr, strerror(errno));
636     } else {
637         switch (id & KVM_REG_SIZE_MASK) {
638         case KVM_REG_SIZE_U32:
639             env->spr[spr] = val.u32;
640             break;
641 
642         case KVM_REG_SIZE_U64:
643             env->spr[spr] = val.u64;
644             break;
645 
646         default:
647             /* Don't handle this size yet */
648             abort();
649         }
650     }
651 }
652 
653 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
654 {
655     PowerPCCPU *cpu = POWERPC_CPU(cs);
656     CPUPPCState *env = &cpu->env;
657     union {
658         uint32_t u32;
659         uint64_t u64;
660     } val;
661     struct kvm_one_reg reg = {
662         .id = id,
663         .addr = (uintptr_t) &val,
664     };
665     int ret;
666 
667     switch (id & KVM_REG_SIZE_MASK) {
668     case KVM_REG_SIZE_U32:
669         val.u32 = env->spr[spr];
670         break;
671 
672     case KVM_REG_SIZE_U64:
673         val.u64 = env->spr[spr];
674         break;
675 
676     default:
677         /* Don't handle this size yet */
678         abort();
679     }
680 
681     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
682     if (ret != 0) {
683         trace_kvm_failed_spr_set(spr, strerror(errno));
684     }
685 }
686 
687 static int kvm_put_fp(CPUState *cs)
688 {
689     PowerPCCPU *cpu = POWERPC_CPU(cs);
690     CPUPPCState *env = &cpu->env;
691     struct kvm_one_reg reg;
692     int i;
693     int ret;
694 
695     if (env->insns_flags & PPC_FLOAT) {
696         uint64_t fpscr = env->fpscr;
697         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
698 
699         reg.id = KVM_REG_PPC_FPSCR;
700         reg.addr = (uintptr_t)&fpscr;
701         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
702         if (ret < 0) {
703             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
704             return ret;
705         }
706 
707         for (i = 0; i < 32; i++) {
708             uint64_t vsr[2];
709 
710 #ifdef HOST_WORDS_BIGENDIAN
711             vsr[0] = float64_val(env->fpr[i]);
712             vsr[1] = env->vsr[i];
713 #else
714             vsr[0] = env->vsr[i];
715             vsr[1] = float64_val(env->fpr[i]);
716 #endif
717             reg.addr = (uintptr_t) &vsr;
718             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
719 
720             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
721             if (ret < 0) {
722                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
723                         i, strerror(errno));
724                 return ret;
725             }
726         }
727     }
728 
729     if (env->insns_flags & PPC_ALTIVEC) {
730         reg.id = KVM_REG_PPC_VSCR;
731         reg.addr = (uintptr_t)&env->vscr;
732         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
733         if (ret < 0) {
734             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
735             return ret;
736         }
737 
738         for (i = 0; i < 32; i++) {
739             reg.id = KVM_REG_PPC_VR(i);
740             reg.addr = (uintptr_t)&env->avr[i];
741             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
742             if (ret < 0) {
743                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
744                 return ret;
745             }
746         }
747     }
748 
749     return 0;
750 }
751 
752 static int kvm_get_fp(CPUState *cs)
753 {
754     PowerPCCPU *cpu = POWERPC_CPU(cs);
755     CPUPPCState *env = &cpu->env;
756     struct kvm_one_reg reg;
757     int i;
758     int ret;
759 
760     if (env->insns_flags & PPC_FLOAT) {
761         uint64_t fpscr;
762         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
763 
764         reg.id = KVM_REG_PPC_FPSCR;
765         reg.addr = (uintptr_t)&fpscr;
766         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
767         if (ret < 0) {
768             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
769             return ret;
770         } else {
771             env->fpscr = fpscr;
772         }
773 
774         for (i = 0; i < 32; i++) {
775             uint64_t vsr[2];
776 
777             reg.addr = (uintptr_t) &vsr;
778             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
779 
780             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781             if (ret < 0) {
782                 DPRINTF("Unable to get %s%d from KVM: %s\n",
783                         vsx ? "VSR" : "FPR", i, strerror(errno));
784                 return ret;
785             } else {
786 #ifdef HOST_WORDS_BIGENDIAN
787                 env->fpr[i] = vsr[0];
788                 if (vsx) {
789                     env->vsr[i] = vsr[1];
790                 }
791 #else
792                 env->fpr[i] = vsr[1];
793                 if (vsx) {
794                     env->vsr[i] = vsr[0];
795                 }
796 #endif
797             }
798         }
799     }
800 
801     if (env->insns_flags & PPC_ALTIVEC) {
802         reg.id = KVM_REG_PPC_VSCR;
803         reg.addr = (uintptr_t)&env->vscr;
804         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
805         if (ret < 0) {
806             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
807             return ret;
808         }
809 
810         for (i = 0; i < 32; i++) {
811             reg.id = KVM_REG_PPC_VR(i);
812             reg.addr = (uintptr_t)&env->avr[i];
813             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
814             if (ret < 0) {
815                 DPRINTF("Unable to get VR%d from KVM: %s\n",
816                         i, strerror(errno));
817                 return ret;
818             }
819         }
820     }
821 
822     return 0;
823 }
824 
825 #if defined(TARGET_PPC64)
826 static int kvm_get_vpa(CPUState *cs)
827 {
828     PowerPCCPU *cpu = POWERPC_CPU(cs);
829     CPUPPCState *env = &cpu->env;
830     struct kvm_one_reg reg;
831     int ret;
832 
833     reg.id = KVM_REG_PPC_VPA_ADDR;
834     reg.addr = (uintptr_t)&env->vpa_addr;
835     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
836     if (ret < 0) {
837         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
838         return ret;
839     }
840 
841     assert((uintptr_t)&env->slb_shadow_size
842            == ((uintptr_t)&env->slb_shadow_addr + 8));
843     reg.id = KVM_REG_PPC_VPA_SLB;
844     reg.addr = (uintptr_t)&env->slb_shadow_addr;
845     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
846     if (ret < 0) {
847         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
848                 strerror(errno));
849         return ret;
850     }
851 
852     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
853     reg.id = KVM_REG_PPC_VPA_DTL;
854     reg.addr = (uintptr_t)&env->dtl_addr;
855     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
856     if (ret < 0) {
857         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
858                 strerror(errno));
859         return ret;
860     }
861 
862     return 0;
863 }
864 
865 static int kvm_put_vpa(CPUState *cs)
866 {
867     PowerPCCPU *cpu = POWERPC_CPU(cs);
868     CPUPPCState *env = &cpu->env;
869     struct kvm_one_reg reg;
870     int ret;
871 
872     /* SLB shadow or DTL can't be registered unless a master VPA is
873      * registered.  That means when restoring state, if a VPA *is*
874      * registered, we need to set that up first.  If not, we need to
875      * deregister the others before deregistering the master VPA */
876     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
877 
878     if (env->vpa_addr) {
879         reg.id = KVM_REG_PPC_VPA_ADDR;
880         reg.addr = (uintptr_t)&env->vpa_addr;
881         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
882         if (ret < 0) {
883             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
884             return ret;
885         }
886     }
887 
888     assert((uintptr_t)&env->slb_shadow_size
889            == ((uintptr_t)&env->slb_shadow_addr + 8));
890     reg.id = KVM_REG_PPC_VPA_SLB;
891     reg.addr = (uintptr_t)&env->slb_shadow_addr;
892     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
893     if (ret < 0) {
894         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
895         return ret;
896     }
897 
898     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
899     reg.id = KVM_REG_PPC_VPA_DTL;
900     reg.addr = (uintptr_t)&env->dtl_addr;
901     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
902     if (ret < 0) {
903         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
904                 strerror(errno));
905         return ret;
906     }
907 
908     if (!env->vpa_addr) {
909         reg.id = KVM_REG_PPC_VPA_ADDR;
910         reg.addr = (uintptr_t)&env->vpa_addr;
911         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
912         if (ret < 0) {
913             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
914             return ret;
915         }
916     }
917 
918     return 0;
919 }
920 #endif /* TARGET_PPC64 */
921 
922 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
923 {
924     CPUPPCState *env = &cpu->env;
925     struct kvm_sregs sregs;
926     int i;
927 
928     sregs.pvr = env->spr[SPR_PVR];
929 
930     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
931 
932     /* Sync SLB */
933 #ifdef TARGET_PPC64
934     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
935         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
936         if (env->slb[i].esid & SLB_ESID_V) {
937             sregs.u.s.ppc64.slb[i].slbe |= i;
938         }
939         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
940     }
941 #endif
942 
943     /* Sync SRs */
944     for (i = 0; i < 16; i++) {
945         sregs.u.s.ppc32.sr[i] = env->sr[i];
946     }
947 
948     /* Sync BATs */
949     for (i = 0; i < 8; i++) {
950         /* Beware. We have to swap upper and lower bits here */
951         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
952             | env->DBAT[1][i];
953         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
954             | env->IBAT[1][i];
955     }
956 
957     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
958 }
959 
960 int kvm_arch_put_registers(CPUState *cs, int level)
961 {
962     PowerPCCPU *cpu = POWERPC_CPU(cs);
963     CPUPPCState *env = &cpu->env;
964     struct kvm_regs regs;
965     int ret;
966     int i;
967 
968     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
969     if (ret < 0) {
970         return ret;
971     }
972 
973     regs.ctr = env->ctr;
974     regs.lr  = env->lr;
975     regs.xer = cpu_read_xer(env);
976     regs.msr = env->msr;
977     regs.pc = env->nip;
978 
979     regs.srr0 = env->spr[SPR_SRR0];
980     regs.srr1 = env->spr[SPR_SRR1];
981 
982     regs.sprg0 = env->spr[SPR_SPRG0];
983     regs.sprg1 = env->spr[SPR_SPRG1];
984     regs.sprg2 = env->spr[SPR_SPRG2];
985     regs.sprg3 = env->spr[SPR_SPRG3];
986     regs.sprg4 = env->spr[SPR_SPRG4];
987     regs.sprg5 = env->spr[SPR_SPRG5];
988     regs.sprg6 = env->spr[SPR_SPRG6];
989     regs.sprg7 = env->spr[SPR_SPRG7];
990 
991     regs.pid = env->spr[SPR_BOOKE_PID];
992 
993     for (i = 0;i < 32; i++)
994         regs.gpr[i] = env->gpr[i];
995 
996     regs.cr = 0;
997     for (i = 0; i < 8; i++) {
998         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
999     }
1000 
1001     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1002     if (ret < 0)
1003         return ret;
1004 
1005     kvm_put_fp(cs);
1006 
1007     if (env->tlb_dirty) {
1008         kvm_sw_tlb_put(cpu);
1009         env->tlb_dirty = false;
1010     }
1011 
1012     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1013         ret = kvmppc_put_books_sregs(cpu);
1014         if (ret < 0) {
1015             return ret;
1016         }
1017     }
1018 
1019     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1020         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1021     }
1022 
1023     if (cap_one_reg) {
1024         int i;
1025 
1026         /* We deliberately ignore errors here, for kernels which have
1027          * the ONE_REG calls, but don't support the specific
1028          * registers, there's a reasonable chance things will still
1029          * work, at least until we try to migrate. */
1030         for (i = 0; i < 1024; i++) {
1031             uint64_t id = env->spr_cb[i].one_reg_id;
1032 
1033             if (id != 0) {
1034                 kvm_put_one_spr(cs, id, i);
1035             }
1036         }
1037 
1038 #ifdef TARGET_PPC64
1039         if (msr_ts) {
1040             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1041                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1042             }
1043             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1044                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1045             }
1046             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1047             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1048             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1056         }
1057 
1058         if (cap_papr) {
1059             if (kvm_put_vpa(cs) < 0) {
1060                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1061             }
1062         }
1063 
1064         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1065 #endif /* TARGET_PPC64 */
1066     }
1067 
1068     return ret;
1069 }
1070 
1071 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1072 {
1073      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1074 }
1075 
1076 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1077 {
1078     CPUPPCState *env = &cpu->env;
1079     struct kvm_sregs sregs;
1080     int ret;
1081 
1082     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1083     if (ret < 0) {
1084         return ret;
1085     }
1086 
1087     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1088         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1089         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1090         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1091         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1092         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1093         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1094         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1095         env->spr[SPR_DECR] = sregs.u.e.dec;
1096         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1097         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1098         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1099     }
1100 
1101     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1102         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1103         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1104         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1105         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1106         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1107     }
1108 
1109     if (sregs.u.e.features & KVM_SREGS_E_64) {
1110         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1111     }
1112 
1113     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1114         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1115     }
1116 
1117     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1118         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1119         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1120         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1121         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1122         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1123         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1124         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1125         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1126         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1127         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1128         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1129         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1130         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1131         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1132         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1133         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1134         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1135         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1136         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1137         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1138         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1139         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1140         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1141         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1142         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1143         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1144         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1145         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1146         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1147         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1148         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1149         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1150 
1151         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1152             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1153             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1154             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1155             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1156             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1157             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1158         }
1159 
1160         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1161             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1162             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1163         }
1164 
1165         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1166             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1167             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1168             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1169             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1170         }
1171     }
1172 
1173     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1174         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1175         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1176         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1177         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1178         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1179         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1180         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1181         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1182         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1183         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1184     }
1185 
1186     if (sregs.u.e.features & KVM_SREGS_EXP) {
1187         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1188     }
1189 
1190     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1191         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1192         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1193     }
1194 
1195     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1196         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1197         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1198         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1199 
1200         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1201             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1202             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1203         }
1204     }
1205 
1206     return 0;
1207 }
1208 
1209 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1210 {
1211     CPUPPCState *env = &cpu->env;
1212     struct kvm_sregs sregs;
1213     int ret;
1214     int i;
1215 
1216     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1217     if (ret < 0) {
1218         return ret;
1219     }
1220 
1221     if (!cpu->vhyp) {
1222         ppc_store_sdr1(env, sregs.u.s.sdr1);
1223     }
1224 
1225     /* Sync SLB */
1226 #ifdef TARGET_PPC64
1227     /*
1228      * The packed SLB array we get from KVM_GET_SREGS only contains
1229      * information about valid entries. So we flush our internal copy
1230      * to get rid of stale ones, then put all valid SLB entries back
1231      * in.
1232      */
1233     memset(env->slb, 0, sizeof(env->slb));
1234     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1235         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1236         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1237         /*
1238          * Only restore valid entries
1239          */
1240         if (rb & SLB_ESID_V) {
1241             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1242         }
1243     }
1244 #endif
1245 
1246     /* Sync SRs */
1247     for (i = 0; i < 16; i++) {
1248         env->sr[i] = sregs.u.s.ppc32.sr[i];
1249     }
1250 
1251     /* Sync BATs */
1252     for (i = 0; i < 8; i++) {
1253         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1254         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1255         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1256         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1257     }
1258 
1259     return 0;
1260 }
1261 
1262 int kvm_arch_get_registers(CPUState *cs)
1263 {
1264     PowerPCCPU *cpu = POWERPC_CPU(cs);
1265     CPUPPCState *env = &cpu->env;
1266     struct kvm_regs regs;
1267     uint32_t cr;
1268     int i, ret;
1269 
1270     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1271     if (ret < 0)
1272         return ret;
1273 
1274     cr = regs.cr;
1275     for (i = 7; i >= 0; i--) {
1276         env->crf[i] = cr & 15;
1277         cr >>= 4;
1278     }
1279 
1280     env->ctr = regs.ctr;
1281     env->lr = regs.lr;
1282     cpu_write_xer(env, regs.xer);
1283     env->msr = regs.msr;
1284     env->nip = regs.pc;
1285 
1286     env->spr[SPR_SRR0] = regs.srr0;
1287     env->spr[SPR_SRR1] = regs.srr1;
1288 
1289     env->spr[SPR_SPRG0] = regs.sprg0;
1290     env->spr[SPR_SPRG1] = regs.sprg1;
1291     env->spr[SPR_SPRG2] = regs.sprg2;
1292     env->spr[SPR_SPRG3] = regs.sprg3;
1293     env->spr[SPR_SPRG4] = regs.sprg4;
1294     env->spr[SPR_SPRG5] = regs.sprg5;
1295     env->spr[SPR_SPRG6] = regs.sprg6;
1296     env->spr[SPR_SPRG7] = regs.sprg7;
1297 
1298     env->spr[SPR_BOOKE_PID] = regs.pid;
1299 
1300     for (i = 0;i < 32; i++)
1301         env->gpr[i] = regs.gpr[i];
1302 
1303     kvm_get_fp(cs);
1304 
1305     if (cap_booke_sregs) {
1306         ret = kvmppc_get_booke_sregs(cpu);
1307         if (ret < 0) {
1308             return ret;
1309         }
1310     }
1311 
1312     if (cap_segstate) {
1313         ret = kvmppc_get_books_sregs(cpu);
1314         if (ret < 0) {
1315             return ret;
1316         }
1317     }
1318 
1319     if (cap_hior) {
1320         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1321     }
1322 
1323     if (cap_one_reg) {
1324         int i;
1325 
1326         /* We deliberately ignore errors here, for kernels which have
1327          * the ONE_REG calls, but don't support the specific
1328          * registers, there's a reasonable chance things will still
1329          * work, at least until we try to migrate. */
1330         for (i = 0; i < 1024; i++) {
1331             uint64_t id = env->spr_cb[i].one_reg_id;
1332 
1333             if (id != 0) {
1334                 kvm_get_one_spr(cs, id, i);
1335             }
1336         }
1337 
1338 #ifdef TARGET_PPC64
1339         if (msr_ts) {
1340             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1341                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1342             }
1343             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1344                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1345             }
1346             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1347             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1348             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1356         }
1357 
1358         if (cap_papr) {
1359             if (kvm_get_vpa(cs) < 0) {
1360                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1361             }
1362         }
1363 
1364         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1365 #endif
1366     }
1367 
1368     return 0;
1369 }
1370 
1371 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1372 {
1373     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1374 
1375     if (irq != PPC_INTERRUPT_EXT) {
1376         return 0;
1377     }
1378 
1379     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1380         return 0;
1381     }
1382 
1383     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1384 
1385     return 0;
1386 }
1387 
1388 #if defined(TARGET_PPCEMB)
1389 #define PPC_INPUT_INT PPC40x_INPUT_INT
1390 #elif defined(TARGET_PPC64)
1391 #define PPC_INPUT_INT PPC970_INPUT_INT
1392 #else
1393 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1394 #endif
1395 
1396 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1397 {
1398     PowerPCCPU *cpu = POWERPC_CPU(cs);
1399     CPUPPCState *env = &cpu->env;
1400     int r;
1401     unsigned irq;
1402 
1403     qemu_mutex_lock_iothread();
1404 
1405     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1406      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1407     if (!cap_interrupt_level &&
1408         run->ready_for_interrupt_injection &&
1409         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1410         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1411     {
1412         /* For now KVM disregards the 'irq' argument. However, in the
1413          * future KVM could cache it in-kernel to avoid a heavyweight exit
1414          * when reading the UIC.
1415          */
1416         irq = KVM_INTERRUPT_SET;
1417 
1418         DPRINTF("injected interrupt %d\n", irq);
1419         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1420         if (r < 0) {
1421             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1422         }
1423 
1424         /* Always wake up soon in case the interrupt was level based */
1425         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1426                        (NANOSECONDS_PER_SECOND / 50));
1427     }
1428 
1429     /* We don't know if there are more interrupts pending after this. However,
1430      * the guest will return to userspace in the course of handling this one
1431      * anyways, so we will get a chance to deliver the rest. */
1432 
1433     qemu_mutex_unlock_iothread();
1434 }
1435 
1436 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1437 {
1438     return MEMTXATTRS_UNSPECIFIED;
1439 }
1440 
1441 int kvm_arch_process_async_events(CPUState *cs)
1442 {
1443     return cs->halted;
1444 }
1445 
1446 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1447 {
1448     CPUState *cs = CPU(cpu);
1449     CPUPPCState *env = &cpu->env;
1450 
1451     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1452         cs->halted = 1;
1453         cs->exception_index = EXCP_HLT;
1454     }
1455 
1456     return 0;
1457 }
1458 
1459 /* map dcr access to existing qemu dcr emulation */
1460 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1461 {
1462     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1463         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1464 
1465     return 0;
1466 }
1467 
1468 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1469 {
1470     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1472 
1473     return 0;
1474 }
1475 
1476 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1477 {
1478     /* Mixed endian case is not handled */
1479     uint32_t sc = debug_inst_opcode;
1480 
1481     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1482                             sizeof(sc), 0) ||
1483         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1484         return -EINVAL;
1485     }
1486 
1487     return 0;
1488 }
1489 
1490 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1491 {
1492     uint32_t sc;
1493 
1494     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1495         sc != debug_inst_opcode ||
1496         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1497                             sizeof(sc), 1)) {
1498         return -EINVAL;
1499     }
1500 
1501     return 0;
1502 }
1503 
1504 static int find_hw_breakpoint(target_ulong addr, int type)
1505 {
1506     int n;
1507 
1508     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1509            <= ARRAY_SIZE(hw_debug_points));
1510 
1511     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1512         if (hw_debug_points[n].addr == addr &&
1513              hw_debug_points[n].type == type) {
1514             return n;
1515         }
1516     }
1517 
1518     return -1;
1519 }
1520 
1521 static int find_hw_watchpoint(target_ulong addr, int *flag)
1522 {
1523     int n;
1524 
1525     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1526     if (n >= 0) {
1527         *flag = BP_MEM_ACCESS;
1528         return n;
1529     }
1530 
1531     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1532     if (n >= 0) {
1533         *flag = BP_MEM_WRITE;
1534         return n;
1535     }
1536 
1537     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1538     if (n >= 0) {
1539         *flag = BP_MEM_READ;
1540         return n;
1541     }
1542 
1543     return -1;
1544 }
1545 
1546 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1547                                   target_ulong len, int type)
1548 {
1549     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1550         return -ENOBUFS;
1551     }
1552 
1553     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1554     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1555 
1556     switch (type) {
1557     case GDB_BREAKPOINT_HW:
1558         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1559             return -ENOBUFS;
1560         }
1561 
1562         if (find_hw_breakpoint(addr, type) >= 0) {
1563             return -EEXIST;
1564         }
1565 
1566         nb_hw_breakpoint++;
1567         break;
1568 
1569     case GDB_WATCHPOINT_WRITE:
1570     case GDB_WATCHPOINT_READ:
1571     case GDB_WATCHPOINT_ACCESS:
1572         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1573             return -ENOBUFS;
1574         }
1575 
1576         if (find_hw_breakpoint(addr, type) >= 0) {
1577             return -EEXIST;
1578         }
1579 
1580         nb_hw_watchpoint++;
1581         break;
1582 
1583     default:
1584         return -ENOSYS;
1585     }
1586 
1587     return 0;
1588 }
1589 
1590 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1591                                   target_ulong len, int type)
1592 {
1593     int n;
1594 
1595     n = find_hw_breakpoint(addr, type);
1596     if (n < 0) {
1597         return -ENOENT;
1598     }
1599 
1600     switch (type) {
1601     case GDB_BREAKPOINT_HW:
1602         nb_hw_breakpoint--;
1603         break;
1604 
1605     case GDB_WATCHPOINT_WRITE:
1606     case GDB_WATCHPOINT_READ:
1607     case GDB_WATCHPOINT_ACCESS:
1608         nb_hw_watchpoint--;
1609         break;
1610 
1611     default:
1612         return -ENOSYS;
1613     }
1614     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1615 
1616     return 0;
1617 }
1618 
1619 void kvm_arch_remove_all_hw_breakpoints(void)
1620 {
1621     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1622 }
1623 
1624 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1625 {
1626     int n;
1627 
1628     /* Software Breakpoint updates */
1629     if (kvm_sw_breakpoints_active(cs)) {
1630         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1631     }
1632 
1633     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1634            <= ARRAY_SIZE(hw_debug_points));
1635     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1636 
1637     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1639         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1640         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1641             switch (hw_debug_points[n].type) {
1642             case GDB_BREAKPOINT_HW:
1643                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1644                 break;
1645             case GDB_WATCHPOINT_WRITE:
1646                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1647                 break;
1648             case GDB_WATCHPOINT_READ:
1649                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1650                 break;
1651             case GDB_WATCHPOINT_ACCESS:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1653                                         KVMPPC_DEBUG_WATCH_READ;
1654                 break;
1655             default:
1656                 cpu_abort(cs, "Unsupported breakpoint type\n");
1657             }
1658             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1659         }
1660     }
1661 }
1662 
1663 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1664 {
1665     CPUState *cs = CPU(cpu);
1666     CPUPPCState *env = &cpu->env;
1667     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1668     int handle = 0;
1669     int n;
1670     int flag = 0;
1671 
1672     if (cs->singlestep_enabled) {
1673         handle = 1;
1674     } else if (arch_info->status) {
1675         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1676             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1677                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1678                 if (n >= 0) {
1679                     handle = 1;
1680                 }
1681             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1682                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1683                 n = find_hw_watchpoint(arch_info->address,  &flag);
1684                 if (n >= 0) {
1685                     handle = 1;
1686                     cs->watchpoint_hit = &hw_watchpoint;
1687                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1688                     hw_watchpoint.flags = flag;
1689                 }
1690             }
1691         }
1692     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1693         handle = 1;
1694     } else {
1695         /* QEMU is not able to handle debug exception, so inject
1696          * program exception to guest;
1697          * Yes program exception NOT debug exception !!
1698          * When QEMU is using debug resources then debug exception must
1699          * be always set. To achieve this we set MSR_DE and also set
1700          * MSRP_DEP so guest cannot change MSR_DE.
1701          * When emulating debug resource for guest we want guest
1702          * to control MSR_DE (enable/disable debug interrupt on need).
1703          * Supporting both configurations are NOT possible.
1704          * So the result is that we cannot share debug resources
1705          * between QEMU and Guest on BOOKE architecture.
1706          * In the current design QEMU gets the priority over guest,
1707          * this means that if QEMU is using debug resources then guest
1708          * cannot use them;
1709          * For software breakpoint QEMU uses a privileged instruction;
1710          * So there cannot be any reason that we are here for guest
1711          * set debug exception, only possibility is guest executed a
1712          * privileged / illegal instruction and that's why we are
1713          * injecting a program interrupt.
1714          */
1715 
1716         cpu_synchronize_state(cs);
1717         /* env->nip is PC, so increment this by 4 to use
1718          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1719          */
1720         env->nip += 4;
1721         cs->exception_index = POWERPC_EXCP_PROGRAM;
1722         env->error_code = POWERPC_EXCP_INVAL;
1723         ppc_cpu_do_interrupt(cs);
1724     }
1725 
1726     return handle;
1727 }
1728 
1729 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1730 {
1731     PowerPCCPU *cpu = POWERPC_CPU(cs);
1732     CPUPPCState *env = &cpu->env;
1733     int ret;
1734 
1735     qemu_mutex_lock_iothread();
1736 
1737     switch (run->exit_reason) {
1738     case KVM_EXIT_DCR:
1739         if (run->dcr.is_write) {
1740             DPRINTF("handle dcr write\n");
1741             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1742         } else {
1743             DPRINTF("handle dcr read\n");
1744             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1745         }
1746         break;
1747     case KVM_EXIT_HLT:
1748         DPRINTF("handle halt\n");
1749         ret = kvmppc_handle_halt(cpu);
1750         break;
1751 #if defined(TARGET_PPC64)
1752     case KVM_EXIT_PAPR_HCALL:
1753         DPRINTF("handle PAPR hypercall\n");
1754         run->papr_hcall.ret = spapr_hypercall(cpu,
1755                                               run->papr_hcall.nr,
1756                                               run->papr_hcall.args);
1757         ret = 0;
1758         break;
1759 #endif
1760     case KVM_EXIT_EPR:
1761         DPRINTF("handle epr\n");
1762         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1763         ret = 0;
1764         break;
1765     case KVM_EXIT_WATCHDOG:
1766         DPRINTF("handle watchdog expiry\n");
1767         watchdog_perform_action();
1768         ret = 0;
1769         break;
1770 
1771     case KVM_EXIT_DEBUG:
1772         DPRINTF("handle debug exception\n");
1773         if (kvm_handle_debug(cpu, run)) {
1774             ret = EXCP_DEBUG;
1775             break;
1776         }
1777         /* re-enter, this exception was guest-internal */
1778         ret = 0;
1779         break;
1780 
1781     default:
1782         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1783         ret = -1;
1784         break;
1785     }
1786 
1787     qemu_mutex_unlock_iothread();
1788     return ret;
1789 }
1790 
1791 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1792 {
1793     CPUState *cs = CPU(cpu);
1794     uint32_t bits = tsr_bits;
1795     struct kvm_one_reg reg = {
1796         .id = KVM_REG_PPC_OR_TSR,
1797         .addr = (uintptr_t) &bits,
1798     };
1799 
1800     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1801 }
1802 
1803 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 {
1805 
1806     CPUState *cs = CPU(cpu);
1807     uint32_t bits = tsr_bits;
1808     struct kvm_one_reg reg = {
1809         .id = KVM_REG_PPC_CLEAR_TSR,
1810         .addr = (uintptr_t) &bits,
1811     };
1812 
1813     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1814 }
1815 
1816 int kvmppc_set_tcr(PowerPCCPU *cpu)
1817 {
1818     CPUState *cs = CPU(cpu);
1819     CPUPPCState *env = &cpu->env;
1820     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1821 
1822     struct kvm_one_reg reg = {
1823         .id = KVM_REG_PPC_TCR,
1824         .addr = (uintptr_t) &tcr,
1825     };
1826 
1827     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 }
1829 
1830 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1831 {
1832     CPUState *cs = CPU(cpu);
1833     int ret;
1834 
1835     if (!kvm_enabled()) {
1836         return -1;
1837     }
1838 
1839     if (!cap_ppc_watchdog) {
1840         printf("warning: KVM does not support watchdog");
1841         return -1;
1842     }
1843 
1844     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1845     if (ret < 0) {
1846         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1847                 __func__, strerror(-ret));
1848         return ret;
1849     }
1850 
1851     return ret;
1852 }
1853 
1854 static int read_cpuinfo(const char *field, char *value, int len)
1855 {
1856     FILE *f;
1857     int ret = -1;
1858     int field_len = strlen(field);
1859     char line[512];
1860 
1861     f = fopen("/proc/cpuinfo", "r");
1862     if (!f) {
1863         return -1;
1864     }
1865 
1866     do {
1867         if (!fgets(line, sizeof(line), f)) {
1868             break;
1869         }
1870         if (!strncmp(line, field, field_len)) {
1871             pstrcpy(value, len, line);
1872             ret = 0;
1873             break;
1874         }
1875     } while(*line);
1876 
1877     fclose(f);
1878 
1879     return ret;
1880 }
1881 
1882 uint32_t kvmppc_get_tbfreq(void)
1883 {
1884     char line[512];
1885     char *ns;
1886     uint32_t retval = NANOSECONDS_PER_SECOND;
1887 
1888     if (read_cpuinfo("timebase", line, sizeof(line))) {
1889         return retval;
1890     }
1891 
1892     if (!(ns = strchr(line, ':'))) {
1893         return retval;
1894     }
1895 
1896     ns++;
1897 
1898     return atoi(ns);
1899 }
1900 
1901 bool kvmppc_get_host_serial(char **value)
1902 {
1903     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1904                                NULL);
1905 }
1906 
1907 bool kvmppc_get_host_model(char **value)
1908 {
1909     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1910 }
1911 
1912 /* Try to find a device tree node for a CPU with clock-frequency property */
1913 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1914 {
1915     struct dirent *dirp;
1916     DIR *dp;
1917 
1918     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1919         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1920         return -1;
1921     }
1922 
1923     buf[0] = '\0';
1924     while ((dirp = readdir(dp)) != NULL) {
1925         FILE *f;
1926         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1927                  dirp->d_name);
1928         f = fopen(buf, "r");
1929         if (f) {
1930             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1931             fclose(f);
1932             break;
1933         }
1934         buf[0] = '\0';
1935     }
1936     closedir(dp);
1937     if (buf[0] == '\0') {
1938         printf("Unknown host!\n");
1939         return -1;
1940     }
1941 
1942     return 0;
1943 }
1944 
1945 static uint64_t kvmppc_read_int_dt(const char *filename)
1946 {
1947     union {
1948         uint32_t v32;
1949         uint64_t v64;
1950     } u;
1951     FILE *f;
1952     int len;
1953 
1954     f = fopen(filename, "rb");
1955     if (!f) {
1956         return -1;
1957     }
1958 
1959     len = fread(&u, 1, sizeof(u), f);
1960     fclose(f);
1961     switch (len) {
1962     case 4:
1963         /* property is a 32-bit quantity */
1964         return be32_to_cpu(u.v32);
1965     case 8:
1966         return be64_to_cpu(u.v64);
1967     }
1968 
1969     return 0;
1970 }
1971 
1972 /* Read a CPU node property from the host device tree that's a single
1973  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1974  * (can't find or open the property, or doesn't understand the
1975  * format) */
1976 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1977 {
1978     char buf[PATH_MAX], *tmp;
1979     uint64_t val;
1980 
1981     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1982         return -1;
1983     }
1984 
1985     tmp = g_strdup_printf("%s/%s", buf, propname);
1986     val = kvmppc_read_int_dt(tmp);
1987     g_free(tmp);
1988 
1989     return val;
1990 }
1991 
1992 uint64_t kvmppc_get_clockfreq(void)
1993 {
1994     return kvmppc_read_int_cpu_dt("clock-frequency");
1995 }
1996 
1997 uint32_t kvmppc_get_vmx(void)
1998 {
1999     return kvmppc_read_int_cpu_dt("ibm,vmx");
2000 }
2001 
2002 uint32_t kvmppc_get_dfp(void)
2003 {
2004     return kvmppc_read_int_cpu_dt("ibm,dfp");
2005 }
2006 
2007 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2008  {
2009      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2010      CPUState *cs = CPU(cpu);
2011 
2012     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2013         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2014         return 0;
2015     }
2016 
2017     return 1;
2018 }
2019 
2020 int kvmppc_get_hasidle(CPUPPCState *env)
2021 {
2022     struct kvm_ppc_pvinfo pvinfo;
2023 
2024     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2025         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2026         return 1;
2027     }
2028 
2029     return 0;
2030 }
2031 
2032 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2033 {
2034     uint32_t *hc = (uint32_t*)buf;
2035     struct kvm_ppc_pvinfo pvinfo;
2036 
2037     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2038         memcpy(buf, pvinfo.hcall, buf_len);
2039         return 0;
2040     }
2041 
2042     /*
2043      * Fallback to always fail hypercalls regardless of endianness:
2044      *
2045      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2046      *     li r3, -1
2047      *     b .+8       (becomes nop in wrong endian)
2048      *     bswap32(li r3, -1)
2049      */
2050 
2051     hc[0] = cpu_to_be32(0x08000048);
2052     hc[1] = cpu_to_be32(0x3860ffff);
2053     hc[2] = cpu_to_be32(0x48000008);
2054     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2055 
2056     return 1;
2057 }
2058 
2059 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2060 {
2061     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2062 }
2063 
2064 void kvmppc_enable_logical_ci_hcalls(void)
2065 {
2066     /*
2067      * FIXME: it would be nice if we could detect the cases where
2068      * we're using a device which requires the in kernel
2069      * implementation of these hcalls, but the kernel lacks them and
2070      * produce a warning.
2071      */
2072     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2073     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2074 }
2075 
2076 void kvmppc_enable_set_mode_hcall(void)
2077 {
2078     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2079 }
2080 
2081 void kvmppc_enable_clear_ref_mod_hcalls(void)
2082 {
2083     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2084     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2085 }
2086 
2087 void kvmppc_set_papr(PowerPCCPU *cpu)
2088 {
2089     CPUState *cs = CPU(cpu);
2090     int ret;
2091 
2092     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2093     if (ret) {
2094         error_report("This vCPU type or KVM version does not support PAPR");
2095         exit(1);
2096     }
2097 
2098     /* Update the capability flag so we sync the right information
2099      * with kvm */
2100     cap_papr = 1;
2101 }
2102 
2103 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2104 {
2105     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2106 }
2107 
2108 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2109 {
2110     CPUState *cs = CPU(cpu);
2111     int ret;
2112 
2113     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2114     if (ret && mpic_proxy) {
2115         error_report("This KVM version does not support EPR");
2116         exit(1);
2117     }
2118 }
2119 
2120 int kvmppc_smt_threads(void)
2121 {
2122     return cap_ppc_smt ? cap_ppc_smt : 1;
2123 }
2124 
2125 #ifdef TARGET_PPC64
2126 off_t kvmppc_alloc_rma(void **rma)
2127 {
2128     off_t size;
2129     int fd;
2130     struct kvm_allocate_rma ret;
2131 
2132     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2133      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2134      *                      not necessary on this hardware
2135      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2136      *
2137      * FIXME: We should allow the user to force contiguous RMA
2138      * allocation in the cap_ppc_rma==1 case.
2139      */
2140     if (cap_ppc_rma < 2) {
2141         return 0;
2142     }
2143 
2144     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2145     if (fd < 0) {
2146         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2147                 strerror(errno));
2148         return -1;
2149     }
2150 
2151     size = MIN(ret.rma_size, 256ul << 20);
2152 
2153     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2154     if (*rma == MAP_FAILED) {
2155         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2156         return -1;
2157     };
2158 
2159     return size;
2160 }
2161 
2162 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2163 {
2164     struct kvm_ppc_smmu_info info;
2165     long rampagesize, best_page_shift;
2166     int i;
2167 
2168     if (cap_ppc_rma >= 2) {
2169         return current_size;
2170     }
2171 
2172     /* Find the largest hardware supported page size that's less than
2173      * or equal to the (logical) backing page size of guest RAM */
2174     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2175     rampagesize = qemu_getrampagesize();
2176     best_page_shift = 0;
2177 
2178     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2179         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2180 
2181         if (!sps->page_shift) {
2182             continue;
2183         }
2184 
2185         if ((sps->page_shift > best_page_shift)
2186             && ((1UL << sps->page_shift) <= rampagesize)) {
2187             best_page_shift = sps->page_shift;
2188         }
2189     }
2190 
2191     return MIN(current_size,
2192                1ULL << (best_page_shift + hash_shift - 7));
2193 }
2194 #endif
2195 
2196 bool kvmppc_spapr_use_multitce(void)
2197 {
2198     return cap_spapr_multitce;
2199 }
2200 
2201 int kvmppc_spapr_enable_inkernel_multitce(void)
2202 {
2203     int ret;
2204 
2205     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2206                             H_PUT_TCE_INDIRECT, 1);
2207     if (!ret) {
2208         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2209                                 H_STUFF_TCE, 1);
2210     }
2211 
2212     return ret;
2213 }
2214 
2215 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2216                               uint64_t bus_offset, uint32_t nb_table,
2217                               int *pfd, bool need_vfio)
2218 {
2219     long len;
2220     int fd;
2221     void *table;
2222 
2223     /* Must set fd to -1 so we don't try to munmap when called for
2224      * destroying the table, which the upper layers -will- do
2225      */
2226     *pfd = -1;
2227     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2228         return NULL;
2229     }
2230 
2231     if (cap_spapr_tce_64) {
2232         struct kvm_create_spapr_tce_64 args = {
2233             .liobn = liobn,
2234             .page_shift = page_shift,
2235             .offset = bus_offset >> page_shift,
2236             .size = nb_table,
2237             .flags = 0
2238         };
2239         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2240         if (fd < 0) {
2241             fprintf(stderr,
2242                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2243                     liobn);
2244             return NULL;
2245         }
2246     } else if (cap_spapr_tce) {
2247         uint64_t window_size = (uint64_t) nb_table << page_shift;
2248         struct kvm_create_spapr_tce args = {
2249             .liobn = liobn,
2250             .window_size = window_size,
2251         };
2252         if ((window_size != args.window_size) || bus_offset) {
2253             return NULL;
2254         }
2255         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2256         if (fd < 0) {
2257             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2258                     liobn);
2259             return NULL;
2260         }
2261     } else {
2262         return NULL;
2263     }
2264 
2265     len = nb_table * sizeof(uint64_t);
2266     /* FIXME: round this up to page size */
2267 
2268     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2269     if (table == MAP_FAILED) {
2270         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2271                 liobn);
2272         close(fd);
2273         return NULL;
2274     }
2275 
2276     *pfd = fd;
2277     return table;
2278 }
2279 
2280 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2281 {
2282     long len;
2283 
2284     if (fd < 0) {
2285         return -1;
2286     }
2287 
2288     len = nb_table * sizeof(uint64_t);
2289     if ((munmap(table, len) < 0) ||
2290         (close(fd) < 0)) {
2291         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2292                 strerror(errno));
2293         /* Leak the table */
2294     }
2295 
2296     return 0;
2297 }
2298 
2299 int kvmppc_reset_htab(int shift_hint)
2300 {
2301     uint32_t shift = shift_hint;
2302 
2303     if (!kvm_enabled()) {
2304         /* Full emulation, tell caller to allocate htab itself */
2305         return 0;
2306     }
2307     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2308         int ret;
2309         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2310         if (ret == -ENOTTY) {
2311             /* At least some versions of PR KVM advertise the
2312              * capability, but don't implement the ioctl().  Oops.
2313              * Return 0 so that we allocate the htab in qemu, as is
2314              * correct for PR. */
2315             return 0;
2316         } else if (ret < 0) {
2317             return ret;
2318         }
2319         return shift;
2320     }
2321 
2322     /* We have a kernel that predates the htab reset calls.  For PR
2323      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2324      * this era, it has allocated a 16MB fixed size hash table already. */
2325     if (kvmppc_is_pr(kvm_state)) {
2326         /* PR - tell caller to allocate htab */
2327         return 0;
2328     } else {
2329         /* HV - assume 16MB kernel allocated htab */
2330         return 24;
2331     }
2332 }
2333 
2334 static inline uint32_t mfpvr(void)
2335 {
2336     uint32_t pvr;
2337 
2338     asm ("mfpvr %0"
2339          : "=r"(pvr));
2340     return pvr;
2341 }
2342 
2343 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2344 {
2345     if (on) {
2346         *word |= flags;
2347     } else {
2348         *word &= ~flags;
2349     }
2350 }
2351 
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2353 {
2354     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2355     uint32_t vmx = kvmppc_get_vmx();
2356     uint32_t dfp = kvmppc_get_dfp();
2357     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2358     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2359 
2360     /* Now fix up the class with information we can query from the host */
2361     pcc->pvr = mfpvr();
2362 
2363     if (vmx != -1) {
2364         /* Only override when we know what the host supports */
2365         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2366         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2367     }
2368     if (dfp != -1) {
2369         /* Only override when we know what the host supports */
2370         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2371     }
2372 
2373     if (dcache_size != -1) {
2374         pcc->l1_dcache_size = dcache_size;
2375     }
2376 
2377     if (icache_size != -1) {
2378         pcc->l1_icache_size = icache_size;
2379     }
2380 
2381 #if defined(TARGET_PPC64)
2382     pcc->radix_page_info = kvm_get_radix_page_info();
2383 
2384     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2385         /*
2386          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2387          * compliant.  More importantly, advertising ISA 3.00
2388          * architected mode may prevent guests from activating
2389          * necessary DD1 workarounds.
2390          */
2391         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2392                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2393     }
2394 #endif /* defined(TARGET_PPC64) */
2395 }
2396 
2397 bool kvmppc_has_cap_epr(void)
2398 {
2399     return cap_epr;
2400 }
2401 
2402 bool kvmppc_has_cap_htab_fd(void)
2403 {
2404     return cap_htab_fd;
2405 }
2406 
2407 bool kvmppc_has_cap_fixup_hcalls(void)
2408 {
2409     return cap_fixup_hcalls;
2410 }
2411 
2412 bool kvmppc_has_cap_htm(void)
2413 {
2414     return cap_htm;
2415 }
2416 
2417 bool kvmppc_has_cap_mmu_radix(void)
2418 {
2419     return cap_mmu_radix;
2420 }
2421 
2422 bool kvmppc_has_cap_mmu_hash_v3(void)
2423 {
2424     return cap_mmu_hash_v3;
2425 }
2426 
2427 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2428 {
2429     uint32_t host_pvr = mfpvr();
2430     PowerPCCPUClass *pvr_pcc;
2431 
2432     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2433     if (pvr_pcc == NULL) {
2434         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2435     }
2436 
2437     return pvr_pcc;
2438 }
2439 
2440 static int kvm_ppc_register_host_cpu_type(void)
2441 {
2442     TypeInfo type_info = {
2443         .name = TYPE_HOST_POWERPC_CPU,
2444         .class_init = kvmppc_host_cpu_class_init,
2445     };
2446     PowerPCCPUClass *pvr_pcc;
2447     DeviceClass *dc;
2448     int i;
2449 
2450     pvr_pcc = kvm_ppc_get_host_cpu_class();
2451     if (pvr_pcc == NULL) {
2452         return -1;
2453     }
2454     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2455     type_register(&type_info);
2456 
2457 #if defined(TARGET_PPC64)
2458     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2459     type_info.parent = TYPE_SPAPR_CPU_CORE,
2460     type_info.instance_size = sizeof(sPAPRCPUCore);
2461     type_info.instance_init = NULL;
2462     type_info.class_init = spapr_cpu_core_class_init;
2463     type_info.class_data = (void *) "host";
2464     type_register(&type_info);
2465     g_free((void *)type_info.name);
2466 #endif
2467 
2468     /*
2469      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2470      * we want "POWER8" to be a "family" alias that points to the current
2471      * host CPU type, too)
2472      */
2473     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2474     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2475         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2476             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2477             char *suffix;
2478 
2479             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2480             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2481             if (suffix) {
2482                 *suffix = 0;
2483             }
2484             ppc_cpu_aliases[i].oc = oc;
2485             break;
2486         }
2487     }
2488 
2489     return 0;
2490 }
2491 
2492 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2493 {
2494     struct kvm_rtas_token_args args = {
2495         .token = token,
2496     };
2497 
2498     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2499         return -ENOENT;
2500     }
2501 
2502     strncpy(args.name, function, sizeof(args.name));
2503 
2504     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2505 }
2506 
2507 int kvmppc_get_htab_fd(bool write)
2508 {
2509     struct kvm_get_htab_fd s = {
2510         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2511         .start_index = 0,
2512     };
2513 
2514     if (!cap_htab_fd) {
2515         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2516         return -1;
2517     }
2518 
2519     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2520 }
2521 
2522 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2523 {
2524     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2525     uint8_t buf[bufsize];
2526     ssize_t rc;
2527 
2528     do {
2529         rc = read(fd, buf, bufsize);
2530         if (rc < 0) {
2531             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2532                     strerror(errno));
2533             return rc;
2534         } else if (rc) {
2535             uint8_t *buffer = buf;
2536             ssize_t n = rc;
2537             while (n) {
2538                 struct kvm_get_htab_header *head =
2539                     (struct kvm_get_htab_header *) buffer;
2540                 size_t chunksize = sizeof(*head) +
2541                      HASH_PTE_SIZE_64 * head->n_valid;
2542 
2543                 qemu_put_be32(f, head->index);
2544                 qemu_put_be16(f, head->n_valid);
2545                 qemu_put_be16(f, head->n_invalid);
2546                 qemu_put_buffer(f, (void *)(head + 1),
2547                                 HASH_PTE_SIZE_64 * head->n_valid);
2548 
2549                 buffer += chunksize;
2550                 n -= chunksize;
2551             }
2552         }
2553     } while ((rc != 0)
2554              && ((max_ns < 0)
2555                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2556 
2557     return (rc == 0) ? 1 : 0;
2558 }
2559 
2560 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2561                            uint16_t n_valid, uint16_t n_invalid)
2562 {
2563     struct kvm_get_htab_header *buf;
2564     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2565     ssize_t rc;
2566 
2567     buf = alloca(chunksize);
2568     buf->index = index;
2569     buf->n_valid = n_valid;
2570     buf->n_invalid = n_invalid;
2571 
2572     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2573 
2574     rc = write(fd, buf, chunksize);
2575     if (rc < 0) {
2576         fprintf(stderr, "Error writing KVM hash table: %s\n",
2577                 strerror(errno));
2578         return rc;
2579     }
2580     if (rc != chunksize) {
2581         /* We should never get a short write on a single chunk */
2582         fprintf(stderr, "Short write, restoring KVM hash table\n");
2583         return -1;
2584     }
2585     return 0;
2586 }
2587 
2588 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2589 {
2590     return true;
2591 }
2592 
2593 void kvm_arch_init_irq_routing(KVMState *s)
2594 {
2595 }
2596 
2597 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2598 {
2599     struct kvm_get_htab_fd ghf = {
2600         .flags = 0,
2601         .start_index = ptex,
2602     };
2603     int fd, rc;
2604     int i;
2605 
2606     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2607     if (fd < 0) {
2608         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2609     }
2610 
2611     i = 0;
2612     while (i < n) {
2613         struct kvm_get_htab_header *hdr;
2614         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2615         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2616 
2617         rc = read(fd, buf, sizeof(buf));
2618         if (rc < 0) {
2619             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2620         }
2621 
2622         hdr = (struct kvm_get_htab_header *)buf;
2623         while ((i < n) && ((char *)hdr < (buf + rc))) {
2624             int invalid = hdr->n_invalid;
2625 
2626             if (hdr->index != (ptex + i)) {
2627                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2628                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2629             }
2630 
2631             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2632             i += hdr->n_valid;
2633 
2634             if ((n - i) < invalid) {
2635                 invalid = n - i;
2636             }
2637             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2638             i += hdr->n_invalid;
2639 
2640             hdr = (struct kvm_get_htab_header *)
2641                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2642         }
2643     }
2644 
2645     close(fd);
2646 }
2647 
2648 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2649 {
2650     int fd, rc;
2651     struct kvm_get_htab_fd ghf;
2652     struct {
2653         struct kvm_get_htab_header hdr;
2654         uint64_t pte0;
2655         uint64_t pte1;
2656     } buf;
2657 
2658     ghf.flags = 0;
2659     ghf.start_index = 0;     /* Ignored */
2660     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2661     if (fd < 0) {
2662         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2663     }
2664 
2665     buf.hdr.n_valid = 1;
2666     buf.hdr.n_invalid = 0;
2667     buf.hdr.index = ptex;
2668     buf.pte0 = cpu_to_be64(pte0);
2669     buf.pte1 = cpu_to_be64(pte1);
2670 
2671     rc = write(fd, &buf, sizeof(buf));
2672     if (rc != sizeof(buf)) {
2673         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2674     }
2675     close(fd);
2676 }
2677 
2678 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2679                              uint64_t address, uint32_t data, PCIDevice *dev)
2680 {
2681     return 0;
2682 }
2683 
2684 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2685                                 int vector, PCIDevice *dev)
2686 {
2687     return 0;
2688 }
2689 
2690 int kvm_arch_release_virq_post(int virq)
2691 {
2692     return 0;
2693 }
2694 
2695 int kvm_arch_msi_data_to_gsi(uint32_t data)
2696 {
2697     return data & 0xffff;
2698 }
2699 
2700 int kvmppc_enable_hwrng(void)
2701 {
2702     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2703         return -1;
2704     }
2705 
2706     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2707 }
2708