xref: /openbmc/qemu/target/ppc/kvm.c (revision 88c725c7)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
55 
56 //#define DEBUG_KVM
57 
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63     do { } while (0)
64 #endif
65 
66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
67 
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69     KVM_CAP_LAST_INFO
70 };
71 
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_rma;
78 static int cap_spapr_tce;
79 static int cap_spapr_tce_64;
80 static int cap_spapr_multitce;
81 static int cap_spapr_vfio;
82 static int cap_hior;
83 static int cap_one_reg;
84 static int cap_epr;
85 static int cap_ppc_watchdog;
86 static int cap_papr;
87 static int cap_htab_fd;
88 static int cap_fixup_hcalls;
89 static int cap_htm;             /* Hardware transactional memory support */
90 static int cap_mmu_radix;
91 static int cap_mmu_hash_v3;
92 static int cap_resize_hpt;
93 static int cap_ppc_pvr_compat;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(void);
126 
127 int kvm_arch_init(MachineState *ms, KVMState *s)
128 {
129     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
130     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
131     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
132     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
133     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
134     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = false;
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
148     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
149     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
150     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
151     /*
152      * Note: setting it to false because there is not such capability
153      * in KVM at this moment.
154      *
155      * TODO: call kvm_vm_check_extension() with the right capability
156      * after the kernel starts implementing it.*/
157     cap_ppc_pvr_compat = false;
158 
159     if (!cap_interrupt_level) {
160         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
161                         "VM to stall at times!\n");
162     }
163 
164     kvm_ppc_register_host_cpu_type();
165 
166     return 0;
167 }
168 
169 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
170 {
171     return 0;
172 }
173 
174 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
175 {
176     CPUPPCState *cenv = &cpu->env;
177     CPUState *cs = CPU(cpu);
178     struct kvm_sregs sregs;
179     int ret;
180 
181     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
182         /* What we're really trying to say is "if we're on BookE, we use
183            the native PVR for now". This is the only sane way to check
184            it though, so we potentially confuse users that they can run
185            BookE guests on BookS. Let's hope nobody dares enough :) */
186         return 0;
187     } else {
188         if (!cap_segstate) {
189             fprintf(stderr, "kvm error: missing PVR setting capability\n");
190             return -ENOSYS;
191         }
192     }
193 
194     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
195     if (ret) {
196         return ret;
197     }
198 
199     sregs.pvr = cenv->spr[SPR_PVR];
200     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
201 }
202 
203 /* Set up a shared TLB array with KVM */
204 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
205 {
206     CPUPPCState *env = &cpu->env;
207     CPUState *cs = CPU(cpu);
208     struct kvm_book3e_206_tlb_params params = {};
209     struct kvm_config_tlb cfg = {};
210     unsigned int entries = 0;
211     int ret, i;
212 
213     if (!kvm_enabled() ||
214         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
215         return 0;
216     }
217 
218     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
219 
220     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
221         params.tlb_sizes[i] = booke206_tlb_size(env, i);
222         params.tlb_ways[i] = booke206_tlb_ways(env, i);
223         entries += params.tlb_sizes[i];
224     }
225 
226     assert(entries == env->nb_tlb);
227     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
228 
229     env->tlb_dirty = true;
230 
231     cfg.array = (uintptr_t)env->tlb.tlbm;
232     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
233     cfg.params = (uintptr_t)&params;
234     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
235 
236     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
237     if (ret < 0) {
238         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
239                 __func__, strerror(-ret));
240         return ret;
241     }
242 
243     env->kvm_sw_tlb = true;
244     return 0;
245 }
246 
247 
248 #if defined(TARGET_PPC64)
249 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
250                                        struct kvm_ppc_smmu_info *info)
251 {
252     CPUPPCState *env = &cpu->env;
253     CPUState *cs = CPU(cpu);
254 
255     memset(info, 0, sizeof(*info));
256 
257     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
258      * need to "guess" what the supported page sizes are.
259      *
260      * For that to work we make a few assumptions:
261      *
262      * - Check whether we are running "PR" KVM which only supports 4K
263      *   and 16M pages, but supports them regardless of the backing
264      *   store characteritics. We also don't support 1T segments.
265      *
266      *   This is safe as if HV KVM ever supports that capability or PR
267      *   KVM grows supports for more page/segment sizes, those versions
268      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
269      *   will not hit this fallback
270      *
271      * - Else we are running HV KVM. This means we only support page
272      *   sizes that fit in the backing store. Additionally we only
273      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
274      *   P7 encodings for the SLB and hash table. Here too, we assume
275      *   support for any newer processor will mean a kernel that
276      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
277      *   this fallback.
278      */
279     if (kvmppc_is_pr(cs->kvm_state)) {
280         /* No flags */
281         info->flags = 0;
282         info->slb_size = 64;
283 
284         /* Standard 4k base page size segment */
285         info->sps[0].page_shift = 12;
286         info->sps[0].slb_enc = 0;
287         info->sps[0].enc[0].page_shift = 12;
288         info->sps[0].enc[0].pte_enc = 0;
289 
290         /* Standard 16M large page size segment */
291         info->sps[1].page_shift = 24;
292         info->sps[1].slb_enc = SLB_VSID_L;
293         info->sps[1].enc[0].page_shift = 24;
294         info->sps[1].enc[0].pte_enc = 0;
295     } else {
296         int i = 0;
297 
298         /* HV KVM has backing store size restrictions */
299         info->flags = KVM_PPC_PAGE_SIZES_REAL;
300 
301         if (env->mmu_model & POWERPC_MMU_1TSEG) {
302             info->flags |= KVM_PPC_1T_SEGMENTS;
303         }
304 
305         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
306            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
307             info->slb_size = 32;
308         } else {
309             info->slb_size = 64;
310         }
311 
312         /* Standard 4k base page size segment */
313         info->sps[i].page_shift = 12;
314         info->sps[i].slb_enc = 0;
315         info->sps[i].enc[0].page_shift = 12;
316         info->sps[i].enc[0].pte_enc = 0;
317         i++;
318 
319         /* 64K on MMU 2.06 and later */
320         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
321             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
322             info->sps[i].page_shift = 16;
323             info->sps[i].slb_enc = 0x110;
324             info->sps[i].enc[0].page_shift = 16;
325             info->sps[i].enc[0].pte_enc = 1;
326             i++;
327         }
328 
329         /* Standard 16M large page size segment */
330         info->sps[i].page_shift = 24;
331         info->sps[i].slb_enc = SLB_VSID_L;
332         info->sps[i].enc[0].page_shift = 24;
333         info->sps[i].enc[0].pte_enc = 0;
334     }
335 }
336 
337 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
338 {
339     CPUState *cs = CPU(cpu);
340     int ret;
341 
342     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
343         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
344         if (ret == 0) {
345             return;
346         }
347     }
348 
349     kvm_get_fallback_smmu_info(cpu, info);
350 }
351 
352 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
353 {
354     KVMState *s = KVM_STATE(current_machine->accelerator);
355     struct ppc_radix_page_info *radix_page_info;
356     struct kvm_ppc_rmmu_info rmmu_info;
357     int i;
358 
359     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
360         return NULL;
361     }
362     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
363         return NULL;
364     }
365     radix_page_info = g_malloc0(sizeof(*radix_page_info));
366     radix_page_info->count = 0;
367     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
368         if (rmmu_info.ap_encodings[i]) {
369             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
370             radix_page_info->count++;
371         }
372     }
373     return radix_page_info;
374 }
375 
376 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
377                                      bool radix, bool gtse,
378                                      uint64_t proc_tbl)
379 {
380     CPUState *cs = CPU(cpu);
381     int ret;
382     uint64_t flags = 0;
383     struct kvm_ppc_mmuv3_cfg cfg = {
384         .process_table = proc_tbl,
385     };
386 
387     if (radix) {
388         flags |= KVM_PPC_MMUV3_RADIX;
389     }
390     if (gtse) {
391         flags |= KVM_PPC_MMUV3_GTSE;
392     }
393     cfg.flags = flags;
394     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
395     switch (ret) {
396     case 0:
397         return H_SUCCESS;
398     case -EINVAL:
399         return H_PARAMETER;
400     case -ENODEV:
401         return H_NOT_AVAILABLE;
402     default:
403         return H_HARDWARE;
404     }
405 }
406 
407 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
408 {
409     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
410         return true;
411     }
412 
413     return (1ul << shift) <= rampgsize;
414 }
415 
416 static long max_cpu_page_size;
417 
418 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
419 {
420     static struct kvm_ppc_smmu_info smmu_info;
421     static bool has_smmu_info;
422     CPUPPCState *env = &cpu->env;
423     int iq, ik, jq, jk;
424     bool has_64k_pages = false;
425 
426     /* We only handle page sizes for 64-bit server guests for now */
427     if (!(env->mmu_model & POWERPC_MMU_64)) {
428         return;
429     }
430 
431     /* Collect MMU info from kernel if not already */
432     if (!has_smmu_info) {
433         kvm_get_smmu_info(cpu, &smmu_info);
434         has_smmu_info = true;
435     }
436 
437     if (!max_cpu_page_size) {
438         max_cpu_page_size = qemu_getrampagesize();
439     }
440 
441     /* Convert to QEMU form */
442     memset(&env->sps, 0, sizeof(env->sps));
443 
444     /* If we have HV KVM, we need to forbid CI large pages if our
445      * host page size is smaller than 64K.
446      */
447     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
448         env->ci_large_pages = getpagesize() >= 0x10000;
449     }
450 
451     /*
452      * XXX This loop should be an entry wide AND of the capabilities that
453      *     the selected CPU has with the capabilities that KVM supports.
454      */
455     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
456         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
457         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
458 
459         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
460                                  ksps->page_shift)) {
461             continue;
462         }
463         qsps->page_shift = ksps->page_shift;
464         qsps->slb_enc = ksps->slb_enc;
465         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
466             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
467                                      ksps->enc[jk].page_shift)) {
468                 continue;
469             }
470             if (ksps->enc[jk].page_shift == 16) {
471                 has_64k_pages = true;
472             }
473             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
474             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
475             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
476                 break;
477             }
478         }
479         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
480             break;
481         }
482     }
483     env->slb_nr = smmu_info.slb_size;
484     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
485         env->mmu_model &= ~POWERPC_MMU_1TSEG;
486     }
487     if (!has_64k_pages) {
488         env->mmu_model &= ~POWERPC_MMU_64K;
489     }
490 }
491 
492 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
493 {
494     Object *mem_obj = object_resolve_path(obj_path, NULL);
495     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
496     long pagesize;
497 
498     if (mempath) {
499         pagesize = qemu_mempath_getpagesize(mempath);
500         g_free(mempath);
501     } else {
502         pagesize = getpagesize();
503     }
504 
505     return pagesize >= max_cpu_page_size;
506 }
507 
508 #else /* defined (TARGET_PPC64) */
509 
510 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
511 {
512 }
513 
514 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
515 {
516     return true;
517 }
518 
519 #endif /* !defined (TARGET_PPC64) */
520 
521 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
522 {
523     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
524 }
525 
526 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
527  * book3s supports only 1 watchpoint, so array size
528  * of 4 is sufficient for now.
529  */
530 #define MAX_HW_BKPTS 4
531 
532 static struct HWBreakpoint {
533     target_ulong addr;
534     int type;
535 } hw_debug_points[MAX_HW_BKPTS];
536 
537 static CPUWatchpoint hw_watchpoint;
538 
539 /* Default there is no breakpoint and watchpoint supported */
540 static int max_hw_breakpoint;
541 static int max_hw_watchpoint;
542 static int nb_hw_breakpoint;
543 static int nb_hw_watchpoint;
544 
545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
546 {
547     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
548         max_hw_breakpoint = 2;
549         max_hw_watchpoint = 2;
550     }
551 
552     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
553         fprintf(stderr, "Error initializing h/w breakpoints\n");
554         return;
555     }
556 }
557 
558 int kvm_arch_init_vcpu(CPUState *cs)
559 {
560     PowerPCCPU *cpu = POWERPC_CPU(cs);
561     CPUPPCState *cenv = &cpu->env;
562     int ret;
563 
564     /* Gather server mmu info from KVM and update the CPU state */
565     kvm_fixup_page_sizes(cpu);
566 
567     /* Synchronize sregs with kvm */
568     ret = kvm_arch_sync_sregs(cpu);
569     if (ret) {
570         if (ret == -EINVAL) {
571             error_report("Register sync failed... If you're using kvm-hv.ko,"
572                          " only \"-cpu host\" is possible");
573         }
574         return ret;
575     }
576 
577     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
578 
579     switch (cenv->mmu_model) {
580     case POWERPC_MMU_BOOKE206:
581         /* This target supports access to KVM's guest TLB */
582         ret = kvm_booke206_tlb_init(cpu);
583         break;
584     case POWERPC_MMU_2_07:
585         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
586             /* KVM-HV has transactional memory on POWER8 also without the
587              * KVM_CAP_PPC_HTM extension, so enable it here instead as
588              * long as it's availble to userspace on the host. */
589             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
590                 cap_htm = true;
591             }
592         }
593         break;
594     default:
595         break;
596     }
597 
598     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
599     kvmppc_hw_debug_points_init(cenv);
600 
601     return ret;
602 }
603 
604 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
605 {
606     CPUPPCState *env = &cpu->env;
607     CPUState *cs = CPU(cpu);
608     struct kvm_dirty_tlb dirty_tlb;
609     unsigned char *bitmap;
610     int ret;
611 
612     if (!env->kvm_sw_tlb) {
613         return;
614     }
615 
616     bitmap = g_malloc((env->nb_tlb + 7) / 8);
617     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
618 
619     dirty_tlb.bitmap = (uintptr_t)bitmap;
620     dirty_tlb.num_dirty = env->nb_tlb;
621 
622     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
623     if (ret) {
624         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
625                 __func__, strerror(-ret));
626     }
627 
628     g_free(bitmap);
629 }
630 
631 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
632 {
633     PowerPCCPU *cpu = POWERPC_CPU(cs);
634     CPUPPCState *env = &cpu->env;
635     union {
636         uint32_t u32;
637         uint64_t u64;
638     } val;
639     struct kvm_one_reg reg = {
640         .id = id,
641         .addr = (uintptr_t) &val,
642     };
643     int ret;
644 
645     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
646     if (ret != 0) {
647         trace_kvm_failed_spr_get(spr, strerror(errno));
648     } else {
649         switch (id & KVM_REG_SIZE_MASK) {
650         case KVM_REG_SIZE_U32:
651             env->spr[spr] = val.u32;
652             break;
653 
654         case KVM_REG_SIZE_U64:
655             env->spr[spr] = val.u64;
656             break;
657 
658         default:
659             /* Don't handle this size yet */
660             abort();
661         }
662     }
663 }
664 
665 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
666 {
667     PowerPCCPU *cpu = POWERPC_CPU(cs);
668     CPUPPCState *env = &cpu->env;
669     union {
670         uint32_t u32;
671         uint64_t u64;
672     } val;
673     struct kvm_one_reg reg = {
674         .id = id,
675         .addr = (uintptr_t) &val,
676     };
677     int ret;
678 
679     switch (id & KVM_REG_SIZE_MASK) {
680     case KVM_REG_SIZE_U32:
681         val.u32 = env->spr[spr];
682         break;
683 
684     case KVM_REG_SIZE_U64:
685         val.u64 = env->spr[spr];
686         break;
687 
688     default:
689         /* Don't handle this size yet */
690         abort();
691     }
692 
693     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
694     if (ret != 0) {
695         trace_kvm_failed_spr_set(spr, strerror(errno));
696     }
697 }
698 
699 static int kvm_put_fp(CPUState *cs)
700 {
701     PowerPCCPU *cpu = POWERPC_CPU(cs);
702     CPUPPCState *env = &cpu->env;
703     struct kvm_one_reg reg;
704     int i;
705     int ret;
706 
707     if (env->insns_flags & PPC_FLOAT) {
708         uint64_t fpscr = env->fpscr;
709         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
710 
711         reg.id = KVM_REG_PPC_FPSCR;
712         reg.addr = (uintptr_t)&fpscr;
713         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
714         if (ret < 0) {
715             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
716             return ret;
717         }
718 
719         for (i = 0; i < 32; i++) {
720             uint64_t vsr[2];
721 
722 #ifdef HOST_WORDS_BIGENDIAN
723             vsr[0] = float64_val(env->fpr[i]);
724             vsr[1] = env->vsr[i];
725 #else
726             vsr[0] = env->vsr[i];
727             vsr[1] = float64_val(env->fpr[i]);
728 #endif
729             reg.addr = (uintptr_t) &vsr;
730             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
731 
732             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
733             if (ret < 0) {
734                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
735                         i, strerror(errno));
736                 return ret;
737             }
738         }
739     }
740 
741     if (env->insns_flags & PPC_ALTIVEC) {
742         reg.id = KVM_REG_PPC_VSCR;
743         reg.addr = (uintptr_t)&env->vscr;
744         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745         if (ret < 0) {
746             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
747             return ret;
748         }
749 
750         for (i = 0; i < 32; i++) {
751             reg.id = KVM_REG_PPC_VR(i);
752             reg.addr = (uintptr_t)&env->avr[i];
753             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
754             if (ret < 0) {
755                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
756                 return ret;
757             }
758         }
759     }
760 
761     return 0;
762 }
763 
764 static int kvm_get_fp(CPUState *cs)
765 {
766     PowerPCCPU *cpu = POWERPC_CPU(cs);
767     CPUPPCState *env = &cpu->env;
768     struct kvm_one_reg reg;
769     int i;
770     int ret;
771 
772     if (env->insns_flags & PPC_FLOAT) {
773         uint64_t fpscr;
774         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
775 
776         reg.id = KVM_REG_PPC_FPSCR;
777         reg.addr = (uintptr_t)&fpscr;
778         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
779         if (ret < 0) {
780             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
781             return ret;
782         } else {
783             env->fpscr = fpscr;
784         }
785 
786         for (i = 0; i < 32; i++) {
787             uint64_t vsr[2];
788 
789             reg.addr = (uintptr_t) &vsr;
790             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
791 
792             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
793             if (ret < 0) {
794                 DPRINTF("Unable to get %s%d from KVM: %s\n",
795                         vsx ? "VSR" : "FPR", i, strerror(errno));
796                 return ret;
797             } else {
798 #ifdef HOST_WORDS_BIGENDIAN
799                 env->fpr[i] = vsr[0];
800                 if (vsx) {
801                     env->vsr[i] = vsr[1];
802                 }
803 #else
804                 env->fpr[i] = vsr[1];
805                 if (vsx) {
806                     env->vsr[i] = vsr[0];
807                 }
808 #endif
809             }
810         }
811     }
812 
813     if (env->insns_flags & PPC_ALTIVEC) {
814         reg.id = KVM_REG_PPC_VSCR;
815         reg.addr = (uintptr_t)&env->vscr;
816         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817         if (ret < 0) {
818             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
819             return ret;
820         }
821 
822         for (i = 0; i < 32; i++) {
823             reg.id = KVM_REG_PPC_VR(i);
824             reg.addr = (uintptr_t)&env->avr[i];
825             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
826             if (ret < 0) {
827                 DPRINTF("Unable to get VR%d from KVM: %s\n",
828                         i, strerror(errno));
829                 return ret;
830             }
831         }
832     }
833 
834     return 0;
835 }
836 
837 #if defined(TARGET_PPC64)
838 static int kvm_get_vpa(CPUState *cs)
839 {
840     PowerPCCPU *cpu = POWERPC_CPU(cs);
841     CPUPPCState *env = &cpu->env;
842     struct kvm_one_reg reg;
843     int ret;
844 
845     reg.id = KVM_REG_PPC_VPA_ADDR;
846     reg.addr = (uintptr_t)&env->vpa_addr;
847     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
848     if (ret < 0) {
849         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
850         return ret;
851     }
852 
853     assert((uintptr_t)&env->slb_shadow_size
854            == ((uintptr_t)&env->slb_shadow_addr + 8));
855     reg.id = KVM_REG_PPC_VPA_SLB;
856     reg.addr = (uintptr_t)&env->slb_shadow_addr;
857     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
858     if (ret < 0) {
859         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
860                 strerror(errno));
861         return ret;
862     }
863 
864     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
865     reg.id = KVM_REG_PPC_VPA_DTL;
866     reg.addr = (uintptr_t)&env->dtl_addr;
867     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
868     if (ret < 0) {
869         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
870                 strerror(errno));
871         return ret;
872     }
873 
874     return 0;
875 }
876 
877 static int kvm_put_vpa(CPUState *cs)
878 {
879     PowerPCCPU *cpu = POWERPC_CPU(cs);
880     CPUPPCState *env = &cpu->env;
881     struct kvm_one_reg reg;
882     int ret;
883 
884     /* SLB shadow or DTL can't be registered unless a master VPA is
885      * registered.  That means when restoring state, if a VPA *is*
886      * registered, we need to set that up first.  If not, we need to
887      * deregister the others before deregistering the master VPA */
888     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
889 
890     if (env->vpa_addr) {
891         reg.id = KVM_REG_PPC_VPA_ADDR;
892         reg.addr = (uintptr_t)&env->vpa_addr;
893         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
894         if (ret < 0) {
895             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
896             return ret;
897         }
898     }
899 
900     assert((uintptr_t)&env->slb_shadow_size
901            == ((uintptr_t)&env->slb_shadow_addr + 8));
902     reg.id = KVM_REG_PPC_VPA_SLB;
903     reg.addr = (uintptr_t)&env->slb_shadow_addr;
904     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905     if (ret < 0) {
906         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
907         return ret;
908     }
909 
910     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
911     reg.id = KVM_REG_PPC_VPA_DTL;
912     reg.addr = (uintptr_t)&env->dtl_addr;
913     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
914     if (ret < 0) {
915         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
916                 strerror(errno));
917         return ret;
918     }
919 
920     if (!env->vpa_addr) {
921         reg.id = KVM_REG_PPC_VPA_ADDR;
922         reg.addr = (uintptr_t)&env->vpa_addr;
923         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
924         if (ret < 0) {
925             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
926             return ret;
927         }
928     }
929 
930     return 0;
931 }
932 #endif /* TARGET_PPC64 */
933 
934 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
935 {
936     CPUPPCState *env = &cpu->env;
937     struct kvm_sregs sregs;
938     int i;
939 
940     sregs.pvr = env->spr[SPR_PVR];
941 
942     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
943 
944     /* Sync SLB */
945 #ifdef TARGET_PPC64
946     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
947         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
948         if (env->slb[i].esid & SLB_ESID_V) {
949             sregs.u.s.ppc64.slb[i].slbe |= i;
950         }
951         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
952     }
953 #endif
954 
955     /* Sync SRs */
956     for (i = 0; i < 16; i++) {
957         sregs.u.s.ppc32.sr[i] = env->sr[i];
958     }
959 
960     /* Sync BATs */
961     for (i = 0; i < 8; i++) {
962         /* Beware. We have to swap upper and lower bits here */
963         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
964             | env->DBAT[1][i];
965         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
966             | env->IBAT[1][i];
967     }
968 
969     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
970 }
971 
972 int kvm_arch_put_registers(CPUState *cs, int level)
973 {
974     PowerPCCPU *cpu = POWERPC_CPU(cs);
975     CPUPPCState *env = &cpu->env;
976     struct kvm_regs regs;
977     int ret;
978     int i;
979 
980     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
981     if (ret < 0) {
982         return ret;
983     }
984 
985     regs.ctr = env->ctr;
986     regs.lr  = env->lr;
987     regs.xer = cpu_read_xer(env);
988     regs.msr = env->msr;
989     regs.pc = env->nip;
990 
991     regs.srr0 = env->spr[SPR_SRR0];
992     regs.srr1 = env->spr[SPR_SRR1];
993 
994     regs.sprg0 = env->spr[SPR_SPRG0];
995     regs.sprg1 = env->spr[SPR_SPRG1];
996     regs.sprg2 = env->spr[SPR_SPRG2];
997     regs.sprg3 = env->spr[SPR_SPRG3];
998     regs.sprg4 = env->spr[SPR_SPRG4];
999     regs.sprg5 = env->spr[SPR_SPRG5];
1000     regs.sprg6 = env->spr[SPR_SPRG6];
1001     regs.sprg7 = env->spr[SPR_SPRG7];
1002 
1003     regs.pid = env->spr[SPR_BOOKE_PID];
1004 
1005     for (i = 0;i < 32; i++)
1006         regs.gpr[i] = env->gpr[i];
1007 
1008     regs.cr = 0;
1009     for (i = 0; i < 8; i++) {
1010         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1011     }
1012 
1013     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1014     if (ret < 0)
1015         return ret;
1016 
1017     kvm_put_fp(cs);
1018 
1019     if (env->tlb_dirty) {
1020         kvm_sw_tlb_put(cpu);
1021         env->tlb_dirty = false;
1022     }
1023 
1024     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1025         ret = kvmppc_put_books_sregs(cpu);
1026         if (ret < 0) {
1027             return ret;
1028         }
1029     }
1030 
1031     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1032         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1033     }
1034 
1035     if (cap_one_reg) {
1036         int i;
1037 
1038         /* We deliberately ignore errors here, for kernels which have
1039          * the ONE_REG calls, but don't support the specific
1040          * registers, there's a reasonable chance things will still
1041          * work, at least until we try to migrate. */
1042         for (i = 0; i < 1024; i++) {
1043             uint64_t id = env->spr_cb[i].one_reg_id;
1044 
1045             if (id != 0) {
1046                 kvm_put_one_spr(cs, id, i);
1047             }
1048         }
1049 
1050 #ifdef TARGET_PPC64
1051         if (msr_ts) {
1052             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1054             }
1055             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1056                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1057             }
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1068         }
1069 
1070         if (cap_papr) {
1071             if (kvm_put_vpa(cs) < 0) {
1072                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1073             }
1074         }
1075 
1076         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1077 #endif /* TARGET_PPC64 */
1078     }
1079 
1080     return ret;
1081 }
1082 
1083 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1084 {
1085      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1086 }
1087 
1088 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1089 {
1090     CPUPPCState *env = &cpu->env;
1091     struct kvm_sregs sregs;
1092     int ret;
1093 
1094     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095     if (ret < 0) {
1096         return ret;
1097     }
1098 
1099     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1100         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1101         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1102         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1103         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1104         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1105         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1106         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1107         env->spr[SPR_DECR] = sregs.u.e.dec;
1108         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1109         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1110         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1111     }
1112 
1113     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1114         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1115         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1116         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1117         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1118         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_64) {
1122         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1126         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1127     }
1128 
1129     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1130         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1131         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1132         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1133         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1134         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1135         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1136         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1137         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1138         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1139         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1140         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1141         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1142         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1143         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1144         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1145         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1146         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1147         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1148         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1149         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1150         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1151         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1152         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1153         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1154         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1155         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1156         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1157         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1158         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1159         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1160         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1161         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1162 
1163         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1164             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1165             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1166             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1167             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1168             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1170         }
1171 
1172         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1173             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1174             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1175         }
1176 
1177         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1178             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1179             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1180             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1182         }
1183     }
1184 
1185     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1186         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1187         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1188         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1189         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1190         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1191         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1192         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1193         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1194         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1195         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1196     }
1197 
1198     if (sregs.u.e.features & KVM_SREGS_EXP) {
1199         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1200     }
1201 
1202     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1203         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1204         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1205     }
1206 
1207     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1208         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1209         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1210         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1211 
1212         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1213             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1214             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1215         }
1216     }
1217 
1218     return 0;
1219 }
1220 
1221 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1222 {
1223     CPUPPCState *env = &cpu->env;
1224     struct kvm_sregs sregs;
1225     int ret;
1226     int i;
1227 
1228     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1229     if (ret < 0) {
1230         return ret;
1231     }
1232 
1233     if (!cpu->vhyp) {
1234         ppc_store_sdr1(env, sregs.u.s.sdr1);
1235     }
1236 
1237     /* Sync SLB */
1238 #ifdef TARGET_PPC64
1239     /*
1240      * The packed SLB array we get from KVM_GET_SREGS only contains
1241      * information about valid entries. So we flush our internal copy
1242      * to get rid of stale ones, then put all valid SLB entries back
1243      * in.
1244      */
1245     memset(env->slb, 0, sizeof(env->slb));
1246     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1247         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1248         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1249         /*
1250          * Only restore valid entries
1251          */
1252         if (rb & SLB_ESID_V) {
1253             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1254         }
1255     }
1256 #endif
1257 
1258     /* Sync SRs */
1259     for (i = 0; i < 16; i++) {
1260         env->sr[i] = sregs.u.s.ppc32.sr[i];
1261     }
1262 
1263     /* Sync BATs */
1264     for (i = 0; i < 8; i++) {
1265         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1266         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1267         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1268         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1269     }
1270 
1271     return 0;
1272 }
1273 
1274 int kvm_arch_get_registers(CPUState *cs)
1275 {
1276     PowerPCCPU *cpu = POWERPC_CPU(cs);
1277     CPUPPCState *env = &cpu->env;
1278     struct kvm_regs regs;
1279     uint32_t cr;
1280     int i, ret;
1281 
1282     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1283     if (ret < 0)
1284         return ret;
1285 
1286     cr = regs.cr;
1287     for (i = 7; i >= 0; i--) {
1288         env->crf[i] = cr & 15;
1289         cr >>= 4;
1290     }
1291 
1292     env->ctr = regs.ctr;
1293     env->lr = regs.lr;
1294     cpu_write_xer(env, regs.xer);
1295     env->msr = regs.msr;
1296     env->nip = regs.pc;
1297 
1298     env->spr[SPR_SRR0] = regs.srr0;
1299     env->spr[SPR_SRR1] = regs.srr1;
1300 
1301     env->spr[SPR_SPRG0] = regs.sprg0;
1302     env->spr[SPR_SPRG1] = regs.sprg1;
1303     env->spr[SPR_SPRG2] = regs.sprg2;
1304     env->spr[SPR_SPRG3] = regs.sprg3;
1305     env->spr[SPR_SPRG4] = regs.sprg4;
1306     env->spr[SPR_SPRG5] = regs.sprg5;
1307     env->spr[SPR_SPRG6] = regs.sprg6;
1308     env->spr[SPR_SPRG7] = regs.sprg7;
1309 
1310     env->spr[SPR_BOOKE_PID] = regs.pid;
1311 
1312     for (i = 0;i < 32; i++)
1313         env->gpr[i] = regs.gpr[i];
1314 
1315     kvm_get_fp(cs);
1316 
1317     if (cap_booke_sregs) {
1318         ret = kvmppc_get_booke_sregs(cpu);
1319         if (ret < 0) {
1320             return ret;
1321         }
1322     }
1323 
1324     if (cap_segstate) {
1325         ret = kvmppc_get_books_sregs(cpu);
1326         if (ret < 0) {
1327             return ret;
1328         }
1329     }
1330 
1331     if (cap_hior) {
1332         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1333     }
1334 
1335     if (cap_one_reg) {
1336         int i;
1337 
1338         /* We deliberately ignore errors here, for kernels which have
1339          * the ONE_REG calls, but don't support the specific
1340          * registers, there's a reasonable chance things will still
1341          * work, at least until we try to migrate. */
1342         for (i = 0; i < 1024; i++) {
1343             uint64_t id = env->spr_cb[i].one_reg_id;
1344 
1345             if (id != 0) {
1346                 kvm_get_one_spr(cs, id, i);
1347             }
1348         }
1349 
1350 #ifdef TARGET_PPC64
1351         if (msr_ts) {
1352             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1354             }
1355             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1356                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1357             }
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1368         }
1369 
1370         if (cap_papr) {
1371             if (kvm_get_vpa(cs) < 0) {
1372                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1373             }
1374         }
1375 
1376         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1377 #endif
1378     }
1379 
1380     return 0;
1381 }
1382 
1383 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1384 {
1385     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1386 
1387     if (irq != PPC_INTERRUPT_EXT) {
1388         return 0;
1389     }
1390 
1391     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1392         return 0;
1393     }
1394 
1395     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1396 
1397     return 0;
1398 }
1399 
1400 #if defined(TARGET_PPCEMB)
1401 #define PPC_INPUT_INT PPC40x_INPUT_INT
1402 #elif defined(TARGET_PPC64)
1403 #define PPC_INPUT_INT PPC970_INPUT_INT
1404 #else
1405 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1406 #endif
1407 
1408 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1409 {
1410     PowerPCCPU *cpu = POWERPC_CPU(cs);
1411     CPUPPCState *env = &cpu->env;
1412     int r;
1413     unsigned irq;
1414 
1415     qemu_mutex_lock_iothread();
1416 
1417     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1418      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1419     if (!cap_interrupt_level &&
1420         run->ready_for_interrupt_injection &&
1421         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1422         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1423     {
1424         /* For now KVM disregards the 'irq' argument. However, in the
1425          * future KVM could cache it in-kernel to avoid a heavyweight exit
1426          * when reading the UIC.
1427          */
1428         irq = KVM_INTERRUPT_SET;
1429 
1430         DPRINTF("injected interrupt %d\n", irq);
1431         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432         if (r < 0) {
1433             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1434         }
1435 
1436         /* Always wake up soon in case the interrupt was level based */
1437         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1438                        (NANOSECONDS_PER_SECOND / 50));
1439     }
1440 
1441     /* We don't know if there are more interrupts pending after this. However,
1442      * the guest will return to userspace in the course of handling this one
1443      * anyways, so we will get a chance to deliver the rest. */
1444 
1445     qemu_mutex_unlock_iothread();
1446 }
1447 
1448 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1449 {
1450     return MEMTXATTRS_UNSPECIFIED;
1451 }
1452 
1453 int kvm_arch_process_async_events(CPUState *cs)
1454 {
1455     return cs->halted;
1456 }
1457 
1458 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1459 {
1460     CPUState *cs = CPU(cpu);
1461     CPUPPCState *env = &cpu->env;
1462 
1463     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464         cs->halted = 1;
1465         cs->exception_index = EXCP_HLT;
1466     }
1467 
1468     return 0;
1469 }
1470 
1471 /* map dcr access to existing qemu dcr emulation */
1472 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1473 {
1474     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1475         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1476 
1477     return 0;
1478 }
1479 
1480 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1481 {
1482     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1483         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1484 
1485     return 0;
1486 }
1487 
1488 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1489 {
1490     /* Mixed endian case is not handled */
1491     uint32_t sc = debug_inst_opcode;
1492 
1493     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494                             sizeof(sc), 0) ||
1495         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1496         return -EINVAL;
1497     }
1498 
1499     return 0;
1500 }
1501 
1502 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1503 {
1504     uint32_t sc;
1505 
1506     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1507         sc != debug_inst_opcode ||
1508         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1509                             sizeof(sc), 1)) {
1510         return -EINVAL;
1511     }
1512 
1513     return 0;
1514 }
1515 
1516 static int find_hw_breakpoint(target_ulong addr, int type)
1517 {
1518     int n;
1519 
1520     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521            <= ARRAY_SIZE(hw_debug_points));
1522 
1523     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1524         if (hw_debug_points[n].addr == addr &&
1525              hw_debug_points[n].type == type) {
1526             return n;
1527         }
1528     }
1529 
1530     return -1;
1531 }
1532 
1533 static int find_hw_watchpoint(target_ulong addr, int *flag)
1534 {
1535     int n;
1536 
1537     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538     if (n >= 0) {
1539         *flag = BP_MEM_ACCESS;
1540         return n;
1541     }
1542 
1543     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544     if (n >= 0) {
1545         *flag = BP_MEM_WRITE;
1546         return n;
1547     }
1548 
1549     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550     if (n >= 0) {
1551         *flag = BP_MEM_READ;
1552         return n;
1553     }
1554 
1555     return -1;
1556 }
1557 
1558 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1559                                   target_ulong len, int type)
1560 {
1561     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1562         return -ENOBUFS;
1563     }
1564 
1565     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1566     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1567 
1568     switch (type) {
1569     case GDB_BREAKPOINT_HW:
1570         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1571             return -ENOBUFS;
1572         }
1573 
1574         if (find_hw_breakpoint(addr, type) >= 0) {
1575             return -EEXIST;
1576         }
1577 
1578         nb_hw_breakpoint++;
1579         break;
1580 
1581     case GDB_WATCHPOINT_WRITE:
1582     case GDB_WATCHPOINT_READ:
1583     case GDB_WATCHPOINT_ACCESS:
1584         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1585             return -ENOBUFS;
1586         }
1587 
1588         if (find_hw_breakpoint(addr, type) >= 0) {
1589             return -EEXIST;
1590         }
1591 
1592         nb_hw_watchpoint++;
1593         break;
1594 
1595     default:
1596         return -ENOSYS;
1597     }
1598 
1599     return 0;
1600 }
1601 
1602 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1603                                   target_ulong len, int type)
1604 {
1605     int n;
1606 
1607     n = find_hw_breakpoint(addr, type);
1608     if (n < 0) {
1609         return -ENOENT;
1610     }
1611 
1612     switch (type) {
1613     case GDB_BREAKPOINT_HW:
1614         nb_hw_breakpoint--;
1615         break;
1616 
1617     case GDB_WATCHPOINT_WRITE:
1618     case GDB_WATCHPOINT_READ:
1619     case GDB_WATCHPOINT_ACCESS:
1620         nb_hw_watchpoint--;
1621         break;
1622 
1623     default:
1624         return -ENOSYS;
1625     }
1626     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1627 
1628     return 0;
1629 }
1630 
1631 void kvm_arch_remove_all_hw_breakpoints(void)
1632 {
1633     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1634 }
1635 
1636 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1637 {
1638     int n;
1639 
1640     /* Software Breakpoint updates */
1641     if (kvm_sw_breakpoints_active(cs)) {
1642         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1643     }
1644 
1645     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1646            <= ARRAY_SIZE(hw_debug_points));
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1648 
1649     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1650         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1651         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1652         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1653             switch (hw_debug_points[n].type) {
1654             case GDB_BREAKPOINT_HW:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656                 break;
1657             case GDB_WATCHPOINT_WRITE:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659                 break;
1660             case GDB_WATCHPOINT_READ:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             case GDB_WATCHPOINT_ACCESS:
1664                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1665                                         KVMPPC_DEBUG_WATCH_READ;
1666                 break;
1667             default:
1668                 cpu_abort(cs, "Unsupported breakpoint type\n");
1669             }
1670             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1671         }
1672     }
1673 }
1674 
1675 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1676 {
1677     CPUState *cs = CPU(cpu);
1678     CPUPPCState *env = &cpu->env;
1679     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680     int handle = 0;
1681     int n;
1682     int flag = 0;
1683 
1684     if (cs->singlestep_enabled) {
1685         handle = 1;
1686     } else if (arch_info->status) {
1687         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1688             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1689                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1690                 if (n >= 0) {
1691                     handle = 1;
1692                 }
1693             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1694                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1695                 n = find_hw_watchpoint(arch_info->address,  &flag);
1696                 if (n >= 0) {
1697                     handle = 1;
1698                     cs->watchpoint_hit = &hw_watchpoint;
1699                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1700                     hw_watchpoint.flags = flag;
1701                 }
1702             }
1703         }
1704     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1705         handle = 1;
1706     } else {
1707         /* QEMU is not able to handle debug exception, so inject
1708          * program exception to guest;
1709          * Yes program exception NOT debug exception !!
1710          * When QEMU is using debug resources then debug exception must
1711          * be always set. To achieve this we set MSR_DE and also set
1712          * MSRP_DEP so guest cannot change MSR_DE.
1713          * When emulating debug resource for guest we want guest
1714          * to control MSR_DE (enable/disable debug interrupt on need).
1715          * Supporting both configurations are NOT possible.
1716          * So the result is that we cannot share debug resources
1717          * between QEMU and Guest on BOOKE architecture.
1718          * In the current design QEMU gets the priority over guest,
1719          * this means that if QEMU is using debug resources then guest
1720          * cannot use them;
1721          * For software breakpoint QEMU uses a privileged instruction;
1722          * So there cannot be any reason that we are here for guest
1723          * set debug exception, only possibility is guest executed a
1724          * privileged / illegal instruction and that's why we are
1725          * injecting a program interrupt.
1726          */
1727 
1728         cpu_synchronize_state(cs);
1729         /* env->nip is PC, so increment this by 4 to use
1730          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1731          */
1732         env->nip += 4;
1733         cs->exception_index = POWERPC_EXCP_PROGRAM;
1734         env->error_code = POWERPC_EXCP_INVAL;
1735         ppc_cpu_do_interrupt(cs);
1736     }
1737 
1738     return handle;
1739 }
1740 
1741 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1742 {
1743     PowerPCCPU *cpu = POWERPC_CPU(cs);
1744     CPUPPCState *env = &cpu->env;
1745     int ret;
1746 
1747     qemu_mutex_lock_iothread();
1748 
1749     switch (run->exit_reason) {
1750     case KVM_EXIT_DCR:
1751         if (run->dcr.is_write) {
1752             DPRINTF("handle dcr write\n");
1753             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754         } else {
1755             DPRINTF("handle dcr read\n");
1756             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1757         }
1758         break;
1759     case KVM_EXIT_HLT:
1760         DPRINTF("handle halt\n");
1761         ret = kvmppc_handle_halt(cpu);
1762         break;
1763 #if defined(TARGET_PPC64)
1764     case KVM_EXIT_PAPR_HCALL:
1765         DPRINTF("handle PAPR hypercall\n");
1766         run->papr_hcall.ret = spapr_hypercall(cpu,
1767                                               run->papr_hcall.nr,
1768                                               run->papr_hcall.args);
1769         ret = 0;
1770         break;
1771 #endif
1772     case KVM_EXIT_EPR:
1773         DPRINTF("handle epr\n");
1774         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1775         ret = 0;
1776         break;
1777     case KVM_EXIT_WATCHDOG:
1778         DPRINTF("handle watchdog expiry\n");
1779         watchdog_perform_action();
1780         ret = 0;
1781         break;
1782 
1783     case KVM_EXIT_DEBUG:
1784         DPRINTF("handle debug exception\n");
1785         if (kvm_handle_debug(cpu, run)) {
1786             ret = EXCP_DEBUG;
1787             break;
1788         }
1789         /* re-enter, this exception was guest-internal */
1790         ret = 0;
1791         break;
1792 
1793     default:
1794         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795         ret = -1;
1796         break;
1797     }
1798 
1799     qemu_mutex_unlock_iothread();
1800     return ret;
1801 }
1802 
1803 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 {
1805     CPUState *cs = CPU(cpu);
1806     uint32_t bits = tsr_bits;
1807     struct kvm_one_reg reg = {
1808         .id = KVM_REG_PPC_OR_TSR,
1809         .addr = (uintptr_t) &bits,
1810     };
1811 
1812     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813 }
1814 
1815 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1816 {
1817 
1818     CPUState *cs = CPU(cpu);
1819     uint32_t bits = tsr_bits;
1820     struct kvm_one_reg reg = {
1821         .id = KVM_REG_PPC_CLEAR_TSR,
1822         .addr = (uintptr_t) &bits,
1823     };
1824 
1825     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826 }
1827 
1828 int kvmppc_set_tcr(PowerPCCPU *cpu)
1829 {
1830     CPUState *cs = CPU(cpu);
1831     CPUPPCState *env = &cpu->env;
1832     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1833 
1834     struct kvm_one_reg reg = {
1835         .id = KVM_REG_PPC_TCR,
1836         .addr = (uintptr_t) &tcr,
1837     };
1838 
1839     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1840 }
1841 
1842 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1843 {
1844     CPUState *cs = CPU(cpu);
1845     int ret;
1846 
1847     if (!kvm_enabled()) {
1848         return -1;
1849     }
1850 
1851     if (!cap_ppc_watchdog) {
1852         printf("warning: KVM does not support watchdog");
1853         return -1;
1854     }
1855 
1856     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857     if (ret < 0) {
1858         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1859                 __func__, strerror(-ret));
1860         return ret;
1861     }
1862 
1863     return ret;
1864 }
1865 
1866 static int read_cpuinfo(const char *field, char *value, int len)
1867 {
1868     FILE *f;
1869     int ret = -1;
1870     int field_len = strlen(field);
1871     char line[512];
1872 
1873     f = fopen("/proc/cpuinfo", "r");
1874     if (!f) {
1875         return -1;
1876     }
1877 
1878     do {
1879         if (!fgets(line, sizeof(line), f)) {
1880             break;
1881         }
1882         if (!strncmp(line, field, field_len)) {
1883             pstrcpy(value, len, line);
1884             ret = 0;
1885             break;
1886         }
1887     } while(*line);
1888 
1889     fclose(f);
1890 
1891     return ret;
1892 }
1893 
1894 uint32_t kvmppc_get_tbfreq(void)
1895 {
1896     char line[512];
1897     char *ns;
1898     uint32_t retval = NANOSECONDS_PER_SECOND;
1899 
1900     if (read_cpuinfo("timebase", line, sizeof(line))) {
1901         return retval;
1902     }
1903 
1904     if (!(ns = strchr(line, ':'))) {
1905         return retval;
1906     }
1907 
1908     ns++;
1909 
1910     return atoi(ns);
1911 }
1912 
1913 bool kvmppc_get_host_serial(char **value)
1914 {
1915     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1916                                NULL);
1917 }
1918 
1919 bool kvmppc_get_host_model(char **value)
1920 {
1921     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1922 }
1923 
1924 /* Try to find a device tree node for a CPU with clock-frequency property */
1925 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1926 {
1927     struct dirent *dirp;
1928     DIR *dp;
1929 
1930     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1931         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932         return -1;
1933     }
1934 
1935     buf[0] = '\0';
1936     while ((dirp = readdir(dp)) != NULL) {
1937         FILE *f;
1938         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939                  dirp->d_name);
1940         f = fopen(buf, "r");
1941         if (f) {
1942             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1943             fclose(f);
1944             break;
1945         }
1946         buf[0] = '\0';
1947     }
1948     closedir(dp);
1949     if (buf[0] == '\0') {
1950         printf("Unknown host!\n");
1951         return -1;
1952     }
1953 
1954     return 0;
1955 }
1956 
1957 static uint64_t kvmppc_read_int_dt(const char *filename)
1958 {
1959     union {
1960         uint32_t v32;
1961         uint64_t v64;
1962     } u;
1963     FILE *f;
1964     int len;
1965 
1966     f = fopen(filename, "rb");
1967     if (!f) {
1968         return -1;
1969     }
1970 
1971     len = fread(&u, 1, sizeof(u), f);
1972     fclose(f);
1973     switch (len) {
1974     case 4:
1975         /* property is a 32-bit quantity */
1976         return be32_to_cpu(u.v32);
1977     case 8:
1978         return be64_to_cpu(u.v64);
1979     }
1980 
1981     return 0;
1982 }
1983 
1984 /* Read a CPU node property from the host device tree that's a single
1985  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1986  * (can't find or open the property, or doesn't understand the
1987  * format) */
1988 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1989 {
1990     char buf[PATH_MAX], *tmp;
1991     uint64_t val;
1992 
1993     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1994         return -1;
1995     }
1996 
1997     tmp = g_strdup_printf("%s/%s", buf, propname);
1998     val = kvmppc_read_int_dt(tmp);
1999     g_free(tmp);
2000 
2001     return val;
2002 }
2003 
2004 uint64_t kvmppc_get_clockfreq(void)
2005 {
2006     return kvmppc_read_int_cpu_dt("clock-frequency");
2007 }
2008 
2009 uint32_t kvmppc_get_vmx(void)
2010 {
2011     return kvmppc_read_int_cpu_dt("ibm,vmx");
2012 }
2013 
2014 uint32_t kvmppc_get_dfp(void)
2015 {
2016     return kvmppc_read_int_cpu_dt("ibm,dfp");
2017 }
2018 
2019 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2020  {
2021      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2022      CPUState *cs = CPU(cpu);
2023 
2024     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2025         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2026         return 0;
2027     }
2028 
2029     return 1;
2030 }
2031 
2032 int kvmppc_get_hasidle(CPUPPCState *env)
2033 {
2034     struct kvm_ppc_pvinfo pvinfo;
2035 
2036     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2037         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2038         return 1;
2039     }
2040 
2041     return 0;
2042 }
2043 
2044 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2045 {
2046     uint32_t *hc = (uint32_t*)buf;
2047     struct kvm_ppc_pvinfo pvinfo;
2048 
2049     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2050         memcpy(buf, pvinfo.hcall, buf_len);
2051         return 0;
2052     }
2053 
2054     /*
2055      * Fallback to always fail hypercalls regardless of endianness:
2056      *
2057      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2058      *     li r3, -1
2059      *     b .+8       (becomes nop in wrong endian)
2060      *     bswap32(li r3, -1)
2061      */
2062 
2063     hc[0] = cpu_to_be32(0x08000048);
2064     hc[1] = cpu_to_be32(0x3860ffff);
2065     hc[2] = cpu_to_be32(0x48000008);
2066     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2067 
2068     return 1;
2069 }
2070 
2071 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2072 {
2073     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2074 }
2075 
2076 void kvmppc_enable_logical_ci_hcalls(void)
2077 {
2078     /*
2079      * FIXME: it would be nice if we could detect the cases where
2080      * we're using a device which requires the in kernel
2081      * implementation of these hcalls, but the kernel lacks them and
2082      * produce a warning.
2083      */
2084     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2085     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2086 }
2087 
2088 void kvmppc_enable_set_mode_hcall(void)
2089 {
2090     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2091 }
2092 
2093 void kvmppc_enable_clear_ref_mod_hcalls(void)
2094 {
2095     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2096     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2097 }
2098 
2099 void kvmppc_set_papr(PowerPCCPU *cpu)
2100 {
2101     CPUState *cs = CPU(cpu);
2102     int ret;
2103 
2104     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2105     if (ret) {
2106         error_report("This vCPU type or KVM version does not support PAPR");
2107         exit(1);
2108     }
2109 
2110     /* Update the capability flag so we sync the right information
2111      * with kvm */
2112     cap_papr = 1;
2113 }
2114 
2115 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2116 {
2117     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2118 }
2119 
2120 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2121 {
2122     CPUState *cs = CPU(cpu);
2123     int ret;
2124 
2125     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2126     if (ret && mpic_proxy) {
2127         error_report("This KVM version does not support EPR");
2128         exit(1);
2129     }
2130 }
2131 
2132 int kvmppc_smt_threads(void)
2133 {
2134     return cap_ppc_smt ? cap_ppc_smt : 1;
2135 }
2136 
2137 #ifdef TARGET_PPC64
2138 off_t kvmppc_alloc_rma(void **rma)
2139 {
2140     off_t size;
2141     int fd;
2142     struct kvm_allocate_rma ret;
2143 
2144     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2145      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2146      *                      not necessary on this hardware
2147      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2148      *
2149      * FIXME: We should allow the user to force contiguous RMA
2150      * allocation in the cap_ppc_rma==1 case.
2151      */
2152     if (cap_ppc_rma < 2) {
2153         return 0;
2154     }
2155 
2156     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2157     if (fd < 0) {
2158         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2159                 strerror(errno));
2160         return -1;
2161     }
2162 
2163     size = MIN(ret.rma_size, 256ul << 20);
2164 
2165     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2166     if (*rma == MAP_FAILED) {
2167         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2168         return -1;
2169     };
2170 
2171     return size;
2172 }
2173 
2174 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2175 {
2176     struct kvm_ppc_smmu_info info;
2177     long rampagesize, best_page_shift;
2178     int i;
2179 
2180     if (cap_ppc_rma >= 2) {
2181         return current_size;
2182     }
2183 
2184     /* Find the largest hardware supported page size that's less than
2185      * or equal to the (logical) backing page size of guest RAM */
2186     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2187     rampagesize = qemu_getrampagesize();
2188     best_page_shift = 0;
2189 
2190     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2191         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2192 
2193         if (!sps->page_shift) {
2194             continue;
2195         }
2196 
2197         if ((sps->page_shift > best_page_shift)
2198             && ((1UL << sps->page_shift) <= rampagesize)) {
2199             best_page_shift = sps->page_shift;
2200         }
2201     }
2202 
2203     return MIN(current_size,
2204                1ULL << (best_page_shift + hash_shift - 7));
2205 }
2206 #endif
2207 
2208 bool kvmppc_spapr_use_multitce(void)
2209 {
2210     return cap_spapr_multitce;
2211 }
2212 
2213 int kvmppc_spapr_enable_inkernel_multitce(void)
2214 {
2215     int ret;
2216 
2217     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2218                             H_PUT_TCE_INDIRECT, 1);
2219     if (!ret) {
2220         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2221                                 H_STUFF_TCE, 1);
2222     }
2223 
2224     return ret;
2225 }
2226 
2227 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2228                               uint64_t bus_offset, uint32_t nb_table,
2229                               int *pfd, bool need_vfio)
2230 {
2231     long len;
2232     int fd;
2233     void *table;
2234 
2235     /* Must set fd to -1 so we don't try to munmap when called for
2236      * destroying the table, which the upper layers -will- do
2237      */
2238     *pfd = -1;
2239     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2240         return NULL;
2241     }
2242 
2243     if (cap_spapr_tce_64) {
2244         struct kvm_create_spapr_tce_64 args = {
2245             .liobn = liobn,
2246             .page_shift = page_shift,
2247             .offset = bus_offset >> page_shift,
2248             .size = nb_table,
2249             .flags = 0
2250         };
2251         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2252         if (fd < 0) {
2253             fprintf(stderr,
2254                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2255                     liobn);
2256             return NULL;
2257         }
2258     } else if (cap_spapr_tce) {
2259         uint64_t window_size = (uint64_t) nb_table << page_shift;
2260         struct kvm_create_spapr_tce args = {
2261             .liobn = liobn,
2262             .window_size = window_size,
2263         };
2264         if ((window_size != args.window_size) || bus_offset) {
2265             return NULL;
2266         }
2267         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2268         if (fd < 0) {
2269             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2270                     liobn);
2271             return NULL;
2272         }
2273     } else {
2274         return NULL;
2275     }
2276 
2277     len = nb_table * sizeof(uint64_t);
2278     /* FIXME: round this up to page size */
2279 
2280     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2281     if (table == MAP_FAILED) {
2282         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2283                 liobn);
2284         close(fd);
2285         return NULL;
2286     }
2287 
2288     *pfd = fd;
2289     return table;
2290 }
2291 
2292 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2293 {
2294     long len;
2295 
2296     if (fd < 0) {
2297         return -1;
2298     }
2299 
2300     len = nb_table * sizeof(uint64_t);
2301     if ((munmap(table, len) < 0) ||
2302         (close(fd) < 0)) {
2303         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2304                 strerror(errno));
2305         /* Leak the table */
2306     }
2307 
2308     return 0;
2309 }
2310 
2311 int kvmppc_reset_htab(int shift_hint)
2312 {
2313     uint32_t shift = shift_hint;
2314 
2315     if (!kvm_enabled()) {
2316         /* Full emulation, tell caller to allocate htab itself */
2317         return 0;
2318     }
2319     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2320         int ret;
2321         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2322         if (ret == -ENOTTY) {
2323             /* At least some versions of PR KVM advertise the
2324              * capability, but don't implement the ioctl().  Oops.
2325              * Return 0 so that we allocate the htab in qemu, as is
2326              * correct for PR. */
2327             return 0;
2328         } else if (ret < 0) {
2329             return ret;
2330         }
2331         return shift;
2332     }
2333 
2334     /* We have a kernel that predates the htab reset calls.  For PR
2335      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2336      * this era, it has allocated a 16MB fixed size hash table already. */
2337     if (kvmppc_is_pr(kvm_state)) {
2338         /* PR - tell caller to allocate htab */
2339         return 0;
2340     } else {
2341         /* HV - assume 16MB kernel allocated htab */
2342         return 24;
2343     }
2344 }
2345 
2346 static inline uint32_t mfpvr(void)
2347 {
2348     uint32_t pvr;
2349 
2350     asm ("mfpvr %0"
2351          : "=r"(pvr));
2352     return pvr;
2353 }
2354 
2355 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2356 {
2357     if (on) {
2358         *word |= flags;
2359     } else {
2360         *word &= ~flags;
2361     }
2362 }
2363 
2364 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2365 {
2366     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2367     uint32_t vmx = kvmppc_get_vmx();
2368     uint32_t dfp = kvmppc_get_dfp();
2369     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2370     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2371 
2372     /* Now fix up the class with information we can query from the host */
2373     pcc->pvr = mfpvr();
2374 
2375     if (vmx != -1) {
2376         /* Only override when we know what the host supports */
2377         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2378         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2379     }
2380     if (dfp != -1) {
2381         /* Only override when we know what the host supports */
2382         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2383     }
2384 
2385     if (dcache_size != -1) {
2386         pcc->l1_dcache_size = dcache_size;
2387     }
2388 
2389     if (icache_size != -1) {
2390         pcc->l1_icache_size = icache_size;
2391     }
2392 
2393 #if defined(TARGET_PPC64)
2394     pcc->radix_page_info = kvm_get_radix_page_info();
2395 
2396     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2397         /*
2398          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2399          * compliant.  More importantly, advertising ISA 3.00
2400          * architected mode may prevent guests from activating
2401          * necessary DD1 workarounds.
2402          */
2403         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2404                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2405     }
2406 #endif /* defined(TARGET_PPC64) */
2407 }
2408 
2409 bool kvmppc_has_cap_epr(void)
2410 {
2411     return cap_epr;
2412 }
2413 
2414 bool kvmppc_has_cap_htab_fd(void)
2415 {
2416     return cap_htab_fd;
2417 }
2418 
2419 bool kvmppc_has_cap_fixup_hcalls(void)
2420 {
2421     return cap_fixup_hcalls;
2422 }
2423 
2424 bool kvmppc_has_cap_htm(void)
2425 {
2426     return cap_htm;
2427 }
2428 
2429 bool kvmppc_has_cap_mmu_radix(void)
2430 {
2431     return cap_mmu_radix;
2432 }
2433 
2434 bool kvmppc_has_cap_mmu_hash_v3(void)
2435 {
2436     return cap_mmu_hash_v3;
2437 }
2438 
2439 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2440 {
2441     uint32_t host_pvr = mfpvr();
2442     PowerPCCPUClass *pvr_pcc;
2443 
2444     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2445     if (pvr_pcc == NULL) {
2446         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2447     }
2448 
2449     return pvr_pcc;
2450 }
2451 
2452 static int kvm_ppc_register_host_cpu_type(void)
2453 {
2454     TypeInfo type_info = {
2455         .name = TYPE_HOST_POWERPC_CPU,
2456         .class_init = kvmppc_host_cpu_class_init,
2457     };
2458     PowerPCCPUClass *pvr_pcc;
2459     ObjectClass *oc;
2460     DeviceClass *dc;
2461     int i;
2462 
2463     pvr_pcc = kvm_ppc_get_host_cpu_class();
2464     if (pvr_pcc == NULL) {
2465         return -1;
2466     }
2467     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2468     type_register(&type_info);
2469 
2470     oc = object_class_by_name(type_info.name);
2471     g_assert(oc);
2472 
2473 #if defined(TARGET_PPC64)
2474     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2475     type_info.parent = TYPE_SPAPR_CPU_CORE,
2476     type_info.instance_size = sizeof(sPAPRCPUCore);
2477     type_info.instance_init = NULL;
2478     type_info.class_init = spapr_cpu_core_class_init;
2479     type_info.class_data = (void *) "host";
2480     type_register(&type_info);
2481     g_free((void *)type_info.name);
2482 #endif
2483 
2484     /*
2485      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2486      * we want "POWER8" to be a "family" alias that points to the current
2487      * host CPU type, too)
2488      */
2489     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2490     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2491         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2492             char *suffix;
2493 
2494             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2495             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2496             if (suffix) {
2497                 *suffix = 0;
2498             }
2499             ppc_cpu_aliases[i].oc = oc;
2500             break;
2501         }
2502     }
2503 
2504     return 0;
2505 }
2506 
2507 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2508 {
2509     struct kvm_rtas_token_args args = {
2510         .token = token,
2511     };
2512 
2513     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2514         return -ENOENT;
2515     }
2516 
2517     strncpy(args.name, function, sizeof(args.name));
2518 
2519     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2520 }
2521 
2522 int kvmppc_get_htab_fd(bool write)
2523 {
2524     struct kvm_get_htab_fd s = {
2525         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2526         .start_index = 0,
2527     };
2528 
2529     if (!cap_htab_fd) {
2530         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2531         return -1;
2532     }
2533 
2534     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2535 }
2536 
2537 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2538 {
2539     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2540     uint8_t buf[bufsize];
2541     ssize_t rc;
2542 
2543     do {
2544         rc = read(fd, buf, bufsize);
2545         if (rc < 0) {
2546             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2547                     strerror(errno));
2548             return rc;
2549         } else if (rc) {
2550             uint8_t *buffer = buf;
2551             ssize_t n = rc;
2552             while (n) {
2553                 struct kvm_get_htab_header *head =
2554                     (struct kvm_get_htab_header *) buffer;
2555                 size_t chunksize = sizeof(*head) +
2556                      HASH_PTE_SIZE_64 * head->n_valid;
2557 
2558                 qemu_put_be32(f, head->index);
2559                 qemu_put_be16(f, head->n_valid);
2560                 qemu_put_be16(f, head->n_invalid);
2561                 qemu_put_buffer(f, (void *)(head + 1),
2562                                 HASH_PTE_SIZE_64 * head->n_valid);
2563 
2564                 buffer += chunksize;
2565                 n -= chunksize;
2566             }
2567         }
2568     } while ((rc != 0)
2569              && ((max_ns < 0)
2570                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2571 
2572     return (rc == 0) ? 1 : 0;
2573 }
2574 
2575 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2576                            uint16_t n_valid, uint16_t n_invalid)
2577 {
2578     struct kvm_get_htab_header *buf;
2579     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2580     ssize_t rc;
2581 
2582     buf = alloca(chunksize);
2583     buf->index = index;
2584     buf->n_valid = n_valid;
2585     buf->n_invalid = n_invalid;
2586 
2587     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2588 
2589     rc = write(fd, buf, chunksize);
2590     if (rc < 0) {
2591         fprintf(stderr, "Error writing KVM hash table: %s\n",
2592                 strerror(errno));
2593         return rc;
2594     }
2595     if (rc != chunksize) {
2596         /* We should never get a short write on a single chunk */
2597         fprintf(stderr, "Short write, restoring KVM hash table\n");
2598         return -1;
2599     }
2600     return 0;
2601 }
2602 
2603 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2604 {
2605     return true;
2606 }
2607 
2608 void kvm_arch_init_irq_routing(KVMState *s)
2609 {
2610 }
2611 
2612 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2613 {
2614     struct kvm_get_htab_fd ghf = {
2615         .flags = 0,
2616         .start_index = ptex,
2617     };
2618     int fd, rc;
2619     int i;
2620 
2621     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2622     if (fd < 0) {
2623         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2624     }
2625 
2626     i = 0;
2627     while (i < n) {
2628         struct kvm_get_htab_header *hdr;
2629         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2630         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2631 
2632         rc = read(fd, buf, sizeof(buf));
2633         if (rc < 0) {
2634             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2635         }
2636 
2637         hdr = (struct kvm_get_htab_header *)buf;
2638         while ((i < n) && ((char *)hdr < (buf + rc))) {
2639             int invalid = hdr->n_invalid;
2640 
2641             if (hdr->index != (ptex + i)) {
2642                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2643                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2644             }
2645 
2646             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2647             i += hdr->n_valid;
2648 
2649             if ((n - i) < invalid) {
2650                 invalid = n - i;
2651             }
2652             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2653             i += hdr->n_invalid;
2654 
2655             hdr = (struct kvm_get_htab_header *)
2656                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2657         }
2658     }
2659 
2660     close(fd);
2661 }
2662 
2663 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2664 {
2665     int fd, rc;
2666     struct kvm_get_htab_fd ghf;
2667     struct {
2668         struct kvm_get_htab_header hdr;
2669         uint64_t pte0;
2670         uint64_t pte1;
2671     } buf;
2672 
2673     ghf.flags = 0;
2674     ghf.start_index = 0;     /* Ignored */
2675     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2676     if (fd < 0) {
2677         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2678     }
2679 
2680     buf.hdr.n_valid = 1;
2681     buf.hdr.n_invalid = 0;
2682     buf.hdr.index = ptex;
2683     buf.pte0 = cpu_to_be64(pte0);
2684     buf.pte1 = cpu_to_be64(pte1);
2685 
2686     rc = write(fd, &buf, sizeof(buf));
2687     if (rc != sizeof(buf)) {
2688         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2689     }
2690     close(fd);
2691 }
2692 
2693 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2694                              uint64_t address, uint32_t data, PCIDevice *dev)
2695 {
2696     return 0;
2697 }
2698 
2699 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2700                                 int vector, PCIDevice *dev)
2701 {
2702     return 0;
2703 }
2704 
2705 int kvm_arch_release_virq_post(int virq)
2706 {
2707     return 0;
2708 }
2709 
2710 int kvm_arch_msi_data_to_gsi(uint32_t data)
2711 {
2712     return data & 0xffff;
2713 }
2714 
2715 int kvmppc_enable_hwrng(void)
2716 {
2717     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2718         return -1;
2719     }
2720 
2721     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2722 }
2723 
2724 void kvmppc_check_papr_resize_hpt(Error **errp)
2725 {
2726     if (!kvm_enabled()) {
2727         return; /* No KVM, we're good */
2728     }
2729 
2730     if (cap_resize_hpt) {
2731         return; /* Kernel has explicit support, we're good */
2732     }
2733 
2734     /* Otherwise fallback on looking for PR KVM */
2735     if (kvmppc_is_pr(kvm_state)) {
2736         return;
2737     }
2738 
2739     error_setg(errp,
2740                "Hash page table resizing not available with this KVM version");
2741 }
2742 
2743 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2744 {
2745     CPUState *cs = CPU(cpu);
2746     struct kvm_ppc_resize_hpt rhpt = {
2747         .flags = flags,
2748         .shift = shift,
2749     };
2750 
2751     if (!cap_resize_hpt) {
2752         return -ENOSYS;
2753     }
2754 
2755     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2756 }
2757 
2758 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2759 {
2760     CPUState *cs = CPU(cpu);
2761     struct kvm_ppc_resize_hpt rhpt = {
2762         .flags = flags,
2763         .shift = shift,
2764     };
2765 
2766     if (!cap_resize_hpt) {
2767         return -ENOSYS;
2768     }
2769 
2770     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2771 }
2772 
2773 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2774 {
2775     target_ulong sdr1 = arg.target_ptr;
2776     PowerPCCPU *cpu = POWERPC_CPU(cs);
2777     CPUPPCState *env = &cpu->env;
2778 
2779     /* This is just for the benefit of PR KVM */
2780     cpu_synchronize_state(cs);
2781     env->spr[SPR_SDR1] = sdr1;
2782     if (kvmppc_put_books_sregs(cpu) < 0) {
2783         error_report("Unable to update SDR1 in KVM");
2784         exit(1);
2785     }
2786 }
2787 
2788 void kvmppc_update_sdr1(target_ulong sdr1)
2789 {
2790     CPUState *cs;
2791 
2792     CPU_FOREACH(cs) {
2793         run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2794     }
2795 }
2796 
2797 /*
2798  * This is a helper function to detect a post migration scenario
2799  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2800  * the guest kernel can't handle a PVR value other than the actual host
2801  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2802  *
2803  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2804  * (so, we're HV), return true. The workaround itself is done in
2805  * cpu_post_load.
2806  *
2807  * The order here is important: we'll only check for KVM PR as a
2808  * fallback if the guest kernel can't handle the situation itself.
2809  * We need to avoid as much as possible querying the running KVM type
2810  * in QEMU level.
2811  */
2812 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2813 {
2814     CPUState *cs = CPU(cpu);
2815 
2816     if (!kvm_enabled()) {
2817         return false;
2818     }
2819 
2820     if (cap_ppc_pvr_compat) {
2821         return false;
2822     }
2823 
2824     return !kvmppc_is_pr(cs->kvm_state);
2825 }
2826