xref: /openbmc/qemu/target/ppc/kvm.c (revision 9d81b2d2)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
55 
56 //#define DEBUG_KVM
57 
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63     do { } while (0)
64 #endif
65 
66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
67 
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69     KVM_CAP_LAST_INFO
70 };
71 
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_smt_possible;
78 static int cap_ppc_rma;
79 static int cap_spapr_tce;
80 static int cap_spapr_tce_64;
81 static int cap_spapr_multitce;
82 static int cap_spapr_vfio;
83 static int cap_hior;
84 static int cap_one_reg;
85 static int cap_epr;
86 static int cap_ppc_watchdog;
87 static int cap_papr;
88 static int cap_htab_fd;
89 static int cap_fixup_hcalls;
90 static int cap_htm;             /* Hardware transactional memory support */
91 static int cap_mmu_radix;
92 static int cap_mmu_hash_v3;
93 static int cap_resize_hpt;
94 static int cap_ppc_pvr_compat;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(void);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139     cap_spapr_vfio = false;
140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144     /* Note: we don't set cap_papr here, because this capability is
145      * only activated after this by kvmppc_set_papr() */
146     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type();
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (env->mmu_model & POWERPC_MMU_1TSEG) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
308            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
323             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
410 {
411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412         return true;
413     }
414 
415     return (1ul << shift) <= rampgsize;
416 }
417 
418 static long max_cpu_page_size;
419 
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
421 {
422     static struct kvm_ppc_smmu_info smmu_info;
423     static bool has_smmu_info;
424     CPUPPCState *env = &cpu->env;
425     int iq, ik, jq, jk;
426     bool has_64k_pages = false;
427 
428     /* We only handle page sizes for 64-bit server guests for now */
429     if (!(env->mmu_model & POWERPC_MMU_64)) {
430         return;
431     }
432 
433     /* Collect MMU info from kernel if not already */
434     if (!has_smmu_info) {
435         kvm_get_smmu_info(cpu, &smmu_info);
436         has_smmu_info = true;
437     }
438 
439     if (!max_cpu_page_size) {
440         max_cpu_page_size = qemu_getrampagesize();
441     }
442 
443     /* Convert to QEMU form */
444     memset(&env->sps, 0, sizeof(env->sps));
445 
446     /* If we have HV KVM, we need to forbid CI large pages if our
447      * host page size is smaller than 64K.
448      */
449     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
450         env->ci_large_pages = getpagesize() >= 0x10000;
451     }
452 
453     /*
454      * XXX This loop should be an entry wide AND of the capabilities that
455      *     the selected CPU has with the capabilities that KVM supports.
456      */
457     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
458         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
459         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
460 
461         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
462                                  ksps->page_shift)) {
463             continue;
464         }
465         qsps->page_shift = ksps->page_shift;
466         qsps->slb_enc = ksps->slb_enc;
467         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
468             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
469                                      ksps->enc[jk].page_shift)) {
470                 continue;
471             }
472             if (ksps->enc[jk].page_shift == 16) {
473                 has_64k_pages = true;
474             }
475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478                 break;
479             }
480         }
481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482             break;
483         }
484     }
485     env->slb_nr = smmu_info.slb_size;
486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487         env->mmu_model &= ~POWERPC_MMU_1TSEG;
488     }
489     if (!has_64k_pages) {
490         env->mmu_model &= ~POWERPC_MMU_64K;
491     }
492 }
493 
494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
495 {
496     Object *mem_obj = object_resolve_path(obj_path, NULL);
497     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
498     long pagesize;
499 
500     if (mempath) {
501         pagesize = qemu_mempath_getpagesize(mempath);
502         g_free(mempath);
503     } else {
504         pagesize = getpagesize();
505     }
506 
507     return pagesize >= max_cpu_page_size;
508 }
509 
510 #else /* defined (TARGET_PPC64) */
511 
512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
513 {
514 }
515 
516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
517 {
518     return true;
519 }
520 
521 #endif /* !defined (TARGET_PPC64) */
522 
523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
524 {
525     return POWERPC_CPU(cpu)->vcpu_id;
526 }
527 
528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
529  * book3s supports only 1 watchpoint, so array size
530  * of 4 is sufficient for now.
531  */
532 #define MAX_HW_BKPTS 4
533 
534 static struct HWBreakpoint {
535     target_ulong addr;
536     int type;
537 } hw_debug_points[MAX_HW_BKPTS];
538 
539 static CPUWatchpoint hw_watchpoint;
540 
541 /* Default there is no breakpoint and watchpoint supported */
542 static int max_hw_breakpoint;
543 static int max_hw_watchpoint;
544 static int nb_hw_breakpoint;
545 static int nb_hw_watchpoint;
546 
547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
548 {
549     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
550         max_hw_breakpoint = 2;
551         max_hw_watchpoint = 2;
552     }
553 
554     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
555         fprintf(stderr, "Error initializing h/w breakpoints\n");
556         return;
557     }
558 }
559 
560 int kvm_arch_init_vcpu(CPUState *cs)
561 {
562     PowerPCCPU *cpu = POWERPC_CPU(cs);
563     CPUPPCState *cenv = &cpu->env;
564     int ret;
565 
566     /* Gather server mmu info from KVM and update the CPU state */
567     kvm_fixup_page_sizes(cpu);
568 
569     /* Synchronize sregs with kvm */
570     ret = kvm_arch_sync_sregs(cpu);
571     if (ret) {
572         if (ret == -EINVAL) {
573             error_report("Register sync failed... If you're using kvm-hv.ko,"
574                          " only \"-cpu host\" is possible");
575         }
576         return ret;
577     }
578 
579     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
580 
581     switch (cenv->mmu_model) {
582     case POWERPC_MMU_BOOKE206:
583         /* This target supports access to KVM's guest TLB */
584         ret = kvm_booke206_tlb_init(cpu);
585         break;
586     case POWERPC_MMU_2_07:
587         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
588             /* KVM-HV has transactional memory on POWER8 also without the
589              * KVM_CAP_PPC_HTM extension, so enable it here instead as
590              * long as it's availble to userspace on the host. */
591             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
592                 cap_htm = true;
593             }
594         }
595         break;
596     default:
597         break;
598     }
599 
600     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
601     kvmppc_hw_debug_points_init(cenv);
602 
603     return ret;
604 }
605 
606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
607 {
608     CPUPPCState *env = &cpu->env;
609     CPUState *cs = CPU(cpu);
610     struct kvm_dirty_tlb dirty_tlb;
611     unsigned char *bitmap;
612     int ret;
613 
614     if (!env->kvm_sw_tlb) {
615         return;
616     }
617 
618     bitmap = g_malloc((env->nb_tlb + 7) / 8);
619     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
620 
621     dirty_tlb.bitmap = (uintptr_t)bitmap;
622     dirty_tlb.num_dirty = env->nb_tlb;
623 
624     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
625     if (ret) {
626         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
627                 __func__, strerror(-ret));
628     }
629 
630     g_free(bitmap);
631 }
632 
633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
634 {
635     PowerPCCPU *cpu = POWERPC_CPU(cs);
636     CPUPPCState *env = &cpu->env;
637     union {
638         uint32_t u32;
639         uint64_t u64;
640     } val;
641     struct kvm_one_reg reg = {
642         .id = id,
643         .addr = (uintptr_t) &val,
644     };
645     int ret;
646 
647     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648     if (ret != 0) {
649         trace_kvm_failed_spr_get(spr, strerror(errno));
650     } else {
651         switch (id & KVM_REG_SIZE_MASK) {
652         case KVM_REG_SIZE_U32:
653             env->spr[spr] = val.u32;
654             break;
655 
656         case KVM_REG_SIZE_U64:
657             env->spr[spr] = val.u64;
658             break;
659 
660         default:
661             /* Don't handle this size yet */
662             abort();
663         }
664     }
665 }
666 
667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
668 {
669     PowerPCCPU *cpu = POWERPC_CPU(cs);
670     CPUPPCState *env = &cpu->env;
671     union {
672         uint32_t u32;
673         uint64_t u64;
674     } val;
675     struct kvm_one_reg reg = {
676         .id = id,
677         .addr = (uintptr_t) &val,
678     };
679     int ret;
680 
681     switch (id & KVM_REG_SIZE_MASK) {
682     case KVM_REG_SIZE_U32:
683         val.u32 = env->spr[spr];
684         break;
685 
686     case KVM_REG_SIZE_U64:
687         val.u64 = env->spr[spr];
688         break;
689 
690     default:
691         /* Don't handle this size yet */
692         abort();
693     }
694 
695     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
696     if (ret != 0) {
697         trace_kvm_failed_spr_set(spr, strerror(errno));
698     }
699 }
700 
701 static int kvm_put_fp(CPUState *cs)
702 {
703     PowerPCCPU *cpu = POWERPC_CPU(cs);
704     CPUPPCState *env = &cpu->env;
705     struct kvm_one_reg reg;
706     int i;
707     int ret;
708 
709     if (env->insns_flags & PPC_FLOAT) {
710         uint64_t fpscr = env->fpscr;
711         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
712 
713         reg.id = KVM_REG_PPC_FPSCR;
714         reg.addr = (uintptr_t)&fpscr;
715         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716         if (ret < 0) {
717             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
718             return ret;
719         }
720 
721         for (i = 0; i < 32; i++) {
722             uint64_t vsr[2];
723 
724 #ifdef HOST_WORDS_BIGENDIAN
725             vsr[0] = float64_val(env->fpr[i]);
726             vsr[1] = env->vsr[i];
727 #else
728             vsr[0] = env->vsr[i];
729             vsr[1] = float64_val(env->fpr[i]);
730 #endif
731             reg.addr = (uintptr_t) &vsr;
732             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
733 
734             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735             if (ret < 0) {
736                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
737                         i, strerror(errno));
738                 return ret;
739             }
740         }
741     }
742 
743     if (env->insns_flags & PPC_ALTIVEC) {
744         reg.id = KVM_REG_PPC_VSCR;
745         reg.addr = (uintptr_t)&env->vscr;
746         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
747         if (ret < 0) {
748             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
749             return ret;
750         }
751 
752         for (i = 0; i < 32; i++) {
753             reg.id = KVM_REG_PPC_VR(i);
754             reg.addr = (uintptr_t)&env->avr[i];
755             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
756             if (ret < 0) {
757                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
758                 return ret;
759             }
760         }
761     }
762 
763     return 0;
764 }
765 
766 static int kvm_get_fp(CPUState *cs)
767 {
768     PowerPCCPU *cpu = POWERPC_CPU(cs);
769     CPUPPCState *env = &cpu->env;
770     struct kvm_one_reg reg;
771     int i;
772     int ret;
773 
774     if (env->insns_flags & PPC_FLOAT) {
775         uint64_t fpscr;
776         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
777 
778         reg.id = KVM_REG_PPC_FPSCR;
779         reg.addr = (uintptr_t)&fpscr;
780         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781         if (ret < 0) {
782             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
783             return ret;
784         } else {
785             env->fpscr = fpscr;
786         }
787 
788         for (i = 0; i < 32; i++) {
789             uint64_t vsr[2];
790 
791             reg.addr = (uintptr_t) &vsr;
792             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
793 
794             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
795             if (ret < 0) {
796                 DPRINTF("Unable to get %s%d from KVM: %s\n",
797                         vsx ? "VSR" : "FPR", i, strerror(errno));
798                 return ret;
799             } else {
800 #ifdef HOST_WORDS_BIGENDIAN
801                 env->fpr[i] = vsr[0];
802                 if (vsx) {
803                     env->vsr[i] = vsr[1];
804                 }
805 #else
806                 env->fpr[i] = vsr[1];
807                 if (vsx) {
808                     env->vsr[i] = vsr[0];
809                 }
810 #endif
811             }
812         }
813     }
814 
815     if (env->insns_flags & PPC_ALTIVEC) {
816         reg.id = KVM_REG_PPC_VSCR;
817         reg.addr = (uintptr_t)&env->vscr;
818         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
819         if (ret < 0) {
820             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
821             return ret;
822         }
823 
824         for (i = 0; i < 32; i++) {
825             reg.id = KVM_REG_PPC_VR(i);
826             reg.addr = (uintptr_t)&env->avr[i];
827             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
828             if (ret < 0) {
829                 DPRINTF("Unable to get VR%d from KVM: %s\n",
830                         i, strerror(errno));
831                 return ret;
832             }
833         }
834     }
835 
836     return 0;
837 }
838 
839 #if defined(TARGET_PPC64)
840 static int kvm_get_vpa(CPUState *cs)
841 {
842     PowerPCCPU *cpu = POWERPC_CPU(cs);
843     CPUPPCState *env = &cpu->env;
844     struct kvm_one_reg reg;
845     int ret;
846 
847     reg.id = KVM_REG_PPC_VPA_ADDR;
848     reg.addr = (uintptr_t)&env->vpa_addr;
849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850     if (ret < 0) {
851         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
852         return ret;
853     }
854 
855     assert((uintptr_t)&env->slb_shadow_size
856            == ((uintptr_t)&env->slb_shadow_addr + 8));
857     reg.id = KVM_REG_PPC_VPA_SLB;
858     reg.addr = (uintptr_t)&env->slb_shadow_addr;
859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860     if (ret < 0) {
861         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
862                 strerror(errno));
863         return ret;
864     }
865 
866     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
867     reg.id = KVM_REG_PPC_VPA_DTL;
868     reg.addr = (uintptr_t)&env->dtl_addr;
869     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
870     if (ret < 0) {
871         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
872                 strerror(errno));
873         return ret;
874     }
875 
876     return 0;
877 }
878 
879 static int kvm_put_vpa(CPUState *cs)
880 {
881     PowerPCCPU *cpu = POWERPC_CPU(cs);
882     CPUPPCState *env = &cpu->env;
883     struct kvm_one_reg reg;
884     int ret;
885 
886     /* SLB shadow or DTL can't be registered unless a master VPA is
887      * registered.  That means when restoring state, if a VPA *is*
888      * registered, we need to set that up first.  If not, we need to
889      * deregister the others before deregistering the master VPA */
890     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
891 
892     if (env->vpa_addr) {
893         reg.id = KVM_REG_PPC_VPA_ADDR;
894         reg.addr = (uintptr_t)&env->vpa_addr;
895         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896         if (ret < 0) {
897             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
898             return ret;
899         }
900     }
901 
902     assert((uintptr_t)&env->slb_shadow_size
903            == ((uintptr_t)&env->slb_shadow_addr + 8));
904     reg.id = KVM_REG_PPC_VPA_SLB;
905     reg.addr = (uintptr_t)&env->slb_shadow_addr;
906     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
907     if (ret < 0) {
908         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
909         return ret;
910     }
911 
912     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
913     reg.id = KVM_REG_PPC_VPA_DTL;
914     reg.addr = (uintptr_t)&env->dtl_addr;
915     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916     if (ret < 0) {
917         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
918                 strerror(errno));
919         return ret;
920     }
921 
922     if (!env->vpa_addr) {
923         reg.id = KVM_REG_PPC_VPA_ADDR;
924         reg.addr = (uintptr_t)&env->vpa_addr;
925         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926         if (ret < 0) {
927             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
928             return ret;
929         }
930     }
931 
932     return 0;
933 }
934 #endif /* TARGET_PPC64 */
935 
936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
937 {
938     CPUPPCState *env = &cpu->env;
939     struct kvm_sregs sregs;
940     int i;
941 
942     sregs.pvr = env->spr[SPR_PVR];
943 
944     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
945 
946     /* Sync SLB */
947 #ifdef TARGET_PPC64
948     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
949         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
950         if (env->slb[i].esid & SLB_ESID_V) {
951             sregs.u.s.ppc64.slb[i].slbe |= i;
952         }
953         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
954     }
955 #endif
956 
957     /* Sync SRs */
958     for (i = 0; i < 16; i++) {
959         sregs.u.s.ppc32.sr[i] = env->sr[i];
960     }
961 
962     /* Sync BATs */
963     for (i = 0; i < 8; i++) {
964         /* Beware. We have to swap upper and lower bits here */
965         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
966             | env->DBAT[1][i];
967         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
968             | env->IBAT[1][i];
969     }
970 
971     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
972 }
973 
974 int kvm_arch_put_registers(CPUState *cs, int level)
975 {
976     PowerPCCPU *cpu = POWERPC_CPU(cs);
977     CPUPPCState *env = &cpu->env;
978     struct kvm_regs regs;
979     int ret;
980     int i;
981 
982     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
983     if (ret < 0) {
984         return ret;
985     }
986 
987     regs.ctr = env->ctr;
988     regs.lr  = env->lr;
989     regs.xer = cpu_read_xer(env);
990     regs.msr = env->msr;
991     regs.pc = env->nip;
992 
993     regs.srr0 = env->spr[SPR_SRR0];
994     regs.srr1 = env->spr[SPR_SRR1];
995 
996     regs.sprg0 = env->spr[SPR_SPRG0];
997     regs.sprg1 = env->spr[SPR_SPRG1];
998     regs.sprg2 = env->spr[SPR_SPRG2];
999     regs.sprg3 = env->spr[SPR_SPRG3];
1000     regs.sprg4 = env->spr[SPR_SPRG4];
1001     regs.sprg5 = env->spr[SPR_SPRG5];
1002     regs.sprg6 = env->spr[SPR_SPRG6];
1003     regs.sprg7 = env->spr[SPR_SPRG7];
1004 
1005     regs.pid = env->spr[SPR_BOOKE_PID];
1006 
1007     for (i = 0;i < 32; i++)
1008         regs.gpr[i] = env->gpr[i];
1009 
1010     regs.cr = 0;
1011     for (i = 0; i < 8; i++) {
1012         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1013     }
1014 
1015     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1016     if (ret < 0)
1017         return ret;
1018 
1019     kvm_put_fp(cs);
1020 
1021     if (env->tlb_dirty) {
1022         kvm_sw_tlb_put(cpu);
1023         env->tlb_dirty = false;
1024     }
1025 
1026     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1027         ret = kvmppc_put_books_sregs(cpu);
1028         if (ret < 0) {
1029             return ret;
1030         }
1031     }
1032 
1033     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1034         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1035     }
1036 
1037     if (cap_one_reg) {
1038         int i;
1039 
1040         /* We deliberately ignore errors here, for kernels which have
1041          * the ONE_REG calls, but don't support the specific
1042          * registers, there's a reasonable chance things will still
1043          * work, at least until we try to migrate. */
1044         for (i = 0; i < 1024; i++) {
1045             uint64_t id = env->spr_cb[i].one_reg_id;
1046 
1047             if (id != 0) {
1048                 kvm_put_one_spr(cs, id, i);
1049             }
1050         }
1051 
1052 #ifdef TARGET_PPC64
1053         if (msr_ts) {
1054             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1055                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1056             }
1057             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1058                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1059             }
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1070         }
1071 
1072         if (cap_papr) {
1073             if (kvm_put_vpa(cs) < 0) {
1074                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1075             }
1076         }
1077 
1078         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1079 #endif /* TARGET_PPC64 */
1080     }
1081 
1082     return ret;
1083 }
1084 
1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1086 {
1087      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1088 }
1089 
1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1091 {
1092     CPUPPCState *env = &cpu->env;
1093     struct kvm_sregs sregs;
1094     int ret;
1095 
1096     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1097     if (ret < 0) {
1098         return ret;
1099     }
1100 
1101     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1102         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1103         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1104         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1105         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1106         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1107         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1108         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1109         env->spr[SPR_DECR] = sregs.u.e.dec;
1110         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1111         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1112         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1113     }
1114 
1115     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1116         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1117         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1118         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1119         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1120         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1121     }
1122 
1123     if (sregs.u.e.features & KVM_SREGS_E_64) {
1124         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1125     }
1126 
1127     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1128         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1129     }
1130 
1131     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1132         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1133         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1134         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1135         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1136         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1137         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1138         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1139         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1140         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1141         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1142         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1143         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1144         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1145         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1146         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1147         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1148         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1149         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1150         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1151         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1152         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1153         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1154         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1155         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1156         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1157         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1158         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1159         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1160         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1161         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1162         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1163         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1164 
1165         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1166             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1167             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1168             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1170             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1171             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1172         }
1173 
1174         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1175             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1176             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1177         }
1178 
1179         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1180             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1182             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1183             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1184         }
1185     }
1186 
1187     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1188         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1189         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1190         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1191         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1192         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1193         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1194         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1195         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1196         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1197         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1198     }
1199 
1200     if (sregs.u.e.features & KVM_SREGS_EXP) {
1201         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1202     }
1203 
1204     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1205         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1206         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1207     }
1208 
1209     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1210         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1211         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1212         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1213 
1214         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1215             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1216             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1217         }
1218     }
1219 
1220     return 0;
1221 }
1222 
1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1224 {
1225     CPUPPCState *env = &cpu->env;
1226     struct kvm_sregs sregs;
1227     int ret;
1228     int i;
1229 
1230     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1231     if (ret < 0) {
1232         return ret;
1233     }
1234 
1235     if (!cpu->vhyp) {
1236         ppc_store_sdr1(env, sregs.u.s.sdr1);
1237     }
1238 
1239     /* Sync SLB */
1240 #ifdef TARGET_PPC64
1241     /*
1242      * The packed SLB array we get from KVM_GET_SREGS only contains
1243      * information about valid entries. So we flush our internal copy
1244      * to get rid of stale ones, then put all valid SLB entries back
1245      * in.
1246      */
1247     memset(env->slb, 0, sizeof(env->slb));
1248     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1249         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1250         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1251         /*
1252          * Only restore valid entries
1253          */
1254         if (rb & SLB_ESID_V) {
1255             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1256         }
1257     }
1258 #endif
1259 
1260     /* Sync SRs */
1261     for (i = 0; i < 16; i++) {
1262         env->sr[i] = sregs.u.s.ppc32.sr[i];
1263     }
1264 
1265     /* Sync BATs */
1266     for (i = 0; i < 8; i++) {
1267         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1268         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1269         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1270         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1271     }
1272 
1273     return 0;
1274 }
1275 
1276 int kvm_arch_get_registers(CPUState *cs)
1277 {
1278     PowerPCCPU *cpu = POWERPC_CPU(cs);
1279     CPUPPCState *env = &cpu->env;
1280     struct kvm_regs regs;
1281     uint32_t cr;
1282     int i, ret;
1283 
1284     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1285     if (ret < 0)
1286         return ret;
1287 
1288     cr = regs.cr;
1289     for (i = 7; i >= 0; i--) {
1290         env->crf[i] = cr & 15;
1291         cr >>= 4;
1292     }
1293 
1294     env->ctr = regs.ctr;
1295     env->lr = regs.lr;
1296     cpu_write_xer(env, regs.xer);
1297     env->msr = regs.msr;
1298     env->nip = regs.pc;
1299 
1300     env->spr[SPR_SRR0] = regs.srr0;
1301     env->spr[SPR_SRR1] = regs.srr1;
1302 
1303     env->spr[SPR_SPRG0] = regs.sprg0;
1304     env->spr[SPR_SPRG1] = regs.sprg1;
1305     env->spr[SPR_SPRG2] = regs.sprg2;
1306     env->spr[SPR_SPRG3] = regs.sprg3;
1307     env->spr[SPR_SPRG4] = regs.sprg4;
1308     env->spr[SPR_SPRG5] = regs.sprg5;
1309     env->spr[SPR_SPRG6] = regs.sprg6;
1310     env->spr[SPR_SPRG7] = regs.sprg7;
1311 
1312     env->spr[SPR_BOOKE_PID] = regs.pid;
1313 
1314     for (i = 0;i < 32; i++)
1315         env->gpr[i] = regs.gpr[i];
1316 
1317     kvm_get_fp(cs);
1318 
1319     if (cap_booke_sregs) {
1320         ret = kvmppc_get_booke_sregs(cpu);
1321         if (ret < 0) {
1322             return ret;
1323         }
1324     }
1325 
1326     if (cap_segstate) {
1327         ret = kvmppc_get_books_sregs(cpu);
1328         if (ret < 0) {
1329             return ret;
1330         }
1331     }
1332 
1333     if (cap_hior) {
1334         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1335     }
1336 
1337     if (cap_one_reg) {
1338         int i;
1339 
1340         /* We deliberately ignore errors here, for kernels which have
1341          * the ONE_REG calls, but don't support the specific
1342          * registers, there's a reasonable chance things will still
1343          * work, at least until we try to migrate. */
1344         for (i = 0; i < 1024; i++) {
1345             uint64_t id = env->spr_cb[i].one_reg_id;
1346 
1347             if (id != 0) {
1348                 kvm_get_one_spr(cs, id, i);
1349             }
1350         }
1351 
1352 #ifdef TARGET_PPC64
1353         if (msr_ts) {
1354             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1355                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1356             }
1357             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1358                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1359             }
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1370         }
1371 
1372         if (cap_papr) {
1373             if (kvm_get_vpa(cs) < 0) {
1374                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1375             }
1376         }
1377 
1378         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1379 #endif
1380     }
1381 
1382     return 0;
1383 }
1384 
1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1386 {
1387     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1388 
1389     if (irq != PPC_INTERRUPT_EXT) {
1390         return 0;
1391     }
1392 
1393     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1394         return 0;
1395     }
1396 
1397     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1398 
1399     return 0;
1400 }
1401 
1402 #if defined(TARGET_PPCEMB)
1403 #define PPC_INPUT_INT PPC40x_INPUT_INT
1404 #elif defined(TARGET_PPC64)
1405 #define PPC_INPUT_INT PPC970_INPUT_INT
1406 #else
1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1408 #endif
1409 
1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1411 {
1412     PowerPCCPU *cpu = POWERPC_CPU(cs);
1413     CPUPPCState *env = &cpu->env;
1414     int r;
1415     unsigned irq;
1416 
1417     qemu_mutex_lock_iothread();
1418 
1419     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1420      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1421     if (!cap_interrupt_level &&
1422         run->ready_for_interrupt_injection &&
1423         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1424         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1425     {
1426         /* For now KVM disregards the 'irq' argument. However, in the
1427          * future KVM could cache it in-kernel to avoid a heavyweight exit
1428          * when reading the UIC.
1429          */
1430         irq = KVM_INTERRUPT_SET;
1431 
1432         DPRINTF("injected interrupt %d\n", irq);
1433         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1434         if (r < 0) {
1435             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1436         }
1437 
1438         /* Always wake up soon in case the interrupt was level based */
1439         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1440                        (NANOSECONDS_PER_SECOND / 50));
1441     }
1442 
1443     /* We don't know if there are more interrupts pending after this. However,
1444      * the guest will return to userspace in the course of handling this one
1445      * anyways, so we will get a chance to deliver the rest. */
1446 
1447     qemu_mutex_unlock_iothread();
1448 }
1449 
1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1451 {
1452     return MEMTXATTRS_UNSPECIFIED;
1453 }
1454 
1455 int kvm_arch_process_async_events(CPUState *cs)
1456 {
1457     return cs->halted;
1458 }
1459 
1460 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1461 {
1462     CPUState *cs = CPU(cpu);
1463     CPUPPCState *env = &cpu->env;
1464 
1465     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1466         cs->halted = 1;
1467         cs->exception_index = EXCP_HLT;
1468     }
1469 
1470     return 0;
1471 }
1472 
1473 /* map dcr access to existing qemu dcr emulation */
1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1475 {
1476     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1477         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1478 
1479     return 0;
1480 }
1481 
1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1483 {
1484     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1485         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1486 
1487     return 0;
1488 }
1489 
1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1491 {
1492     /* Mixed endian case is not handled */
1493     uint32_t sc = debug_inst_opcode;
1494 
1495     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1496                             sizeof(sc), 0) ||
1497         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1498         return -EINVAL;
1499     }
1500 
1501     return 0;
1502 }
1503 
1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1505 {
1506     uint32_t sc;
1507 
1508     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1509         sc != debug_inst_opcode ||
1510         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1511                             sizeof(sc), 1)) {
1512         return -EINVAL;
1513     }
1514 
1515     return 0;
1516 }
1517 
1518 static int find_hw_breakpoint(target_ulong addr, int type)
1519 {
1520     int n;
1521 
1522     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1523            <= ARRAY_SIZE(hw_debug_points));
1524 
1525     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1526         if (hw_debug_points[n].addr == addr &&
1527              hw_debug_points[n].type == type) {
1528             return n;
1529         }
1530     }
1531 
1532     return -1;
1533 }
1534 
1535 static int find_hw_watchpoint(target_ulong addr, int *flag)
1536 {
1537     int n;
1538 
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1540     if (n >= 0) {
1541         *flag = BP_MEM_ACCESS;
1542         return n;
1543     }
1544 
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1546     if (n >= 0) {
1547         *flag = BP_MEM_WRITE;
1548         return n;
1549     }
1550 
1551     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1552     if (n >= 0) {
1553         *flag = BP_MEM_READ;
1554         return n;
1555     }
1556 
1557     return -1;
1558 }
1559 
1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1561                                   target_ulong len, int type)
1562 {
1563     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1564         return -ENOBUFS;
1565     }
1566 
1567     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1568     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1569 
1570     switch (type) {
1571     case GDB_BREAKPOINT_HW:
1572         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1573             return -ENOBUFS;
1574         }
1575 
1576         if (find_hw_breakpoint(addr, type) >= 0) {
1577             return -EEXIST;
1578         }
1579 
1580         nb_hw_breakpoint++;
1581         break;
1582 
1583     case GDB_WATCHPOINT_WRITE:
1584     case GDB_WATCHPOINT_READ:
1585     case GDB_WATCHPOINT_ACCESS:
1586         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1587             return -ENOBUFS;
1588         }
1589 
1590         if (find_hw_breakpoint(addr, type) >= 0) {
1591             return -EEXIST;
1592         }
1593 
1594         nb_hw_watchpoint++;
1595         break;
1596 
1597     default:
1598         return -ENOSYS;
1599     }
1600 
1601     return 0;
1602 }
1603 
1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1605                                   target_ulong len, int type)
1606 {
1607     int n;
1608 
1609     n = find_hw_breakpoint(addr, type);
1610     if (n < 0) {
1611         return -ENOENT;
1612     }
1613 
1614     switch (type) {
1615     case GDB_BREAKPOINT_HW:
1616         nb_hw_breakpoint--;
1617         break;
1618 
1619     case GDB_WATCHPOINT_WRITE:
1620     case GDB_WATCHPOINT_READ:
1621     case GDB_WATCHPOINT_ACCESS:
1622         nb_hw_watchpoint--;
1623         break;
1624 
1625     default:
1626         return -ENOSYS;
1627     }
1628     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1629 
1630     return 0;
1631 }
1632 
1633 void kvm_arch_remove_all_hw_breakpoints(void)
1634 {
1635     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1636 }
1637 
1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1639 {
1640     int n;
1641 
1642     /* Software Breakpoint updates */
1643     if (kvm_sw_breakpoints_active(cs)) {
1644         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1645     }
1646 
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1648            <= ARRAY_SIZE(hw_debug_points));
1649     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1650 
1651     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1653         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1654         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1655             switch (hw_debug_points[n].type) {
1656             case GDB_BREAKPOINT_HW:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1658                 break;
1659             case GDB_WATCHPOINT_WRITE:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1661                 break;
1662             case GDB_WATCHPOINT_READ:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1664                 break;
1665             case GDB_WATCHPOINT_ACCESS:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1667                                         KVMPPC_DEBUG_WATCH_READ;
1668                 break;
1669             default:
1670                 cpu_abort(cs, "Unsupported breakpoint type\n");
1671             }
1672             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1673         }
1674     }
1675 }
1676 
1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1678 {
1679     CPUState *cs = CPU(cpu);
1680     CPUPPCState *env = &cpu->env;
1681     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1682     int handle = 0;
1683     int n;
1684     int flag = 0;
1685 
1686     if (cs->singlestep_enabled) {
1687         handle = 1;
1688     } else if (arch_info->status) {
1689         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1690             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1691                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                 }
1695             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1696                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1697                 n = find_hw_watchpoint(arch_info->address,  &flag);
1698                 if (n >= 0) {
1699                     handle = 1;
1700                     cs->watchpoint_hit = &hw_watchpoint;
1701                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1702                     hw_watchpoint.flags = flag;
1703                 }
1704             }
1705         }
1706     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1707         handle = 1;
1708     } else {
1709         /* QEMU is not able to handle debug exception, so inject
1710          * program exception to guest;
1711          * Yes program exception NOT debug exception !!
1712          * When QEMU is using debug resources then debug exception must
1713          * be always set. To achieve this we set MSR_DE and also set
1714          * MSRP_DEP so guest cannot change MSR_DE.
1715          * When emulating debug resource for guest we want guest
1716          * to control MSR_DE (enable/disable debug interrupt on need).
1717          * Supporting both configurations are NOT possible.
1718          * So the result is that we cannot share debug resources
1719          * between QEMU and Guest on BOOKE architecture.
1720          * In the current design QEMU gets the priority over guest,
1721          * this means that if QEMU is using debug resources then guest
1722          * cannot use them;
1723          * For software breakpoint QEMU uses a privileged instruction;
1724          * So there cannot be any reason that we are here for guest
1725          * set debug exception, only possibility is guest executed a
1726          * privileged / illegal instruction and that's why we are
1727          * injecting a program interrupt.
1728          */
1729 
1730         cpu_synchronize_state(cs);
1731         /* env->nip is PC, so increment this by 4 to use
1732          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1733          */
1734         env->nip += 4;
1735         cs->exception_index = POWERPC_EXCP_PROGRAM;
1736         env->error_code = POWERPC_EXCP_INVAL;
1737         ppc_cpu_do_interrupt(cs);
1738     }
1739 
1740     return handle;
1741 }
1742 
1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1744 {
1745     PowerPCCPU *cpu = POWERPC_CPU(cs);
1746     CPUPPCState *env = &cpu->env;
1747     int ret;
1748 
1749     qemu_mutex_lock_iothread();
1750 
1751     switch (run->exit_reason) {
1752     case KVM_EXIT_DCR:
1753         if (run->dcr.is_write) {
1754             DPRINTF("handle dcr write\n");
1755             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1756         } else {
1757             DPRINTF("handle dcr read\n");
1758             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1759         }
1760         break;
1761     case KVM_EXIT_HLT:
1762         DPRINTF("handle halt\n");
1763         ret = kvmppc_handle_halt(cpu);
1764         break;
1765 #if defined(TARGET_PPC64)
1766     case KVM_EXIT_PAPR_HCALL:
1767         DPRINTF("handle PAPR hypercall\n");
1768         run->papr_hcall.ret = spapr_hypercall(cpu,
1769                                               run->papr_hcall.nr,
1770                                               run->papr_hcall.args);
1771         ret = 0;
1772         break;
1773 #endif
1774     case KVM_EXIT_EPR:
1775         DPRINTF("handle epr\n");
1776         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1777         ret = 0;
1778         break;
1779     case KVM_EXIT_WATCHDOG:
1780         DPRINTF("handle watchdog expiry\n");
1781         watchdog_perform_action();
1782         ret = 0;
1783         break;
1784 
1785     case KVM_EXIT_DEBUG:
1786         DPRINTF("handle debug exception\n");
1787         if (kvm_handle_debug(cpu, run)) {
1788             ret = EXCP_DEBUG;
1789             break;
1790         }
1791         /* re-enter, this exception was guest-internal */
1792         ret = 0;
1793         break;
1794 
1795     default:
1796         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1797         ret = -1;
1798         break;
1799     }
1800 
1801     qemu_mutex_unlock_iothread();
1802     return ret;
1803 }
1804 
1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1806 {
1807     CPUState *cs = CPU(cpu);
1808     uint32_t bits = tsr_bits;
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_OR_TSR,
1811         .addr = (uintptr_t) &bits,
1812     };
1813 
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816 
1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1818 {
1819 
1820     CPUState *cs = CPU(cpu);
1821     uint32_t bits = tsr_bits;
1822     struct kvm_one_reg reg = {
1823         .id = KVM_REG_PPC_CLEAR_TSR,
1824         .addr = (uintptr_t) &bits,
1825     };
1826 
1827     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 }
1829 
1830 int kvmppc_set_tcr(PowerPCCPU *cpu)
1831 {
1832     CPUState *cs = CPU(cpu);
1833     CPUPPCState *env = &cpu->env;
1834     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1835 
1836     struct kvm_one_reg reg = {
1837         .id = KVM_REG_PPC_TCR,
1838         .addr = (uintptr_t) &tcr,
1839     };
1840 
1841     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1842 }
1843 
1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1845 {
1846     CPUState *cs = CPU(cpu);
1847     int ret;
1848 
1849     if (!kvm_enabled()) {
1850         return -1;
1851     }
1852 
1853     if (!cap_ppc_watchdog) {
1854         printf("warning: KVM does not support watchdog");
1855         return -1;
1856     }
1857 
1858     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1859     if (ret < 0) {
1860         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1861                 __func__, strerror(-ret));
1862         return ret;
1863     }
1864 
1865     return ret;
1866 }
1867 
1868 static int read_cpuinfo(const char *field, char *value, int len)
1869 {
1870     FILE *f;
1871     int ret = -1;
1872     int field_len = strlen(field);
1873     char line[512];
1874 
1875     f = fopen("/proc/cpuinfo", "r");
1876     if (!f) {
1877         return -1;
1878     }
1879 
1880     do {
1881         if (!fgets(line, sizeof(line), f)) {
1882             break;
1883         }
1884         if (!strncmp(line, field, field_len)) {
1885             pstrcpy(value, len, line);
1886             ret = 0;
1887             break;
1888         }
1889     } while(*line);
1890 
1891     fclose(f);
1892 
1893     return ret;
1894 }
1895 
1896 uint32_t kvmppc_get_tbfreq(void)
1897 {
1898     char line[512];
1899     char *ns;
1900     uint32_t retval = NANOSECONDS_PER_SECOND;
1901 
1902     if (read_cpuinfo("timebase", line, sizeof(line))) {
1903         return retval;
1904     }
1905 
1906     if (!(ns = strchr(line, ':'))) {
1907         return retval;
1908     }
1909 
1910     ns++;
1911 
1912     return atoi(ns);
1913 }
1914 
1915 bool kvmppc_get_host_serial(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1918                                NULL);
1919 }
1920 
1921 bool kvmppc_get_host_model(char **value)
1922 {
1923     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1924 }
1925 
1926 /* Try to find a device tree node for a CPU with clock-frequency property */
1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1928 {
1929     struct dirent *dirp;
1930     DIR *dp;
1931 
1932     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1933         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1934         return -1;
1935     }
1936 
1937     buf[0] = '\0';
1938     while ((dirp = readdir(dp)) != NULL) {
1939         FILE *f;
1940         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1941                  dirp->d_name);
1942         f = fopen(buf, "r");
1943         if (f) {
1944             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1945             fclose(f);
1946             break;
1947         }
1948         buf[0] = '\0';
1949     }
1950     closedir(dp);
1951     if (buf[0] == '\0') {
1952         printf("Unknown host!\n");
1953         return -1;
1954     }
1955 
1956     return 0;
1957 }
1958 
1959 static uint64_t kvmppc_read_int_dt(const char *filename)
1960 {
1961     union {
1962         uint32_t v32;
1963         uint64_t v64;
1964     } u;
1965     FILE *f;
1966     int len;
1967 
1968     f = fopen(filename, "rb");
1969     if (!f) {
1970         return -1;
1971     }
1972 
1973     len = fread(&u, 1, sizeof(u), f);
1974     fclose(f);
1975     switch (len) {
1976     case 4:
1977         /* property is a 32-bit quantity */
1978         return be32_to_cpu(u.v32);
1979     case 8:
1980         return be64_to_cpu(u.v64);
1981     }
1982 
1983     return 0;
1984 }
1985 
1986 /* Read a CPU node property from the host device tree that's a single
1987  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1988  * (can't find or open the property, or doesn't understand the
1989  * format) */
1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1991 {
1992     char buf[PATH_MAX], *tmp;
1993     uint64_t val;
1994 
1995     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1996         return -1;
1997     }
1998 
1999     tmp = g_strdup_printf("%s/%s", buf, propname);
2000     val = kvmppc_read_int_dt(tmp);
2001     g_free(tmp);
2002 
2003     return val;
2004 }
2005 
2006 uint64_t kvmppc_get_clockfreq(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("clock-frequency");
2009 }
2010 
2011 uint32_t kvmppc_get_vmx(void)
2012 {
2013     return kvmppc_read_int_cpu_dt("ibm,vmx");
2014 }
2015 
2016 uint32_t kvmppc_get_dfp(void)
2017 {
2018     return kvmppc_read_int_cpu_dt("ibm,dfp");
2019 }
2020 
2021 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2022  {
2023      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2024      CPUState *cs = CPU(cpu);
2025 
2026     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2027         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2028         return 0;
2029     }
2030 
2031     return 1;
2032 }
2033 
2034 int kvmppc_get_hasidle(CPUPPCState *env)
2035 {
2036     struct kvm_ppc_pvinfo pvinfo;
2037 
2038     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2039         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2040         return 1;
2041     }
2042 
2043     return 0;
2044 }
2045 
2046 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2047 {
2048     uint32_t *hc = (uint32_t*)buf;
2049     struct kvm_ppc_pvinfo pvinfo;
2050 
2051     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2052         memcpy(buf, pvinfo.hcall, buf_len);
2053         return 0;
2054     }
2055 
2056     /*
2057      * Fallback to always fail hypercalls regardless of endianness:
2058      *
2059      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2060      *     li r3, -1
2061      *     b .+8       (becomes nop in wrong endian)
2062      *     bswap32(li r3, -1)
2063      */
2064 
2065     hc[0] = cpu_to_be32(0x08000048);
2066     hc[1] = cpu_to_be32(0x3860ffff);
2067     hc[2] = cpu_to_be32(0x48000008);
2068     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2069 
2070     return 1;
2071 }
2072 
2073 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2074 {
2075     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2076 }
2077 
2078 void kvmppc_enable_logical_ci_hcalls(void)
2079 {
2080     /*
2081      * FIXME: it would be nice if we could detect the cases where
2082      * we're using a device which requires the in kernel
2083      * implementation of these hcalls, but the kernel lacks them and
2084      * produce a warning.
2085      */
2086     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2087     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2088 }
2089 
2090 void kvmppc_enable_set_mode_hcall(void)
2091 {
2092     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2093 }
2094 
2095 void kvmppc_enable_clear_ref_mod_hcalls(void)
2096 {
2097     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2098     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2099 }
2100 
2101 void kvmppc_set_papr(PowerPCCPU *cpu)
2102 {
2103     CPUState *cs = CPU(cpu);
2104     int ret;
2105 
2106     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2107     if (ret) {
2108         error_report("This vCPU type or KVM version does not support PAPR");
2109         exit(1);
2110     }
2111 
2112     /* Update the capability flag so we sync the right information
2113      * with kvm */
2114     cap_papr = 1;
2115 }
2116 
2117 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2118 {
2119     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2120 }
2121 
2122 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2123 {
2124     CPUState *cs = CPU(cpu);
2125     int ret;
2126 
2127     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2128     if (ret && mpic_proxy) {
2129         error_report("This KVM version does not support EPR");
2130         exit(1);
2131     }
2132 }
2133 
2134 int kvmppc_smt_threads(void)
2135 {
2136     return cap_ppc_smt ? cap_ppc_smt : 1;
2137 }
2138 
2139 int kvmppc_set_smt_threads(int smt)
2140 {
2141     int ret;
2142 
2143     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2144     if (!ret) {
2145         cap_ppc_smt = smt;
2146     }
2147     return ret;
2148 }
2149 
2150 void kvmppc_hint_smt_possible(Error **errp)
2151 {
2152     int i;
2153     GString *g;
2154     char *s;
2155 
2156     assert(kvm_enabled());
2157     if (cap_ppc_smt_possible) {
2158         g = g_string_new("Available VSMT modes:");
2159         for (i = 63; i >= 0; i--) {
2160             if ((1UL << i) & cap_ppc_smt_possible) {
2161                 g_string_append_printf(g, " %lu", (1UL << i));
2162             }
2163         }
2164         s = g_string_free(g, false);
2165         error_append_hint(errp, "%s.\n", s);
2166         g_free(s);
2167     } else {
2168         error_append_hint(errp,
2169                           "This KVM seems to be too old to support VSMT.\n");
2170     }
2171 }
2172 
2173 
2174 #ifdef TARGET_PPC64
2175 off_t kvmppc_alloc_rma(void **rma)
2176 {
2177     off_t size;
2178     int fd;
2179     struct kvm_allocate_rma ret;
2180 
2181     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2182      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2183      *                      not necessary on this hardware
2184      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2185      *
2186      * FIXME: We should allow the user to force contiguous RMA
2187      * allocation in the cap_ppc_rma==1 case.
2188      */
2189     if (cap_ppc_rma < 2) {
2190         return 0;
2191     }
2192 
2193     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2194     if (fd < 0) {
2195         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2196                 strerror(errno));
2197         return -1;
2198     }
2199 
2200     size = MIN(ret.rma_size, 256ul << 20);
2201 
2202     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2203     if (*rma == MAP_FAILED) {
2204         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2205         return -1;
2206     };
2207 
2208     return size;
2209 }
2210 
2211 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2212 {
2213     struct kvm_ppc_smmu_info info;
2214     long rampagesize, best_page_shift;
2215     int i;
2216 
2217     if (cap_ppc_rma >= 2) {
2218         return current_size;
2219     }
2220 
2221     /* Find the largest hardware supported page size that's less than
2222      * or equal to the (logical) backing page size of guest RAM */
2223     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2224     rampagesize = qemu_getrampagesize();
2225     best_page_shift = 0;
2226 
2227     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2228         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2229 
2230         if (!sps->page_shift) {
2231             continue;
2232         }
2233 
2234         if ((sps->page_shift > best_page_shift)
2235             && ((1UL << sps->page_shift) <= rampagesize)) {
2236             best_page_shift = sps->page_shift;
2237         }
2238     }
2239 
2240     return MIN(current_size,
2241                1ULL << (best_page_shift + hash_shift - 7));
2242 }
2243 #endif
2244 
2245 bool kvmppc_spapr_use_multitce(void)
2246 {
2247     return cap_spapr_multitce;
2248 }
2249 
2250 int kvmppc_spapr_enable_inkernel_multitce(void)
2251 {
2252     int ret;
2253 
2254     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2255                             H_PUT_TCE_INDIRECT, 1);
2256     if (!ret) {
2257         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2258                                 H_STUFF_TCE, 1);
2259     }
2260 
2261     return ret;
2262 }
2263 
2264 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2265                               uint64_t bus_offset, uint32_t nb_table,
2266                               int *pfd, bool need_vfio)
2267 {
2268     long len;
2269     int fd;
2270     void *table;
2271 
2272     /* Must set fd to -1 so we don't try to munmap when called for
2273      * destroying the table, which the upper layers -will- do
2274      */
2275     *pfd = -1;
2276     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2277         return NULL;
2278     }
2279 
2280     if (cap_spapr_tce_64) {
2281         struct kvm_create_spapr_tce_64 args = {
2282             .liobn = liobn,
2283             .page_shift = page_shift,
2284             .offset = bus_offset >> page_shift,
2285             .size = nb_table,
2286             .flags = 0
2287         };
2288         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2289         if (fd < 0) {
2290             fprintf(stderr,
2291                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2292                     liobn);
2293             return NULL;
2294         }
2295     } else if (cap_spapr_tce) {
2296         uint64_t window_size = (uint64_t) nb_table << page_shift;
2297         struct kvm_create_spapr_tce args = {
2298             .liobn = liobn,
2299             .window_size = window_size,
2300         };
2301         if ((window_size != args.window_size) || bus_offset) {
2302             return NULL;
2303         }
2304         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2305         if (fd < 0) {
2306             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2307                     liobn);
2308             return NULL;
2309         }
2310     } else {
2311         return NULL;
2312     }
2313 
2314     len = nb_table * sizeof(uint64_t);
2315     /* FIXME: round this up to page size */
2316 
2317     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2318     if (table == MAP_FAILED) {
2319         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2320                 liobn);
2321         close(fd);
2322         return NULL;
2323     }
2324 
2325     *pfd = fd;
2326     return table;
2327 }
2328 
2329 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2330 {
2331     long len;
2332 
2333     if (fd < 0) {
2334         return -1;
2335     }
2336 
2337     len = nb_table * sizeof(uint64_t);
2338     if ((munmap(table, len) < 0) ||
2339         (close(fd) < 0)) {
2340         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2341                 strerror(errno));
2342         /* Leak the table */
2343     }
2344 
2345     return 0;
2346 }
2347 
2348 int kvmppc_reset_htab(int shift_hint)
2349 {
2350     uint32_t shift = shift_hint;
2351 
2352     if (!kvm_enabled()) {
2353         /* Full emulation, tell caller to allocate htab itself */
2354         return 0;
2355     }
2356     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2357         int ret;
2358         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2359         if (ret == -ENOTTY) {
2360             /* At least some versions of PR KVM advertise the
2361              * capability, but don't implement the ioctl().  Oops.
2362              * Return 0 so that we allocate the htab in qemu, as is
2363              * correct for PR. */
2364             return 0;
2365         } else if (ret < 0) {
2366             return ret;
2367         }
2368         return shift;
2369     }
2370 
2371     /* We have a kernel that predates the htab reset calls.  For PR
2372      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2373      * this era, it has allocated a 16MB fixed size hash table already. */
2374     if (kvmppc_is_pr(kvm_state)) {
2375         /* PR - tell caller to allocate htab */
2376         return 0;
2377     } else {
2378         /* HV - assume 16MB kernel allocated htab */
2379         return 24;
2380     }
2381 }
2382 
2383 static inline uint32_t mfpvr(void)
2384 {
2385     uint32_t pvr;
2386 
2387     asm ("mfpvr %0"
2388          : "=r"(pvr));
2389     return pvr;
2390 }
2391 
2392 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2393 {
2394     if (on) {
2395         *word |= flags;
2396     } else {
2397         *word &= ~flags;
2398     }
2399 }
2400 
2401 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2402 {
2403     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2404     uint32_t vmx = kvmppc_get_vmx();
2405     uint32_t dfp = kvmppc_get_dfp();
2406     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2407     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2408 
2409     /* Now fix up the class with information we can query from the host */
2410     pcc->pvr = mfpvr();
2411 
2412     if (vmx != -1) {
2413         /* Only override when we know what the host supports */
2414         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2415         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2416     }
2417     if (dfp != -1) {
2418         /* Only override when we know what the host supports */
2419         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2420     }
2421 
2422     if (dcache_size != -1) {
2423         pcc->l1_dcache_size = dcache_size;
2424     }
2425 
2426     if (icache_size != -1) {
2427         pcc->l1_icache_size = icache_size;
2428     }
2429 
2430 #if defined(TARGET_PPC64)
2431     pcc->radix_page_info = kvm_get_radix_page_info();
2432 
2433     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2434         /*
2435          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2436          * compliant.  More importantly, advertising ISA 3.00
2437          * architected mode may prevent guests from activating
2438          * necessary DD1 workarounds.
2439          */
2440         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2441                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2442     }
2443 #endif /* defined(TARGET_PPC64) */
2444 }
2445 
2446 bool kvmppc_has_cap_epr(void)
2447 {
2448     return cap_epr;
2449 }
2450 
2451 bool kvmppc_has_cap_htab_fd(void)
2452 {
2453     return cap_htab_fd;
2454 }
2455 
2456 bool kvmppc_has_cap_fixup_hcalls(void)
2457 {
2458     return cap_fixup_hcalls;
2459 }
2460 
2461 bool kvmppc_has_cap_htm(void)
2462 {
2463     return cap_htm;
2464 }
2465 
2466 bool kvmppc_has_cap_mmu_radix(void)
2467 {
2468     return cap_mmu_radix;
2469 }
2470 
2471 bool kvmppc_has_cap_mmu_hash_v3(void)
2472 {
2473     return cap_mmu_hash_v3;
2474 }
2475 
2476 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2477 {
2478     uint32_t host_pvr = mfpvr();
2479     PowerPCCPUClass *pvr_pcc;
2480 
2481     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2482     if (pvr_pcc == NULL) {
2483         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2484     }
2485 
2486     return pvr_pcc;
2487 }
2488 
2489 static int kvm_ppc_register_host_cpu_type(void)
2490 {
2491     TypeInfo type_info = {
2492         .name = TYPE_HOST_POWERPC_CPU,
2493         .class_init = kvmppc_host_cpu_class_init,
2494     };
2495     PowerPCCPUClass *pvr_pcc;
2496     ObjectClass *oc;
2497     DeviceClass *dc;
2498     int i;
2499 
2500     pvr_pcc = kvm_ppc_get_host_cpu_class();
2501     if (pvr_pcc == NULL) {
2502         return -1;
2503     }
2504     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2505     type_register(&type_info);
2506 
2507     oc = object_class_by_name(type_info.name);
2508     g_assert(oc);
2509 
2510 #if defined(TARGET_PPC64)
2511     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2512     type_info.parent = TYPE_SPAPR_CPU_CORE,
2513     type_info.instance_size = sizeof(sPAPRCPUCore);
2514     type_info.instance_init = NULL;
2515     type_info.class_init = spapr_cpu_core_class_init;
2516     type_info.class_data = (void *) "host";
2517     type_register(&type_info);
2518     g_free((void *)type_info.name);
2519 #endif
2520 
2521     /*
2522      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2523      * we want "POWER8" to be a "family" alias that points to the current
2524      * host CPU type, too)
2525      */
2526     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2527     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2528         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2529             char *suffix;
2530 
2531             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2532             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2533             if (suffix) {
2534                 *suffix = 0;
2535             }
2536             break;
2537         }
2538     }
2539 
2540     return 0;
2541 }
2542 
2543 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2544 {
2545     struct kvm_rtas_token_args args = {
2546         .token = token,
2547     };
2548 
2549     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2550         return -ENOENT;
2551     }
2552 
2553     strncpy(args.name, function, sizeof(args.name));
2554 
2555     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2556 }
2557 
2558 int kvmppc_get_htab_fd(bool write)
2559 {
2560     struct kvm_get_htab_fd s = {
2561         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2562         .start_index = 0,
2563     };
2564 
2565     if (!cap_htab_fd) {
2566         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2567         return -1;
2568     }
2569 
2570     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2571 }
2572 
2573 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2574 {
2575     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2576     uint8_t buf[bufsize];
2577     ssize_t rc;
2578 
2579     do {
2580         rc = read(fd, buf, bufsize);
2581         if (rc < 0) {
2582             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2583                     strerror(errno));
2584             return rc;
2585         } else if (rc) {
2586             uint8_t *buffer = buf;
2587             ssize_t n = rc;
2588             while (n) {
2589                 struct kvm_get_htab_header *head =
2590                     (struct kvm_get_htab_header *) buffer;
2591                 size_t chunksize = sizeof(*head) +
2592                      HASH_PTE_SIZE_64 * head->n_valid;
2593 
2594                 qemu_put_be32(f, head->index);
2595                 qemu_put_be16(f, head->n_valid);
2596                 qemu_put_be16(f, head->n_invalid);
2597                 qemu_put_buffer(f, (void *)(head + 1),
2598                                 HASH_PTE_SIZE_64 * head->n_valid);
2599 
2600                 buffer += chunksize;
2601                 n -= chunksize;
2602             }
2603         }
2604     } while ((rc != 0)
2605              && ((max_ns < 0)
2606                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2607 
2608     return (rc == 0) ? 1 : 0;
2609 }
2610 
2611 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2612                            uint16_t n_valid, uint16_t n_invalid)
2613 {
2614     struct kvm_get_htab_header *buf;
2615     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2616     ssize_t rc;
2617 
2618     buf = alloca(chunksize);
2619     buf->index = index;
2620     buf->n_valid = n_valid;
2621     buf->n_invalid = n_invalid;
2622 
2623     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2624 
2625     rc = write(fd, buf, chunksize);
2626     if (rc < 0) {
2627         fprintf(stderr, "Error writing KVM hash table: %s\n",
2628                 strerror(errno));
2629         return rc;
2630     }
2631     if (rc != chunksize) {
2632         /* We should never get a short write on a single chunk */
2633         fprintf(stderr, "Short write, restoring KVM hash table\n");
2634         return -1;
2635     }
2636     return 0;
2637 }
2638 
2639 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2640 {
2641     return true;
2642 }
2643 
2644 void kvm_arch_init_irq_routing(KVMState *s)
2645 {
2646 }
2647 
2648 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2649 {
2650     struct kvm_get_htab_fd ghf = {
2651         .flags = 0,
2652         .start_index = ptex,
2653     };
2654     int fd, rc;
2655     int i;
2656 
2657     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2658     if (fd < 0) {
2659         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2660     }
2661 
2662     i = 0;
2663     while (i < n) {
2664         struct kvm_get_htab_header *hdr;
2665         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2666         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2667 
2668         rc = read(fd, buf, sizeof(buf));
2669         if (rc < 0) {
2670             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2671         }
2672 
2673         hdr = (struct kvm_get_htab_header *)buf;
2674         while ((i < n) && ((char *)hdr < (buf + rc))) {
2675             int invalid = hdr->n_invalid;
2676 
2677             if (hdr->index != (ptex + i)) {
2678                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2679                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2680             }
2681 
2682             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2683             i += hdr->n_valid;
2684 
2685             if ((n - i) < invalid) {
2686                 invalid = n - i;
2687             }
2688             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2689             i += hdr->n_invalid;
2690 
2691             hdr = (struct kvm_get_htab_header *)
2692                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2693         }
2694     }
2695 
2696     close(fd);
2697 }
2698 
2699 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2700 {
2701     int fd, rc;
2702     struct kvm_get_htab_fd ghf;
2703     struct {
2704         struct kvm_get_htab_header hdr;
2705         uint64_t pte0;
2706         uint64_t pte1;
2707     } buf;
2708 
2709     ghf.flags = 0;
2710     ghf.start_index = 0;     /* Ignored */
2711     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2712     if (fd < 0) {
2713         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2714     }
2715 
2716     buf.hdr.n_valid = 1;
2717     buf.hdr.n_invalid = 0;
2718     buf.hdr.index = ptex;
2719     buf.pte0 = cpu_to_be64(pte0);
2720     buf.pte1 = cpu_to_be64(pte1);
2721 
2722     rc = write(fd, &buf, sizeof(buf));
2723     if (rc != sizeof(buf)) {
2724         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2725     }
2726     close(fd);
2727 }
2728 
2729 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2730                              uint64_t address, uint32_t data, PCIDevice *dev)
2731 {
2732     return 0;
2733 }
2734 
2735 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2736                                 int vector, PCIDevice *dev)
2737 {
2738     return 0;
2739 }
2740 
2741 int kvm_arch_release_virq_post(int virq)
2742 {
2743     return 0;
2744 }
2745 
2746 int kvm_arch_msi_data_to_gsi(uint32_t data)
2747 {
2748     return data & 0xffff;
2749 }
2750 
2751 int kvmppc_enable_hwrng(void)
2752 {
2753     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2754         return -1;
2755     }
2756 
2757     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2758 }
2759 
2760 void kvmppc_check_papr_resize_hpt(Error **errp)
2761 {
2762     if (!kvm_enabled()) {
2763         return; /* No KVM, we're good */
2764     }
2765 
2766     if (cap_resize_hpt) {
2767         return; /* Kernel has explicit support, we're good */
2768     }
2769 
2770     /* Otherwise fallback on looking for PR KVM */
2771     if (kvmppc_is_pr(kvm_state)) {
2772         return;
2773     }
2774 
2775     error_setg(errp,
2776                "Hash page table resizing not available with this KVM version");
2777 }
2778 
2779 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2780 {
2781     CPUState *cs = CPU(cpu);
2782     struct kvm_ppc_resize_hpt rhpt = {
2783         .flags = flags,
2784         .shift = shift,
2785     };
2786 
2787     if (!cap_resize_hpt) {
2788         return -ENOSYS;
2789     }
2790 
2791     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2792 }
2793 
2794 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2795 {
2796     CPUState *cs = CPU(cpu);
2797     struct kvm_ppc_resize_hpt rhpt = {
2798         .flags = flags,
2799         .shift = shift,
2800     };
2801 
2802     if (!cap_resize_hpt) {
2803         return -ENOSYS;
2804     }
2805 
2806     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2807 }
2808 
2809 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2810 {
2811     target_ulong sdr1 = arg.target_ptr;
2812     PowerPCCPU *cpu = POWERPC_CPU(cs);
2813     CPUPPCState *env = &cpu->env;
2814 
2815     /* This is just for the benefit of PR KVM */
2816     cpu_synchronize_state(cs);
2817     env->spr[SPR_SDR1] = sdr1;
2818     if (kvmppc_put_books_sregs(cpu) < 0) {
2819         error_report("Unable to update SDR1 in KVM");
2820         exit(1);
2821     }
2822 }
2823 
2824 void kvmppc_update_sdr1(target_ulong sdr1)
2825 {
2826     CPUState *cs;
2827 
2828     CPU_FOREACH(cs) {
2829         run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2830     }
2831 }
2832 
2833 /*
2834  * This is a helper function to detect a post migration scenario
2835  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2836  * the guest kernel can't handle a PVR value other than the actual host
2837  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2838  *
2839  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2840  * (so, we're HV), return true. The workaround itself is done in
2841  * cpu_post_load.
2842  *
2843  * The order here is important: we'll only check for KVM PR as a
2844  * fallback if the guest kernel can't handle the situation itself.
2845  * We need to avoid as much as possible querying the running KVM type
2846  * in QEMU level.
2847  */
2848 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2849 {
2850     CPUState *cs = CPU(cpu);
2851 
2852     if (!kvm_enabled()) {
2853         return false;
2854     }
2855 
2856     if (cap_ppc_pvr_compat) {
2857         return false;
2858     }
2859 
2860     return !kvmppc_is_pr(cs->kvm_state);
2861 }
2862