xref: /openbmc/qemu/target/ppc/kvm.c (revision fe29141b)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
92 static int cap_ppc_safe_cache;
93 static int cap_ppc_safe_bounds_check;
94 static int cap_ppc_safe_indirect_branch;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
137     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
138     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
139     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
140     cap_spapr_vfio = false;
141     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
142     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
143     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
144     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
145     /* Note: we don't set cap_papr here, because this capability is
146      * only activated after this by kvmppc_set_papr() */
147     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
148     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
149     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
150     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
151     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
152     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
153     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154     kvmppc_get_cpu_characteristics(s);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
254                                        struct kvm_ppc_smmu_info *info)
255 {
256     CPUPPCState *env = &cpu->env;
257     CPUState *cs = CPU(cpu);
258 
259     memset(info, 0, sizeof(*info));
260 
261     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
262      * need to "guess" what the supported page sizes are.
263      *
264      * For that to work we make a few assumptions:
265      *
266      * - Check whether we are running "PR" KVM which only supports 4K
267      *   and 16M pages, but supports them regardless of the backing
268      *   store characteritics. We also don't support 1T segments.
269      *
270      *   This is safe as if HV KVM ever supports that capability or PR
271      *   KVM grows supports for more page/segment sizes, those versions
272      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
273      *   will not hit this fallback
274      *
275      * - Else we are running HV KVM. This means we only support page
276      *   sizes that fit in the backing store. Additionally we only
277      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
278      *   P7 encodings for the SLB and hash table. Here too, we assume
279      *   support for any newer processor will mean a kernel that
280      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281      *   this fallback.
282      */
283     if (kvmppc_is_pr(cs->kvm_state)) {
284         /* No flags */
285         info->flags = 0;
286         info->slb_size = 64;
287 
288         /* Standard 4k base page size segment */
289         info->sps[0].page_shift = 12;
290         info->sps[0].slb_enc = 0;
291         info->sps[0].enc[0].page_shift = 12;
292         info->sps[0].enc[0].pte_enc = 0;
293 
294         /* Standard 16M large page size segment */
295         info->sps[1].page_shift = 24;
296         info->sps[1].slb_enc = SLB_VSID_L;
297         info->sps[1].enc[0].page_shift = 24;
298         info->sps[1].enc[0].pte_enc = 0;
299     } else {
300         int i = 0;
301 
302         /* HV KVM has backing store size restrictions */
303         info->flags = KVM_PPC_PAGE_SIZES_REAL;
304 
305         if (env->mmu_model & POWERPC_MMU_1TSEG) {
306             info->flags |= KVM_PPC_1T_SEGMENTS;
307         }
308 
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->slb_size = 32;
312         } else {
313             info->slb_size = 64;
314         }
315 
316         /* Standard 4k base page size segment */
317         info->sps[i].page_shift = 12;
318         info->sps[i].slb_enc = 0;
319         info->sps[i].enc[0].page_shift = 12;
320         info->sps[i].enc[0].pte_enc = 0;
321         i++;
322 
323         /* 64K on MMU 2.06 and later */
324         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
325             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
326             info->sps[i].page_shift = 16;
327             info->sps[i].slb_enc = 0x110;
328             info->sps[i].enc[0].page_shift = 16;
329             info->sps[i].enc[0].pte_enc = 1;
330             i++;
331         }
332 
333         /* Standard 16M large page size segment */
334         info->sps[i].page_shift = 24;
335         info->sps[i].slb_enc = SLB_VSID_L;
336         info->sps[i].enc[0].page_shift = 24;
337         info->sps[i].enc[0].pte_enc = 0;
338     }
339 }
340 
341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
342 {
343     CPUState *cs = CPU(cpu);
344     int ret;
345 
346     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
347         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
348         if (ret == 0) {
349             return;
350         }
351     }
352 
353     kvm_get_fallback_smmu_info(cpu, info);
354 }
355 
356 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
357 {
358     KVMState *s = KVM_STATE(current_machine->accelerator);
359     struct ppc_radix_page_info *radix_page_info;
360     struct kvm_ppc_rmmu_info rmmu_info;
361     int i;
362 
363     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364         return NULL;
365     }
366     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367         return NULL;
368     }
369     radix_page_info = g_malloc0(sizeof(*radix_page_info));
370     radix_page_info->count = 0;
371     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
372         if (rmmu_info.ap_encodings[i]) {
373             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
374             radix_page_info->count++;
375         }
376     }
377     return radix_page_info;
378 }
379 
380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
381                                      bool radix, bool gtse,
382                                      uint64_t proc_tbl)
383 {
384     CPUState *cs = CPU(cpu);
385     int ret;
386     uint64_t flags = 0;
387     struct kvm_ppc_mmuv3_cfg cfg = {
388         .process_table = proc_tbl,
389     };
390 
391     if (radix) {
392         flags |= KVM_PPC_MMUV3_RADIX;
393     }
394     if (gtse) {
395         flags |= KVM_PPC_MMUV3_GTSE;
396     }
397     cfg.flags = flags;
398     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
399     switch (ret) {
400     case 0:
401         return H_SUCCESS;
402     case -EINVAL:
403         return H_PARAMETER;
404     case -ENODEV:
405         return H_NOT_AVAILABLE;
406     default:
407         return H_HARDWARE;
408     }
409 }
410 
411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
412 {
413     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
414         return true;
415     }
416 
417     return (1ul << shift) <= rampgsize;
418 }
419 
420 static long max_cpu_page_size;
421 
422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
423 {
424     static struct kvm_ppc_smmu_info smmu_info;
425     static bool has_smmu_info;
426     CPUPPCState *env = &cpu->env;
427     int iq, ik, jq, jk;
428     bool has_64k_pages = false;
429 
430     /* We only handle page sizes for 64-bit server guests for now */
431     if (!(env->mmu_model & POWERPC_MMU_64)) {
432         return;
433     }
434 
435     /* Collect MMU info from kernel if not already */
436     if (!has_smmu_info) {
437         kvm_get_smmu_info(cpu, &smmu_info);
438         has_smmu_info = true;
439     }
440 
441     if (!max_cpu_page_size) {
442         max_cpu_page_size = qemu_getrampagesize();
443     }
444 
445     /* Convert to QEMU form */
446     memset(&env->sps, 0, sizeof(env->sps));
447 
448     /* If we have HV KVM, we need to forbid CI large pages if our
449      * host page size is smaller than 64K.
450      */
451     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
452         env->ci_large_pages = getpagesize() >= 0x10000;
453     }
454 
455     /*
456      * XXX This loop should be an entry wide AND of the capabilities that
457      *     the selected CPU has with the capabilities that KVM supports.
458      */
459     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
460         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
461         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
462 
463         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
464                                  ksps->page_shift)) {
465             continue;
466         }
467         qsps->page_shift = ksps->page_shift;
468         qsps->slb_enc = ksps->slb_enc;
469         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
470             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
471                                      ksps->enc[jk].page_shift)) {
472                 continue;
473             }
474             if (ksps->enc[jk].page_shift == 16) {
475                 has_64k_pages = true;
476             }
477             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
478             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
479             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
480                 break;
481             }
482         }
483         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
484             break;
485         }
486     }
487     env->slb_nr = smmu_info.slb_size;
488     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
489         env->mmu_model &= ~POWERPC_MMU_1TSEG;
490     }
491     if (!has_64k_pages) {
492         env->mmu_model &= ~POWERPC_MMU_64K;
493     }
494 }
495 
496 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
497 {
498     Object *mem_obj = object_resolve_path(obj_path, NULL);
499     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
500     long pagesize;
501 
502     if (mempath) {
503         pagesize = qemu_mempath_getpagesize(mempath);
504         g_free(mempath);
505     } else {
506         pagesize = getpagesize();
507     }
508 
509     return pagesize >= max_cpu_page_size;
510 }
511 
512 #else /* defined (TARGET_PPC64) */
513 
514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
515 {
516 }
517 
518 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
519 {
520     return true;
521 }
522 
523 #endif /* !defined (TARGET_PPC64) */
524 
525 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
526 {
527     return POWERPC_CPU(cpu)->vcpu_id;
528 }
529 
530 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
531  * book3s supports only 1 watchpoint, so array size
532  * of 4 is sufficient for now.
533  */
534 #define MAX_HW_BKPTS 4
535 
536 static struct HWBreakpoint {
537     target_ulong addr;
538     int type;
539 } hw_debug_points[MAX_HW_BKPTS];
540 
541 static CPUWatchpoint hw_watchpoint;
542 
543 /* Default there is no breakpoint and watchpoint supported */
544 static int max_hw_breakpoint;
545 static int max_hw_watchpoint;
546 static int nb_hw_breakpoint;
547 static int nb_hw_watchpoint;
548 
549 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
550 {
551     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
552         max_hw_breakpoint = 2;
553         max_hw_watchpoint = 2;
554     }
555 
556     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
557         fprintf(stderr, "Error initializing h/w breakpoints\n");
558         return;
559     }
560 }
561 
562 int kvm_arch_init_vcpu(CPUState *cs)
563 {
564     PowerPCCPU *cpu = POWERPC_CPU(cs);
565     CPUPPCState *cenv = &cpu->env;
566     int ret;
567 
568     /* Gather server mmu info from KVM and update the CPU state */
569     kvm_fixup_page_sizes(cpu);
570 
571     /* Synchronize sregs with kvm */
572     ret = kvm_arch_sync_sregs(cpu);
573     if (ret) {
574         if (ret == -EINVAL) {
575             error_report("Register sync failed... If you're using kvm-hv.ko,"
576                          " only \"-cpu host\" is possible");
577         }
578         return ret;
579     }
580 
581     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
582 
583     switch (cenv->mmu_model) {
584     case POWERPC_MMU_BOOKE206:
585         /* This target supports access to KVM's guest TLB */
586         ret = kvm_booke206_tlb_init(cpu);
587         break;
588     case POWERPC_MMU_2_07:
589         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
590             /* KVM-HV has transactional memory on POWER8 also without the
591              * KVM_CAP_PPC_HTM extension, so enable it here instead as
592              * long as it's availble to userspace on the host. */
593             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
594                 cap_htm = true;
595             }
596         }
597         break;
598     default:
599         break;
600     }
601 
602     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
603     kvmppc_hw_debug_points_init(cenv);
604 
605     return ret;
606 }
607 
608 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
609 {
610     CPUPPCState *env = &cpu->env;
611     CPUState *cs = CPU(cpu);
612     struct kvm_dirty_tlb dirty_tlb;
613     unsigned char *bitmap;
614     int ret;
615 
616     if (!env->kvm_sw_tlb) {
617         return;
618     }
619 
620     bitmap = g_malloc((env->nb_tlb + 7) / 8);
621     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
622 
623     dirty_tlb.bitmap = (uintptr_t)bitmap;
624     dirty_tlb.num_dirty = env->nb_tlb;
625 
626     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
627     if (ret) {
628         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
629                 __func__, strerror(-ret));
630     }
631 
632     g_free(bitmap);
633 }
634 
635 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
636 {
637     PowerPCCPU *cpu = POWERPC_CPU(cs);
638     CPUPPCState *env = &cpu->env;
639     union {
640         uint32_t u32;
641         uint64_t u64;
642     } val;
643     struct kvm_one_reg reg = {
644         .id = id,
645         .addr = (uintptr_t) &val,
646     };
647     int ret;
648 
649     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650     if (ret != 0) {
651         trace_kvm_failed_spr_get(spr, strerror(errno));
652     } else {
653         switch (id & KVM_REG_SIZE_MASK) {
654         case KVM_REG_SIZE_U32:
655             env->spr[spr] = val.u32;
656             break;
657 
658         case KVM_REG_SIZE_U64:
659             env->spr[spr] = val.u64;
660             break;
661 
662         default:
663             /* Don't handle this size yet */
664             abort();
665         }
666     }
667 }
668 
669 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
670 {
671     PowerPCCPU *cpu = POWERPC_CPU(cs);
672     CPUPPCState *env = &cpu->env;
673     union {
674         uint32_t u32;
675         uint64_t u64;
676     } val;
677     struct kvm_one_reg reg = {
678         .id = id,
679         .addr = (uintptr_t) &val,
680     };
681     int ret;
682 
683     switch (id & KVM_REG_SIZE_MASK) {
684     case KVM_REG_SIZE_U32:
685         val.u32 = env->spr[spr];
686         break;
687 
688     case KVM_REG_SIZE_U64:
689         val.u64 = env->spr[spr];
690         break;
691 
692     default:
693         /* Don't handle this size yet */
694         abort();
695     }
696 
697     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
698     if (ret != 0) {
699         trace_kvm_failed_spr_set(spr, strerror(errno));
700     }
701 }
702 
703 static int kvm_put_fp(CPUState *cs)
704 {
705     PowerPCCPU *cpu = POWERPC_CPU(cs);
706     CPUPPCState *env = &cpu->env;
707     struct kvm_one_reg reg;
708     int i;
709     int ret;
710 
711     if (env->insns_flags & PPC_FLOAT) {
712         uint64_t fpscr = env->fpscr;
713         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
714 
715         reg.id = KVM_REG_PPC_FPSCR;
716         reg.addr = (uintptr_t)&fpscr;
717         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718         if (ret < 0) {
719             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
720             return ret;
721         }
722 
723         for (i = 0; i < 32; i++) {
724             uint64_t vsr[2];
725 
726 #ifdef HOST_WORDS_BIGENDIAN
727             vsr[0] = float64_val(env->fpr[i]);
728             vsr[1] = env->vsr[i];
729 #else
730             vsr[0] = env->vsr[i];
731             vsr[1] = float64_val(env->fpr[i]);
732 #endif
733             reg.addr = (uintptr_t) &vsr;
734             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
735 
736             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
737             if (ret < 0) {
738                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
739                         i, strerror(errno));
740                 return ret;
741             }
742         }
743     }
744 
745     if (env->insns_flags & PPC_ALTIVEC) {
746         reg.id = KVM_REG_PPC_VSCR;
747         reg.addr = (uintptr_t)&env->vscr;
748         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749         if (ret < 0) {
750             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
751             return ret;
752         }
753 
754         for (i = 0; i < 32; i++) {
755             reg.id = KVM_REG_PPC_VR(i);
756             reg.addr = (uintptr_t)&env->avr[i];
757             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
758             if (ret < 0) {
759                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
760                 return ret;
761             }
762         }
763     }
764 
765     return 0;
766 }
767 
768 static int kvm_get_fp(CPUState *cs)
769 {
770     PowerPCCPU *cpu = POWERPC_CPU(cs);
771     CPUPPCState *env = &cpu->env;
772     struct kvm_one_reg reg;
773     int i;
774     int ret;
775 
776     if (env->insns_flags & PPC_FLOAT) {
777         uint64_t fpscr;
778         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
779 
780         reg.id = KVM_REG_PPC_FPSCR;
781         reg.addr = (uintptr_t)&fpscr;
782         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
783         if (ret < 0) {
784             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
785             return ret;
786         } else {
787             env->fpscr = fpscr;
788         }
789 
790         for (i = 0; i < 32; i++) {
791             uint64_t vsr[2];
792 
793             reg.addr = (uintptr_t) &vsr;
794             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
795 
796             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
797             if (ret < 0) {
798                 DPRINTF("Unable to get %s%d from KVM: %s\n",
799                         vsx ? "VSR" : "FPR", i, strerror(errno));
800                 return ret;
801             } else {
802 #ifdef HOST_WORDS_BIGENDIAN
803                 env->fpr[i] = vsr[0];
804                 if (vsx) {
805                     env->vsr[i] = vsr[1];
806                 }
807 #else
808                 env->fpr[i] = vsr[1];
809                 if (vsx) {
810                     env->vsr[i] = vsr[0];
811                 }
812 #endif
813             }
814         }
815     }
816 
817     if (env->insns_flags & PPC_ALTIVEC) {
818         reg.id = KVM_REG_PPC_VSCR;
819         reg.addr = (uintptr_t)&env->vscr;
820         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821         if (ret < 0) {
822             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
823             return ret;
824         }
825 
826         for (i = 0; i < 32; i++) {
827             reg.id = KVM_REG_PPC_VR(i);
828             reg.addr = (uintptr_t)&env->avr[i];
829             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
830             if (ret < 0) {
831                 DPRINTF("Unable to get VR%d from KVM: %s\n",
832                         i, strerror(errno));
833                 return ret;
834             }
835         }
836     }
837 
838     return 0;
839 }
840 
841 #if defined(TARGET_PPC64)
842 static int kvm_get_vpa(CPUState *cs)
843 {
844     PowerPCCPU *cpu = POWERPC_CPU(cs);
845     CPUPPCState *env = &cpu->env;
846     struct kvm_one_reg reg;
847     int ret;
848 
849     reg.id = KVM_REG_PPC_VPA_ADDR;
850     reg.addr = (uintptr_t)&env->vpa_addr;
851     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
852     if (ret < 0) {
853         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
854         return ret;
855     }
856 
857     assert((uintptr_t)&env->slb_shadow_size
858            == ((uintptr_t)&env->slb_shadow_addr + 8));
859     reg.id = KVM_REG_PPC_VPA_SLB;
860     reg.addr = (uintptr_t)&env->slb_shadow_addr;
861     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
862     if (ret < 0) {
863         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
864                 strerror(errno));
865         return ret;
866     }
867 
868     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
869     reg.id = KVM_REG_PPC_VPA_DTL;
870     reg.addr = (uintptr_t)&env->dtl_addr;
871     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
872     if (ret < 0) {
873         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
874                 strerror(errno));
875         return ret;
876     }
877 
878     return 0;
879 }
880 
881 static int kvm_put_vpa(CPUState *cs)
882 {
883     PowerPCCPU *cpu = POWERPC_CPU(cs);
884     CPUPPCState *env = &cpu->env;
885     struct kvm_one_reg reg;
886     int ret;
887 
888     /* SLB shadow or DTL can't be registered unless a master VPA is
889      * registered.  That means when restoring state, if a VPA *is*
890      * registered, we need to set that up first.  If not, we need to
891      * deregister the others before deregistering the master VPA */
892     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
893 
894     if (env->vpa_addr) {
895         reg.id = KVM_REG_PPC_VPA_ADDR;
896         reg.addr = (uintptr_t)&env->vpa_addr;
897         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
898         if (ret < 0) {
899             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
900             return ret;
901         }
902     }
903 
904     assert((uintptr_t)&env->slb_shadow_size
905            == ((uintptr_t)&env->slb_shadow_addr + 8));
906     reg.id = KVM_REG_PPC_VPA_SLB;
907     reg.addr = (uintptr_t)&env->slb_shadow_addr;
908     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
909     if (ret < 0) {
910         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
911         return ret;
912     }
913 
914     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
915     reg.id = KVM_REG_PPC_VPA_DTL;
916     reg.addr = (uintptr_t)&env->dtl_addr;
917     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
918     if (ret < 0) {
919         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
920                 strerror(errno));
921         return ret;
922     }
923 
924     if (!env->vpa_addr) {
925         reg.id = KVM_REG_PPC_VPA_ADDR;
926         reg.addr = (uintptr_t)&env->vpa_addr;
927         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
928         if (ret < 0) {
929             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
930             return ret;
931         }
932     }
933 
934     return 0;
935 }
936 #endif /* TARGET_PPC64 */
937 
938 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
939 {
940     CPUPPCState *env = &cpu->env;
941     struct kvm_sregs sregs;
942     int i;
943 
944     sregs.pvr = env->spr[SPR_PVR];
945 
946     if (cpu->vhyp) {
947         PPCVirtualHypervisorClass *vhc =
948             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
949         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
950     } else {
951         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
952     }
953 
954     /* Sync SLB */
955 #ifdef TARGET_PPC64
956     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
957         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
958         if (env->slb[i].esid & SLB_ESID_V) {
959             sregs.u.s.ppc64.slb[i].slbe |= i;
960         }
961         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
962     }
963 #endif
964 
965     /* Sync SRs */
966     for (i = 0; i < 16; i++) {
967         sregs.u.s.ppc32.sr[i] = env->sr[i];
968     }
969 
970     /* Sync BATs */
971     for (i = 0; i < 8; i++) {
972         /* Beware. We have to swap upper and lower bits here */
973         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
974             | env->DBAT[1][i];
975         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
976             | env->IBAT[1][i];
977     }
978 
979     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
980 }
981 
982 int kvm_arch_put_registers(CPUState *cs, int level)
983 {
984     PowerPCCPU *cpu = POWERPC_CPU(cs);
985     CPUPPCState *env = &cpu->env;
986     struct kvm_regs regs;
987     int ret;
988     int i;
989 
990     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
991     if (ret < 0) {
992         return ret;
993     }
994 
995     regs.ctr = env->ctr;
996     regs.lr  = env->lr;
997     regs.xer = cpu_read_xer(env);
998     regs.msr = env->msr;
999     regs.pc = env->nip;
1000 
1001     regs.srr0 = env->spr[SPR_SRR0];
1002     regs.srr1 = env->spr[SPR_SRR1];
1003 
1004     regs.sprg0 = env->spr[SPR_SPRG0];
1005     regs.sprg1 = env->spr[SPR_SPRG1];
1006     regs.sprg2 = env->spr[SPR_SPRG2];
1007     regs.sprg3 = env->spr[SPR_SPRG3];
1008     regs.sprg4 = env->spr[SPR_SPRG4];
1009     regs.sprg5 = env->spr[SPR_SPRG5];
1010     regs.sprg6 = env->spr[SPR_SPRG6];
1011     regs.sprg7 = env->spr[SPR_SPRG7];
1012 
1013     regs.pid = env->spr[SPR_BOOKE_PID];
1014 
1015     for (i = 0;i < 32; i++)
1016         regs.gpr[i] = env->gpr[i];
1017 
1018     regs.cr = 0;
1019     for (i = 0; i < 8; i++) {
1020         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1021     }
1022 
1023     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1024     if (ret < 0)
1025         return ret;
1026 
1027     kvm_put_fp(cs);
1028 
1029     if (env->tlb_dirty) {
1030         kvm_sw_tlb_put(cpu);
1031         env->tlb_dirty = false;
1032     }
1033 
1034     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1035         ret = kvmppc_put_books_sregs(cpu);
1036         if (ret < 0) {
1037             return ret;
1038         }
1039     }
1040 
1041     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1042         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1043     }
1044 
1045     if (cap_one_reg) {
1046         int i;
1047 
1048         /* We deliberately ignore errors here, for kernels which have
1049          * the ONE_REG calls, but don't support the specific
1050          * registers, there's a reasonable chance things will still
1051          * work, at least until we try to migrate. */
1052         for (i = 0; i < 1024; i++) {
1053             uint64_t id = env->spr_cb[i].one_reg_id;
1054 
1055             if (id != 0) {
1056                 kvm_put_one_spr(cs, id, i);
1057             }
1058         }
1059 
1060 #ifdef TARGET_PPC64
1061         if (msr_ts) {
1062             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1063                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1064             }
1065             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1066                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1067             }
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1070             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1073             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1074             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1075             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1076             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1077             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1078         }
1079 
1080         if (cap_papr) {
1081             if (kvm_put_vpa(cs) < 0) {
1082                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1083             }
1084         }
1085 
1086         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1087 #endif /* TARGET_PPC64 */
1088     }
1089 
1090     return ret;
1091 }
1092 
1093 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1094 {
1095      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1096 }
1097 
1098 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1099 {
1100     CPUPPCState *env = &cpu->env;
1101     struct kvm_sregs sregs;
1102     int ret;
1103 
1104     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1105     if (ret < 0) {
1106         return ret;
1107     }
1108 
1109     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1110         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1111         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1112         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1113         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1114         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1115         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1116         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1117         env->spr[SPR_DECR] = sregs.u.e.dec;
1118         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1119         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1120         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1121     }
1122 
1123     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1124         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1125         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1126         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1127         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1128         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1129     }
1130 
1131     if (sregs.u.e.features & KVM_SREGS_E_64) {
1132         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1133     }
1134 
1135     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1136         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1137     }
1138 
1139     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1140         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1141         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1142         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1143         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1144         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1145         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1146         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1147         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1148         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1149         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1150         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1151         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1152         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1153         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1154         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1155         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1156         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1157         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1158         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1159         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1160         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1161         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1162         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1163         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1164         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1165         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1166         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1167         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1168         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1169         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1170         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1171         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1172 
1173         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1174             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1175             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1176             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1177             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1178             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1179             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1180         }
1181 
1182         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1183             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1184             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1185         }
1186 
1187         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1188             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1189             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1190             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1191             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1192         }
1193     }
1194 
1195     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1196         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1197         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1198         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1199         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1200         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1201         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1202         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1203         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1204         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1205         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1206     }
1207 
1208     if (sregs.u.e.features & KVM_SREGS_EXP) {
1209         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1210     }
1211 
1212     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1213         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1214         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1215     }
1216 
1217     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1218         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1219         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1220         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1221 
1222         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1223             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1224             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1225         }
1226     }
1227 
1228     return 0;
1229 }
1230 
1231 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1232 {
1233     CPUPPCState *env = &cpu->env;
1234     struct kvm_sregs sregs;
1235     int ret;
1236     int i;
1237 
1238     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1239     if (ret < 0) {
1240         return ret;
1241     }
1242 
1243     if (!cpu->vhyp) {
1244         ppc_store_sdr1(env, sregs.u.s.sdr1);
1245     }
1246 
1247     /* Sync SLB */
1248 #ifdef TARGET_PPC64
1249     /*
1250      * The packed SLB array we get from KVM_GET_SREGS only contains
1251      * information about valid entries. So we flush our internal copy
1252      * to get rid of stale ones, then put all valid SLB entries back
1253      * in.
1254      */
1255     memset(env->slb, 0, sizeof(env->slb));
1256     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1257         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1258         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1259         /*
1260          * Only restore valid entries
1261          */
1262         if (rb & SLB_ESID_V) {
1263             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1264         }
1265     }
1266 #endif
1267 
1268     /* Sync SRs */
1269     for (i = 0; i < 16; i++) {
1270         env->sr[i] = sregs.u.s.ppc32.sr[i];
1271     }
1272 
1273     /* Sync BATs */
1274     for (i = 0; i < 8; i++) {
1275         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1276         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1277         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1278         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1279     }
1280 
1281     return 0;
1282 }
1283 
1284 int kvm_arch_get_registers(CPUState *cs)
1285 {
1286     PowerPCCPU *cpu = POWERPC_CPU(cs);
1287     CPUPPCState *env = &cpu->env;
1288     struct kvm_regs regs;
1289     uint32_t cr;
1290     int i, ret;
1291 
1292     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1293     if (ret < 0)
1294         return ret;
1295 
1296     cr = regs.cr;
1297     for (i = 7; i >= 0; i--) {
1298         env->crf[i] = cr & 15;
1299         cr >>= 4;
1300     }
1301 
1302     env->ctr = regs.ctr;
1303     env->lr = regs.lr;
1304     cpu_write_xer(env, regs.xer);
1305     env->msr = regs.msr;
1306     env->nip = regs.pc;
1307 
1308     env->spr[SPR_SRR0] = regs.srr0;
1309     env->spr[SPR_SRR1] = regs.srr1;
1310 
1311     env->spr[SPR_SPRG0] = regs.sprg0;
1312     env->spr[SPR_SPRG1] = regs.sprg1;
1313     env->spr[SPR_SPRG2] = regs.sprg2;
1314     env->spr[SPR_SPRG3] = regs.sprg3;
1315     env->spr[SPR_SPRG4] = regs.sprg4;
1316     env->spr[SPR_SPRG5] = regs.sprg5;
1317     env->spr[SPR_SPRG6] = regs.sprg6;
1318     env->spr[SPR_SPRG7] = regs.sprg7;
1319 
1320     env->spr[SPR_BOOKE_PID] = regs.pid;
1321 
1322     for (i = 0;i < 32; i++)
1323         env->gpr[i] = regs.gpr[i];
1324 
1325     kvm_get_fp(cs);
1326 
1327     if (cap_booke_sregs) {
1328         ret = kvmppc_get_booke_sregs(cpu);
1329         if (ret < 0) {
1330             return ret;
1331         }
1332     }
1333 
1334     if (cap_segstate) {
1335         ret = kvmppc_get_books_sregs(cpu);
1336         if (ret < 0) {
1337             return ret;
1338         }
1339     }
1340 
1341     if (cap_hior) {
1342         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1343     }
1344 
1345     if (cap_one_reg) {
1346         int i;
1347 
1348         /* We deliberately ignore errors here, for kernels which have
1349          * the ONE_REG calls, but don't support the specific
1350          * registers, there's a reasonable chance things will still
1351          * work, at least until we try to migrate. */
1352         for (i = 0; i < 1024; i++) {
1353             uint64_t id = env->spr_cb[i].one_reg_id;
1354 
1355             if (id != 0) {
1356                 kvm_get_one_spr(cs, id, i);
1357             }
1358         }
1359 
1360 #ifdef TARGET_PPC64
1361         if (msr_ts) {
1362             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1363                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1364             }
1365             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1366                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1367             }
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1370             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1373             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1374             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1375             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1376             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1377             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1378         }
1379 
1380         if (cap_papr) {
1381             if (kvm_get_vpa(cs) < 0) {
1382                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1383             }
1384         }
1385 
1386         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1387 #endif
1388     }
1389 
1390     return 0;
1391 }
1392 
1393 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1394 {
1395     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1396 
1397     if (irq != PPC_INTERRUPT_EXT) {
1398         return 0;
1399     }
1400 
1401     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1402         return 0;
1403     }
1404 
1405     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1406 
1407     return 0;
1408 }
1409 
1410 #if defined(TARGET_PPCEMB)
1411 #define PPC_INPUT_INT PPC40x_INPUT_INT
1412 #elif defined(TARGET_PPC64)
1413 #define PPC_INPUT_INT PPC970_INPUT_INT
1414 #else
1415 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1416 #endif
1417 
1418 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1419 {
1420     PowerPCCPU *cpu = POWERPC_CPU(cs);
1421     CPUPPCState *env = &cpu->env;
1422     int r;
1423     unsigned irq;
1424 
1425     qemu_mutex_lock_iothread();
1426 
1427     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1428      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1429     if (!cap_interrupt_level &&
1430         run->ready_for_interrupt_injection &&
1431         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1432         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1433     {
1434         /* For now KVM disregards the 'irq' argument. However, in the
1435          * future KVM could cache it in-kernel to avoid a heavyweight exit
1436          * when reading the UIC.
1437          */
1438         irq = KVM_INTERRUPT_SET;
1439 
1440         DPRINTF("injected interrupt %d\n", irq);
1441         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1442         if (r < 0) {
1443             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1444         }
1445 
1446         /* Always wake up soon in case the interrupt was level based */
1447         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1448                        (NANOSECONDS_PER_SECOND / 50));
1449     }
1450 
1451     /* We don't know if there are more interrupts pending after this. However,
1452      * the guest will return to userspace in the course of handling this one
1453      * anyways, so we will get a chance to deliver the rest. */
1454 
1455     qemu_mutex_unlock_iothread();
1456 }
1457 
1458 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1459 {
1460     return MEMTXATTRS_UNSPECIFIED;
1461 }
1462 
1463 int kvm_arch_process_async_events(CPUState *cs)
1464 {
1465     return cs->halted;
1466 }
1467 
1468 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1469 {
1470     CPUState *cs = CPU(cpu);
1471     CPUPPCState *env = &cpu->env;
1472 
1473     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1474         cs->halted = 1;
1475         cs->exception_index = EXCP_HLT;
1476     }
1477 
1478     return 0;
1479 }
1480 
1481 /* map dcr access to existing qemu dcr emulation */
1482 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1483 {
1484     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1485         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1486 
1487     return 0;
1488 }
1489 
1490 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1491 {
1492     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1493         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1494 
1495     return 0;
1496 }
1497 
1498 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499 {
1500     /* Mixed endian case is not handled */
1501     uint32_t sc = debug_inst_opcode;
1502 
1503     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1504                             sizeof(sc), 0) ||
1505         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1506         return -EINVAL;
1507     }
1508 
1509     return 0;
1510 }
1511 
1512 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1513 {
1514     uint32_t sc;
1515 
1516     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1517         sc != debug_inst_opcode ||
1518         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1519                             sizeof(sc), 1)) {
1520         return -EINVAL;
1521     }
1522 
1523     return 0;
1524 }
1525 
1526 static int find_hw_breakpoint(target_ulong addr, int type)
1527 {
1528     int n;
1529 
1530     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1531            <= ARRAY_SIZE(hw_debug_points));
1532 
1533     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1534         if (hw_debug_points[n].addr == addr &&
1535              hw_debug_points[n].type == type) {
1536             return n;
1537         }
1538     }
1539 
1540     return -1;
1541 }
1542 
1543 static int find_hw_watchpoint(target_ulong addr, int *flag)
1544 {
1545     int n;
1546 
1547     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1548     if (n >= 0) {
1549         *flag = BP_MEM_ACCESS;
1550         return n;
1551     }
1552 
1553     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1554     if (n >= 0) {
1555         *flag = BP_MEM_WRITE;
1556         return n;
1557     }
1558 
1559     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1560     if (n >= 0) {
1561         *flag = BP_MEM_READ;
1562         return n;
1563     }
1564 
1565     return -1;
1566 }
1567 
1568 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1569                                   target_ulong len, int type)
1570 {
1571     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1572         return -ENOBUFS;
1573     }
1574 
1575     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1576     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1577 
1578     switch (type) {
1579     case GDB_BREAKPOINT_HW:
1580         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1581             return -ENOBUFS;
1582         }
1583 
1584         if (find_hw_breakpoint(addr, type) >= 0) {
1585             return -EEXIST;
1586         }
1587 
1588         nb_hw_breakpoint++;
1589         break;
1590 
1591     case GDB_WATCHPOINT_WRITE:
1592     case GDB_WATCHPOINT_READ:
1593     case GDB_WATCHPOINT_ACCESS:
1594         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1595             return -ENOBUFS;
1596         }
1597 
1598         if (find_hw_breakpoint(addr, type) >= 0) {
1599             return -EEXIST;
1600         }
1601 
1602         nb_hw_watchpoint++;
1603         break;
1604 
1605     default:
1606         return -ENOSYS;
1607     }
1608 
1609     return 0;
1610 }
1611 
1612 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1613                                   target_ulong len, int type)
1614 {
1615     int n;
1616 
1617     n = find_hw_breakpoint(addr, type);
1618     if (n < 0) {
1619         return -ENOENT;
1620     }
1621 
1622     switch (type) {
1623     case GDB_BREAKPOINT_HW:
1624         nb_hw_breakpoint--;
1625         break;
1626 
1627     case GDB_WATCHPOINT_WRITE:
1628     case GDB_WATCHPOINT_READ:
1629     case GDB_WATCHPOINT_ACCESS:
1630         nb_hw_watchpoint--;
1631         break;
1632 
1633     default:
1634         return -ENOSYS;
1635     }
1636     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1637 
1638     return 0;
1639 }
1640 
1641 void kvm_arch_remove_all_hw_breakpoints(void)
1642 {
1643     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1644 }
1645 
1646 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1647 {
1648     int n;
1649 
1650     /* Software Breakpoint updates */
1651     if (kvm_sw_breakpoints_active(cs)) {
1652         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1653     }
1654 
1655     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1656            <= ARRAY_SIZE(hw_debug_points));
1657     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1658 
1659     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1660         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1661         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1662         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1663             switch (hw_debug_points[n].type) {
1664             case GDB_BREAKPOINT_HW:
1665                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1666                 break;
1667             case GDB_WATCHPOINT_WRITE:
1668                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1669                 break;
1670             case GDB_WATCHPOINT_READ:
1671                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1672                 break;
1673             case GDB_WATCHPOINT_ACCESS:
1674                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1675                                         KVMPPC_DEBUG_WATCH_READ;
1676                 break;
1677             default:
1678                 cpu_abort(cs, "Unsupported breakpoint type\n");
1679             }
1680             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1681         }
1682     }
1683 }
1684 
1685 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1686 {
1687     CPUState *cs = CPU(cpu);
1688     CPUPPCState *env = &cpu->env;
1689     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1690     int handle = 0;
1691     int n;
1692     int flag = 0;
1693 
1694     if (cs->singlestep_enabled) {
1695         handle = 1;
1696     } else if (arch_info->status) {
1697         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1698             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1699                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1700                 if (n >= 0) {
1701                     handle = 1;
1702                 }
1703             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1704                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1705                 n = find_hw_watchpoint(arch_info->address,  &flag);
1706                 if (n >= 0) {
1707                     handle = 1;
1708                     cs->watchpoint_hit = &hw_watchpoint;
1709                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1710                     hw_watchpoint.flags = flag;
1711                 }
1712             }
1713         }
1714     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1715         handle = 1;
1716     } else {
1717         /* QEMU is not able to handle debug exception, so inject
1718          * program exception to guest;
1719          * Yes program exception NOT debug exception !!
1720          * When QEMU is using debug resources then debug exception must
1721          * be always set. To achieve this we set MSR_DE and also set
1722          * MSRP_DEP so guest cannot change MSR_DE.
1723          * When emulating debug resource for guest we want guest
1724          * to control MSR_DE (enable/disable debug interrupt on need).
1725          * Supporting both configurations are NOT possible.
1726          * So the result is that we cannot share debug resources
1727          * between QEMU and Guest on BOOKE architecture.
1728          * In the current design QEMU gets the priority over guest,
1729          * this means that if QEMU is using debug resources then guest
1730          * cannot use them;
1731          * For software breakpoint QEMU uses a privileged instruction;
1732          * So there cannot be any reason that we are here for guest
1733          * set debug exception, only possibility is guest executed a
1734          * privileged / illegal instruction and that's why we are
1735          * injecting a program interrupt.
1736          */
1737 
1738         cpu_synchronize_state(cs);
1739         /* env->nip is PC, so increment this by 4 to use
1740          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1741          */
1742         env->nip += 4;
1743         cs->exception_index = POWERPC_EXCP_PROGRAM;
1744         env->error_code = POWERPC_EXCP_INVAL;
1745         ppc_cpu_do_interrupt(cs);
1746     }
1747 
1748     return handle;
1749 }
1750 
1751 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1752 {
1753     PowerPCCPU *cpu = POWERPC_CPU(cs);
1754     CPUPPCState *env = &cpu->env;
1755     int ret;
1756 
1757     qemu_mutex_lock_iothread();
1758 
1759     switch (run->exit_reason) {
1760     case KVM_EXIT_DCR:
1761         if (run->dcr.is_write) {
1762             DPRINTF("handle dcr write\n");
1763             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1764         } else {
1765             DPRINTF("handle dcr read\n");
1766             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1767         }
1768         break;
1769     case KVM_EXIT_HLT:
1770         DPRINTF("handle halt\n");
1771         ret = kvmppc_handle_halt(cpu);
1772         break;
1773 #if defined(TARGET_PPC64)
1774     case KVM_EXIT_PAPR_HCALL:
1775         DPRINTF("handle PAPR hypercall\n");
1776         run->papr_hcall.ret = spapr_hypercall(cpu,
1777                                               run->papr_hcall.nr,
1778                                               run->papr_hcall.args);
1779         ret = 0;
1780         break;
1781 #endif
1782     case KVM_EXIT_EPR:
1783         DPRINTF("handle epr\n");
1784         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1785         ret = 0;
1786         break;
1787     case KVM_EXIT_WATCHDOG:
1788         DPRINTF("handle watchdog expiry\n");
1789         watchdog_perform_action();
1790         ret = 0;
1791         break;
1792 
1793     case KVM_EXIT_DEBUG:
1794         DPRINTF("handle debug exception\n");
1795         if (kvm_handle_debug(cpu, run)) {
1796             ret = EXCP_DEBUG;
1797             break;
1798         }
1799         /* re-enter, this exception was guest-internal */
1800         ret = 0;
1801         break;
1802 
1803     default:
1804         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1805         ret = -1;
1806         break;
1807     }
1808 
1809     qemu_mutex_unlock_iothread();
1810     return ret;
1811 }
1812 
1813 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1814 {
1815     CPUState *cs = CPU(cpu);
1816     uint32_t bits = tsr_bits;
1817     struct kvm_one_reg reg = {
1818         .id = KVM_REG_PPC_OR_TSR,
1819         .addr = (uintptr_t) &bits,
1820     };
1821 
1822     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 }
1824 
1825 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1826 {
1827 
1828     CPUState *cs = CPU(cpu);
1829     uint32_t bits = tsr_bits;
1830     struct kvm_one_reg reg = {
1831         .id = KVM_REG_PPC_CLEAR_TSR,
1832         .addr = (uintptr_t) &bits,
1833     };
1834 
1835     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 }
1837 
1838 int kvmppc_set_tcr(PowerPCCPU *cpu)
1839 {
1840     CPUState *cs = CPU(cpu);
1841     CPUPPCState *env = &cpu->env;
1842     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1843 
1844     struct kvm_one_reg reg = {
1845         .id = KVM_REG_PPC_TCR,
1846         .addr = (uintptr_t) &tcr,
1847     };
1848 
1849     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1850 }
1851 
1852 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1853 {
1854     CPUState *cs = CPU(cpu);
1855     int ret;
1856 
1857     if (!kvm_enabled()) {
1858         return -1;
1859     }
1860 
1861     if (!cap_ppc_watchdog) {
1862         printf("warning: KVM does not support watchdog");
1863         return -1;
1864     }
1865 
1866     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1867     if (ret < 0) {
1868         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1869                 __func__, strerror(-ret));
1870         return ret;
1871     }
1872 
1873     return ret;
1874 }
1875 
1876 static int read_cpuinfo(const char *field, char *value, int len)
1877 {
1878     FILE *f;
1879     int ret = -1;
1880     int field_len = strlen(field);
1881     char line[512];
1882 
1883     f = fopen("/proc/cpuinfo", "r");
1884     if (!f) {
1885         return -1;
1886     }
1887 
1888     do {
1889         if (!fgets(line, sizeof(line), f)) {
1890             break;
1891         }
1892         if (!strncmp(line, field, field_len)) {
1893             pstrcpy(value, len, line);
1894             ret = 0;
1895             break;
1896         }
1897     } while(*line);
1898 
1899     fclose(f);
1900 
1901     return ret;
1902 }
1903 
1904 uint32_t kvmppc_get_tbfreq(void)
1905 {
1906     char line[512];
1907     char *ns;
1908     uint32_t retval = NANOSECONDS_PER_SECOND;
1909 
1910     if (read_cpuinfo("timebase", line, sizeof(line))) {
1911         return retval;
1912     }
1913 
1914     if (!(ns = strchr(line, ':'))) {
1915         return retval;
1916     }
1917 
1918     ns++;
1919 
1920     return atoi(ns);
1921 }
1922 
1923 bool kvmppc_get_host_serial(char **value)
1924 {
1925     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1926                                NULL);
1927 }
1928 
1929 bool kvmppc_get_host_model(char **value)
1930 {
1931     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1932 }
1933 
1934 /* Try to find a device tree node for a CPU with clock-frequency property */
1935 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1936 {
1937     struct dirent *dirp;
1938     DIR *dp;
1939 
1940     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1941         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1942         return -1;
1943     }
1944 
1945     buf[0] = '\0';
1946     while ((dirp = readdir(dp)) != NULL) {
1947         FILE *f;
1948         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1949                  dirp->d_name);
1950         f = fopen(buf, "r");
1951         if (f) {
1952             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1953             fclose(f);
1954             break;
1955         }
1956         buf[0] = '\0';
1957     }
1958     closedir(dp);
1959     if (buf[0] == '\0') {
1960         printf("Unknown host!\n");
1961         return -1;
1962     }
1963 
1964     return 0;
1965 }
1966 
1967 static uint64_t kvmppc_read_int_dt(const char *filename)
1968 {
1969     union {
1970         uint32_t v32;
1971         uint64_t v64;
1972     } u;
1973     FILE *f;
1974     int len;
1975 
1976     f = fopen(filename, "rb");
1977     if (!f) {
1978         return -1;
1979     }
1980 
1981     len = fread(&u, 1, sizeof(u), f);
1982     fclose(f);
1983     switch (len) {
1984     case 4:
1985         /* property is a 32-bit quantity */
1986         return be32_to_cpu(u.v32);
1987     case 8:
1988         return be64_to_cpu(u.v64);
1989     }
1990 
1991     return 0;
1992 }
1993 
1994 /* Read a CPU node property from the host device tree that's a single
1995  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1996  * (can't find or open the property, or doesn't understand the
1997  * format) */
1998 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1999 {
2000     char buf[PATH_MAX], *tmp;
2001     uint64_t val;
2002 
2003     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2004         return -1;
2005     }
2006 
2007     tmp = g_strdup_printf("%s/%s", buf, propname);
2008     val = kvmppc_read_int_dt(tmp);
2009     g_free(tmp);
2010 
2011     return val;
2012 }
2013 
2014 uint64_t kvmppc_get_clockfreq(void)
2015 {
2016     return kvmppc_read_int_cpu_dt("clock-frequency");
2017 }
2018 
2019 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2020  {
2021      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2022      CPUState *cs = CPU(cpu);
2023 
2024     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2025         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2026         return 0;
2027     }
2028 
2029     return 1;
2030 }
2031 
2032 int kvmppc_get_hasidle(CPUPPCState *env)
2033 {
2034     struct kvm_ppc_pvinfo pvinfo;
2035 
2036     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2037         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2038         return 1;
2039     }
2040 
2041     return 0;
2042 }
2043 
2044 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2045 {
2046     uint32_t *hc = (uint32_t*)buf;
2047     struct kvm_ppc_pvinfo pvinfo;
2048 
2049     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2050         memcpy(buf, pvinfo.hcall, buf_len);
2051         return 0;
2052     }
2053 
2054     /*
2055      * Fallback to always fail hypercalls regardless of endianness:
2056      *
2057      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2058      *     li r3, -1
2059      *     b .+8       (becomes nop in wrong endian)
2060      *     bswap32(li r3, -1)
2061      */
2062 
2063     hc[0] = cpu_to_be32(0x08000048);
2064     hc[1] = cpu_to_be32(0x3860ffff);
2065     hc[2] = cpu_to_be32(0x48000008);
2066     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2067 
2068     return 1;
2069 }
2070 
2071 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2072 {
2073     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2074 }
2075 
2076 void kvmppc_enable_logical_ci_hcalls(void)
2077 {
2078     /*
2079      * FIXME: it would be nice if we could detect the cases where
2080      * we're using a device which requires the in kernel
2081      * implementation of these hcalls, but the kernel lacks them and
2082      * produce a warning.
2083      */
2084     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2085     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2086 }
2087 
2088 void kvmppc_enable_set_mode_hcall(void)
2089 {
2090     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2091 }
2092 
2093 void kvmppc_enable_clear_ref_mod_hcalls(void)
2094 {
2095     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2096     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2097 }
2098 
2099 void kvmppc_set_papr(PowerPCCPU *cpu)
2100 {
2101     CPUState *cs = CPU(cpu);
2102     int ret;
2103 
2104     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2105     if (ret) {
2106         error_report("This vCPU type or KVM version does not support PAPR");
2107         exit(1);
2108     }
2109 
2110     /* Update the capability flag so we sync the right information
2111      * with kvm */
2112     cap_papr = 1;
2113 }
2114 
2115 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2116 {
2117     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2118 }
2119 
2120 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2121 {
2122     CPUState *cs = CPU(cpu);
2123     int ret;
2124 
2125     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2126     if (ret && mpic_proxy) {
2127         error_report("This KVM version does not support EPR");
2128         exit(1);
2129     }
2130 }
2131 
2132 int kvmppc_smt_threads(void)
2133 {
2134     return cap_ppc_smt ? cap_ppc_smt : 1;
2135 }
2136 
2137 int kvmppc_set_smt_threads(int smt)
2138 {
2139     int ret;
2140 
2141     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2142     if (!ret) {
2143         cap_ppc_smt = smt;
2144     }
2145     return ret;
2146 }
2147 
2148 void kvmppc_hint_smt_possible(Error **errp)
2149 {
2150     int i;
2151     GString *g;
2152     char *s;
2153 
2154     assert(kvm_enabled());
2155     if (cap_ppc_smt_possible) {
2156         g = g_string_new("Available VSMT modes:");
2157         for (i = 63; i >= 0; i--) {
2158             if ((1UL << i) & cap_ppc_smt_possible) {
2159                 g_string_append_printf(g, " %lu", (1UL << i));
2160             }
2161         }
2162         s = g_string_free(g, false);
2163         error_append_hint(errp, "%s.\n", s);
2164         g_free(s);
2165     } else {
2166         error_append_hint(errp,
2167                           "This KVM seems to be too old to support VSMT.\n");
2168     }
2169 }
2170 
2171 
2172 #ifdef TARGET_PPC64
2173 off_t kvmppc_alloc_rma(void **rma)
2174 {
2175     off_t size;
2176     int fd;
2177     struct kvm_allocate_rma ret;
2178 
2179     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2180      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2181      *                      not necessary on this hardware
2182      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2183      *
2184      * FIXME: We should allow the user to force contiguous RMA
2185      * allocation in the cap_ppc_rma==1 case.
2186      */
2187     if (cap_ppc_rma < 2) {
2188         return 0;
2189     }
2190 
2191     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2192     if (fd < 0) {
2193         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2194                 strerror(errno));
2195         return -1;
2196     }
2197 
2198     size = MIN(ret.rma_size, 256ul << 20);
2199 
2200     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2201     if (*rma == MAP_FAILED) {
2202         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2203         return -1;
2204     };
2205 
2206     return size;
2207 }
2208 
2209 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2210 {
2211     struct kvm_ppc_smmu_info info;
2212     long rampagesize, best_page_shift;
2213     int i;
2214 
2215     if (cap_ppc_rma >= 2) {
2216         return current_size;
2217     }
2218 
2219     /* Find the largest hardware supported page size that's less than
2220      * or equal to the (logical) backing page size of guest RAM */
2221     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2222     rampagesize = qemu_getrampagesize();
2223     best_page_shift = 0;
2224 
2225     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2226         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2227 
2228         if (!sps->page_shift) {
2229             continue;
2230         }
2231 
2232         if ((sps->page_shift > best_page_shift)
2233             && ((1UL << sps->page_shift) <= rampagesize)) {
2234             best_page_shift = sps->page_shift;
2235         }
2236     }
2237 
2238     return MIN(current_size,
2239                1ULL << (best_page_shift + hash_shift - 7));
2240 }
2241 #endif
2242 
2243 bool kvmppc_spapr_use_multitce(void)
2244 {
2245     return cap_spapr_multitce;
2246 }
2247 
2248 int kvmppc_spapr_enable_inkernel_multitce(void)
2249 {
2250     int ret;
2251 
2252     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2253                             H_PUT_TCE_INDIRECT, 1);
2254     if (!ret) {
2255         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2256                                 H_STUFF_TCE, 1);
2257     }
2258 
2259     return ret;
2260 }
2261 
2262 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2263                               uint64_t bus_offset, uint32_t nb_table,
2264                               int *pfd, bool need_vfio)
2265 {
2266     long len;
2267     int fd;
2268     void *table;
2269 
2270     /* Must set fd to -1 so we don't try to munmap when called for
2271      * destroying the table, which the upper layers -will- do
2272      */
2273     *pfd = -1;
2274     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2275         return NULL;
2276     }
2277 
2278     if (cap_spapr_tce_64) {
2279         struct kvm_create_spapr_tce_64 args = {
2280             .liobn = liobn,
2281             .page_shift = page_shift,
2282             .offset = bus_offset >> page_shift,
2283             .size = nb_table,
2284             .flags = 0
2285         };
2286         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2287         if (fd < 0) {
2288             fprintf(stderr,
2289                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2290                     liobn);
2291             return NULL;
2292         }
2293     } else if (cap_spapr_tce) {
2294         uint64_t window_size = (uint64_t) nb_table << page_shift;
2295         struct kvm_create_spapr_tce args = {
2296             .liobn = liobn,
2297             .window_size = window_size,
2298         };
2299         if ((window_size != args.window_size) || bus_offset) {
2300             return NULL;
2301         }
2302         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2303         if (fd < 0) {
2304             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2305                     liobn);
2306             return NULL;
2307         }
2308     } else {
2309         return NULL;
2310     }
2311 
2312     len = nb_table * sizeof(uint64_t);
2313     /* FIXME: round this up to page size */
2314 
2315     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2316     if (table == MAP_FAILED) {
2317         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2318                 liobn);
2319         close(fd);
2320         return NULL;
2321     }
2322 
2323     *pfd = fd;
2324     return table;
2325 }
2326 
2327 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2328 {
2329     long len;
2330 
2331     if (fd < 0) {
2332         return -1;
2333     }
2334 
2335     len = nb_table * sizeof(uint64_t);
2336     if ((munmap(table, len) < 0) ||
2337         (close(fd) < 0)) {
2338         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2339                 strerror(errno));
2340         /* Leak the table */
2341     }
2342 
2343     return 0;
2344 }
2345 
2346 int kvmppc_reset_htab(int shift_hint)
2347 {
2348     uint32_t shift = shift_hint;
2349 
2350     if (!kvm_enabled()) {
2351         /* Full emulation, tell caller to allocate htab itself */
2352         return 0;
2353     }
2354     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2355         int ret;
2356         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2357         if (ret == -ENOTTY) {
2358             /* At least some versions of PR KVM advertise the
2359              * capability, but don't implement the ioctl().  Oops.
2360              * Return 0 so that we allocate the htab in qemu, as is
2361              * correct for PR. */
2362             return 0;
2363         } else if (ret < 0) {
2364             return ret;
2365         }
2366         return shift;
2367     }
2368 
2369     /* We have a kernel that predates the htab reset calls.  For PR
2370      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2371      * this era, it has allocated a 16MB fixed size hash table already. */
2372     if (kvmppc_is_pr(kvm_state)) {
2373         /* PR - tell caller to allocate htab */
2374         return 0;
2375     } else {
2376         /* HV - assume 16MB kernel allocated htab */
2377         return 24;
2378     }
2379 }
2380 
2381 static inline uint32_t mfpvr(void)
2382 {
2383     uint32_t pvr;
2384 
2385     asm ("mfpvr %0"
2386          : "=r"(pvr));
2387     return pvr;
2388 }
2389 
2390 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2391 {
2392     if (on) {
2393         *word |= flags;
2394     } else {
2395         *word &= ~flags;
2396     }
2397 }
2398 
2399 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2400 {
2401     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2402     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2403     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2404 
2405     /* Now fix up the class with information we can query from the host */
2406     pcc->pvr = mfpvr();
2407 
2408     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2409                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2410     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2411                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2412     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2413                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2414 
2415     if (dcache_size != -1) {
2416         pcc->l1_dcache_size = dcache_size;
2417     }
2418 
2419     if (icache_size != -1) {
2420         pcc->l1_icache_size = icache_size;
2421     }
2422 
2423 #if defined(TARGET_PPC64)
2424     pcc->radix_page_info = kvm_get_radix_page_info();
2425 
2426     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2427         /*
2428          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2429          * compliant.  More importantly, advertising ISA 3.00
2430          * architected mode may prevent guests from activating
2431          * necessary DD1 workarounds.
2432          */
2433         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2434                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2435     }
2436 #endif /* defined(TARGET_PPC64) */
2437 }
2438 
2439 bool kvmppc_has_cap_epr(void)
2440 {
2441     return cap_epr;
2442 }
2443 
2444 bool kvmppc_has_cap_fixup_hcalls(void)
2445 {
2446     return cap_fixup_hcalls;
2447 }
2448 
2449 bool kvmppc_has_cap_htm(void)
2450 {
2451     return cap_htm;
2452 }
2453 
2454 bool kvmppc_has_cap_mmu_radix(void)
2455 {
2456     return cap_mmu_radix;
2457 }
2458 
2459 bool kvmppc_has_cap_mmu_hash_v3(void)
2460 {
2461     return cap_mmu_hash_v3;
2462 }
2463 
2464 static void kvmppc_get_cpu_characteristics(KVMState *s)
2465 {
2466     struct kvm_ppc_cpu_char c;
2467     int ret;
2468 
2469     /* Assume broken */
2470     cap_ppc_safe_cache = 0;
2471     cap_ppc_safe_bounds_check = 0;
2472     cap_ppc_safe_indirect_branch = 0;
2473 
2474     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2475     if (!ret) {
2476         return;
2477     }
2478     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2479     if (ret < 0) {
2480         return;
2481     }
2482     /* Parse and set cap_ppc_safe_cache */
2483     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2484         cap_ppc_safe_cache = 2;
2485     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2486                (c.character & c.character_mask
2487                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2488         cap_ppc_safe_cache = 1;
2489     }
2490     /* Parse and set cap_ppc_safe_bounds_check */
2491     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2492         cap_ppc_safe_bounds_check = 2;
2493     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2494         cap_ppc_safe_bounds_check = 1;
2495     }
2496     /* Parse and set cap_ppc_safe_indirect_branch */
2497     if (c.character & H_CPU_CHAR_BCCTRL_SERIALISED) {
2498         cap_ppc_safe_indirect_branch = 2;
2499     }
2500 }
2501 
2502 int kvmppc_get_cap_safe_cache(void)
2503 {
2504     return cap_ppc_safe_cache;
2505 }
2506 
2507 int kvmppc_get_cap_safe_bounds_check(void)
2508 {
2509     return cap_ppc_safe_bounds_check;
2510 }
2511 
2512 int kvmppc_get_cap_safe_indirect_branch(void)
2513 {
2514     return cap_ppc_safe_indirect_branch;
2515 }
2516 
2517 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2518 {
2519     uint32_t host_pvr = mfpvr();
2520     PowerPCCPUClass *pvr_pcc;
2521 
2522     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2523     if (pvr_pcc == NULL) {
2524         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2525     }
2526 
2527     return pvr_pcc;
2528 }
2529 
2530 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2531 {
2532     TypeInfo type_info = {
2533         .name = TYPE_HOST_POWERPC_CPU,
2534         .class_init = kvmppc_host_cpu_class_init,
2535     };
2536     MachineClass *mc = MACHINE_GET_CLASS(ms);
2537     PowerPCCPUClass *pvr_pcc;
2538     ObjectClass *oc;
2539     DeviceClass *dc;
2540     int i;
2541 
2542     pvr_pcc = kvm_ppc_get_host_cpu_class();
2543     if (pvr_pcc == NULL) {
2544         return -1;
2545     }
2546     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2547     type_register(&type_info);
2548     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2549         /* override TCG default cpu type with 'host' cpu model */
2550         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2551     }
2552 
2553     oc = object_class_by_name(type_info.name);
2554     g_assert(oc);
2555 
2556     /*
2557      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2558      * we want "POWER8" to be a "family" alias that points to the current
2559      * host CPU type, too)
2560      */
2561     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2562     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2563         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2564             char *suffix;
2565 
2566             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2567             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2568             if (suffix) {
2569                 *suffix = 0;
2570             }
2571             break;
2572         }
2573     }
2574 
2575     return 0;
2576 }
2577 
2578 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2579 {
2580     struct kvm_rtas_token_args args = {
2581         .token = token,
2582     };
2583 
2584     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2585         return -ENOENT;
2586     }
2587 
2588     strncpy(args.name, function, sizeof(args.name));
2589 
2590     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2591 }
2592 
2593 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2594 {
2595     struct kvm_get_htab_fd s = {
2596         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2597         .start_index = index,
2598     };
2599     int ret;
2600 
2601     if (!cap_htab_fd) {
2602         error_setg(errp, "KVM version doesn't support %s the HPT",
2603                    write ? "writing" : "reading");
2604         return -ENOTSUP;
2605     }
2606 
2607     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2608     if (ret < 0) {
2609         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2610                    write ? "writing" : "reading", write ? "to" : "from",
2611                    strerror(errno));
2612         return -errno;
2613     }
2614 
2615     return ret;
2616 }
2617 
2618 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2619 {
2620     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2621     uint8_t buf[bufsize];
2622     ssize_t rc;
2623 
2624     do {
2625         rc = read(fd, buf, bufsize);
2626         if (rc < 0) {
2627             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2628                     strerror(errno));
2629             return rc;
2630         } else if (rc) {
2631             uint8_t *buffer = buf;
2632             ssize_t n = rc;
2633             while (n) {
2634                 struct kvm_get_htab_header *head =
2635                     (struct kvm_get_htab_header *) buffer;
2636                 size_t chunksize = sizeof(*head) +
2637                      HASH_PTE_SIZE_64 * head->n_valid;
2638 
2639                 qemu_put_be32(f, head->index);
2640                 qemu_put_be16(f, head->n_valid);
2641                 qemu_put_be16(f, head->n_invalid);
2642                 qemu_put_buffer(f, (void *)(head + 1),
2643                                 HASH_PTE_SIZE_64 * head->n_valid);
2644 
2645                 buffer += chunksize;
2646                 n -= chunksize;
2647             }
2648         }
2649     } while ((rc != 0)
2650              && ((max_ns < 0)
2651                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2652 
2653     return (rc == 0) ? 1 : 0;
2654 }
2655 
2656 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2657                            uint16_t n_valid, uint16_t n_invalid)
2658 {
2659     struct kvm_get_htab_header *buf;
2660     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2661     ssize_t rc;
2662 
2663     buf = alloca(chunksize);
2664     buf->index = index;
2665     buf->n_valid = n_valid;
2666     buf->n_invalid = n_invalid;
2667 
2668     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2669 
2670     rc = write(fd, buf, chunksize);
2671     if (rc < 0) {
2672         fprintf(stderr, "Error writing KVM hash table: %s\n",
2673                 strerror(errno));
2674         return rc;
2675     }
2676     if (rc != chunksize) {
2677         /* We should never get a short write on a single chunk */
2678         fprintf(stderr, "Short write, restoring KVM hash table\n");
2679         return -1;
2680     }
2681     return 0;
2682 }
2683 
2684 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2685 {
2686     return true;
2687 }
2688 
2689 void kvm_arch_init_irq_routing(KVMState *s)
2690 {
2691 }
2692 
2693 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2694 {
2695     int fd, rc;
2696     int i;
2697 
2698     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2699 
2700     i = 0;
2701     while (i < n) {
2702         struct kvm_get_htab_header *hdr;
2703         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2704         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2705 
2706         rc = read(fd, buf, sizeof(buf));
2707         if (rc < 0) {
2708             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2709         }
2710 
2711         hdr = (struct kvm_get_htab_header *)buf;
2712         while ((i < n) && ((char *)hdr < (buf + rc))) {
2713             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2714 
2715             if (hdr->index != (ptex + i)) {
2716                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2717                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2718             }
2719 
2720             if (n - i < valid) {
2721                 valid = n - i;
2722             }
2723             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2724             i += valid;
2725 
2726             if ((n - i) < invalid) {
2727                 invalid = n - i;
2728             }
2729             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2730             i += invalid;
2731 
2732             hdr = (struct kvm_get_htab_header *)
2733                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2734         }
2735     }
2736 
2737     close(fd);
2738 }
2739 
2740 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2741 {
2742     int fd, rc;
2743     struct {
2744         struct kvm_get_htab_header hdr;
2745         uint64_t pte0;
2746         uint64_t pte1;
2747     } buf;
2748 
2749     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2750 
2751     buf.hdr.n_valid = 1;
2752     buf.hdr.n_invalid = 0;
2753     buf.hdr.index = ptex;
2754     buf.pte0 = cpu_to_be64(pte0);
2755     buf.pte1 = cpu_to_be64(pte1);
2756 
2757     rc = write(fd, &buf, sizeof(buf));
2758     if (rc != sizeof(buf)) {
2759         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2760     }
2761     close(fd);
2762 }
2763 
2764 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2765                              uint64_t address, uint32_t data, PCIDevice *dev)
2766 {
2767     return 0;
2768 }
2769 
2770 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2771                                 int vector, PCIDevice *dev)
2772 {
2773     return 0;
2774 }
2775 
2776 int kvm_arch_release_virq_post(int virq)
2777 {
2778     return 0;
2779 }
2780 
2781 int kvm_arch_msi_data_to_gsi(uint32_t data)
2782 {
2783     return data & 0xffff;
2784 }
2785 
2786 int kvmppc_enable_hwrng(void)
2787 {
2788     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2789         return -1;
2790     }
2791 
2792     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2793 }
2794 
2795 void kvmppc_check_papr_resize_hpt(Error **errp)
2796 {
2797     if (!kvm_enabled()) {
2798         return; /* No KVM, we're good */
2799     }
2800 
2801     if (cap_resize_hpt) {
2802         return; /* Kernel has explicit support, we're good */
2803     }
2804 
2805     /* Otherwise fallback on looking for PR KVM */
2806     if (kvmppc_is_pr(kvm_state)) {
2807         return;
2808     }
2809 
2810     error_setg(errp,
2811                "Hash page table resizing not available with this KVM version");
2812 }
2813 
2814 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2815 {
2816     CPUState *cs = CPU(cpu);
2817     struct kvm_ppc_resize_hpt rhpt = {
2818         .flags = flags,
2819         .shift = shift,
2820     };
2821 
2822     if (!cap_resize_hpt) {
2823         return -ENOSYS;
2824     }
2825 
2826     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2827 }
2828 
2829 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2830 {
2831     CPUState *cs = CPU(cpu);
2832     struct kvm_ppc_resize_hpt rhpt = {
2833         .flags = flags,
2834         .shift = shift,
2835     };
2836 
2837     if (!cap_resize_hpt) {
2838         return -ENOSYS;
2839     }
2840 
2841     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2842 }
2843 
2844 /*
2845  * This is a helper function to detect a post migration scenario
2846  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2847  * the guest kernel can't handle a PVR value other than the actual host
2848  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2849  *
2850  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2851  * (so, we're HV), return true. The workaround itself is done in
2852  * cpu_post_load.
2853  *
2854  * The order here is important: we'll only check for KVM PR as a
2855  * fallback if the guest kernel can't handle the situation itself.
2856  * We need to avoid as much as possible querying the running KVM type
2857  * in QEMU level.
2858  */
2859 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2860 {
2861     CPUState *cs = CPU(cpu);
2862 
2863     if (!kvm_enabled()) {
2864         return false;
2865     }
2866 
2867     if (cap_ppc_pvr_compat) {
2868         return false;
2869     }
2870 
2871     return !kvmppc_is_pr(cs->kvm_state);
2872 }
2873