xref: /openbmc/qemu/target/ppc/kvm.c (revision 2b108085)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
92 static int cap_ppc_safe_cache;
93 static int cap_ppc_safe_bounds_check;
94 static int cap_ppc_safe_indirect_branch;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
137     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
138     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
139     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
140     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
141     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
142     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
143     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
144     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
145     /* Note: we don't set cap_papr here, because this capability is
146      * only activated after this by kvmppc_set_papr() */
147     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
148     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
149     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
150     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
151     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
152     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
153     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154     kvmppc_get_cpu_characteristics(s);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
254                                        struct kvm_ppc_smmu_info *info)
255 {
256     CPUPPCState *env = &cpu->env;
257     CPUState *cs = CPU(cpu);
258 
259     memset(info, 0, sizeof(*info));
260 
261     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
262      * need to "guess" what the supported page sizes are.
263      *
264      * For that to work we make a few assumptions:
265      *
266      * - Check whether we are running "PR" KVM which only supports 4K
267      *   and 16M pages, but supports them regardless of the backing
268      *   store characteritics. We also don't support 1T segments.
269      *
270      *   This is safe as if HV KVM ever supports that capability or PR
271      *   KVM grows supports for more page/segment sizes, those versions
272      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
273      *   will not hit this fallback
274      *
275      * - Else we are running HV KVM. This means we only support page
276      *   sizes that fit in the backing store. Additionally we only
277      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
278      *   P7 encodings for the SLB and hash table. Here too, we assume
279      *   support for any newer processor will mean a kernel that
280      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281      *   this fallback.
282      */
283     if (kvmppc_is_pr(cs->kvm_state)) {
284         /* No flags */
285         info->flags = 0;
286         info->slb_size = 64;
287 
288         /* Standard 4k base page size segment */
289         info->sps[0].page_shift = 12;
290         info->sps[0].slb_enc = 0;
291         info->sps[0].enc[0].page_shift = 12;
292         info->sps[0].enc[0].pte_enc = 0;
293 
294         /* Standard 16M large page size segment */
295         info->sps[1].page_shift = 24;
296         info->sps[1].slb_enc = SLB_VSID_L;
297         info->sps[1].enc[0].page_shift = 24;
298         info->sps[1].enc[0].pte_enc = 0;
299     } else {
300         int i = 0;
301 
302         /* HV KVM has backing store size restrictions */
303         info->flags = KVM_PPC_PAGE_SIZES_REAL;
304 
305         if (env->mmu_model & POWERPC_MMU_1TSEG) {
306             info->flags |= KVM_PPC_1T_SEGMENTS;
307         }
308 
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->slb_size = 32;
312         } else {
313             info->slb_size = 64;
314         }
315 
316         /* Standard 4k base page size segment */
317         info->sps[i].page_shift = 12;
318         info->sps[i].slb_enc = 0;
319         info->sps[i].enc[0].page_shift = 12;
320         info->sps[i].enc[0].pte_enc = 0;
321         i++;
322 
323         /* 64K on MMU 2.06 and later */
324         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
325             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
326             info->sps[i].page_shift = 16;
327             info->sps[i].slb_enc = 0x110;
328             info->sps[i].enc[0].page_shift = 16;
329             info->sps[i].enc[0].pte_enc = 1;
330             i++;
331         }
332 
333         /* Standard 16M large page size segment */
334         info->sps[i].page_shift = 24;
335         info->sps[i].slb_enc = SLB_VSID_L;
336         info->sps[i].enc[0].page_shift = 24;
337         info->sps[i].enc[0].pte_enc = 0;
338     }
339 }
340 
341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
342 {
343     CPUState *cs = CPU(cpu);
344     int ret;
345 
346     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
347         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
348         if (ret == 0) {
349             return;
350         }
351     }
352 
353     kvm_get_fallback_smmu_info(cpu, info);
354 }
355 
356 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
357 {
358     KVMState *s = KVM_STATE(current_machine->accelerator);
359     struct ppc_radix_page_info *radix_page_info;
360     struct kvm_ppc_rmmu_info rmmu_info;
361     int i;
362 
363     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364         return NULL;
365     }
366     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367         return NULL;
368     }
369     radix_page_info = g_malloc0(sizeof(*radix_page_info));
370     radix_page_info->count = 0;
371     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
372         if (rmmu_info.ap_encodings[i]) {
373             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
374             radix_page_info->count++;
375         }
376     }
377     return radix_page_info;
378 }
379 
380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
381                                      bool radix, bool gtse,
382                                      uint64_t proc_tbl)
383 {
384     CPUState *cs = CPU(cpu);
385     int ret;
386     uint64_t flags = 0;
387     struct kvm_ppc_mmuv3_cfg cfg = {
388         .process_table = proc_tbl,
389     };
390 
391     if (radix) {
392         flags |= KVM_PPC_MMUV3_RADIX;
393     }
394     if (gtse) {
395         flags |= KVM_PPC_MMUV3_GTSE;
396     }
397     cfg.flags = flags;
398     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
399     switch (ret) {
400     case 0:
401         return H_SUCCESS;
402     case -EINVAL:
403         return H_PARAMETER;
404     case -ENODEV:
405         return H_NOT_AVAILABLE;
406     default:
407         return H_HARDWARE;
408     }
409 }
410 
411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
412 {
413     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
414         return true;
415     }
416 
417     return (1ul << shift) <= rampgsize;
418 }
419 
420 static long max_cpu_page_size;
421 
422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
423 {
424     static struct kvm_ppc_smmu_info smmu_info;
425     static bool has_smmu_info;
426     CPUPPCState *env = &cpu->env;
427     int iq, ik, jq, jk;
428     bool has_64k_pages = false;
429 
430     /* We only handle page sizes for 64-bit server guests for now */
431     if (!(env->mmu_model & POWERPC_MMU_64)) {
432         return;
433     }
434 
435     /* Collect MMU info from kernel if not already */
436     if (!has_smmu_info) {
437         kvm_get_smmu_info(cpu, &smmu_info);
438         has_smmu_info = true;
439     }
440 
441     if (!max_cpu_page_size) {
442         max_cpu_page_size = qemu_getrampagesize();
443     }
444 
445     /* Convert to QEMU form */
446     memset(&env->sps, 0, sizeof(env->sps));
447 
448     /* If we have HV KVM, we need to forbid CI large pages if our
449      * host page size is smaller than 64K.
450      */
451     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
452         env->ci_large_pages = getpagesize() >= 0x10000;
453     }
454 
455     /*
456      * XXX This loop should be an entry wide AND of the capabilities that
457      *     the selected CPU has with the capabilities that KVM supports.
458      */
459     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
460         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
461         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
462 
463         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
464                                  ksps->page_shift)) {
465             continue;
466         }
467         qsps->page_shift = ksps->page_shift;
468         qsps->slb_enc = ksps->slb_enc;
469         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
470             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
471                                      ksps->enc[jk].page_shift)) {
472                 continue;
473             }
474             if (ksps->enc[jk].page_shift == 16) {
475                 has_64k_pages = true;
476             }
477             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
478             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
479             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
480                 break;
481             }
482         }
483         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
484             break;
485         }
486     }
487     env->slb_nr = smmu_info.slb_size;
488     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
489         env->mmu_model &= ~POWERPC_MMU_1TSEG;
490     }
491     if (!has_64k_pages) {
492         env->mmu_model &= ~POWERPC_MMU_64K;
493     }
494 }
495 
496 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
497 {
498     Object *mem_obj = object_resolve_path(obj_path, NULL);
499     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
500 
501     return pagesize >= max_cpu_page_size;
502 }
503 
504 #else /* defined (TARGET_PPC64) */
505 
506 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
507 {
508 }
509 
510 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
511 {
512     return true;
513 }
514 
515 #endif /* !defined (TARGET_PPC64) */
516 
517 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
518 {
519     return POWERPC_CPU(cpu)->vcpu_id;
520 }
521 
522 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
523  * book3s supports only 1 watchpoint, so array size
524  * of 4 is sufficient for now.
525  */
526 #define MAX_HW_BKPTS 4
527 
528 static struct HWBreakpoint {
529     target_ulong addr;
530     int type;
531 } hw_debug_points[MAX_HW_BKPTS];
532 
533 static CPUWatchpoint hw_watchpoint;
534 
535 /* Default there is no breakpoint and watchpoint supported */
536 static int max_hw_breakpoint;
537 static int max_hw_watchpoint;
538 static int nb_hw_breakpoint;
539 static int nb_hw_watchpoint;
540 
541 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
542 {
543     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
544         max_hw_breakpoint = 2;
545         max_hw_watchpoint = 2;
546     }
547 
548     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
549         fprintf(stderr, "Error initializing h/w breakpoints\n");
550         return;
551     }
552 }
553 
554 int kvm_arch_init_vcpu(CPUState *cs)
555 {
556     PowerPCCPU *cpu = POWERPC_CPU(cs);
557     CPUPPCState *cenv = &cpu->env;
558     int ret;
559 
560     /* Gather server mmu info from KVM and update the CPU state */
561     kvm_fixup_page_sizes(cpu);
562 
563     /* Synchronize sregs with kvm */
564     ret = kvm_arch_sync_sregs(cpu);
565     if (ret) {
566         if (ret == -EINVAL) {
567             error_report("Register sync failed... If you're using kvm-hv.ko,"
568                          " only \"-cpu host\" is possible");
569         }
570         return ret;
571     }
572 
573     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
574 
575     switch (cenv->mmu_model) {
576     case POWERPC_MMU_BOOKE206:
577         /* This target supports access to KVM's guest TLB */
578         ret = kvm_booke206_tlb_init(cpu);
579         break;
580     case POWERPC_MMU_2_07:
581         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
582             /* KVM-HV has transactional memory on POWER8 also without the
583              * KVM_CAP_PPC_HTM extension, so enable it here instead as
584              * long as it's availble to userspace on the host. */
585             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
586                 cap_htm = true;
587             }
588         }
589         break;
590     default:
591         break;
592     }
593 
594     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
595     kvmppc_hw_debug_points_init(cenv);
596 
597     return ret;
598 }
599 
600 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
601 {
602     CPUPPCState *env = &cpu->env;
603     CPUState *cs = CPU(cpu);
604     struct kvm_dirty_tlb dirty_tlb;
605     unsigned char *bitmap;
606     int ret;
607 
608     if (!env->kvm_sw_tlb) {
609         return;
610     }
611 
612     bitmap = g_malloc((env->nb_tlb + 7) / 8);
613     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
614 
615     dirty_tlb.bitmap = (uintptr_t)bitmap;
616     dirty_tlb.num_dirty = env->nb_tlb;
617 
618     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
619     if (ret) {
620         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
621                 __func__, strerror(-ret));
622     }
623 
624     g_free(bitmap);
625 }
626 
627 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
628 {
629     PowerPCCPU *cpu = POWERPC_CPU(cs);
630     CPUPPCState *env = &cpu->env;
631     union {
632         uint32_t u32;
633         uint64_t u64;
634     } val;
635     struct kvm_one_reg reg = {
636         .id = id,
637         .addr = (uintptr_t) &val,
638     };
639     int ret;
640 
641     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642     if (ret != 0) {
643         trace_kvm_failed_spr_get(spr, strerror(errno));
644     } else {
645         switch (id & KVM_REG_SIZE_MASK) {
646         case KVM_REG_SIZE_U32:
647             env->spr[spr] = val.u32;
648             break;
649 
650         case KVM_REG_SIZE_U64:
651             env->spr[spr] = val.u64;
652             break;
653 
654         default:
655             /* Don't handle this size yet */
656             abort();
657         }
658     }
659 }
660 
661 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
662 {
663     PowerPCCPU *cpu = POWERPC_CPU(cs);
664     CPUPPCState *env = &cpu->env;
665     union {
666         uint32_t u32;
667         uint64_t u64;
668     } val;
669     struct kvm_one_reg reg = {
670         .id = id,
671         .addr = (uintptr_t) &val,
672     };
673     int ret;
674 
675     switch (id & KVM_REG_SIZE_MASK) {
676     case KVM_REG_SIZE_U32:
677         val.u32 = env->spr[spr];
678         break;
679 
680     case KVM_REG_SIZE_U64:
681         val.u64 = env->spr[spr];
682         break;
683 
684     default:
685         /* Don't handle this size yet */
686         abort();
687     }
688 
689     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690     if (ret != 0) {
691         trace_kvm_failed_spr_set(spr, strerror(errno));
692     }
693 }
694 
695 static int kvm_put_fp(CPUState *cs)
696 {
697     PowerPCCPU *cpu = POWERPC_CPU(cs);
698     CPUPPCState *env = &cpu->env;
699     struct kvm_one_reg reg;
700     int i;
701     int ret;
702 
703     if (env->insns_flags & PPC_FLOAT) {
704         uint64_t fpscr = env->fpscr;
705         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
706 
707         reg.id = KVM_REG_PPC_FPSCR;
708         reg.addr = (uintptr_t)&fpscr;
709         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
710         if (ret < 0) {
711             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
712             return ret;
713         }
714 
715         for (i = 0; i < 32; i++) {
716             uint64_t vsr[2];
717 
718 #ifdef HOST_WORDS_BIGENDIAN
719             vsr[0] = float64_val(env->fpr[i]);
720             vsr[1] = env->vsr[i];
721 #else
722             vsr[0] = env->vsr[i];
723             vsr[1] = float64_val(env->fpr[i]);
724 #endif
725             reg.addr = (uintptr_t) &vsr;
726             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
727 
728             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729             if (ret < 0) {
730                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
731                         i, strerror(errno));
732                 return ret;
733             }
734         }
735     }
736 
737     if (env->insns_flags & PPC_ALTIVEC) {
738         reg.id = KVM_REG_PPC_VSCR;
739         reg.addr = (uintptr_t)&env->vscr;
740         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
741         if (ret < 0) {
742             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
743             return ret;
744         }
745 
746         for (i = 0; i < 32; i++) {
747             reg.id = KVM_REG_PPC_VR(i);
748             reg.addr = (uintptr_t)&env->avr[i];
749             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750             if (ret < 0) {
751                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
752                 return ret;
753             }
754         }
755     }
756 
757     return 0;
758 }
759 
760 static int kvm_get_fp(CPUState *cs)
761 {
762     PowerPCCPU *cpu = POWERPC_CPU(cs);
763     CPUPPCState *env = &cpu->env;
764     struct kvm_one_reg reg;
765     int i;
766     int ret;
767 
768     if (env->insns_flags & PPC_FLOAT) {
769         uint64_t fpscr;
770         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
771 
772         reg.id = KVM_REG_PPC_FPSCR;
773         reg.addr = (uintptr_t)&fpscr;
774         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
775         if (ret < 0) {
776             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
777             return ret;
778         } else {
779             env->fpscr = fpscr;
780         }
781 
782         for (i = 0; i < 32; i++) {
783             uint64_t vsr[2];
784 
785             reg.addr = (uintptr_t) &vsr;
786             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
787 
788             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
789             if (ret < 0) {
790                 DPRINTF("Unable to get %s%d from KVM: %s\n",
791                         vsx ? "VSR" : "FPR", i, strerror(errno));
792                 return ret;
793             } else {
794 #ifdef HOST_WORDS_BIGENDIAN
795                 env->fpr[i] = vsr[0];
796                 if (vsx) {
797                     env->vsr[i] = vsr[1];
798                 }
799 #else
800                 env->fpr[i] = vsr[1];
801                 if (vsx) {
802                     env->vsr[i] = vsr[0];
803                 }
804 #endif
805             }
806         }
807     }
808 
809     if (env->insns_flags & PPC_ALTIVEC) {
810         reg.id = KVM_REG_PPC_VSCR;
811         reg.addr = (uintptr_t)&env->vscr;
812         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
813         if (ret < 0) {
814             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
815             return ret;
816         }
817 
818         for (i = 0; i < 32; i++) {
819             reg.id = KVM_REG_PPC_VR(i);
820             reg.addr = (uintptr_t)&env->avr[i];
821             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
822             if (ret < 0) {
823                 DPRINTF("Unable to get VR%d from KVM: %s\n",
824                         i, strerror(errno));
825                 return ret;
826             }
827         }
828     }
829 
830     return 0;
831 }
832 
833 #if defined(TARGET_PPC64)
834 static int kvm_get_vpa(CPUState *cs)
835 {
836     PowerPCCPU *cpu = POWERPC_CPU(cs);
837     CPUPPCState *env = &cpu->env;
838     struct kvm_one_reg reg;
839     int ret;
840 
841     reg.id = KVM_REG_PPC_VPA_ADDR;
842     reg.addr = (uintptr_t)&env->vpa_addr;
843     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
844     if (ret < 0) {
845         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
846         return ret;
847     }
848 
849     assert((uintptr_t)&env->slb_shadow_size
850            == ((uintptr_t)&env->slb_shadow_addr + 8));
851     reg.id = KVM_REG_PPC_VPA_SLB;
852     reg.addr = (uintptr_t)&env->slb_shadow_addr;
853     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
854     if (ret < 0) {
855         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
856                 strerror(errno));
857         return ret;
858     }
859 
860     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
861     reg.id = KVM_REG_PPC_VPA_DTL;
862     reg.addr = (uintptr_t)&env->dtl_addr;
863     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
864     if (ret < 0) {
865         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
866                 strerror(errno));
867         return ret;
868     }
869 
870     return 0;
871 }
872 
873 static int kvm_put_vpa(CPUState *cs)
874 {
875     PowerPCCPU *cpu = POWERPC_CPU(cs);
876     CPUPPCState *env = &cpu->env;
877     struct kvm_one_reg reg;
878     int ret;
879 
880     /* SLB shadow or DTL can't be registered unless a master VPA is
881      * registered.  That means when restoring state, if a VPA *is*
882      * registered, we need to set that up first.  If not, we need to
883      * deregister the others before deregistering the master VPA */
884     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
885 
886     if (env->vpa_addr) {
887         reg.id = KVM_REG_PPC_VPA_ADDR;
888         reg.addr = (uintptr_t)&env->vpa_addr;
889         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
890         if (ret < 0) {
891             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
892             return ret;
893         }
894     }
895 
896     assert((uintptr_t)&env->slb_shadow_size
897            == ((uintptr_t)&env->slb_shadow_addr + 8));
898     reg.id = KVM_REG_PPC_VPA_SLB;
899     reg.addr = (uintptr_t)&env->slb_shadow_addr;
900     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
901     if (ret < 0) {
902         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
903         return ret;
904     }
905 
906     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
907     reg.id = KVM_REG_PPC_VPA_DTL;
908     reg.addr = (uintptr_t)&env->dtl_addr;
909     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
910     if (ret < 0) {
911         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
912                 strerror(errno));
913         return ret;
914     }
915 
916     if (!env->vpa_addr) {
917         reg.id = KVM_REG_PPC_VPA_ADDR;
918         reg.addr = (uintptr_t)&env->vpa_addr;
919         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
920         if (ret < 0) {
921             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
922             return ret;
923         }
924     }
925 
926     return 0;
927 }
928 #endif /* TARGET_PPC64 */
929 
930 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
931 {
932     CPUPPCState *env = &cpu->env;
933     struct kvm_sregs sregs;
934     int i;
935 
936     sregs.pvr = env->spr[SPR_PVR];
937 
938     if (cpu->vhyp) {
939         PPCVirtualHypervisorClass *vhc =
940             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
941         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
942     } else {
943         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
944     }
945 
946     /* Sync SLB */
947 #ifdef TARGET_PPC64
948     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
949         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
950         if (env->slb[i].esid & SLB_ESID_V) {
951             sregs.u.s.ppc64.slb[i].slbe |= i;
952         }
953         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
954     }
955 #endif
956 
957     /* Sync SRs */
958     for (i = 0; i < 16; i++) {
959         sregs.u.s.ppc32.sr[i] = env->sr[i];
960     }
961 
962     /* Sync BATs */
963     for (i = 0; i < 8; i++) {
964         /* Beware. We have to swap upper and lower bits here */
965         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
966             | env->DBAT[1][i];
967         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
968             | env->IBAT[1][i];
969     }
970 
971     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
972 }
973 
974 int kvm_arch_put_registers(CPUState *cs, int level)
975 {
976     PowerPCCPU *cpu = POWERPC_CPU(cs);
977     CPUPPCState *env = &cpu->env;
978     struct kvm_regs regs;
979     int ret;
980     int i;
981 
982     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
983     if (ret < 0) {
984         return ret;
985     }
986 
987     regs.ctr = env->ctr;
988     regs.lr  = env->lr;
989     regs.xer = cpu_read_xer(env);
990     regs.msr = env->msr;
991     regs.pc = env->nip;
992 
993     regs.srr0 = env->spr[SPR_SRR0];
994     regs.srr1 = env->spr[SPR_SRR1];
995 
996     regs.sprg0 = env->spr[SPR_SPRG0];
997     regs.sprg1 = env->spr[SPR_SPRG1];
998     regs.sprg2 = env->spr[SPR_SPRG2];
999     regs.sprg3 = env->spr[SPR_SPRG3];
1000     regs.sprg4 = env->spr[SPR_SPRG4];
1001     regs.sprg5 = env->spr[SPR_SPRG5];
1002     regs.sprg6 = env->spr[SPR_SPRG6];
1003     regs.sprg7 = env->spr[SPR_SPRG7];
1004 
1005     regs.pid = env->spr[SPR_BOOKE_PID];
1006 
1007     for (i = 0;i < 32; i++)
1008         regs.gpr[i] = env->gpr[i];
1009 
1010     regs.cr = 0;
1011     for (i = 0; i < 8; i++) {
1012         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1013     }
1014 
1015     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1016     if (ret < 0)
1017         return ret;
1018 
1019     kvm_put_fp(cs);
1020 
1021     if (env->tlb_dirty) {
1022         kvm_sw_tlb_put(cpu);
1023         env->tlb_dirty = false;
1024     }
1025 
1026     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1027         ret = kvmppc_put_books_sregs(cpu);
1028         if (ret < 0) {
1029             return ret;
1030         }
1031     }
1032 
1033     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1034         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1035     }
1036 
1037     if (cap_one_reg) {
1038         int i;
1039 
1040         /* We deliberately ignore errors here, for kernels which have
1041          * the ONE_REG calls, but don't support the specific
1042          * registers, there's a reasonable chance things will still
1043          * work, at least until we try to migrate. */
1044         for (i = 0; i < 1024; i++) {
1045             uint64_t id = env->spr_cb[i].one_reg_id;
1046 
1047             if (id != 0) {
1048                 kvm_put_one_spr(cs, id, i);
1049             }
1050         }
1051 
1052 #ifdef TARGET_PPC64
1053         if (msr_ts) {
1054             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1055                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1056             }
1057             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1058                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1059             }
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1070         }
1071 
1072         if (cap_papr) {
1073             if (kvm_put_vpa(cs) < 0) {
1074                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1075             }
1076         }
1077 
1078         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1079 #endif /* TARGET_PPC64 */
1080     }
1081 
1082     return ret;
1083 }
1084 
1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1086 {
1087      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1088 }
1089 
1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1091 {
1092     CPUPPCState *env = &cpu->env;
1093     struct kvm_sregs sregs;
1094     int ret;
1095 
1096     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1097     if (ret < 0) {
1098         return ret;
1099     }
1100 
1101     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1102         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1103         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1104         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1105         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1106         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1107         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1108         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1109         env->spr[SPR_DECR] = sregs.u.e.dec;
1110         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1111         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1112         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1113     }
1114 
1115     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1116         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1117         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1118         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1119         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1120         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1121     }
1122 
1123     if (sregs.u.e.features & KVM_SREGS_E_64) {
1124         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1125     }
1126 
1127     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1128         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1129     }
1130 
1131     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1132         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1133         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1134         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1135         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1136         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1137         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1138         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1139         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1140         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1141         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1142         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1143         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1144         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1145         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1146         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1147         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1148         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1149         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1150         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1151         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1152         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1153         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1154         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1155         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1156         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1157         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1158         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1159         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1160         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1161         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1162         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1163         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1164 
1165         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1166             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1167             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1168             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1170             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1171             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1172         }
1173 
1174         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1175             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1176             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1177         }
1178 
1179         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1180             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1182             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1183             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1184         }
1185     }
1186 
1187     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1188         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1189         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1190         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1191         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1192         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1193         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1194         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1195         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1196         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1197         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1198     }
1199 
1200     if (sregs.u.e.features & KVM_SREGS_EXP) {
1201         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1202     }
1203 
1204     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1205         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1206         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1207     }
1208 
1209     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1210         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1211         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1212         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1213 
1214         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1215             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1216             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1217         }
1218     }
1219 
1220     return 0;
1221 }
1222 
1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1224 {
1225     CPUPPCState *env = &cpu->env;
1226     struct kvm_sregs sregs;
1227     int ret;
1228     int i;
1229 
1230     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1231     if (ret < 0) {
1232         return ret;
1233     }
1234 
1235     if (!cpu->vhyp) {
1236         ppc_store_sdr1(env, sregs.u.s.sdr1);
1237     }
1238 
1239     /* Sync SLB */
1240 #ifdef TARGET_PPC64
1241     /*
1242      * The packed SLB array we get from KVM_GET_SREGS only contains
1243      * information about valid entries. So we flush our internal copy
1244      * to get rid of stale ones, then put all valid SLB entries back
1245      * in.
1246      */
1247     memset(env->slb, 0, sizeof(env->slb));
1248     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1249         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1250         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1251         /*
1252          * Only restore valid entries
1253          */
1254         if (rb & SLB_ESID_V) {
1255             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1256         }
1257     }
1258 #endif
1259 
1260     /* Sync SRs */
1261     for (i = 0; i < 16; i++) {
1262         env->sr[i] = sregs.u.s.ppc32.sr[i];
1263     }
1264 
1265     /* Sync BATs */
1266     for (i = 0; i < 8; i++) {
1267         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1268         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1269         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1270         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1271     }
1272 
1273     return 0;
1274 }
1275 
1276 int kvm_arch_get_registers(CPUState *cs)
1277 {
1278     PowerPCCPU *cpu = POWERPC_CPU(cs);
1279     CPUPPCState *env = &cpu->env;
1280     struct kvm_regs regs;
1281     uint32_t cr;
1282     int i, ret;
1283 
1284     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1285     if (ret < 0)
1286         return ret;
1287 
1288     cr = regs.cr;
1289     for (i = 7; i >= 0; i--) {
1290         env->crf[i] = cr & 15;
1291         cr >>= 4;
1292     }
1293 
1294     env->ctr = regs.ctr;
1295     env->lr = regs.lr;
1296     cpu_write_xer(env, regs.xer);
1297     env->msr = regs.msr;
1298     env->nip = regs.pc;
1299 
1300     env->spr[SPR_SRR0] = regs.srr0;
1301     env->spr[SPR_SRR1] = regs.srr1;
1302 
1303     env->spr[SPR_SPRG0] = regs.sprg0;
1304     env->spr[SPR_SPRG1] = regs.sprg1;
1305     env->spr[SPR_SPRG2] = regs.sprg2;
1306     env->spr[SPR_SPRG3] = regs.sprg3;
1307     env->spr[SPR_SPRG4] = regs.sprg4;
1308     env->spr[SPR_SPRG5] = regs.sprg5;
1309     env->spr[SPR_SPRG6] = regs.sprg6;
1310     env->spr[SPR_SPRG7] = regs.sprg7;
1311 
1312     env->spr[SPR_BOOKE_PID] = regs.pid;
1313 
1314     for (i = 0;i < 32; i++)
1315         env->gpr[i] = regs.gpr[i];
1316 
1317     kvm_get_fp(cs);
1318 
1319     if (cap_booke_sregs) {
1320         ret = kvmppc_get_booke_sregs(cpu);
1321         if (ret < 0) {
1322             return ret;
1323         }
1324     }
1325 
1326     if (cap_segstate) {
1327         ret = kvmppc_get_books_sregs(cpu);
1328         if (ret < 0) {
1329             return ret;
1330         }
1331     }
1332 
1333     if (cap_hior) {
1334         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1335     }
1336 
1337     if (cap_one_reg) {
1338         int i;
1339 
1340         /* We deliberately ignore errors here, for kernels which have
1341          * the ONE_REG calls, but don't support the specific
1342          * registers, there's a reasonable chance things will still
1343          * work, at least until we try to migrate. */
1344         for (i = 0; i < 1024; i++) {
1345             uint64_t id = env->spr_cb[i].one_reg_id;
1346 
1347             if (id != 0) {
1348                 kvm_get_one_spr(cs, id, i);
1349             }
1350         }
1351 
1352 #ifdef TARGET_PPC64
1353         if (msr_ts) {
1354             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1355                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1356             }
1357             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1358                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1359             }
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1370         }
1371 
1372         if (cap_papr) {
1373             if (kvm_get_vpa(cs) < 0) {
1374                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1375             }
1376         }
1377 
1378         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1379 #endif
1380     }
1381 
1382     return 0;
1383 }
1384 
1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1386 {
1387     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1388 
1389     if (irq != PPC_INTERRUPT_EXT) {
1390         return 0;
1391     }
1392 
1393     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1394         return 0;
1395     }
1396 
1397     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1398 
1399     return 0;
1400 }
1401 
1402 #if defined(TARGET_PPCEMB)
1403 #define PPC_INPUT_INT PPC40x_INPUT_INT
1404 #elif defined(TARGET_PPC64)
1405 #define PPC_INPUT_INT PPC970_INPUT_INT
1406 #else
1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1408 #endif
1409 
1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1411 {
1412     PowerPCCPU *cpu = POWERPC_CPU(cs);
1413     CPUPPCState *env = &cpu->env;
1414     int r;
1415     unsigned irq;
1416 
1417     qemu_mutex_lock_iothread();
1418 
1419     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1420      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1421     if (!cap_interrupt_level &&
1422         run->ready_for_interrupt_injection &&
1423         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1424         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1425     {
1426         /* For now KVM disregards the 'irq' argument. However, in the
1427          * future KVM could cache it in-kernel to avoid a heavyweight exit
1428          * when reading the UIC.
1429          */
1430         irq = KVM_INTERRUPT_SET;
1431 
1432         DPRINTF("injected interrupt %d\n", irq);
1433         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1434         if (r < 0) {
1435             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1436         }
1437 
1438         /* Always wake up soon in case the interrupt was level based */
1439         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1440                        (NANOSECONDS_PER_SECOND / 50));
1441     }
1442 
1443     /* We don't know if there are more interrupts pending after this. However,
1444      * the guest will return to userspace in the course of handling this one
1445      * anyways, so we will get a chance to deliver the rest. */
1446 
1447     qemu_mutex_unlock_iothread();
1448 }
1449 
1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1451 {
1452     return MEMTXATTRS_UNSPECIFIED;
1453 }
1454 
1455 int kvm_arch_process_async_events(CPUState *cs)
1456 {
1457     return cs->halted;
1458 }
1459 
1460 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1461 {
1462     CPUState *cs = CPU(cpu);
1463     CPUPPCState *env = &cpu->env;
1464 
1465     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1466         cs->halted = 1;
1467         cs->exception_index = EXCP_HLT;
1468     }
1469 
1470     return 0;
1471 }
1472 
1473 /* map dcr access to existing qemu dcr emulation */
1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1475 {
1476     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1477         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1478 
1479     return 0;
1480 }
1481 
1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1483 {
1484     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1485         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1486 
1487     return 0;
1488 }
1489 
1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1491 {
1492     /* Mixed endian case is not handled */
1493     uint32_t sc = debug_inst_opcode;
1494 
1495     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1496                             sizeof(sc), 0) ||
1497         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1498         return -EINVAL;
1499     }
1500 
1501     return 0;
1502 }
1503 
1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1505 {
1506     uint32_t sc;
1507 
1508     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1509         sc != debug_inst_opcode ||
1510         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1511                             sizeof(sc), 1)) {
1512         return -EINVAL;
1513     }
1514 
1515     return 0;
1516 }
1517 
1518 static int find_hw_breakpoint(target_ulong addr, int type)
1519 {
1520     int n;
1521 
1522     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1523            <= ARRAY_SIZE(hw_debug_points));
1524 
1525     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1526         if (hw_debug_points[n].addr == addr &&
1527              hw_debug_points[n].type == type) {
1528             return n;
1529         }
1530     }
1531 
1532     return -1;
1533 }
1534 
1535 static int find_hw_watchpoint(target_ulong addr, int *flag)
1536 {
1537     int n;
1538 
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1540     if (n >= 0) {
1541         *flag = BP_MEM_ACCESS;
1542         return n;
1543     }
1544 
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1546     if (n >= 0) {
1547         *flag = BP_MEM_WRITE;
1548         return n;
1549     }
1550 
1551     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1552     if (n >= 0) {
1553         *flag = BP_MEM_READ;
1554         return n;
1555     }
1556 
1557     return -1;
1558 }
1559 
1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1561                                   target_ulong len, int type)
1562 {
1563     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1564         return -ENOBUFS;
1565     }
1566 
1567     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1568     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1569 
1570     switch (type) {
1571     case GDB_BREAKPOINT_HW:
1572         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1573             return -ENOBUFS;
1574         }
1575 
1576         if (find_hw_breakpoint(addr, type) >= 0) {
1577             return -EEXIST;
1578         }
1579 
1580         nb_hw_breakpoint++;
1581         break;
1582 
1583     case GDB_WATCHPOINT_WRITE:
1584     case GDB_WATCHPOINT_READ:
1585     case GDB_WATCHPOINT_ACCESS:
1586         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1587             return -ENOBUFS;
1588         }
1589 
1590         if (find_hw_breakpoint(addr, type) >= 0) {
1591             return -EEXIST;
1592         }
1593 
1594         nb_hw_watchpoint++;
1595         break;
1596 
1597     default:
1598         return -ENOSYS;
1599     }
1600 
1601     return 0;
1602 }
1603 
1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1605                                   target_ulong len, int type)
1606 {
1607     int n;
1608 
1609     n = find_hw_breakpoint(addr, type);
1610     if (n < 0) {
1611         return -ENOENT;
1612     }
1613 
1614     switch (type) {
1615     case GDB_BREAKPOINT_HW:
1616         nb_hw_breakpoint--;
1617         break;
1618 
1619     case GDB_WATCHPOINT_WRITE:
1620     case GDB_WATCHPOINT_READ:
1621     case GDB_WATCHPOINT_ACCESS:
1622         nb_hw_watchpoint--;
1623         break;
1624 
1625     default:
1626         return -ENOSYS;
1627     }
1628     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1629 
1630     return 0;
1631 }
1632 
1633 void kvm_arch_remove_all_hw_breakpoints(void)
1634 {
1635     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1636 }
1637 
1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1639 {
1640     int n;
1641 
1642     /* Software Breakpoint updates */
1643     if (kvm_sw_breakpoints_active(cs)) {
1644         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1645     }
1646 
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1648            <= ARRAY_SIZE(hw_debug_points));
1649     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1650 
1651     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1653         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1654         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1655             switch (hw_debug_points[n].type) {
1656             case GDB_BREAKPOINT_HW:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1658                 break;
1659             case GDB_WATCHPOINT_WRITE:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1661                 break;
1662             case GDB_WATCHPOINT_READ:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1664                 break;
1665             case GDB_WATCHPOINT_ACCESS:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1667                                         KVMPPC_DEBUG_WATCH_READ;
1668                 break;
1669             default:
1670                 cpu_abort(cs, "Unsupported breakpoint type\n");
1671             }
1672             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1673         }
1674     }
1675 }
1676 
1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1678 {
1679     CPUState *cs = CPU(cpu);
1680     CPUPPCState *env = &cpu->env;
1681     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1682     int handle = 0;
1683     int n;
1684     int flag = 0;
1685 
1686     if (cs->singlestep_enabled) {
1687         handle = 1;
1688     } else if (arch_info->status) {
1689         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1690             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1691                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                 }
1695             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1696                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1697                 n = find_hw_watchpoint(arch_info->address,  &flag);
1698                 if (n >= 0) {
1699                     handle = 1;
1700                     cs->watchpoint_hit = &hw_watchpoint;
1701                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1702                     hw_watchpoint.flags = flag;
1703                 }
1704             }
1705         }
1706     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1707         handle = 1;
1708     } else {
1709         /* QEMU is not able to handle debug exception, so inject
1710          * program exception to guest;
1711          * Yes program exception NOT debug exception !!
1712          * When QEMU is using debug resources then debug exception must
1713          * be always set. To achieve this we set MSR_DE and also set
1714          * MSRP_DEP so guest cannot change MSR_DE.
1715          * When emulating debug resource for guest we want guest
1716          * to control MSR_DE (enable/disable debug interrupt on need).
1717          * Supporting both configurations are NOT possible.
1718          * So the result is that we cannot share debug resources
1719          * between QEMU and Guest on BOOKE architecture.
1720          * In the current design QEMU gets the priority over guest,
1721          * this means that if QEMU is using debug resources then guest
1722          * cannot use them;
1723          * For software breakpoint QEMU uses a privileged instruction;
1724          * So there cannot be any reason that we are here for guest
1725          * set debug exception, only possibility is guest executed a
1726          * privileged / illegal instruction and that's why we are
1727          * injecting a program interrupt.
1728          */
1729 
1730         cpu_synchronize_state(cs);
1731         /* env->nip is PC, so increment this by 4 to use
1732          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1733          */
1734         env->nip += 4;
1735         cs->exception_index = POWERPC_EXCP_PROGRAM;
1736         env->error_code = POWERPC_EXCP_INVAL;
1737         ppc_cpu_do_interrupt(cs);
1738     }
1739 
1740     return handle;
1741 }
1742 
1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1744 {
1745     PowerPCCPU *cpu = POWERPC_CPU(cs);
1746     CPUPPCState *env = &cpu->env;
1747     int ret;
1748 
1749     qemu_mutex_lock_iothread();
1750 
1751     switch (run->exit_reason) {
1752     case KVM_EXIT_DCR:
1753         if (run->dcr.is_write) {
1754             DPRINTF("handle dcr write\n");
1755             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1756         } else {
1757             DPRINTF("handle dcr read\n");
1758             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1759         }
1760         break;
1761     case KVM_EXIT_HLT:
1762         DPRINTF("handle halt\n");
1763         ret = kvmppc_handle_halt(cpu);
1764         break;
1765 #if defined(TARGET_PPC64)
1766     case KVM_EXIT_PAPR_HCALL:
1767         DPRINTF("handle PAPR hypercall\n");
1768         run->papr_hcall.ret = spapr_hypercall(cpu,
1769                                               run->papr_hcall.nr,
1770                                               run->papr_hcall.args);
1771         ret = 0;
1772         break;
1773 #endif
1774     case KVM_EXIT_EPR:
1775         DPRINTF("handle epr\n");
1776         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1777         ret = 0;
1778         break;
1779     case KVM_EXIT_WATCHDOG:
1780         DPRINTF("handle watchdog expiry\n");
1781         watchdog_perform_action();
1782         ret = 0;
1783         break;
1784 
1785     case KVM_EXIT_DEBUG:
1786         DPRINTF("handle debug exception\n");
1787         if (kvm_handle_debug(cpu, run)) {
1788             ret = EXCP_DEBUG;
1789             break;
1790         }
1791         /* re-enter, this exception was guest-internal */
1792         ret = 0;
1793         break;
1794 
1795     default:
1796         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1797         ret = -1;
1798         break;
1799     }
1800 
1801     qemu_mutex_unlock_iothread();
1802     return ret;
1803 }
1804 
1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1806 {
1807     CPUState *cs = CPU(cpu);
1808     uint32_t bits = tsr_bits;
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_OR_TSR,
1811         .addr = (uintptr_t) &bits,
1812     };
1813 
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816 
1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1818 {
1819 
1820     CPUState *cs = CPU(cpu);
1821     uint32_t bits = tsr_bits;
1822     struct kvm_one_reg reg = {
1823         .id = KVM_REG_PPC_CLEAR_TSR,
1824         .addr = (uintptr_t) &bits,
1825     };
1826 
1827     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 }
1829 
1830 int kvmppc_set_tcr(PowerPCCPU *cpu)
1831 {
1832     CPUState *cs = CPU(cpu);
1833     CPUPPCState *env = &cpu->env;
1834     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1835 
1836     struct kvm_one_reg reg = {
1837         .id = KVM_REG_PPC_TCR,
1838         .addr = (uintptr_t) &tcr,
1839     };
1840 
1841     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1842 }
1843 
1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1845 {
1846     CPUState *cs = CPU(cpu);
1847     int ret;
1848 
1849     if (!kvm_enabled()) {
1850         return -1;
1851     }
1852 
1853     if (!cap_ppc_watchdog) {
1854         printf("warning: KVM does not support watchdog");
1855         return -1;
1856     }
1857 
1858     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1859     if (ret < 0) {
1860         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1861                 __func__, strerror(-ret));
1862         return ret;
1863     }
1864 
1865     return ret;
1866 }
1867 
1868 static int read_cpuinfo(const char *field, char *value, int len)
1869 {
1870     FILE *f;
1871     int ret = -1;
1872     int field_len = strlen(field);
1873     char line[512];
1874 
1875     f = fopen("/proc/cpuinfo", "r");
1876     if (!f) {
1877         return -1;
1878     }
1879 
1880     do {
1881         if (!fgets(line, sizeof(line), f)) {
1882             break;
1883         }
1884         if (!strncmp(line, field, field_len)) {
1885             pstrcpy(value, len, line);
1886             ret = 0;
1887             break;
1888         }
1889     } while(*line);
1890 
1891     fclose(f);
1892 
1893     return ret;
1894 }
1895 
1896 uint32_t kvmppc_get_tbfreq(void)
1897 {
1898     char line[512];
1899     char *ns;
1900     uint32_t retval = NANOSECONDS_PER_SECOND;
1901 
1902     if (read_cpuinfo("timebase", line, sizeof(line))) {
1903         return retval;
1904     }
1905 
1906     if (!(ns = strchr(line, ':'))) {
1907         return retval;
1908     }
1909 
1910     ns++;
1911 
1912     return atoi(ns);
1913 }
1914 
1915 bool kvmppc_get_host_serial(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1918                                NULL);
1919 }
1920 
1921 bool kvmppc_get_host_model(char **value)
1922 {
1923     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1924 }
1925 
1926 /* Try to find a device tree node for a CPU with clock-frequency property */
1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1928 {
1929     struct dirent *dirp;
1930     DIR *dp;
1931 
1932     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1933         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1934         return -1;
1935     }
1936 
1937     buf[0] = '\0';
1938     while ((dirp = readdir(dp)) != NULL) {
1939         FILE *f;
1940         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1941                  dirp->d_name);
1942         f = fopen(buf, "r");
1943         if (f) {
1944             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1945             fclose(f);
1946             break;
1947         }
1948         buf[0] = '\0';
1949     }
1950     closedir(dp);
1951     if (buf[0] == '\0') {
1952         printf("Unknown host!\n");
1953         return -1;
1954     }
1955 
1956     return 0;
1957 }
1958 
1959 static uint64_t kvmppc_read_int_dt(const char *filename)
1960 {
1961     union {
1962         uint32_t v32;
1963         uint64_t v64;
1964     } u;
1965     FILE *f;
1966     int len;
1967 
1968     f = fopen(filename, "rb");
1969     if (!f) {
1970         return -1;
1971     }
1972 
1973     len = fread(&u, 1, sizeof(u), f);
1974     fclose(f);
1975     switch (len) {
1976     case 4:
1977         /* property is a 32-bit quantity */
1978         return be32_to_cpu(u.v32);
1979     case 8:
1980         return be64_to_cpu(u.v64);
1981     }
1982 
1983     return 0;
1984 }
1985 
1986 /* Read a CPU node property from the host device tree that's a single
1987  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1988  * (can't find or open the property, or doesn't understand the
1989  * format) */
1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1991 {
1992     char buf[PATH_MAX], *tmp;
1993     uint64_t val;
1994 
1995     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1996         return -1;
1997     }
1998 
1999     tmp = g_strdup_printf("%s/%s", buf, propname);
2000     val = kvmppc_read_int_dt(tmp);
2001     g_free(tmp);
2002 
2003     return val;
2004 }
2005 
2006 uint64_t kvmppc_get_clockfreq(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("clock-frequency");
2009 }
2010 
2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2012  {
2013      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2014      CPUState *cs = CPU(cpu);
2015 
2016     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2017         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2018         return 0;
2019     }
2020 
2021     return 1;
2022 }
2023 
2024 int kvmppc_get_hasidle(CPUPPCState *env)
2025 {
2026     struct kvm_ppc_pvinfo pvinfo;
2027 
2028     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2029         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2030         return 1;
2031     }
2032 
2033     return 0;
2034 }
2035 
2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2037 {
2038     uint32_t *hc = (uint32_t*)buf;
2039     struct kvm_ppc_pvinfo pvinfo;
2040 
2041     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2042         memcpy(buf, pvinfo.hcall, buf_len);
2043         return 0;
2044     }
2045 
2046     /*
2047      * Fallback to always fail hypercalls regardless of endianness:
2048      *
2049      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2050      *     li r3, -1
2051      *     b .+8       (becomes nop in wrong endian)
2052      *     bswap32(li r3, -1)
2053      */
2054 
2055     hc[0] = cpu_to_be32(0x08000048);
2056     hc[1] = cpu_to_be32(0x3860ffff);
2057     hc[2] = cpu_to_be32(0x48000008);
2058     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2059 
2060     return 1;
2061 }
2062 
2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2064 {
2065     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2066 }
2067 
2068 void kvmppc_enable_logical_ci_hcalls(void)
2069 {
2070     /*
2071      * FIXME: it would be nice if we could detect the cases where
2072      * we're using a device which requires the in kernel
2073      * implementation of these hcalls, but the kernel lacks them and
2074      * produce a warning.
2075      */
2076     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2077     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2078 }
2079 
2080 void kvmppc_enable_set_mode_hcall(void)
2081 {
2082     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2083 }
2084 
2085 void kvmppc_enable_clear_ref_mod_hcalls(void)
2086 {
2087     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2088     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2089 }
2090 
2091 void kvmppc_set_papr(PowerPCCPU *cpu)
2092 {
2093     CPUState *cs = CPU(cpu);
2094     int ret;
2095 
2096     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2097     if (ret) {
2098         error_report("This vCPU type or KVM version does not support PAPR");
2099         exit(1);
2100     }
2101 
2102     /* Update the capability flag so we sync the right information
2103      * with kvm */
2104     cap_papr = 1;
2105 }
2106 
2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2108 {
2109     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2110 }
2111 
2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2113 {
2114     CPUState *cs = CPU(cpu);
2115     int ret;
2116 
2117     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2118     if (ret && mpic_proxy) {
2119         error_report("This KVM version does not support EPR");
2120         exit(1);
2121     }
2122 }
2123 
2124 int kvmppc_smt_threads(void)
2125 {
2126     return cap_ppc_smt ? cap_ppc_smt : 1;
2127 }
2128 
2129 int kvmppc_set_smt_threads(int smt)
2130 {
2131     int ret;
2132 
2133     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2134     if (!ret) {
2135         cap_ppc_smt = smt;
2136     }
2137     return ret;
2138 }
2139 
2140 void kvmppc_hint_smt_possible(Error **errp)
2141 {
2142     int i;
2143     GString *g;
2144     char *s;
2145 
2146     assert(kvm_enabled());
2147     if (cap_ppc_smt_possible) {
2148         g = g_string_new("Available VSMT modes:");
2149         for (i = 63; i >= 0; i--) {
2150             if ((1UL << i) & cap_ppc_smt_possible) {
2151                 g_string_append_printf(g, " %lu", (1UL << i));
2152             }
2153         }
2154         s = g_string_free(g, false);
2155         error_append_hint(errp, "%s.\n", s);
2156         g_free(s);
2157     } else {
2158         error_append_hint(errp,
2159                           "This KVM seems to be too old to support VSMT.\n");
2160     }
2161 }
2162 
2163 
2164 #ifdef TARGET_PPC64
2165 off_t kvmppc_alloc_rma(void **rma)
2166 {
2167     off_t size;
2168     int fd;
2169     struct kvm_allocate_rma ret;
2170 
2171     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2172      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2173      *                      not necessary on this hardware
2174      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2175      *
2176      * FIXME: We should allow the user to force contiguous RMA
2177      * allocation in the cap_ppc_rma==1 case.
2178      */
2179     if (cap_ppc_rma < 2) {
2180         return 0;
2181     }
2182 
2183     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2184     if (fd < 0) {
2185         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2186                 strerror(errno));
2187         return -1;
2188     }
2189 
2190     size = MIN(ret.rma_size, 256ul << 20);
2191 
2192     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2193     if (*rma == MAP_FAILED) {
2194         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2195         return -1;
2196     };
2197 
2198     return size;
2199 }
2200 
2201 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2202 {
2203     struct kvm_ppc_smmu_info info;
2204     long rampagesize, best_page_shift;
2205     int i;
2206 
2207     if (cap_ppc_rma >= 2) {
2208         return current_size;
2209     }
2210 
2211     /* Find the largest hardware supported page size that's less than
2212      * or equal to the (logical) backing page size of guest RAM */
2213     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2214     rampagesize = qemu_getrampagesize();
2215     best_page_shift = 0;
2216 
2217     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2218         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2219 
2220         if (!sps->page_shift) {
2221             continue;
2222         }
2223 
2224         if ((sps->page_shift > best_page_shift)
2225             && ((1UL << sps->page_shift) <= rampagesize)) {
2226             best_page_shift = sps->page_shift;
2227         }
2228     }
2229 
2230     return MIN(current_size,
2231                1ULL << (best_page_shift + hash_shift - 7));
2232 }
2233 #endif
2234 
2235 bool kvmppc_spapr_use_multitce(void)
2236 {
2237     return cap_spapr_multitce;
2238 }
2239 
2240 int kvmppc_spapr_enable_inkernel_multitce(void)
2241 {
2242     int ret;
2243 
2244     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2245                             H_PUT_TCE_INDIRECT, 1);
2246     if (!ret) {
2247         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2248                                 H_STUFF_TCE, 1);
2249     }
2250 
2251     return ret;
2252 }
2253 
2254 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2255                               uint64_t bus_offset, uint32_t nb_table,
2256                               int *pfd, bool need_vfio)
2257 {
2258     long len;
2259     int fd;
2260     void *table;
2261 
2262     /* Must set fd to -1 so we don't try to munmap when called for
2263      * destroying the table, which the upper layers -will- do
2264      */
2265     *pfd = -1;
2266     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2267         return NULL;
2268     }
2269 
2270     if (cap_spapr_tce_64) {
2271         struct kvm_create_spapr_tce_64 args = {
2272             .liobn = liobn,
2273             .page_shift = page_shift,
2274             .offset = bus_offset >> page_shift,
2275             .size = nb_table,
2276             .flags = 0
2277         };
2278         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2279         if (fd < 0) {
2280             fprintf(stderr,
2281                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2282                     liobn);
2283             return NULL;
2284         }
2285     } else if (cap_spapr_tce) {
2286         uint64_t window_size = (uint64_t) nb_table << page_shift;
2287         struct kvm_create_spapr_tce args = {
2288             .liobn = liobn,
2289             .window_size = window_size,
2290         };
2291         if ((window_size != args.window_size) || bus_offset) {
2292             return NULL;
2293         }
2294         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2295         if (fd < 0) {
2296             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2297                     liobn);
2298             return NULL;
2299         }
2300     } else {
2301         return NULL;
2302     }
2303 
2304     len = nb_table * sizeof(uint64_t);
2305     /* FIXME: round this up to page size */
2306 
2307     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2308     if (table == MAP_FAILED) {
2309         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2310                 liobn);
2311         close(fd);
2312         return NULL;
2313     }
2314 
2315     *pfd = fd;
2316     return table;
2317 }
2318 
2319 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2320 {
2321     long len;
2322 
2323     if (fd < 0) {
2324         return -1;
2325     }
2326 
2327     len = nb_table * sizeof(uint64_t);
2328     if ((munmap(table, len) < 0) ||
2329         (close(fd) < 0)) {
2330         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2331                 strerror(errno));
2332         /* Leak the table */
2333     }
2334 
2335     return 0;
2336 }
2337 
2338 int kvmppc_reset_htab(int shift_hint)
2339 {
2340     uint32_t shift = shift_hint;
2341 
2342     if (!kvm_enabled()) {
2343         /* Full emulation, tell caller to allocate htab itself */
2344         return 0;
2345     }
2346     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2347         int ret;
2348         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2349         if (ret == -ENOTTY) {
2350             /* At least some versions of PR KVM advertise the
2351              * capability, but don't implement the ioctl().  Oops.
2352              * Return 0 so that we allocate the htab in qemu, as is
2353              * correct for PR. */
2354             return 0;
2355         } else if (ret < 0) {
2356             return ret;
2357         }
2358         return shift;
2359     }
2360 
2361     /* We have a kernel that predates the htab reset calls.  For PR
2362      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2363      * this era, it has allocated a 16MB fixed size hash table already. */
2364     if (kvmppc_is_pr(kvm_state)) {
2365         /* PR - tell caller to allocate htab */
2366         return 0;
2367     } else {
2368         /* HV - assume 16MB kernel allocated htab */
2369         return 24;
2370     }
2371 }
2372 
2373 static inline uint32_t mfpvr(void)
2374 {
2375     uint32_t pvr;
2376 
2377     asm ("mfpvr %0"
2378          : "=r"(pvr));
2379     return pvr;
2380 }
2381 
2382 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2383 {
2384     if (on) {
2385         *word |= flags;
2386     } else {
2387         *word &= ~flags;
2388     }
2389 }
2390 
2391 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2392 {
2393     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2394     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2395     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2396 
2397     /* Now fix up the class with information we can query from the host */
2398     pcc->pvr = mfpvr();
2399 
2400     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2401                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2402     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2403                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2404     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2405                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2406 
2407     if (dcache_size != -1) {
2408         pcc->l1_dcache_size = dcache_size;
2409     }
2410 
2411     if (icache_size != -1) {
2412         pcc->l1_icache_size = icache_size;
2413     }
2414 
2415 #if defined(TARGET_PPC64)
2416     pcc->radix_page_info = kvm_get_radix_page_info();
2417 
2418     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2419         /*
2420          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2421          * compliant.  More importantly, advertising ISA 3.00
2422          * architected mode may prevent guests from activating
2423          * necessary DD1 workarounds.
2424          */
2425         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2426                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2427     }
2428 #endif /* defined(TARGET_PPC64) */
2429 }
2430 
2431 bool kvmppc_has_cap_epr(void)
2432 {
2433     return cap_epr;
2434 }
2435 
2436 bool kvmppc_has_cap_fixup_hcalls(void)
2437 {
2438     return cap_fixup_hcalls;
2439 }
2440 
2441 bool kvmppc_has_cap_htm(void)
2442 {
2443     return cap_htm;
2444 }
2445 
2446 bool kvmppc_has_cap_mmu_radix(void)
2447 {
2448     return cap_mmu_radix;
2449 }
2450 
2451 bool kvmppc_has_cap_mmu_hash_v3(void)
2452 {
2453     return cap_mmu_hash_v3;
2454 }
2455 
2456 static void kvmppc_get_cpu_characteristics(KVMState *s)
2457 {
2458     struct kvm_ppc_cpu_char c;
2459     int ret;
2460 
2461     /* Assume broken */
2462     cap_ppc_safe_cache = 0;
2463     cap_ppc_safe_bounds_check = 0;
2464     cap_ppc_safe_indirect_branch = 0;
2465 
2466     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2467     if (!ret) {
2468         return;
2469     }
2470     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2471     if (ret < 0) {
2472         return;
2473     }
2474     /* Parse and set cap_ppc_safe_cache */
2475     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2476         cap_ppc_safe_cache = 2;
2477     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2478                (c.character & c.character_mask
2479                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2480         cap_ppc_safe_cache = 1;
2481     }
2482     /* Parse and set cap_ppc_safe_bounds_check */
2483     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2484         cap_ppc_safe_bounds_check = 2;
2485     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2486         cap_ppc_safe_bounds_check = 1;
2487     }
2488     /* Parse and set cap_ppc_safe_indirect_branch */
2489     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2490         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2491     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2492         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2493     }
2494 }
2495 
2496 int kvmppc_get_cap_safe_cache(void)
2497 {
2498     return cap_ppc_safe_cache;
2499 }
2500 
2501 int kvmppc_get_cap_safe_bounds_check(void)
2502 {
2503     return cap_ppc_safe_bounds_check;
2504 }
2505 
2506 int kvmppc_get_cap_safe_indirect_branch(void)
2507 {
2508     return cap_ppc_safe_indirect_branch;
2509 }
2510 
2511 bool kvmppc_has_cap_spapr_vfio(void)
2512 {
2513     return cap_spapr_vfio;
2514 }
2515 
2516 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2517 {
2518     uint32_t host_pvr = mfpvr();
2519     PowerPCCPUClass *pvr_pcc;
2520 
2521     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2522     if (pvr_pcc == NULL) {
2523         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2524     }
2525 
2526     return pvr_pcc;
2527 }
2528 
2529 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2530 {
2531     TypeInfo type_info = {
2532         .name = TYPE_HOST_POWERPC_CPU,
2533         .class_init = kvmppc_host_cpu_class_init,
2534     };
2535     MachineClass *mc = MACHINE_GET_CLASS(ms);
2536     PowerPCCPUClass *pvr_pcc;
2537     ObjectClass *oc;
2538     DeviceClass *dc;
2539     int i;
2540 
2541     pvr_pcc = kvm_ppc_get_host_cpu_class();
2542     if (pvr_pcc == NULL) {
2543         return -1;
2544     }
2545     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2546     type_register(&type_info);
2547     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2548         /* override TCG default cpu type with 'host' cpu model */
2549         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2550     }
2551 
2552     oc = object_class_by_name(type_info.name);
2553     g_assert(oc);
2554 
2555     /*
2556      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2557      * we want "POWER8" to be a "family" alias that points to the current
2558      * host CPU type, too)
2559      */
2560     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2561     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2562         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2563             char *suffix;
2564 
2565             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2566             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2567             if (suffix) {
2568                 *suffix = 0;
2569             }
2570             break;
2571         }
2572     }
2573 
2574     return 0;
2575 }
2576 
2577 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2578 {
2579     struct kvm_rtas_token_args args = {
2580         .token = token,
2581     };
2582 
2583     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2584         return -ENOENT;
2585     }
2586 
2587     strncpy(args.name, function, sizeof(args.name));
2588 
2589     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2590 }
2591 
2592 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2593 {
2594     struct kvm_get_htab_fd s = {
2595         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2596         .start_index = index,
2597     };
2598     int ret;
2599 
2600     if (!cap_htab_fd) {
2601         error_setg(errp, "KVM version doesn't support %s the HPT",
2602                    write ? "writing" : "reading");
2603         return -ENOTSUP;
2604     }
2605 
2606     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2607     if (ret < 0) {
2608         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2609                    write ? "writing" : "reading", write ? "to" : "from",
2610                    strerror(errno));
2611         return -errno;
2612     }
2613 
2614     return ret;
2615 }
2616 
2617 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2618 {
2619     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2620     uint8_t buf[bufsize];
2621     ssize_t rc;
2622 
2623     do {
2624         rc = read(fd, buf, bufsize);
2625         if (rc < 0) {
2626             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2627                     strerror(errno));
2628             return rc;
2629         } else if (rc) {
2630             uint8_t *buffer = buf;
2631             ssize_t n = rc;
2632             while (n) {
2633                 struct kvm_get_htab_header *head =
2634                     (struct kvm_get_htab_header *) buffer;
2635                 size_t chunksize = sizeof(*head) +
2636                      HASH_PTE_SIZE_64 * head->n_valid;
2637 
2638                 qemu_put_be32(f, head->index);
2639                 qemu_put_be16(f, head->n_valid);
2640                 qemu_put_be16(f, head->n_invalid);
2641                 qemu_put_buffer(f, (void *)(head + 1),
2642                                 HASH_PTE_SIZE_64 * head->n_valid);
2643 
2644                 buffer += chunksize;
2645                 n -= chunksize;
2646             }
2647         }
2648     } while ((rc != 0)
2649              && ((max_ns < 0)
2650                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2651 
2652     return (rc == 0) ? 1 : 0;
2653 }
2654 
2655 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2656                            uint16_t n_valid, uint16_t n_invalid)
2657 {
2658     struct kvm_get_htab_header *buf;
2659     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2660     ssize_t rc;
2661 
2662     buf = alloca(chunksize);
2663     buf->index = index;
2664     buf->n_valid = n_valid;
2665     buf->n_invalid = n_invalid;
2666 
2667     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2668 
2669     rc = write(fd, buf, chunksize);
2670     if (rc < 0) {
2671         fprintf(stderr, "Error writing KVM hash table: %s\n",
2672                 strerror(errno));
2673         return rc;
2674     }
2675     if (rc != chunksize) {
2676         /* We should never get a short write on a single chunk */
2677         fprintf(stderr, "Short write, restoring KVM hash table\n");
2678         return -1;
2679     }
2680     return 0;
2681 }
2682 
2683 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2684 {
2685     return true;
2686 }
2687 
2688 void kvm_arch_init_irq_routing(KVMState *s)
2689 {
2690 }
2691 
2692 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2693 {
2694     int fd, rc;
2695     int i;
2696 
2697     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2698 
2699     i = 0;
2700     while (i < n) {
2701         struct kvm_get_htab_header *hdr;
2702         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2703         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2704 
2705         rc = read(fd, buf, sizeof(buf));
2706         if (rc < 0) {
2707             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2708         }
2709 
2710         hdr = (struct kvm_get_htab_header *)buf;
2711         while ((i < n) && ((char *)hdr < (buf + rc))) {
2712             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2713 
2714             if (hdr->index != (ptex + i)) {
2715                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2716                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2717             }
2718 
2719             if (n - i < valid) {
2720                 valid = n - i;
2721             }
2722             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2723             i += valid;
2724 
2725             if ((n - i) < invalid) {
2726                 invalid = n - i;
2727             }
2728             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2729             i += invalid;
2730 
2731             hdr = (struct kvm_get_htab_header *)
2732                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2733         }
2734     }
2735 
2736     close(fd);
2737 }
2738 
2739 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2740 {
2741     int fd, rc;
2742     struct {
2743         struct kvm_get_htab_header hdr;
2744         uint64_t pte0;
2745         uint64_t pte1;
2746     } buf;
2747 
2748     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2749 
2750     buf.hdr.n_valid = 1;
2751     buf.hdr.n_invalid = 0;
2752     buf.hdr.index = ptex;
2753     buf.pte0 = cpu_to_be64(pte0);
2754     buf.pte1 = cpu_to_be64(pte1);
2755 
2756     rc = write(fd, &buf, sizeof(buf));
2757     if (rc != sizeof(buf)) {
2758         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2759     }
2760     close(fd);
2761 }
2762 
2763 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2764                              uint64_t address, uint32_t data, PCIDevice *dev)
2765 {
2766     return 0;
2767 }
2768 
2769 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2770                                 int vector, PCIDevice *dev)
2771 {
2772     return 0;
2773 }
2774 
2775 int kvm_arch_release_virq_post(int virq)
2776 {
2777     return 0;
2778 }
2779 
2780 int kvm_arch_msi_data_to_gsi(uint32_t data)
2781 {
2782     return data & 0xffff;
2783 }
2784 
2785 int kvmppc_enable_hwrng(void)
2786 {
2787     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2788         return -1;
2789     }
2790 
2791     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2792 }
2793 
2794 void kvmppc_check_papr_resize_hpt(Error **errp)
2795 {
2796     if (!kvm_enabled()) {
2797         return; /* No KVM, we're good */
2798     }
2799 
2800     if (cap_resize_hpt) {
2801         return; /* Kernel has explicit support, we're good */
2802     }
2803 
2804     /* Otherwise fallback on looking for PR KVM */
2805     if (kvmppc_is_pr(kvm_state)) {
2806         return;
2807     }
2808 
2809     error_setg(errp,
2810                "Hash page table resizing not available with this KVM version");
2811 }
2812 
2813 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2814 {
2815     CPUState *cs = CPU(cpu);
2816     struct kvm_ppc_resize_hpt rhpt = {
2817         .flags = flags,
2818         .shift = shift,
2819     };
2820 
2821     if (!cap_resize_hpt) {
2822         return -ENOSYS;
2823     }
2824 
2825     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2826 }
2827 
2828 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2829 {
2830     CPUState *cs = CPU(cpu);
2831     struct kvm_ppc_resize_hpt rhpt = {
2832         .flags = flags,
2833         .shift = shift,
2834     };
2835 
2836     if (!cap_resize_hpt) {
2837         return -ENOSYS;
2838     }
2839 
2840     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2841 }
2842 
2843 /*
2844  * This is a helper function to detect a post migration scenario
2845  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2846  * the guest kernel can't handle a PVR value other than the actual host
2847  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2848  *
2849  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2850  * (so, we're HV), return true. The workaround itself is done in
2851  * cpu_post_load.
2852  *
2853  * The order here is important: we'll only check for KVM PR as a
2854  * fallback if the guest kernel can't handle the situation itself.
2855  * We need to avoid as much as possible querying the running KVM type
2856  * in QEMU level.
2857  */
2858 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2859 {
2860     CPUState *cs = CPU(cpu);
2861 
2862     if (!kvm_enabled()) {
2863         return false;
2864     }
2865 
2866     if (cap_ppc_pvr_compat) {
2867         return false;
2868     }
2869 
2870     return !kvmppc_is_pr(cs->kvm_state);
2871 }
2872