xref: /openbmc/qemu/target/ppc/kvm.c (revision 39aeba6caa4b9de8b195fddddae5cc5835d19b04)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (env->mmu_model == POWERPC_MMU_2_06 ||
308             env->mmu_model == POWERPC_MMU_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (env->mmu_model == POWERPC_MMU_2_06 ||
323             env->mmu_model == POWERPC_MMU_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 bool kvmppc_hpt_needs_host_contiguous_pages(void)
410 {
411     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
412     static struct kvm_ppc_smmu_info smmu_info;
413 
414     if (!kvm_enabled()) {
415         return false;
416     }
417 
418     kvm_get_smmu_info(cpu, &smmu_info);
419     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
420 }
421 
422 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
423 {
424     if (!kvmppc_hpt_needs_host_contiguous_pages()) {
425         return true;
426     }
427 
428     return (1ul << shift) <= rampgsize;
429 }
430 
431 static long max_cpu_page_size;
432 
433 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
434 {
435     static struct kvm_ppc_smmu_info smmu_info;
436     static bool has_smmu_info;
437     CPUPPCState *env = &cpu->env;
438     int iq, ik, jq, jk;
439 
440     /* We only handle page sizes for 64-bit server guests for now */
441     if (!(env->mmu_model & POWERPC_MMU_64)) {
442         return;
443     }
444 
445     /* Collect MMU info from kernel if not already */
446     if (!has_smmu_info) {
447         kvm_get_smmu_info(cpu, &smmu_info);
448         has_smmu_info = true;
449     }
450 
451     if (!max_cpu_page_size) {
452         max_cpu_page_size = qemu_getrampagesize();
453     }
454 
455     /* Convert to QEMU form */
456     memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
457 
458     /* If we have HV KVM, we need to forbid CI large pages if our
459      * host page size is smaller than 64K.
460      */
461     if (kvmppc_hpt_needs_host_contiguous_pages()) {
462         if (getpagesize() >= 0x10000) {
463             cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
464         } else {
465             cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
466         }
467     }
468 
469     /*
470      * XXX This loop should be an entry wide AND of the capabilities that
471      *     the selected CPU has with the capabilities that KVM supports.
472      */
473     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
474         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
475         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
476 
477         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
478                                  ksps->page_shift)) {
479             continue;
480         }
481         qsps->page_shift = ksps->page_shift;
482         qsps->slb_enc = ksps->slb_enc;
483         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
484             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
485                                      ksps->enc[jk].page_shift)) {
486                 continue;
487             }
488             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
489             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
490             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
491                 break;
492             }
493         }
494         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
495             break;
496         }
497     }
498     cpu->hash64_opts->slb_size = smmu_info.slb_size;
499     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
500         cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
501     }
502 }
503 
504 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
505 {
506     Object *mem_obj = object_resolve_path(obj_path, NULL);
507     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
508 
509     return pagesize >= max_cpu_page_size;
510 }
511 
512 #else /* defined (TARGET_PPC64) */
513 
514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
515 {
516 }
517 
518 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
519 {
520     return true;
521 }
522 
523 #endif /* !defined (TARGET_PPC64) */
524 
525 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
526 {
527     return POWERPC_CPU(cpu)->vcpu_id;
528 }
529 
530 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
531  * book3s supports only 1 watchpoint, so array size
532  * of 4 is sufficient for now.
533  */
534 #define MAX_HW_BKPTS 4
535 
536 static struct HWBreakpoint {
537     target_ulong addr;
538     int type;
539 } hw_debug_points[MAX_HW_BKPTS];
540 
541 static CPUWatchpoint hw_watchpoint;
542 
543 /* Default there is no breakpoint and watchpoint supported */
544 static int max_hw_breakpoint;
545 static int max_hw_watchpoint;
546 static int nb_hw_breakpoint;
547 static int nb_hw_watchpoint;
548 
549 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
550 {
551     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
552         max_hw_breakpoint = 2;
553         max_hw_watchpoint = 2;
554     }
555 
556     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
557         fprintf(stderr, "Error initializing h/w breakpoints\n");
558         return;
559     }
560 }
561 
562 int kvm_arch_init_vcpu(CPUState *cs)
563 {
564     PowerPCCPU *cpu = POWERPC_CPU(cs);
565     CPUPPCState *cenv = &cpu->env;
566     int ret;
567 
568     /* Gather server mmu info from KVM and update the CPU state */
569     kvm_fixup_page_sizes(cpu);
570 
571     /* Synchronize sregs with kvm */
572     ret = kvm_arch_sync_sregs(cpu);
573     if (ret) {
574         if (ret == -EINVAL) {
575             error_report("Register sync failed... If you're using kvm-hv.ko,"
576                          " only \"-cpu host\" is possible");
577         }
578         return ret;
579     }
580 
581     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
582 
583     switch (cenv->mmu_model) {
584     case POWERPC_MMU_BOOKE206:
585         /* This target supports access to KVM's guest TLB */
586         ret = kvm_booke206_tlb_init(cpu);
587         break;
588     case POWERPC_MMU_2_07:
589         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
590             /* KVM-HV has transactional memory on POWER8 also without the
591              * KVM_CAP_PPC_HTM extension, so enable it here instead as
592              * long as it's availble to userspace on the host. */
593             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
594                 cap_htm = true;
595             }
596         }
597         break;
598     default:
599         break;
600     }
601 
602     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
603     kvmppc_hw_debug_points_init(cenv);
604 
605     return ret;
606 }
607 
608 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
609 {
610     CPUPPCState *env = &cpu->env;
611     CPUState *cs = CPU(cpu);
612     struct kvm_dirty_tlb dirty_tlb;
613     unsigned char *bitmap;
614     int ret;
615 
616     if (!env->kvm_sw_tlb) {
617         return;
618     }
619 
620     bitmap = g_malloc((env->nb_tlb + 7) / 8);
621     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
622 
623     dirty_tlb.bitmap = (uintptr_t)bitmap;
624     dirty_tlb.num_dirty = env->nb_tlb;
625 
626     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
627     if (ret) {
628         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
629                 __func__, strerror(-ret));
630     }
631 
632     g_free(bitmap);
633 }
634 
635 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
636 {
637     PowerPCCPU *cpu = POWERPC_CPU(cs);
638     CPUPPCState *env = &cpu->env;
639     union {
640         uint32_t u32;
641         uint64_t u64;
642     } val;
643     struct kvm_one_reg reg = {
644         .id = id,
645         .addr = (uintptr_t) &val,
646     };
647     int ret;
648 
649     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650     if (ret != 0) {
651         trace_kvm_failed_spr_get(spr, strerror(errno));
652     } else {
653         switch (id & KVM_REG_SIZE_MASK) {
654         case KVM_REG_SIZE_U32:
655             env->spr[spr] = val.u32;
656             break;
657 
658         case KVM_REG_SIZE_U64:
659             env->spr[spr] = val.u64;
660             break;
661 
662         default:
663             /* Don't handle this size yet */
664             abort();
665         }
666     }
667 }
668 
669 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
670 {
671     PowerPCCPU *cpu = POWERPC_CPU(cs);
672     CPUPPCState *env = &cpu->env;
673     union {
674         uint32_t u32;
675         uint64_t u64;
676     } val;
677     struct kvm_one_reg reg = {
678         .id = id,
679         .addr = (uintptr_t) &val,
680     };
681     int ret;
682 
683     switch (id & KVM_REG_SIZE_MASK) {
684     case KVM_REG_SIZE_U32:
685         val.u32 = env->spr[spr];
686         break;
687 
688     case KVM_REG_SIZE_U64:
689         val.u64 = env->spr[spr];
690         break;
691 
692     default:
693         /* Don't handle this size yet */
694         abort();
695     }
696 
697     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
698     if (ret != 0) {
699         trace_kvm_failed_spr_set(spr, strerror(errno));
700     }
701 }
702 
703 static int kvm_put_fp(CPUState *cs)
704 {
705     PowerPCCPU *cpu = POWERPC_CPU(cs);
706     CPUPPCState *env = &cpu->env;
707     struct kvm_one_reg reg;
708     int i;
709     int ret;
710 
711     if (env->insns_flags & PPC_FLOAT) {
712         uint64_t fpscr = env->fpscr;
713         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
714 
715         reg.id = KVM_REG_PPC_FPSCR;
716         reg.addr = (uintptr_t)&fpscr;
717         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718         if (ret < 0) {
719             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
720             return ret;
721         }
722 
723         for (i = 0; i < 32; i++) {
724             uint64_t vsr[2];
725 
726 #ifdef HOST_WORDS_BIGENDIAN
727             vsr[0] = float64_val(env->fpr[i]);
728             vsr[1] = env->vsr[i];
729 #else
730             vsr[0] = env->vsr[i];
731             vsr[1] = float64_val(env->fpr[i]);
732 #endif
733             reg.addr = (uintptr_t) &vsr;
734             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
735 
736             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
737             if (ret < 0) {
738                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
739                         i, strerror(errno));
740                 return ret;
741             }
742         }
743     }
744 
745     if (env->insns_flags & PPC_ALTIVEC) {
746         reg.id = KVM_REG_PPC_VSCR;
747         reg.addr = (uintptr_t)&env->vscr;
748         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749         if (ret < 0) {
750             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
751             return ret;
752         }
753 
754         for (i = 0; i < 32; i++) {
755             reg.id = KVM_REG_PPC_VR(i);
756             reg.addr = (uintptr_t)&env->avr[i];
757             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
758             if (ret < 0) {
759                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
760                 return ret;
761             }
762         }
763     }
764 
765     return 0;
766 }
767 
768 static int kvm_get_fp(CPUState *cs)
769 {
770     PowerPCCPU *cpu = POWERPC_CPU(cs);
771     CPUPPCState *env = &cpu->env;
772     struct kvm_one_reg reg;
773     int i;
774     int ret;
775 
776     if (env->insns_flags & PPC_FLOAT) {
777         uint64_t fpscr;
778         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
779 
780         reg.id = KVM_REG_PPC_FPSCR;
781         reg.addr = (uintptr_t)&fpscr;
782         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
783         if (ret < 0) {
784             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
785             return ret;
786         } else {
787             env->fpscr = fpscr;
788         }
789 
790         for (i = 0; i < 32; i++) {
791             uint64_t vsr[2];
792 
793             reg.addr = (uintptr_t) &vsr;
794             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
795 
796             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
797             if (ret < 0) {
798                 DPRINTF("Unable to get %s%d from KVM: %s\n",
799                         vsx ? "VSR" : "FPR", i, strerror(errno));
800                 return ret;
801             } else {
802 #ifdef HOST_WORDS_BIGENDIAN
803                 env->fpr[i] = vsr[0];
804                 if (vsx) {
805                     env->vsr[i] = vsr[1];
806                 }
807 #else
808                 env->fpr[i] = vsr[1];
809                 if (vsx) {
810                     env->vsr[i] = vsr[0];
811                 }
812 #endif
813             }
814         }
815     }
816 
817     if (env->insns_flags & PPC_ALTIVEC) {
818         reg.id = KVM_REG_PPC_VSCR;
819         reg.addr = (uintptr_t)&env->vscr;
820         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821         if (ret < 0) {
822             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
823             return ret;
824         }
825 
826         for (i = 0; i < 32; i++) {
827             reg.id = KVM_REG_PPC_VR(i);
828             reg.addr = (uintptr_t)&env->avr[i];
829             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
830             if (ret < 0) {
831                 DPRINTF("Unable to get VR%d from KVM: %s\n",
832                         i, strerror(errno));
833                 return ret;
834             }
835         }
836     }
837 
838     return 0;
839 }
840 
841 #if defined(TARGET_PPC64)
842 static int kvm_get_vpa(CPUState *cs)
843 {
844     PowerPCCPU *cpu = POWERPC_CPU(cs);
845     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
846     struct kvm_one_reg reg;
847     int ret;
848 
849     reg.id = KVM_REG_PPC_VPA_ADDR;
850     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
851     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
852     if (ret < 0) {
853         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
854         return ret;
855     }
856 
857     assert((uintptr_t)&spapr_cpu->slb_shadow_size
858            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
859     reg.id = KVM_REG_PPC_VPA_SLB;
860     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
861     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
862     if (ret < 0) {
863         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
864                 strerror(errno));
865         return ret;
866     }
867 
868     assert((uintptr_t)&spapr_cpu->dtl_size
869            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
870     reg.id = KVM_REG_PPC_VPA_DTL;
871     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
872     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
873     if (ret < 0) {
874         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
875                 strerror(errno));
876         return ret;
877     }
878 
879     return 0;
880 }
881 
882 static int kvm_put_vpa(CPUState *cs)
883 {
884     PowerPCCPU *cpu = POWERPC_CPU(cs);
885     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
886     struct kvm_one_reg reg;
887     int ret;
888 
889     /* SLB shadow or DTL can't be registered unless a master VPA is
890      * registered.  That means when restoring state, if a VPA *is*
891      * registered, we need to set that up first.  If not, we need to
892      * deregister the others before deregistering the master VPA */
893     assert(spapr_cpu->vpa_addr
894            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
895 
896     if (spapr_cpu->vpa_addr) {
897         reg.id = KVM_REG_PPC_VPA_ADDR;
898         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
899         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
900         if (ret < 0) {
901             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
902             return ret;
903         }
904     }
905 
906     assert((uintptr_t)&spapr_cpu->slb_shadow_size
907            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
908     reg.id = KVM_REG_PPC_VPA_SLB;
909     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
910     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
911     if (ret < 0) {
912         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
913         return ret;
914     }
915 
916     assert((uintptr_t)&spapr_cpu->dtl_size
917            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
918     reg.id = KVM_REG_PPC_VPA_DTL;
919     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
920     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
921     if (ret < 0) {
922         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
923                 strerror(errno));
924         return ret;
925     }
926 
927     if (!spapr_cpu->vpa_addr) {
928         reg.id = KVM_REG_PPC_VPA_ADDR;
929         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
930         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
931         if (ret < 0) {
932             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
933             return ret;
934         }
935     }
936 
937     return 0;
938 }
939 #endif /* TARGET_PPC64 */
940 
941 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
942 {
943     CPUPPCState *env = &cpu->env;
944     struct kvm_sregs sregs;
945     int i;
946 
947     sregs.pvr = env->spr[SPR_PVR];
948 
949     if (cpu->vhyp) {
950         PPCVirtualHypervisorClass *vhc =
951             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
952         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
953     } else {
954         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
955     }
956 
957     /* Sync SLB */
958 #ifdef TARGET_PPC64
959     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
960         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
961         if (env->slb[i].esid & SLB_ESID_V) {
962             sregs.u.s.ppc64.slb[i].slbe |= i;
963         }
964         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
965     }
966 #endif
967 
968     /* Sync SRs */
969     for (i = 0; i < 16; i++) {
970         sregs.u.s.ppc32.sr[i] = env->sr[i];
971     }
972 
973     /* Sync BATs */
974     for (i = 0; i < 8; i++) {
975         /* Beware. We have to swap upper and lower bits here */
976         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
977             | env->DBAT[1][i];
978         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
979             | env->IBAT[1][i];
980     }
981 
982     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
983 }
984 
985 int kvm_arch_put_registers(CPUState *cs, int level)
986 {
987     PowerPCCPU *cpu = POWERPC_CPU(cs);
988     CPUPPCState *env = &cpu->env;
989     struct kvm_regs regs;
990     int ret;
991     int i;
992 
993     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
994     if (ret < 0) {
995         return ret;
996     }
997 
998     regs.ctr = env->ctr;
999     regs.lr  = env->lr;
1000     regs.xer = cpu_read_xer(env);
1001     regs.msr = env->msr;
1002     regs.pc = env->nip;
1003 
1004     regs.srr0 = env->spr[SPR_SRR0];
1005     regs.srr1 = env->spr[SPR_SRR1];
1006 
1007     regs.sprg0 = env->spr[SPR_SPRG0];
1008     regs.sprg1 = env->spr[SPR_SPRG1];
1009     regs.sprg2 = env->spr[SPR_SPRG2];
1010     regs.sprg3 = env->spr[SPR_SPRG3];
1011     regs.sprg4 = env->spr[SPR_SPRG4];
1012     regs.sprg5 = env->spr[SPR_SPRG5];
1013     regs.sprg6 = env->spr[SPR_SPRG6];
1014     regs.sprg7 = env->spr[SPR_SPRG7];
1015 
1016     regs.pid = env->spr[SPR_BOOKE_PID];
1017 
1018     for (i = 0;i < 32; i++)
1019         regs.gpr[i] = env->gpr[i];
1020 
1021     regs.cr = 0;
1022     for (i = 0; i < 8; i++) {
1023         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1024     }
1025 
1026     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1027     if (ret < 0)
1028         return ret;
1029 
1030     kvm_put_fp(cs);
1031 
1032     if (env->tlb_dirty) {
1033         kvm_sw_tlb_put(cpu);
1034         env->tlb_dirty = false;
1035     }
1036 
1037     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1038         ret = kvmppc_put_books_sregs(cpu);
1039         if (ret < 0) {
1040             return ret;
1041         }
1042     }
1043 
1044     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1045         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1046     }
1047 
1048     if (cap_one_reg) {
1049         int i;
1050 
1051         /* We deliberately ignore errors here, for kernels which have
1052          * the ONE_REG calls, but don't support the specific
1053          * registers, there's a reasonable chance things will still
1054          * work, at least until we try to migrate. */
1055         for (i = 0; i < 1024; i++) {
1056             uint64_t id = env->spr_cb[i].one_reg_id;
1057 
1058             if (id != 0) {
1059                 kvm_put_one_spr(cs, id, i);
1060             }
1061         }
1062 
1063 #ifdef TARGET_PPC64
1064         if (msr_ts) {
1065             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1066                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1067             }
1068             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1069                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1070             }
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1073             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1074             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1075             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1076             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1077             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1078             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1079             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1080             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1081         }
1082 
1083         if (cap_papr) {
1084             if (kvm_put_vpa(cs) < 0) {
1085                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1086             }
1087         }
1088 
1089         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1090 #endif /* TARGET_PPC64 */
1091     }
1092 
1093     return ret;
1094 }
1095 
1096 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1097 {
1098      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1099 }
1100 
1101 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1102 {
1103     CPUPPCState *env = &cpu->env;
1104     struct kvm_sregs sregs;
1105     int ret;
1106 
1107     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1108     if (ret < 0) {
1109         return ret;
1110     }
1111 
1112     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1113         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1114         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1115         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1116         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1117         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1118         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1119         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1120         env->spr[SPR_DECR] = sregs.u.e.dec;
1121         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1122         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1123         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1127         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1128         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1129         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1130         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1131         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1132     }
1133 
1134     if (sregs.u.e.features & KVM_SREGS_E_64) {
1135         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1136     }
1137 
1138     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1139         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1140     }
1141 
1142     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1143         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1144         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1145         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1146         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1147         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1148         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1149         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1150         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1151         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1152         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1153         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1154         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1155         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1156         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1157         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1158         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1159         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1160         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1161         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1162         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1163         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1164         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1165         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1166         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1167         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1168         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1169         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1170         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1171         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1172         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1173         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1174         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1175 
1176         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1177             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1178             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1179             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1180             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1181             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1182             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1183         }
1184 
1185         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1186             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1187             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1188         }
1189 
1190         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1191             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1192             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1193             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1194             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1195         }
1196     }
1197 
1198     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1199         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1200         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1201         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1202         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1203         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1204         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1205         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1206         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1207         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1208         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1209     }
1210 
1211     if (sregs.u.e.features & KVM_SREGS_EXP) {
1212         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1213     }
1214 
1215     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1216         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1217         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1218     }
1219 
1220     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1221         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1222         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1223         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1224 
1225         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1226             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1227             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1228         }
1229     }
1230 
1231     return 0;
1232 }
1233 
1234 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1235 {
1236     CPUPPCState *env = &cpu->env;
1237     struct kvm_sregs sregs;
1238     int ret;
1239     int i;
1240 
1241     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1242     if (ret < 0) {
1243         return ret;
1244     }
1245 
1246     if (!cpu->vhyp) {
1247         ppc_store_sdr1(env, sregs.u.s.sdr1);
1248     }
1249 
1250     /* Sync SLB */
1251 #ifdef TARGET_PPC64
1252     /*
1253      * The packed SLB array we get from KVM_GET_SREGS only contains
1254      * information about valid entries. So we flush our internal copy
1255      * to get rid of stale ones, then put all valid SLB entries back
1256      * in.
1257      */
1258     memset(env->slb, 0, sizeof(env->slb));
1259     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1260         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1261         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1262         /*
1263          * Only restore valid entries
1264          */
1265         if (rb & SLB_ESID_V) {
1266             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1267         }
1268     }
1269 #endif
1270 
1271     /* Sync SRs */
1272     for (i = 0; i < 16; i++) {
1273         env->sr[i] = sregs.u.s.ppc32.sr[i];
1274     }
1275 
1276     /* Sync BATs */
1277     for (i = 0; i < 8; i++) {
1278         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1279         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1280         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1281         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1282     }
1283 
1284     return 0;
1285 }
1286 
1287 int kvm_arch_get_registers(CPUState *cs)
1288 {
1289     PowerPCCPU *cpu = POWERPC_CPU(cs);
1290     CPUPPCState *env = &cpu->env;
1291     struct kvm_regs regs;
1292     uint32_t cr;
1293     int i, ret;
1294 
1295     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1296     if (ret < 0)
1297         return ret;
1298 
1299     cr = regs.cr;
1300     for (i = 7; i >= 0; i--) {
1301         env->crf[i] = cr & 15;
1302         cr >>= 4;
1303     }
1304 
1305     env->ctr = regs.ctr;
1306     env->lr = regs.lr;
1307     cpu_write_xer(env, regs.xer);
1308     env->msr = regs.msr;
1309     env->nip = regs.pc;
1310 
1311     env->spr[SPR_SRR0] = regs.srr0;
1312     env->spr[SPR_SRR1] = regs.srr1;
1313 
1314     env->spr[SPR_SPRG0] = regs.sprg0;
1315     env->spr[SPR_SPRG1] = regs.sprg1;
1316     env->spr[SPR_SPRG2] = regs.sprg2;
1317     env->spr[SPR_SPRG3] = regs.sprg3;
1318     env->spr[SPR_SPRG4] = regs.sprg4;
1319     env->spr[SPR_SPRG5] = regs.sprg5;
1320     env->spr[SPR_SPRG6] = regs.sprg6;
1321     env->spr[SPR_SPRG7] = regs.sprg7;
1322 
1323     env->spr[SPR_BOOKE_PID] = regs.pid;
1324 
1325     for (i = 0;i < 32; i++)
1326         env->gpr[i] = regs.gpr[i];
1327 
1328     kvm_get_fp(cs);
1329 
1330     if (cap_booke_sregs) {
1331         ret = kvmppc_get_booke_sregs(cpu);
1332         if (ret < 0) {
1333             return ret;
1334         }
1335     }
1336 
1337     if (cap_segstate) {
1338         ret = kvmppc_get_books_sregs(cpu);
1339         if (ret < 0) {
1340             return ret;
1341         }
1342     }
1343 
1344     if (cap_hior) {
1345         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1346     }
1347 
1348     if (cap_one_reg) {
1349         int i;
1350 
1351         /* We deliberately ignore errors here, for kernels which have
1352          * the ONE_REG calls, but don't support the specific
1353          * registers, there's a reasonable chance things will still
1354          * work, at least until we try to migrate. */
1355         for (i = 0; i < 1024; i++) {
1356             uint64_t id = env->spr_cb[i].one_reg_id;
1357 
1358             if (id != 0) {
1359                 kvm_get_one_spr(cs, id, i);
1360             }
1361         }
1362 
1363 #ifdef TARGET_PPC64
1364         if (msr_ts) {
1365             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1366                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1367             }
1368             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1369                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1370             }
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1373             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1374             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1375             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1376             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1377             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1378             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1379             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1380             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1381         }
1382 
1383         if (cap_papr) {
1384             if (kvm_get_vpa(cs) < 0) {
1385                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1386             }
1387         }
1388 
1389         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1390 #endif
1391     }
1392 
1393     return 0;
1394 }
1395 
1396 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1397 {
1398     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1399 
1400     if (irq != PPC_INTERRUPT_EXT) {
1401         return 0;
1402     }
1403 
1404     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1405         return 0;
1406     }
1407 
1408     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1409 
1410     return 0;
1411 }
1412 
1413 #if defined(TARGET_PPCEMB)
1414 #define PPC_INPUT_INT PPC40x_INPUT_INT
1415 #elif defined(TARGET_PPC64)
1416 #define PPC_INPUT_INT PPC970_INPUT_INT
1417 #else
1418 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1419 #endif
1420 
1421 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1422 {
1423     PowerPCCPU *cpu = POWERPC_CPU(cs);
1424     CPUPPCState *env = &cpu->env;
1425     int r;
1426     unsigned irq;
1427 
1428     qemu_mutex_lock_iothread();
1429 
1430     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1431      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1432     if (!cap_interrupt_level &&
1433         run->ready_for_interrupt_injection &&
1434         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1435         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1436     {
1437         /* For now KVM disregards the 'irq' argument. However, in the
1438          * future KVM could cache it in-kernel to avoid a heavyweight exit
1439          * when reading the UIC.
1440          */
1441         irq = KVM_INTERRUPT_SET;
1442 
1443         DPRINTF("injected interrupt %d\n", irq);
1444         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1445         if (r < 0) {
1446             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1447         }
1448 
1449         /* Always wake up soon in case the interrupt was level based */
1450         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1451                        (NANOSECONDS_PER_SECOND / 50));
1452     }
1453 
1454     /* We don't know if there are more interrupts pending after this. However,
1455      * the guest will return to userspace in the course of handling this one
1456      * anyways, so we will get a chance to deliver the rest. */
1457 
1458     qemu_mutex_unlock_iothread();
1459 }
1460 
1461 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1462 {
1463     return MEMTXATTRS_UNSPECIFIED;
1464 }
1465 
1466 int kvm_arch_process_async_events(CPUState *cs)
1467 {
1468     return cs->halted;
1469 }
1470 
1471 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1472 {
1473     CPUState *cs = CPU(cpu);
1474     CPUPPCState *env = &cpu->env;
1475 
1476     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1477         cs->halted = 1;
1478         cs->exception_index = EXCP_HLT;
1479     }
1480 
1481     return 0;
1482 }
1483 
1484 /* map dcr access to existing qemu dcr emulation */
1485 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1486 {
1487     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1488         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1489 
1490     return 0;
1491 }
1492 
1493 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1494 {
1495     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1496         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1497 
1498     return 0;
1499 }
1500 
1501 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1502 {
1503     /* Mixed endian case is not handled */
1504     uint32_t sc = debug_inst_opcode;
1505 
1506     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1507                             sizeof(sc), 0) ||
1508         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1509         return -EINVAL;
1510     }
1511 
1512     return 0;
1513 }
1514 
1515 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1516 {
1517     uint32_t sc;
1518 
1519     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1520         sc != debug_inst_opcode ||
1521         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1522                             sizeof(sc), 1)) {
1523         return -EINVAL;
1524     }
1525 
1526     return 0;
1527 }
1528 
1529 static int find_hw_breakpoint(target_ulong addr, int type)
1530 {
1531     int n;
1532 
1533     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1534            <= ARRAY_SIZE(hw_debug_points));
1535 
1536     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1537         if (hw_debug_points[n].addr == addr &&
1538              hw_debug_points[n].type == type) {
1539             return n;
1540         }
1541     }
1542 
1543     return -1;
1544 }
1545 
1546 static int find_hw_watchpoint(target_ulong addr, int *flag)
1547 {
1548     int n;
1549 
1550     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1551     if (n >= 0) {
1552         *flag = BP_MEM_ACCESS;
1553         return n;
1554     }
1555 
1556     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1557     if (n >= 0) {
1558         *flag = BP_MEM_WRITE;
1559         return n;
1560     }
1561 
1562     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1563     if (n >= 0) {
1564         *flag = BP_MEM_READ;
1565         return n;
1566     }
1567 
1568     return -1;
1569 }
1570 
1571 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1572                                   target_ulong len, int type)
1573 {
1574     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1575         return -ENOBUFS;
1576     }
1577 
1578     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1579     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1580 
1581     switch (type) {
1582     case GDB_BREAKPOINT_HW:
1583         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1584             return -ENOBUFS;
1585         }
1586 
1587         if (find_hw_breakpoint(addr, type) >= 0) {
1588             return -EEXIST;
1589         }
1590 
1591         nb_hw_breakpoint++;
1592         break;
1593 
1594     case GDB_WATCHPOINT_WRITE:
1595     case GDB_WATCHPOINT_READ:
1596     case GDB_WATCHPOINT_ACCESS:
1597         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1598             return -ENOBUFS;
1599         }
1600 
1601         if (find_hw_breakpoint(addr, type) >= 0) {
1602             return -EEXIST;
1603         }
1604 
1605         nb_hw_watchpoint++;
1606         break;
1607 
1608     default:
1609         return -ENOSYS;
1610     }
1611 
1612     return 0;
1613 }
1614 
1615 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1616                                   target_ulong len, int type)
1617 {
1618     int n;
1619 
1620     n = find_hw_breakpoint(addr, type);
1621     if (n < 0) {
1622         return -ENOENT;
1623     }
1624 
1625     switch (type) {
1626     case GDB_BREAKPOINT_HW:
1627         nb_hw_breakpoint--;
1628         break;
1629 
1630     case GDB_WATCHPOINT_WRITE:
1631     case GDB_WATCHPOINT_READ:
1632     case GDB_WATCHPOINT_ACCESS:
1633         nb_hw_watchpoint--;
1634         break;
1635 
1636     default:
1637         return -ENOSYS;
1638     }
1639     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1640 
1641     return 0;
1642 }
1643 
1644 void kvm_arch_remove_all_hw_breakpoints(void)
1645 {
1646     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1647 }
1648 
1649 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1650 {
1651     int n;
1652 
1653     /* Software Breakpoint updates */
1654     if (kvm_sw_breakpoints_active(cs)) {
1655         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1656     }
1657 
1658     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1659            <= ARRAY_SIZE(hw_debug_points));
1660     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1661 
1662     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1663         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1664         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1665         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1666             switch (hw_debug_points[n].type) {
1667             case GDB_BREAKPOINT_HW:
1668                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1669                 break;
1670             case GDB_WATCHPOINT_WRITE:
1671                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1672                 break;
1673             case GDB_WATCHPOINT_READ:
1674                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1675                 break;
1676             case GDB_WATCHPOINT_ACCESS:
1677                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1678                                         KVMPPC_DEBUG_WATCH_READ;
1679                 break;
1680             default:
1681                 cpu_abort(cs, "Unsupported breakpoint type\n");
1682             }
1683             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1684         }
1685     }
1686 }
1687 
1688 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1689 {
1690     CPUState *cs = CPU(cpu);
1691     CPUPPCState *env = &cpu->env;
1692     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1693     int handle = 0;
1694     int n;
1695     int flag = 0;
1696 
1697     if (cs->singlestep_enabled) {
1698         handle = 1;
1699     } else if (arch_info->status) {
1700         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1701             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1702                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1703                 if (n >= 0) {
1704                     handle = 1;
1705                 }
1706             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1707                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1708                 n = find_hw_watchpoint(arch_info->address,  &flag);
1709                 if (n >= 0) {
1710                     handle = 1;
1711                     cs->watchpoint_hit = &hw_watchpoint;
1712                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1713                     hw_watchpoint.flags = flag;
1714                 }
1715             }
1716         }
1717     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1718         handle = 1;
1719     } else {
1720         /* QEMU is not able to handle debug exception, so inject
1721          * program exception to guest;
1722          * Yes program exception NOT debug exception !!
1723          * When QEMU is using debug resources then debug exception must
1724          * be always set. To achieve this we set MSR_DE and also set
1725          * MSRP_DEP so guest cannot change MSR_DE.
1726          * When emulating debug resource for guest we want guest
1727          * to control MSR_DE (enable/disable debug interrupt on need).
1728          * Supporting both configurations are NOT possible.
1729          * So the result is that we cannot share debug resources
1730          * between QEMU and Guest on BOOKE architecture.
1731          * In the current design QEMU gets the priority over guest,
1732          * this means that if QEMU is using debug resources then guest
1733          * cannot use them;
1734          * For software breakpoint QEMU uses a privileged instruction;
1735          * So there cannot be any reason that we are here for guest
1736          * set debug exception, only possibility is guest executed a
1737          * privileged / illegal instruction and that's why we are
1738          * injecting a program interrupt.
1739          */
1740 
1741         cpu_synchronize_state(cs);
1742         /* env->nip is PC, so increment this by 4 to use
1743          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1744          */
1745         env->nip += 4;
1746         cs->exception_index = POWERPC_EXCP_PROGRAM;
1747         env->error_code = POWERPC_EXCP_INVAL;
1748         ppc_cpu_do_interrupt(cs);
1749     }
1750 
1751     return handle;
1752 }
1753 
1754 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1755 {
1756     PowerPCCPU *cpu = POWERPC_CPU(cs);
1757     CPUPPCState *env = &cpu->env;
1758     int ret;
1759 
1760     qemu_mutex_lock_iothread();
1761 
1762     switch (run->exit_reason) {
1763     case KVM_EXIT_DCR:
1764         if (run->dcr.is_write) {
1765             DPRINTF("handle dcr write\n");
1766             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1767         } else {
1768             DPRINTF("handle dcr read\n");
1769             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1770         }
1771         break;
1772     case KVM_EXIT_HLT:
1773         DPRINTF("handle halt\n");
1774         ret = kvmppc_handle_halt(cpu);
1775         break;
1776 #if defined(TARGET_PPC64)
1777     case KVM_EXIT_PAPR_HCALL:
1778         DPRINTF("handle PAPR hypercall\n");
1779         run->papr_hcall.ret = spapr_hypercall(cpu,
1780                                               run->papr_hcall.nr,
1781                                               run->papr_hcall.args);
1782         ret = 0;
1783         break;
1784 #endif
1785     case KVM_EXIT_EPR:
1786         DPRINTF("handle epr\n");
1787         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1788         ret = 0;
1789         break;
1790     case KVM_EXIT_WATCHDOG:
1791         DPRINTF("handle watchdog expiry\n");
1792         watchdog_perform_action();
1793         ret = 0;
1794         break;
1795 
1796     case KVM_EXIT_DEBUG:
1797         DPRINTF("handle debug exception\n");
1798         if (kvm_handle_debug(cpu, run)) {
1799             ret = EXCP_DEBUG;
1800             break;
1801         }
1802         /* re-enter, this exception was guest-internal */
1803         ret = 0;
1804         break;
1805 
1806     default:
1807         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1808         ret = -1;
1809         break;
1810     }
1811 
1812     qemu_mutex_unlock_iothread();
1813     return ret;
1814 }
1815 
1816 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1817 {
1818     CPUState *cs = CPU(cpu);
1819     uint32_t bits = tsr_bits;
1820     struct kvm_one_reg reg = {
1821         .id = KVM_REG_PPC_OR_TSR,
1822         .addr = (uintptr_t) &bits,
1823     };
1824 
1825     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826 }
1827 
1828 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1829 {
1830 
1831     CPUState *cs = CPU(cpu);
1832     uint32_t bits = tsr_bits;
1833     struct kvm_one_reg reg = {
1834         .id = KVM_REG_PPC_CLEAR_TSR,
1835         .addr = (uintptr_t) &bits,
1836     };
1837 
1838     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1839 }
1840 
1841 int kvmppc_set_tcr(PowerPCCPU *cpu)
1842 {
1843     CPUState *cs = CPU(cpu);
1844     CPUPPCState *env = &cpu->env;
1845     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1846 
1847     struct kvm_one_reg reg = {
1848         .id = KVM_REG_PPC_TCR,
1849         .addr = (uintptr_t) &tcr,
1850     };
1851 
1852     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1853 }
1854 
1855 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1856 {
1857     CPUState *cs = CPU(cpu);
1858     int ret;
1859 
1860     if (!kvm_enabled()) {
1861         return -1;
1862     }
1863 
1864     if (!cap_ppc_watchdog) {
1865         printf("warning: KVM does not support watchdog");
1866         return -1;
1867     }
1868 
1869     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1870     if (ret < 0) {
1871         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1872                 __func__, strerror(-ret));
1873         return ret;
1874     }
1875 
1876     return ret;
1877 }
1878 
1879 static int read_cpuinfo(const char *field, char *value, int len)
1880 {
1881     FILE *f;
1882     int ret = -1;
1883     int field_len = strlen(field);
1884     char line[512];
1885 
1886     f = fopen("/proc/cpuinfo", "r");
1887     if (!f) {
1888         return -1;
1889     }
1890 
1891     do {
1892         if (!fgets(line, sizeof(line), f)) {
1893             break;
1894         }
1895         if (!strncmp(line, field, field_len)) {
1896             pstrcpy(value, len, line);
1897             ret = 0;
1898             break;
1899         }
1900     } while(*line);
1901 
1902     fclose(f);
1903 
1904     return ret;
1905 }
1906 
1907 uint32_t kvmppc_get_tbfreq(void)
1908 {
1909     char line[512];
1910     char *ns;
1911     uint32_t retval = NANOSECONDS_PER_SECOND;
1912 
1913     if (read_cpuinfo("timebase", line, sizeof(line))) {
1914         return retval;
1915     }
1916 
1917     if (!(ns = strchr(line, ':'))) {
1918         return retval;
1919     }
1920 
1921     ns++;
1922 
1923     return atoi(ns);
1924 }
1925 
1926 bool kvmppc_get_host_serial(char **value)
1927 {
1928     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1929                                NULL);
1930 }
1931 
1932 bool kvmppc_get_host_model(char **value)
1933 {
1934     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1935 }
1936 
1937 /* Try to find a device tree node for a CPU with clock-frequency property */
1938 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1939 {
1940     struct dirent *dirp;
1941     DIR *dp;
1942 
1943     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1944         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1945         return -1;
1946     }
1947 
1948     buf[0] = '\0';
1949     while ((dirp = readdir(dp)) != NULL) {
1950         FILE *f;
1951         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1952                  dirp->d_name);
1953         f = fopen(buf, "r");
1954         if (f) {
1955             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1956             fclose(f);
1957             break;
1958         }
1959         buf[0] = '\0';
1960     }
1961     closedir(dp);
1962     if (buf[0] == '\0') {
1963         printf("Unknown host!\n");
1964         return -1;
1965     }
1966 
1967     return 0;
1968 }
1969 
1970 static uint64_t kvmppc_read_int_dt(const char *filename)
1971 {
1972     union {
1973         uint32_t v32;
1974         uint64_t v64;
1975     } u;
1976     FILE *f;
1977     int len;
1978 
1979     f = fopen(filename, "rb");
1980     if (!f) {
1981         return -1;
1982     }
1983 
1984     len = fread(&u, 1, sizeof(u), f);
1985     fclose(f);
1986     switch (len) {
1987     case 4:
1988         /* property is a 32-bit quantity */
1989         return be32_to_cpu(u.v32);
1990     case 8:
1991         return be64_to_cpu(u.v64);
1992     }
1993 
1994     return 0;
1995 }
1996 
1997 /* Read a CPU node property from the host device tree that's a single
1998  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1999  * (can't find or open the property, or doesn't understand the
2000  * format) */
2001 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
2002 {
2003     char buf[PATH_MAX], *tmp;
2004     uint64_t val;
2005 
2006     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2007         return -1;
2008     }
2009 
2010     tmp = g_strdup_printf("%s/%s", buf, propname);
2011     val = kvmppc_read_int_dt(tmp);
2012     g_free(tmp);
2013 
2014     return val;
2015 }
2016 
2017 uint64_t kvmppc_get_clockfreq(void)
2018 {
2019     return kvmppc_read_int_cpu_dt("clock-frequency");
2020 }
2021 
2022 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2023  {
2024      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2025      CPUState *cs = CPU(cpu);
2026 
2027     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2028         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2029         return 0;
2030     }
2031 
2032     return 1;
2033 }
2034 
2035 int kvmppc_get_hasidle(CPUPPCState *env)
2036 {
2037     struct kvm_ppc_pvinfo pvinfo;
2038 
2039     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2040         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2041         return 1;
2042     }
2043 
2044     return 0;
2045 }
2046 
2047 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2048 {
2049     uint32_t *hc = (uint32_t*)buf;
2050     struct kvm_ppc_pvinfo pvinfo;
2051 
2052     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2053         memcpy(buf, pvinfo.hcall, buf_len);
2054         return 0;
2055     }
2056 
2057     /*
2058      * Fallback to always fail hypercalls regardless of endianness:
2059      *
2060      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2061      *     li r3, -1
2062      *     b .+8       (becomes nop in wrong endian)
2063      *     bswap32(li r3, -1)
2064      */
2065 
2066     hc[0] = cpu_to_be32(0x08000048);
2067     hc[1] = cpu_to_be32(0x3860ffff);
2068     hc[2] = cpu_to_be32(0x48000008);
2069     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2070 
2071     return 1;
2072 }
2073 
2074 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2075 {
2076     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2077 }
2078 
2079 void kvmppc_enable_logical_ci_hcalls(void)
2080 {
2081     /*
2082      * FIXME: it would be nice if we could detect the cases where
2083      * we're using a device which requires the in kernel
2084      * implementation of these hcalls, but the kernel lacks them and
2085      * produce a warning.
2086      */
2087     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2088     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2089 }
2090 
2091 void kvmppc_enable_set_mode_hcall(void)
2092 {
2093     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2094 }
2095 
2096 void kvmppc_enable_clear_ref_mod_hcalls(void)
2097 {
2098     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2099     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2100 }
2101 
2102 void kvmppc_set_papr(PowerPCCPU *cpu)
2103 {
2104     CPUState *cs = CPU(cpu);
2105     int ret;
2106 
2107     if (!kvm_enabled()) {
2108         return;
2109     }
2110 
2111     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2112     if (ret) {
2113         error_report("This vCPU type or KVM version does not support PAPR");
2114         exit(1);
2115     }
2116 
2117     /* Update the capability flag so we sync the right information
2118      * with kvm */
2119     cap_papr = 1;
2120 }
2121 
2122 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2123 {
2124     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2125 }
2126 
2127 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2128 {
2129     CPUState *cs = CPU(cpu);
2130     int ret;
2131 
2132     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2133     if (ret && mpic_proxy) {
2134         error_report("This KVM version does not support EPR");
2135         exit(1);
2136     }
2137 }
2138 
2139 int kvmppc_smt_threads(void)
2140 {
2141     return cap_ppc_smt ? cap_ppc_smt : 1;
2142 }
2143 
2144 int kvmppc_set_smt_threads(int smt)
2145 {
2146     int ret;
2147 
2148     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2149     if (!ret) {
2150         cap_ppc_smt = smt;
2151     }
2152     return ret;
2153 }
2154 
2155 void kvmppc_hint_smt_possible(Error **errp)
2156 {
2157     int i;
2158     GString *g;
2159     char *s;
2160 
2161     assert(kvm_enabled());
2162     if (cap_ppc_smt_possible) {
2163         g = g_string_new("Available VSMT modes:");
2164         for (i = 63; i >= 0; i--) {
2165             if ((1UL << i) & cap_ppc_smt_possible) {
2166                 g_string_append_printf(g, " %lu", (1UL << i));
2167             }
2168         }
2169         s = g_string_free(g, false);
2170         error_append_hint(errp, "%s.\n", s);
2171         g_free(s);
2172     } else {
2173         error_append_hint(errp,
2174                           "This KVM seems to be too old to support VSMT.\n");
2175     }
2176 }
2177 
2178 
2179 #ifdef TARGET_PPC64
2180 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2181 {
2182     struct kvm_ppc_smmu_info info;
2183     long rampagesize, best_page_shift;
2184     int i;
2185 
2186     /* Find the largest hardware supported page size that's less than
2187      * or equal to the (logical) backing page size of guest RAM */
2188     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2189     rampagesize = qemu_getrampagesize();
2190     best_page_shift = 0;
2191 
2192     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2193         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2194 
2195         if (!sps->page_shift) {
2196             continue;
2197         }
2198 
2199         if ((sps->page_shift > best_page_shift)
2200             && ((1UL << sps->page_shift) <= rampagesize)) {
2201             best_page_shift = sps->page_shift;
2202         }
2203     }
2204 
2205     return MIN(current_size,
2206                1ULL << (best_page_shift + hash_shift - 7));
2207 }
2208 #endif
2209 
2210 bool kvmppc_spapr_use_multitce(void)
2211 {
2212     return cap_spapr_multitce;
2213 }
2214 
2215 int kvmppc_spapr_enable_inkernel_multitce(void)
2216 {
2217     int ret;
2218 
2219     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2220                             H_PUT_TCE_INDIRECT, 1);
2221     if (!ret) {
2222         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2223                                 H_STUFF_TCE, 1);
2224     }
2225 
2226     return ret;
2227 }
2228 
2229 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2230                               uint64_t bus_offset, uint32_t nb_table,
2231                               int *pfd, bool need_vfio)
2232 {
2233     long len;
2234     int fd;
2235     void *table;
2236 
2237     /* Must set fd to -1 so we don't try to munmap when called for
2238      * destroying the table, which the upper layers -will- do
2239      */
2240     *pfd = -1;
2241     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2242         return NULL;
2243     }
2244 
2245     if (cap_spapr_tce_64) {
2246         struct kvm_create_spapr_tce_64 args = {
2247             .liobn = liobn,
2248             .page_shift = page_shift,
2249             .offset = bus_offset >> page_shift,
2250             .size = nb_table,
2251             .flags = 0
2252         };
2253         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2254         if (fd < 0) {
2255             fprintf(stderr,
2256                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2257                     liobn);
2258             return NULL;
2259         }
2260     } else if (cap_spapr_tce) {
2261         uint64_t window_size = (uint64_t) nb_table << page_shift;
2262         struct kvm_create_spapr_tce args = {
2263             .liobn = liobn,
2264             .window_size = window_size,
2265         };
2266         if ((window_size != args.window_size) || bus_offset) {
2267             return NULL;
2268         }
2269         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2270         if (fd < 0) {
2271             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2272                     liobn);
2273             return NULL;
2274         }
2275     } else {
2276         return NULL;
2277     }
2278 
2279     len = nb_table * sizeof(uint64_t);
2280     /* FIXME: round this up to page size */
2281 
2282     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2283     if (table == MAP_FAILED) {
2284         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2285                 liobn);
2286         close(fd);
2287         return NULL;
2288     }
2289 
2290     *pfd = fd;
2291     return table;
2292 }
2293 
2294 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2295 {
2296     long len;
2297 
2298     if (fd < 0) {
2299         return -1;
2300     }
2301 
2302     len = nb_table * sizeof(uint64_t);
2303     if ((munmap(table, len) < 0) ||
2304         (close(fd) < 0)) {
2305         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2306                 strerror(errno));
2307         /* Leak the table */
2308     }
2309 
2310     return 0;
2311 }
2312 
2313 int kvmppc_reset_htab(int shift_hint)
2314 {
2315     uint32_t shift = shift_hint;
2316 
2317     if (!kvm_enabled()) {
2318         /* Full emulation, tell caller to allocate htab itself */
2319         return 0;
2320     }
2321     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2322         int ret;
2323         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2324         if (ret == -ENOTTY) {
2325             /* At least some versions of PR KVM advertise the
2326              * capability, but don't implement the ioctl().  Oops.
2327              * Return 0 so that we allocate the htab in qemu, as is
2328              * correct for PR. */
2329             return 0;
2330         } else if (ret < 0) {
2331             return ret;
2332         }
2333         return shift;
2334     }
2335 
2336     /* We have a kernel that predates the htab reset calls.  For PR
2337      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2338      * this era, it has allocated a 16MB fixed size hash table already. */
2339     if (kvmppc_is_pr(kvm_state)) {
2340         /* PR - tell caller to allocate htab */
2341         return 0;
2342     } else {
2343         /* HV - assume 16MB kernel allocated htab */
2344         return 24;
2345     }
2346 }
2347 
2348 static inline uint32_t mfpvr(void)
2349 {
2350     uint32_t pvr;
2351 
2352     asm ("mfpvr %0"
2353          : "=r"(pvr));
2354     return pvr;
2355 }
2356 
2357 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2358 {
2359     if (on) {
2360         *word |= flags;
2361     } else {
2362         *word &= ~flags;
2363     }
2364 }
2365 
2366 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2367 {
2368     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2369     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2370     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2371 
2372     /* Now fix up the class with information we can query from the host */
2373     pcc->pvr = mfpvr();
2374 
2375     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2376                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2377     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2378                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2379     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2380                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2381 
2382     if (dcache_size != -1) {
2383         pcc->l1_dcache_size = dcache_size;
2384     }
2385 
2386     if (icache_size != -1) {
2387         pcc->l1_icache_size = icache_size;
2388     }
2389 
2390 #if defined(TARGET_PPC64)
2391     pcc->radix_page_info = kvm_get_radix_page_info();
2392 
2393     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2394         /*
2395          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2396          * compliant.  More importantly, advertising ISA 3.00
2397          * architected mode may prevent guests from activating
2398          * necessary DD1 workarounds.
2399          */
2400         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2401                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2402     }
2403 #endif /* defined(TARGET_PPC64) */
2404 }
2405 
2406 bool kvmppc_has_cap_epr(void)
2407 {
2408     return cap_epr;
2409 }
2410 
2411 bool kvmppc_has_cap_fixup_hcalls(void)
2412 {
2413     return cap_fixup_hcalls;
2414 }
2415 
2416 bool kvmppc_has_cap_htm(void)
2417 {
2418     return cap_htm;
2419 }
2420 
2421 bool kvmppc_has_cap_mmu_radix(void)
2422 {
2423     return cap_mmu_radix;
2424 }
2425 
2426 bool kvmppc_has_cap_mmu_hash_v3(void)
2427 {
2428     return cap_mmu_hash_v3;
2429 }
2430 
2431 static bool kvmppc_power8_host(void)
2432 {
2433     bool ret = false;
2434 #ifdef TARGET_PPC64
2435     {
2436         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2437         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2438               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2439               (base_pvr == CPU_POWERPC_POWER8_BASE);
2440     }
2441 #endif /* TARGET_PPC64 */
2442     return ret;
2443 }
2444 
2445 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2446 {
2447     bool l1d_thread_priv_req = !kvmppc_power8_host();
2448 
2449     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2450         return 2;
2451     } else if ((!l1d_thread_priv_req ||
2452                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2453                (c.character & c.character_mask
2454                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2455         return 1;
2456     }
2457 
2458     return 0;
2459 }
2460 
2461 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2462 {
2463     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2464         return 2;
2465     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2466         return 1;
2467     }
2468 
2469     return 0;
2470 }
2471 
2472 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2473 {
2474     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2475         return  SPAPR_CAP_FIXED_CCD;
2476     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2477         return SPAPR_CAP_FIXED_IBS;
2478     }
2479 
2480     return 0;
2481 }
2482 
2483 static void kvmppc_get_cpu_characteristics(KVMState *s)
2484 {
2485     struct kvm_ppc_cpu_char c;
2486     int ret;
2487 
2488     /* Assume broken */
2489     cap_ppc_safe_cache = 0;
2490     cap_ppc_safe_bounds_check = 0;
2491     cap_ppc_safe_indirect_branch = 0;
2492 
2493     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2494     if (!ret) {
2495         return;
2496     }
2497     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2498     if (ret < 0) {
2499         return;
2500     }
2501 
2502     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2503     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2504     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2505 }
2506 
2507 int kvmppc_get_cap_safe_cache(void)
2508 {
2509     return cap_ppc_safe_cache;
2510 }
2511 
2512 int kvmppc_get_cap_safe_bounds_check(void)
2513 {
2514     return cap_ppc_safe_bounds_check;
2515 }
2516 
2517 int kvmppc_get_cap_safe_indirect_branch(void)
2518 {
2519     return cap_ppc_safe_indirect_branch;
2520 }
2521 
2522 bool kvmppc_has_cap_spapr_vfio(void)
2523 {
2524     return cap_spapr_vfio;
2525 }
2526 
2527 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2528 {
2529     uint32_t host_pvr = mfpvr();
2530     PowerPCCPUClass *pvr_pcc;
2531 
2532     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2533     if (pvr_pcc == NULL) {
2534         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2535     }
2536 
2537     return pvr_pcc;
2538 }
2539 
2540 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2541 {
2542     TypeInfo type_info = {
2543         .name = TYPE_HOST_POWERPC_CPU,
2544         .class_init = kvmppc_host_cpu_class_init,
2545     };
2546     MachineClass *mc = MACHINE_GET_CLASS(ms);
2547     PowerPCCPUClass *pvr_pcc;
2548     ObjectClass *oc;
2549     DeviceClass *dc;
2550     int i;
2551 
2552     pvr_pcc = kvm_ppc_get_host_cpu_class();
2553     if (pvr_pcc == NULL) {
2554         return -1;
2555     }
2556     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2557     type_register(&type_info);
2558     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2559         /* override TCG default cpu type with 'host' cpu model */
2560         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2561     }
2562 
2563     oc = object_class_by_name(type_info.name);
2564     g_assert(oc);
2565 
2566     /*
2567      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2568      * we want "POWER8" to be a "family" alias that points to the current
2569      * host CPU type, too)
2570      */
2571     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2572     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2573         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2574             char *suffix;
2575 
2576             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2577             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2578             if (suffix) {
2579                 *suffix = 0;
2580             }
2581             break;
2582         }
2583     }
2584 
2585     return 0;
2586 }
2587 
2588 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2589 {
2590     struct kvm_rtas_token_args args = {
2591         .token = token,
2592     };
2593 
2594     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2595         return -ENOENT;
2596     }
2597 
2598     strncpy(args.name, function, sizeof(args.name));
2599 
2600     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2601 }
2602 
2603 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2604 {
2605     struct kvm_get_htab_fd s = {
2606         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2607         .start_index = index,
2608     };
2609     int ret;
2610 
2611     if (!cap_htab_fd) {
2612         error_setg(errp, "KVM version doesn't support %s the HPT",
2613                    write ? "writing" : "reading");
2614         return -ENOTSUP;
2615     }
2616 
2617     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2618     if (ret < 0) {
2619         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2620                    write ? "writing" : "reading", write ? "to" : "from",
2621                    strerror(errno));
2622         return -errno;
2623     }
2624 
2625     return ret;
2626 }
2627 
2628 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2629 {
2630     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2631     uint8_t buf[bufsize];
2632     ssize_t rc;
2633 
2634     do {
2635         rc = read(fd, buf, bufsize);
2636         if (rc < 0) {
2637             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2638                     strerror(errno));
2639             return rc;
2640         } else if (rc) {
2641             uint8_t *buffer = buf;
2642             ssize_t n = rc;
2643             while (n) {
2644                 struct kvm_get_htab_header *head =
2645                     (struct kvm_get_htab_header *) buffer;
2646                 size_t chunksize = sizeof(*head) +
2647                      HASH_PTE_SIZE_64 * head->n_valid;
2648 
2649                 qemu_put_be32(f, head->index);
2650                 qemu_put_be16(f, head->n_valid);
2651                 qemu_put_be16(f, head->n_invalid);
2652                 qemu_put_buffer(f, (void *)(head + 1),
2653                                 HASH_PTE_SIZE_64 * head->n_valid);
2654 
2655                 buffer += chunksize;
2656                 n -= chunksize;
2657             }
2658         }
2659     } while ((rc != 0)
2660              && ((max_ns < 0)
2661                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2662 
2663     return (rc == 0) ? 1 : 0;
2664 }
2665 
2666 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2667                            uint16_t n_valid, uint16_t n_invalid)
2668 {
2669     struct kvm_get_htab_header *buf;
2670     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2671     ssize_t rc;
2672 
2673     buf = alloca(chunksize);
2674     buf->index = index;
2675     buf->n_valid = n_valid;
2676     buf->n_invalid = n_invalid;
2677 
2678     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2679 
2680     rc = write(fd, buf, chunksize);
2681     if (rc < 0) {
2682         fprintf(stderr, "Error writing KVM hash table: %s\n",
2683                 strerror(errno));
2684         return rc;
2685     }
2686     if (rc != chunksize) {
2687         /* We should never get a short write on a single chunk */
2688         fprintf(stderr, "Short write, restoring KVM hash table\n");
2689         return -1;
2690     }
2691     return 0;
2692 }
2693 
2694 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2695 {
2696     return true;
2697 }
2698 
2699 void kvm_arch_init_irq_routing(KVMState *s)
2700 {
2701 }
2702 
2703 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2704 {
2705     int fd, rc;
2706     int i;
2707 
2708     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2709 
2710     i = 0;
2711     while (i < n) {
2712         struct kvm_get_htab_header *hdr;
2713         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2714         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2715 
2716         rc = read(fd, buf, sizeof(buf));
2717         if (rc < 0) {
2718             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2719         }
2720 
2721         hdr = (struct kvm_get_htab_header *)buf;
2722         while ((i < n) && ((char *)hdr < (buf + rc))) {
2723             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2724 
2725             if (hdr->index != (ptex + i)) {
2726                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2727                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2728             }
2729 
2730             if (n - i < valid) {
2731                 valid = n - i;
2732             }
2733             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2734             i += valid;
2735 
2736             if ((n - i) < invalid) {
2737                 invalid = n - i;
2738             }
2739             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2740             i += invalid;
2741 
2742             hdr = (struct kvm_get_htab_header *)
2743                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2744         }
2745     }
2746 
2747     close(fd);
2748 }
2749 
2750 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2751 {
2752     int fd, rc;
2753     struct {
2754         struct kvm_get_htab_header hdr;
2755         uint64_t pte0;
2756         uint64_t pte1;
2757     } buf;
2758 
2759     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2760 
2761     buf.hdr.n_valid = 1;
2762     buf.hdr.n_invalid = 0;
2763     buf.hdr.index = ptex;
2764     buf.pte0 = cpu_to_be64(pte0);
2765     buf.pte1 = cpu_to_be64(pte1);
2766 
2767     rc = write(fd, &buf, sizeof(buf));
2768     if (rc != sizeof(buf)) {
2769         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2770     }
2771     close(fd);
2772 }
2773 
2774 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2775                              uint64_t address, uint32_t data, PCIDevice *dev)
2776 {
2777     return 0;
2778 }
2779 
2780 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2781                                 int vector, PCIDevice *dev)
2782 {
2783     return 0;
2784 }
2785 
2786 int kvm_arch_release_virq_post(int virq)
2787 {
2788     return 0;
2789 }
2790 
2791 int kvm_arch_msi_data_to_gsi(uint32_t data)
2792 {
2793     return data & 0xffff;
2794 }
2795 
2796 int kvmppc_enable_hwrng(void)
2797 {
2798     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2799         return -1;
2800     }
2801 
2802     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2803 }
2804 
2805 void kvmppc_check_papr_resize_hpt(Error **errp)
2806 {
2807     if (!kvm_enabled()) {
2808         return; /* No KVM, we're good */
2809     }
2810 
2811     if (cap_resize_hpt) {
2812         return; /* Kernel has explicit support, we're good */
2813     }
2814 
2815     /* Otherwise fallback on looking for PR KVM */
2816     if (kvmppc_is_pr(kvm_state)) {
2817         return;
2818     }
2819 
2820     error_setg(errp,
2821                "Hash page table resizing not available with this KVM version");
2822 }
2823 
2824 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2825 {
2826     CPUState *cs = CPU(cpu);
2827     struct kvm_ppc_resize_hpt rhpt = {
2828         .flags = flags,
2829         .shift = shift,
2830     };
2831 
2832     if (!cap_resize_hpt) {
2833         return -ENOSYS;
2834     }
2835 
2836     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2837 }
2838 
2839 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2840 {
2841     CPUState *cs = CPU(cpu);
2842     struct kvm_ppc_resize_hpt rhpt = {
2843         .flags = flags,
2844         .shift = shift,
2845     };
2846 
2847     if (!cap_resize_hpt) {
2848         return -ENOSYS;
2849     }
2850 
2851     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2852 }
2853 
2854 /*
2855  * This is a helper function to detect a post migration scenario
2856  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2857  * the guest kernel can't handle a PVR value other than the actual host
2858  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2859  *
2860  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2861  * (so, we're HV), return true. The workaround itself is done in
2862  * cpu_post_load.
2863  *
2864  * The order here is important: we'll only check for KVM PR as a
2865  * fallback if the guest kernel can't handle the situation itself.
2866  * We need to avoid as much as possible querying the running KVM type
2867  * in QEMU level.
2868  */
2869 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2870 {
2871     CPUState *cs = CPU(cpu);
2872 
2873     if (!kvm_enabled()) {
2874         return false;
2875     }
2876 
2877     if (cap_ppc_pvr_compat) {
2878         return false;
2879     }
2880 
2881     return !kvmppc_is_pr(cs->kvm_state);
2882 }
2883