xref: /openbmc/qemu/target/ppc/kvm.c (revision 82c4f87e)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (env->mmu_model == POWERPC_MMU_2_06 ||
308             env->mmu_model == POWERPC_MMU_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (env->mmu_model == POWERPC_MMU_2_06 ||
323             env->mmu_model == POWERPC_MMU_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 bool kvmppc_hpt_needs_host_contiguous_pages(void)
410 {
411     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
412     static struct kvm_ppc_smmu_info smmu_info;
413 
414     if (!kvm_enabled()) {
415         return false;
416     }
417 
418     kvm_get_smmu_info(cpu, &smmu_info);
419     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
420 }
421 
422 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
423 {
424     struct kvm_ppc_smmu_info smmu_info;
425     int iq, ik, jq, jk;
426 
427     /* For now, we only have anything to check on hash64 MMUs */
428     if (!cpu->hash64_opts || !kvm_enabled()) {
429         return;
430     }
431 
432     kvm_get_smmu_info(cpu, &smmu_info);
433 
434     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
435         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
436         error_setg(errp,
437                    "KVM does not support 1TiB segments which guest expects");
438         return;
439     }
440 
441     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
442         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
443                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
444         return;
445     }
446 
447     /*
448      * Verify that every pagesize supported by the cpu model is
449      * supported by KVM with the same encodings
450      */
451     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
452         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
453         struct kvm_ppc_one_seg_page_size *ksps;
454 
455         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
456             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
457                 break;
458             }
459         }
460         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
461             error_setg(errp, "KVM doesn't support for base page shift %u",
462                        qsps->page_shift);
463             return;
464         }
465 
466         ksps = &smmu_info.sps[ik];
467         if (ksps->slb_enc != qsps->slb_enc) {
468             error_setg(errp,
469 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
470                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
471             return;
472         }
473 
474         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
475             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
476                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
477                     break;
478                 }
479             }
480 
481             if (jk >= ARRAY_SIZE(ksps->enc)) {
482                 error_setg(errp, "KVM doesn't support page shift %u/%u",
483                            qsps->enc[jq].page_shift, qsps->page_shift);
484                 return;
485             }
486             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
487                 error_setg(errp,
488 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
489                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
490                            qsps->page_shift, qsps->enc[jq].pte_enc);
491                 return;
492             }
493         }
494     }
495 
496     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
497         /* Mostly what guest pagesizes we can use are related to the
498          * host pages used to map guest RAM, which is handled in the
499          * platform code. Cache-Inhibited largepages (64k) however are
500          * used for I/O, so if they're mapped to the host at all it
501          * will be a normal mapping, not a special hugepage one used
502          * for RAM. */
503         if (getpagesize() < 0x10000) {
504             error_setg(errp,
505                        "KVM can't supply 64kiB CI pages, which guest expects");
506         }
507     }
508 }
509 #endif /* !defined (TARGET_PPC64) */
510 
511 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
512 {
513     return POWERPC_CPU(cpu)->vcpu_id;
514 }
515 
516 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
517  * book3s supports only 1 watchpoint, so array size
518  * of 4 is sufficient for now.
519  */
520 #define MAX_HW_BKPTS 4
521 
522 static struct HWBreakpoint {
523     target_ulong addr;
524     int type;
525 } hw_debug_points[MAX_HW_BKPTS];
526 
527 static CPUWatchpoint hw_watchpoint;
528 
529 /* Default there is no breakpoint and watchpoint supported */
530 static int max_hw_breakpoint;
531 static int max_hw_watchpoint;
532 static int nb_hw_breakpoint;
533 static int nb_hw_watchpoint;
534 
535 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
536 {
537     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
538         max_hw_breakpoint = 2;
539         max_hw_watchpoint = 2;
540     }
541 
542     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
543         fprintf(stderr, "Error initializing h/w breakpoints\n");
544         return;
545     }
546 }
547 
548 int kvm_arch_init_vcpu(CPUState *cs)
549 {
550     PowerPCCPU *cpu = POWERPC_CPU(cs);
551     CPUPPCState *cenv = &cpu->env;
552     int ret;
553 
554     /* Synchronize sregs with kvm */
555     ret = kvm_arch_sync_sregs(cpu);
556     if (ret) {
557         if (ret == -EINVAL) {
558             error_report("Register sync failed... If you're using kvm-hv.ko,"
559                          " only \"-cpu host\" is possible");
560         }
561         return ret;
562     }
563 
564     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
565 
566     switch (cenv->mmu_model) {
567     case POWERPC_MMU_BOOKE206:
568         /* This target supports access to KVM's guest TLB */
569         ret = kvm_booke206_tlb_init(cpu);
570         break;
571     case POWERPC_MMU_2_07:
572         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
573             /* KVM-HV has transactional memory on POWER8 also without the
574              * KVM_CAP_PPC_HTM extension, so enable it here instead as
575              * long as it's availble to userspace on the host. */
576             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
577                 cap_htm = true;
578             }
579         }
580         break;
581     default:
582         break;
583     }
584 
585     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
586     kvmppc_hw_debug_points_init(cenv);
587 
588     return ret;
589 }
590 
591 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
592 {
593     CPUPPCState *env = &cpu->env;
594     CPUState *cs = CPU(cpu);
595     struct kvm_dirty_tlb dirty_tlb;
596     unsigned char *bitmap;
597     int ret;
598 
599     if (!env->kvm_sw_tlb) {
600         return;
601     }
602 
603     bitmap = g_malloc((env->nb_tlb + 7) / 8);
604     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
605 
606     dirty_tlb.bitmap = (uintptr_t)bitmap;
607     dirty_tlb.num_dirty = env->nb_tlb;
608 
609     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
610     if (ret) {
611         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
612                 __func__, strerror(-ret));
613     }
614 
615     g_free(bitmap);
616 }
617 
618 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
619 {
620     PowerPCCPU *cpu = POWERPC_CPU(cs);
621     CPUPPCState *env = &cpu->env;
622     union {
623         uint32_t u32;
624         uint64_t u64;
625     } val;
626     struct kvm_one_reg reg = {
627         .id = id,
628         .addr = (uintptr_t) &val,
629     };
630     int ret;
631 
632     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
633     if (ret != 0) {
634         trace_kvm_failed_spr_get(spr, strerror(errno));
635     } else {
636         switch (id & KVM_REG_SIZE_MASK) {
637         case KVM_REG_SIZE_U32:
638             env->spr[spr] = val.u32;
639             break;
640 
641         case KVM_REG_SIZE_U64:
642             env->spr[spr] = val.u64;
643             break;
644 
645         default:
646             /* Don't handle this size yet */
647             abort();
648         }
649     }
650 }
651 
652 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
653 {
654     PowerPCCPU *cpu = POWERPC_CPU(cs);
655     CPUPPCState *env = &cpu->env;
656     union {
657         uint32_t u32;
658         uint64_t u64;
659     } val;
660     struct kvm_one_reg reg = {
661         .id = id,
662         .addr = (uintptr_t) &val,
663     };
664     int ret;
665 
666     switch (id & KVM_REG_SIZE_MASK) {
667     case KVM_REG_SIZE_U32:
668         val.u32 = env->spr[spr];
669         break;
670 
671     case KVM_REG_SIZE_U64:
672         val.u64 = env->spr[spr];
673         break;
674 
675     default:
676         /* Don't handle this size yet */
677         abort();
678     }
679 
680     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
681     if (ret != 0) {
682         trace_kvm_failed_spr_set(spr, strerror(errno));
683     }
684 }
685 
686 static int kvm_put_fp(CPUState *cs)
687 {
688     PowerPCCPU *cpu = POWERPC_CPU(cs);
689     CPUPPCState *env = &cpu->env;
690     struct kvm_one_reg reg;
691     int i;
692     int ret;
693 
694     if (env->insns_flags & PPC_FLOAT) {
695         uint64_t fpscr = env->fpscr;
696         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
697 
698         reg.id = KVM_REG_PPC_FPSCR;
699         reg.addr = (uintptr_t)&fpscr;
700         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
701         if (ret < 0) {
702             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
703             return ret;
704         }
705 
706         for (i = 0; i < 32; i++) {
707             uint64_t vsr[2];
708 
709 #ifdef HOST_WORDS_BIGENDIAN
710             vsr[0] = float64_val(env->fpr[i]);
711             vsr[1] = env->vsr[i];
712 #else
713             vsr[0] = env->vsr[i];
714             vsr[1] = float64_val(env->fpr[i]);
715 #endif
716             reg.addr = (uintptr_t) &vsr;
717             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
718 
719             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
720             if (ret < 0) {
721                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
722                         i, strerror(errno));
723                 return ret;
724             }
725         }
726     }
727 
728     if (env->insns_flags & PPC_ALTIVEC) {
729         reg.id = KVM_REG_PPC_VSCR;
730         reg.addr = (uintptr_t)&env->vscr;
731         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
732         if (ret < 0) {
733             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
734             return ret;
735         }
736 
737         for (i = 0; i < 32; i++) {
738             reg.id = KVM_REG_PPC_VR(i);
739             reg.addr = (uintptr_t)&env->avr[i];
740             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
741             if (ret < 0) {
742                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
743                 return ret;
744             }
745         }
746     }
747 
748     return 0;
749 }
750 
751 static int kvm_get_fp(CPUState *cs)
752 {
753     PowerPCCPU *cpu = POWERPC_CPU(cs);
754     CPUPPCState *env = &cpu->env;
755     struct kvm_one_reg reg;
756     int i;
757     int ret;
758 
759     if (env->insns_flags & PPC_FLOAT) {
760         uint64_t fpscr;
761         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
762 
763         reg.id = KVM_REG_PPC_FPSCR;
764         reg.addr = (uintptr_t)&fpscr;
765         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
766         if (ret < 0) {
767             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
768             return ret;
769         } else {
770             env->fpscr = fpscr;
771         }
772 
773         for (i = 0; i < 32; i++) {
774             uint64_t vsr[2];
775 
776             reg.addr = (uintptr_t) &vsr;
777             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
778 
779             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
780             if (ret < 0) {
781                 DPRINTF("Unable to get %s%d from KVM: %s\n",
782                         vsx ? "VSR" : "FPR", i, strerror(errno));
783                 return ret;
784             } else {
785 #ifdef HOST_WORDS_BIGENDIAN
786                 env->fpr[i] = vsr[0];
787                 if (vsx) {
788                     env->vsr[i] = vsr[1];
789                 }
790 #else
791                 env->fpr[i] = vsr[1];
792                 if (vsx) {
793                     env->vsr[i] = vsr[0];
794                 }
795 #endif
796             }
797         }
798     }
799 
800     if (env->insns_flags & PPC_ALTIVEC) {
801         reg.id = KVM_REG_PPC_VSCR;
802         reg.addr = (uintptr_t)&env->vscr;
803         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
804         if (ret < 0) {
805             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
806             return ret;
807         }
808 
809         for (i = 0; i < 32; i++) {
810             reg.id = KVM_REG_PPC_VR(i);
811             reg.addr = (uintptr_t)&env->avr[i];
812             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
813             if (ret < 0) {
814                 DPRINTF("Unable to get VR%d from KVM: %s\n",
815                         i, strerror(errno));
816                 return ret;
817             }
818         }
819     }
820 
821     return 0;
822 }
823 
824 #if defined(TARGET_PPC64)
825 static int kvm_get_vpa(CPUState *cs)
826 {
827     PowerPCCPU *cpu = POWERPC_CPU(cs);
828     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
829     struct kvm_one_reg reg;
830     int ret;
831 
832     reg.id = KVM_REG_PPC_VPA_ADDR;
833     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
834     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
835     if (ret < 0) {
836         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
837         return ret;
838     }
839 
840     assert((uintptr_t)&spapr_cpu->slb_shadow_size
841            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
842     reg.id = KVM_REG_PPC_VPA_SLB;
843     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
844     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
845     if (ret < 0) {
846         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
847                 strerror(errno));
848         return ret;
849     }
850 
851     assert((uintptr_t)&spapr_cpu->dtl_size
852            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
853     reg.id = KVM_REG_PPC_VPA_DTL;
854     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
855     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
856     if (ret < 0) {
857         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
858                 strerror(errno));
859         return ret;
860     }
861 
862     return 0;
863 }
864 
865 static int kvm_put_vpa(CPUState *cs)
866 {
867     PowerPCCPU *cpu = POWERPC_CPU(cs);
868     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
869     struct kvm_one_reg reg;
870     int ret;
871 
872     /* SLB shadow or DTL can't be registered unless a master VPA is
873      * registered.  That means when restoring state, if a VPA *is*
874      * registered, we need to set that up first.  If not, we need to
875      * deregister the others before deregistering the master VPA */
876     assert(spapr_cpu->vpa_addr
877            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
878 
879     if (spapr_cpu->vpa_addr) {
880         reg.id = KVM_REG_PPC_VPA_ADDR;
881         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
882         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
883         if (ret < 0) {
884             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
885             return ret;
886         }
887     }
888 
889     assert((uintptr_t)&spapr_cpu->slb_shadow_size
890            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
891     reg.id = KVM_REG_PPC_VPA_SLB;
892     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
893     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
894     if (ret < 0) {
895         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
896         return ret;
897     }
898 
899     assert((uintptr_t)&spapr_cpu->dtl_size
900            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
901     reg.id = KVM_REG_PPC_VPA_DTL;
902     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
903     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
904     if (ret < 0) {
905         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
906                 strerror(errno));
907         return ret;
908     }
909 
910     if (!spapr_cpu->vpa_addr) {
911         reg.id = KVM_REG_PPC_VPA_ADDR;
912         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
913         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
914         if (ret < 0) {
915             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
916             return ret;
917         }
918     }
919 
920     return 0;
921 }
922 #endif /* TARGET_PPC64 */
923 
924 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
925 {
926     CPUPPCState *env = &cpu->env;
927     struct kvm_sregs sregs;
928     int i;
929 
930     sregs.pvr = env->spr[SPR_PVR];
931 
932     if (cpu->vhyp) {
933         PPCVirtualHypervisorClass *vhc =
934             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
935         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
936     } else {
937         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
938     }
939 
940     /* Sync SLB */
941 #ifdef TARGET_PPC64
942     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
943         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
944         if (env->slb[i].esid & SLB_ESID_V) {
945             sregs.u.s.ppc64.slb[i].slbe |= i;
946         }
947         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
948     }
949 #endif
950 
951     /* Sync SRs */
952     for (i = 0; i < 16; i++) {
953         sregs.u.s.ppc32.sr[i] = env->sr[i];
954     }
955 
956     /* Sync BATs */
957     for (i = 0; i < 8; i++) {
958         /* Beware. We have to swap upper and lower bits here */
959         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
960             | env->DBAT[1][i];
961         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
962             | env->IBAT[1][i];
963     }
964 
965     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
966 }
967 
968 int kvm_arch_put_registers(CPUState *cs, int level)
969 {
970     PowerPCCPU *cpu = POWERPC_CPU(cs);
971     CPUPPCState *env = &cpu->env;
972     struct kvm_regs regs;
973     int ret;
974     int i;
975 
976     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
977     if (ret < 0) {
978         return ret;
979     }
980 
981     regs.ctr = env->ctr;
982     regs.lr  = env->lr;
983     regs.xer = cpu_read_xer(env);
984     regs.msr = env->msr;
985     regs.pc = env->nip;
986 
987     regs.srr0 = env->spr[SPR_SRR0];
988     regs.srr1 = env->spr[SPR_SRR1];
989 
990     regs.sprg0 = env->spr[SPR_SPRG0];
991     regs.sprg1 = env->spr[SPR_SPRG1];
992     regs.sprg2 = env->spr[SPR_SPRG2];
993     regs.sprg3 = env->spr[SPR_SPRG3];
994     regs.sprg4 = env->spr[SPR_SPRG4];
995     regs.sprg5 = env->spr[SPR_SPRG5];
996     regs.sprg6 = env->spr[SPR_SPRG6];
997     regs.sprg7 = env->spr[SPR_SPRG7];
998 
999     regs.pid = env->spr[SPR_BOOKE_PID];
1000 
1001     for (i = 0;i < 32; i++)
1002         regs.gpr[i] = env->gpr[i];
1003 
1004     regs.cr = 0;
1005     for (i = 0; i < 8; i++) {
1006         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1007     }
1008 
1009     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012 
1013     kvm_put_fp(cs);
1014 
1015     if (env->tlb_dirty) {
1016         kvm_sw_tlb_put(cpu);
1017         env->tlb_dirty = false;
1018     }
1019 
1020     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1021         ret = kvmppc_put_books_sregs(cpu);
1022         if (ret < 0) {
1023             return ret;
1024         }
1025     }
1026 
1027     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1028         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1029     }
1030 
1031     if (cap_one_reg) {
1032         int i;
1033 
1034         /* We deliberately ignore errors here, for kernels which have
1035          * the ONE_REG calls, but don't support the specific
1036          * registers, there's a reasonable chance things will still
1037          * work, at least until we try to migrate. */
1038         for (i = 0; i < 1024; i++) {
1039             uint64_t id = env->spr_cb[i].one_reg_id;
1040 
1041             if (id != 0) {
1042                 kvm_put_one_spr(cs, id, i);
1043             }
1044         }
1045 
1046 #ifdef TARGET_PPC64
1047         if (msr_ts) {
1048             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1050             }
1051             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1053             }
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1064         }
1065 
1066         if (cap_papr) {
1067             if (kvm_put_vpa(cs) < 0) {
1068                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1069             }
1070         }
1071 
1072         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1073 #endif /* TARGET_PPC64 */
1074     }
1075 
1076     return ret;
1077 }
1078 
1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1080 {
1081      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1082 }
1083 
1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1085 {
1086     CPUPPCState *env = &cpu->env;
1087     struct kvm_sregs sregs;
1088     int ret;
1089 
1090     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1091     if (ret < 0) {
1092         return ret;
1093     }
1094 
1095     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103         env->spr[SPR_DECR] = sregs.u.e.dec;
1104         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1107     }
1108 
1109     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1115     }
1116 
1117     if (sregs.u.e.features & KVM_SREGS_E_64) {
1118         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1128         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1130         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1132         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1134         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1136         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1138         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1140         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1142         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1144         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1146         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1148         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1150         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1152         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1154         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1156         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1158 
1159         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1162             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1164             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1166         }
1167 
1168         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1171         }
1172 
1173         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1176             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1178         }
1179     }
1180 
1181     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1192     }
1193 
1194     if (sregs.u.e.features & KVM_SREGS_EXP) {
1195         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1196     }
1197 
1198     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1201     }
1202 
1203     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1207 
1208         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1211         }
1212     }
1213 
1214     return 0;
1215 }
1216 
1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1218 {
1219     CPUPPCState *env = &cpu->env;
1220     struct kvm_sregs sregs;
1221     int ret;
1222     int i;
1223 
1224     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1225     if (ret < 0) {
1226         return ret;
1227     }
1228 
1229     if (!cpu->vhyp) {
1230         ppc_store_sdr1(env, sregs.u.s.sdr1);
1231     }
1232 
1233     /* Sync SLB */
1234 #ifdef TARGET_PPC64
1235     /*
1236      * The packed SLB array we get from KVM_GET_SREGS only contains
1237      * information about valid entries. So we flush our internal copy
1238      * to get rid of stale ones, then put all valid SLB entries back
1239      * in.
1240      */
1241     memset(env->slb, 0, sizeof(env->slb));
1242     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1245         /*
1246          * Only restore valid entries
1247          */
1248         if (rb & SLB_ESID_V) {
1249             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1250         }
1251     }
1252 #endif
1253 
1254     /* Sync SRs */
1255     for (i = 0; i < 16; i++) {
1256         env->sr[i] = sregs.u.s.ppc32.sr[i];
1257     }
1258 
1259     /* Sync BATs */
1260     for (i = 0; i < 8; i++) {
1261         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1265     }
1266 
1267     return 0;
1268 }
1269 
1270 int kvm_arch_get_registers(CPUState *cs)
1271 {
1272     PowerPCCPU *cpu = POWERPC_CPU(cs);
1273     CPUPPCState *env = &cpu->env;
1274     struct kvm_regs regs;
1275     uint32_t cr;
1276     int i, ret;
1277 
1278     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1279     if (ret < 0)
1280         return ret;
1281 
1282     cr = regs.cr;
1283     for (i = 7; i >= 0; i--) {
1284         env->crf[i] = cr & 15;
1285         cr >>= 4;
1286     }
1287 
1288     env->ctr = regs.ctr;
1289     env->lr = regs.lr;
1290     cpu_write_xer(env, regs.xer);
1291     env->msr = regs.msr;
1292     env->nip = regs.pc;
1293 
1294     env->spr[SPR_SRR0] = regs.srr0;
1295     env->spr[SPR_SRR1] = regs.srr1;
1296 
1297     env->spr[SPR_SPRG0] = regs.sprg0;
1298     env->spr[SPR_SPRG1] = regs.sprg1;
1299     env->spr[SPR_SPRG2] = regs.sprg2;
1300     env->spr[SPR_SPRG3] = regs.sprg3;
1301     env->spr[SPR_SPRG4] = regs.sprg4;
1302     env->spr[SPR_SPRG5] = regs.sprg5;
1303     env->spr[SPR_SPRG6] = regs.sprg6;
1304     env->spr[SPR_SPRG7] = regs.sprg7;
1305 
1306     env->spr[SPR_BOOKE_PID] = regs.pid;
1307 
1308     for (i = 0;i < 32; i++)
1309         env->gpr[i] = regs.gpr[i];
1310 
1311     kvm_get_fp(cs);
1312 
1313     if (cap_booke_sregs) {
1314         ret = kvmppc_get_booke_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319 
1320     if (cap_segstate) {
1321         ret = kvmppc_get_books_sregs(cpu);
1322         if (ret < 0) {
1323             return ret;
1324         }
1325     }
1326 
1327     if (cap_hior) {
1328         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1329     }
1330 
1331     if (cap_one_reg) {
1332         int i;
1333 
1334         /* We deliberately ignore errors here, for kernels which have
1335          * the ONE_REG calls, but don't support the specific
1336          * registers, there's a reasonable chance things will still
1337          * work, at least until we try to migrate. */
1338         for (i = 0; i < 1024; i++) {
1339             uint64_t id = env->spr_cb[i].one_reg_id;
1340 
1341             if (id != 0) {
1342                 kvm_get_one_spr(cs, id, i);
1343             }
1344         }
1345 
1346 #ifdef TARGET_PPC64
1347         if (msr_ts) {
1348             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1350             }
1351             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1353             }
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1364         }
1365 
1366         if (cap_papr) {
1367             if (kvm_get_vpa(cs) < 0) {
1368                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1369             }
1370         }
1371 
1372         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1373 #endif
1374     }
1375 
1376     return 0;
1377 }
1378 
1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1380 {
1381     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1382 
1383     if (irq != PPC_INTERRUPT_EXT) {
1384         return 0;
1385     }
1386 
1387     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1388         return 0;
1389     }
1390 
1391     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1392 
1393     return 0;
1394 }
1395 
1396 #if defined(TARGET_PPCEMB)
1397 #define PPC_INPUT_INT PPC40x_INPUT_INT
1398 #elif defined(TARGET_PPC64)
1399 #define PPC_INPUT_INT PPC970_INPUT_INT
1400 #else
1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1402 #endif
1403 
1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1405 {
1406     PowerPCCPU *cpu = POWERPC_CPU(cs);
1407     CPUPPCState *env = &cpu->env;
1408     int r;
1409     unsigned irq;
1410 
1411     qemu_mutex_lock_iothread();
1412 
1413     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1414      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1415     if (!cap_interrupt_level &&
1416         run->ready_for_interrupt_injection &&
1417         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1418         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1419     {
1420         /* For now KVM disregards the 'irq' argument. However, in the
1421          * future KVM could cache it in-kernel to avoid a heavyweight exit
1422          * when reading the UIC.
1423          */
1424         irq = KVM_INTERRUPT_SET;
1425 
1426         DPRINTF("injected interrupt %d\n", irq);
1427         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1428         if (r < 0) {
1429             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1430         }
1431 
1432         /* Always wake up soon in case the interrupt was level based */
1433         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1434                        (NANOSECONDS_PER_SECOND / 50));
1435     }
1436 
1437     /* We don't know if there are more interrupts pending after this. However,
1438      * the guest will return to userspace in the course of handling this one
1439      * anyways, so we will get a chance to deliver the rest. */
1440 
1441     qemu_mutex_unlock_iothread();
1442 }
1443 
1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1445 {
1446     return MEMTXATTRS_UNSPECIFIED;
1447 }
1448 
1449 int kvm_arch_process_async_events(CPUState *cs)
1450 {
1451     return cs->halted;
1452 }
1453 
1454 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1455 {
1456     CPUState *cs = CPU(cpu);
1457     CPUPPCState *env = &cpu->env;
1458 
1459     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1460         cs->halted = 1;
1461         cs->exception_index = EXCP_HLT;
1462     }
1463 
1464     return 0;
1465 }
1466 
1467 /* map dcr access to existing qemu dcr emulation */
1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1469 {
1470     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1472 
1473     return 0;
1474 }
1475 
1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1477 {
1478     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1480 
1481     return 0;
1482 }
1483 
1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1485 {
1486     /* Mixed endian case is not handled */
1487     uint32_t sc = debug_inst_opcode;
1488 
1489     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1490                             sizeof(sc), 0) ||
1491         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1492         return -EINVAL;
1493     }
1494 
1495     return 0;
1496 }
1497 
1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499 {
1500     uint32_t sc;
1501 
1502     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503         sc != debug_inst_opcode ||
1504         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1505                             sizeof(sc), 1)) {
1506         return -EINVAL;
1507     }
1508 
1509     return 0;
1510 }
1511 
1512 static int find_hw_breakpoint(target_ulong addr, int type)
1513 {
1514     int n;
1515 
1516     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517            <= ARRAY_SIZE(hw_debug_points));
1518 
1519     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520         if (hw_debug_points[n].addr == addr &&
1521              hw_debug_points[n].type == type) {
1522             return n;
1523         }
1524     }
1525 
1526     return -1;
1527 }
1528 
1529 static int find_hw_watchpoint(target_ulong addr, int *flag)
1530 {
1531     int n;
1532 
1533     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1534     if (n >= 0) {
1535         *flag = BP_MEM_ACCESS;
1536         return n;
1537     }
1538 
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1540     if (n >= 0) {
1541         *flag = BP_MEM_WRITE;
1542         return n;
1543     }
1544 
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1546     if (n >= 0) {
1547         *flag = BP_MEM_READ;
1548         return n;
1549     }
1550 
1551     return -1;
1552 }
1553 
1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555                                   target_ulong len, int type)
1556 {
1557     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1558         return -ENOBUFS;
1559     }
1560 
1561     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1563 
1564     switch (type) {
1565     case GDB_BREAKPOINT_HW:
1566         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1567             return -ENOBUFS;
1568         }
1569 
1570         if (find_hw_breakpoint(addr, type) >= 0) {
1571             return -EEXIST;
1572         }
1573 
1574         nb_hw_breakpoint++;
1575         break;
1576 
1577     case GDB_WATCHPOINT_WRITE:
1578     case GDB_WATCHPOINT_READ:
1579     case GDB_WATCHPOINT_ACCESS:
1580         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1581             return -ENOBUFS;
1582         }
1583 
1584         if (find_hw_breakpoint(addr, type) >= 0) {
1585             return -EEXIST;
1586         }
1587 
1588         nb_hw_watchpoint++;
1589         break;
1590 
1591     default:
1592         return -ENOSYS;
1593     }
1594 
1595     return 0;
1596 }
1597 
1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599                                   target_ulong len, int type)
1600 {
1601     int n;
1602 
1603     n = find_hw_breakpoint(addr, type);
1604     if (n < 0) {
1605         return -ENOENT;
1606     }
1607 
1608     switch (type) {
1609     case GDB_BREAKPOINT_HW:
1610         nb_hw_breakpoint--;
1611         break;
1612 
1613     case GDB_WATCHPOINT_WRITE:
1614     case GDB_WATCHPOINT_READ:
1615     case GDB_WATCHPOINT_ACCESS:
1616         nb_hw_watchpoint--;
1617         break;
1618 
1619     default:
1620         return -ENOSYS;
1621     }
1622     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1623 
1624     return 0;
1625 }
1626 
1627 void kvm_arch_remove_all_hw_breakpoints(void)
1628 {
1629     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1630 }
1631 
1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1633 {
1634     int n;
1635 
1636     /* Software Breakpoint updates */
1637     if (kvm_sw_breakpoints_active(cs)) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1639     }
1640 
1641     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642            <= ARRAY_SIZE(hw_debug_points));
1643     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1644 
1645     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649             switch (hw_debug_points[n].type) {
1650             case GDB_BREAKPOINT_HW:
1651                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1652                 break;
1653             case GDB_WATCHPOINT_WRITE:
1654                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1655                 break;
1656             case GDB_WATCHPOINT_READ:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             case GDB_WATCHPOINT_ACCESS:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661                                         KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             default:
1664                 cpu_abort(cs, "Unsupported breakpoint type\n");
1665             }
1666             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667         }
1668     }
1669 }
1670 
1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1672 {
1673     CPUState *cs = CPU(cpu);
1674     CPUPPCState *env = &cpu->env;
1675     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1676     int handle = 0;
1677     int n;
1678     int flag = 0;
1679 
1680     if (cs->singlestep_enabled) {
1681         handle = 1;
1682     } else if (arch_info->status) {
1683         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1686                 if (n >= 0) {
1687                     handle = 1;
1688                 }
1689             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1691                 n = find_hw_watchpoint(arch_info->address,  &flag);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                     cs->watchpoint_hit = &hw_watchpoint;
1695                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696                     hw_watchpoint.flags = flag;
1697                 }
1698             }
1699         }
1700     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1701         handle = 1;
1702     } else {
1703         /* QEMU is not able to handle debug exception, so inject
1704          * program exception to guest;
1705          * Yes program exception NOT debug exception !!
1706          * When QEMU is using debug resources then debug exception must
1707          * be always set. To achieve this we set MSR_DE and also set
1708          * MSRP_DEP so guest cannot change MSR_DE.
1709          * When emulating debug resource for guest we want guest
1710          * to control MSR_DE (enable/disable debug interrupt on need).
1711          * Supporting both configurations are NOT possible.
1712          * So the result is that we cannot share debug resources
1713          * between QEMU and Guest on BOOKE architecture.
1714          * In the current design QEMU gets the priority over guest,
1715          * this means that if QEMU is using debug resources then guest
1716          * cannot use them;
1717          * For software breakpoint QEMU uses a privileged instruction;
1718          * So there cannot be any reason that we are here for guest
1719          * set debug exception, only possibility is guest executed a
1720          * privileged / illegal instruction and that's why we are
1721          * injecting a program interrupt.
1722          */
1723 
1724         cpu_synchronize_state(cs);
1725         /* env->nip is PC, so increment this by 4 to use
1726          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1727          */
1728         env->nip += 4;
1729         cs->exception_index = POWERPC_EXCP_PROGRAM;
1730         env->error_code = POWERPC_EXCP_INVAL;
1731         ppc_cpu_do_interrupt(cs);
1732     }
1733 
1734     return handle;
1735 }
1736 
1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1738 {
1739     PowerPCCPU *cpu = POWERPC_CPU(cs);
1740     CPUPPCState *env = &cpu->env;
1741     int ret;
1742 
1743     qemu_mutex_lock_iothread();
1744 
1745     switch (run->exit_reason) {
1746     case KVM_EXIT_DCR:
1747         if (run->dcr.is_write) {
1748             DPRINTF("handle dcr write\n");
1749             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1750         } else {
1751             DPRINTF("handle dcr read\n");
1752             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1753         }
1754         break;
1755     case KVM_EXIT_HLT:
1756         DPRINTF("handle halt\n");
1757         ret = kvmppc_handle_halt(cpu);
1758         break;
1759 #if defined(TARGET_PPC64)
1760     case KVM_EXIT_PAPR_HCALL:
1761         DPRINTF("handle PAPR hypercall\n");
1762         run->papr_hcall.ret = spapr_hypercall(cpu,
1763                                               run->papr_hcall.nr,
1764                                               run->papr_hcall.args);
1765         ret = 0;
1766         break;
1767 #endif
1768     case KVM_EXIT_EPR:
1769         DPRINTF("handle epr\n");
1770         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1771         ret = 0;
1772         break;
1773     case KVM_EXIT_WATCHDOG:
1774         DPRINTF("handle watchdog expiry\n");
1775         watchdog_perform_action();
1776         ret = 0;
1777         break;
1778 
1779     case KVM_EXIT_DEBUG:
1780         DPRINTF("handle debug exception\n");
1781         if (kvm_handle_debug(cpu, run)) {
1782             ret = EXCP_DEBUG;
1783             break;
1784         }
1785         /* re-enter, this exception was guest-internal */
1786         ret = 0;
1787         break;
1788 
1789     default:
1790         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1791         ret = -1;
1792         break;
1793     }
1794 
1795     qemu_mutex_unlock_iothread();
1796     return ret;
1797 }
1798 
1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1800 {
1801     CPUState *cs = CPU(cpu);
1802     uint32_t bits = tsr_bits;
1803     struct kvm_one_reg reg = {
1804         .id = KVM_REG_PPC_OR_TSR,
1805         .addr = (uintptr_t) &bits,
1806     };
1807 
1808     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1809 }
1810 
1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813 
1814     CPUState *cs = CPU(cpu);
1815     uint32_t bits = tsr_bits;
1816     struct kvm_one_reg reg = {
1817         .id = KVM_REG_PPC_CLEAR_TSR,
1818         .addr = (uintptr_t) &bits,
1819     };
1820 
1821     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1822 }
1823 
1824 int kvmppc_set_tcr(PowerPCCPU *cpu)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     CPUPPCState *env = &cpu->env;
1828     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1829 
1830     struct kvm_one_reg reg = {
1831         .id = KVM_REG_PPC_TCR,
1832         .addr = (uintptr_t) &tcr,
1833     };
1834 
1835     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 }
1837 
1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1839 {
1840     CPUState *cs = CPU(cpu);
1841     int ret;
1842 
1843     if (!kvm_enabled()) {
1844         return -1;
1845     }
1846 
1847     if (!cap_ppc_watchdog) {
1848         printf("warning: KVM does not support watchdog");
1849         return -1;
1850     }
1851 
1852     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1853     if (ret < 0) {
1854         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855                 __func__, strerror(-ret));
1856         return ret;
1857     }
1858 
1859     return ret;
1860 }
1861 
1862 static int read_cpuinfo(const char *field, char *value, int len)
1863 {
1864     FILE *f;
1865     int ret = -1;
1866     int field_len = strlen(field);
1867     char line[512];
1868 
1869     f = fopen("/proc/cpuinfo", "r");
1870     if (!f) {
1871         return -1;
1872     }
1873 
1874     do {
1875         if (!fgets(line, sizeof(line), f)) {
1876             break;
1877         }
1878         if (!strncmp(line, field, field_len)) {
1879             pstrcpy(value, len, line);
1880             ret = 0;
1881             break;
1882         }
1883     } while(*line);
1884 
1885     fclose(f);
1886 
1887     return ret;
1888 }
1889 
1890 uint32_t kvmppc_get_tbfreq(void)
1891 {
1892     char line[512];
1893     char *ns;
1894     uint32_t retval = NANOSECONDS_PER_SECOND;
1895 
1896     if (read_cpuinfo("timebase", line, sizeof(line))) {
1897         return retval;
1898     }
1899 
1900     if (!(ns = strchr(line, ':'))) {
1901         return retval;
1902     }
1903 
1904     ns++;
1905 
1906     return atoi(ns);
1907 }
1908 
1909 bool kvmppc_get_host_serial(char **value)
1910 {
1911     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1912                                NULL);
1913 }
1914 
1915 bool kvmppc_get_host_model(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1918 }
1919 
1920 /* Try to find a device tree node for a CPU with clock-frequency property */
1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1922 {
1923     struct dirent *dirp;
1924     DIR *dp;
1925 
1926     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1928         return -1;
1929     }
1930 
1931     buf[0] = '\0';
1932     while ((dirp = readdir(dp)) != NULL) {
1933         FILE *f;
1934         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1935                  dirp->d_name);
1936         f = fopen(buf, "r");
1937         if (f) {
1938             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1939             fclose(f);
1940             break;
1941         }
1942         buf[0] = '\0';
1943     }
1944     closedir(dp);
1945     if (buf[0] == '\0') {
1946         printf("Unknown host!\n");
1947         return -1;
1948     }
1949 
1950     return 0;
1951 }
1952 
1953 static uint64_t kvmppc_read_int_dt(const char *filename)
1954 {
1955     union {
1956         uint32_t v32;
1957         uint64_t v64;
1958     } u;
1959     FILE *f;
1960     int len;
1961 
1962     f = fopen(filename, "rb");
1963     if (!f) {
1964         return -1;
1965     }
1966 
1967     len = fread(&u, 1, sizeof(u), f);
1968     fclose(f);
1969     switch (len) {
1970     case 4:
1971         /* property is a 32-bit quantity */
1972         return be32_to_cpu(u.v32);
1973     case 8:
1974         return be64_to_cpu(u.v64);
1975     }
1976 
1977     return 0;
1978 }
1979 
1980 /* Read a CPU node property from the host device tree that's a single
1981  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1982  * (can't find or open the property, or doesn't understand the
1983  * format) */
1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1985 {
1986     char buf[PATH_MAX], *tmp;
1987     uint64_t val;
1988 
1989     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1990         return -1;
1991     }
1992 
1993     tmp = g_strdup_printf("%s/%s", buf, propname);
1994     val = kvmppc_read_int_dt(tmp);
1995     g_free(tmp);
1996 
1997     return val;
1998 }
1999 
2000 uint64_t kvmppc_get_clockfreq(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("clock-frequency");
2003 }
2004 
2005 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2006  {
2007      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2008      CPUState *cs = CPU(cpu);
2009 
2010     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2011         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2012         return 0;
2013     }
2014 
2015     return 1;
2016 }
2017 
2018 int kvmppc_get_hasidle(CPUPPCState *env)
2019 {
2020     struct kvm_ppc_pvinfo pvinfo;
2021 
2022     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2023         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2024         return 1;
2025     }
2026 
2027     return 0;
2028 }
2029 
2030 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2031 {
2032     uint32_t *hc = (uint32_t*)buf;
2033     struct kvm_ppc_pvinfo pvinfo;
2034 
2035     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2036         memcpy(buf, pvinfo.hcall, buf_len);
2037         return 0;
2038     }
2039 
2040     /*
2041      * Fallback to always fail hypercalls regardless of endianness:
2042      *
2043      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2044      *     li r3, -1
2045      *     b .+8       (becomes nop in wrong endian)
2046      *     bswap32(li r3, -1)
2047      */
2048 
2049     hc[0] = cpu_to_be32(0x08000048);
2050     hc[1] = cpu_to_be32(0x3860ffff);
2051     hc[2] = cpu_to_be32(0x48000008);
2052     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2053 
2054     return 1;
2055 }
2056 
2057 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2058 {
2059     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2060 }
2061 
2062 void kvmppc_enable_logical_ci_hcalls(void)
2063 {
2064     /*
2065      * FIXME: it would be nice if we could detect the cases where
2066      * we're using a device which requires the in kernel
2067      * implementation of these hcalls, but the kernel lacks them and
2068      * produce a warning.
2069      */
2070     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2071     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2072 }
2073 
2074 void kvmppc_enable_set_mode_hcall(void)
2075 {
2076     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2077 }
2078 
2079 void kvmppc_enable_clear_ref_mod_hcalls(void)
2080 {
2081     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2082     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2083 }
2084 
2085 void kvmppc_set_papr(PowerPCCPU *cpu)
2086 {
2087     CPUState *cs = CPU(cpu);
2088     int ret;
2089 
2090     if (!kvm_enabled()) {
2091         return;
2092     }
2093 
2094     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2095     if (ret) {
2096         error_report("This vCPU type or KVM version does not support PAPR");
2097         exit(1);
2098     }
2099 
2100     /* Update the capability flag so we sync the right information
2101      * with kvm */
2102     cap_papr = 1;
2103 }
2104 
2105 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2106 {
2107     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2108 }
2109 
2110 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2111 {
2112     CPUState *cs = CPU(cpu);
2113     int ret;
2114 
2115     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2116     if (ret && mpic_proxy) {
2117         error_report("This KVM version does not support EPR");
2118         exit(1);
2119     }
2120 }
2121 
2122 int kvmppc_smt_threads(void)
2123 {
2124     return cap_ppc_smt ? cap_ppc_smt : 1;
2125 }
2126 
2127 int kvmppc_set_smt_threads(int smt)
2128 {
2129     int ret;
2130 
2131     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2132     if (!ret) {
2133         cap_ppc_smt = smt;
2134     }
2135     return ret;
2136 }
2137 
2138 void kvmppc_hint_smt_possible(Error **errp)
2139 {
2140     int i;
2141     GString *g;
2142     char *s;
2143 
2144     assert(kvm_enabled());
2145     if (cap_ppc_smt_possible) {
2146         g = g_string_new("Available VSMT modes:");
2147         for (i = 63; i >= 0; i--) {
2148             if ((1UL << i) & cap_ppc_smt_possible) {
2149                 g_string_append_printf(g, " %lu", (1UL << i));
2150             }
2151         }
2152         s = g_string_free(g, false);
2153         error_append_hint(errp, "%s.\n", s);
2154         g_free(s);
2155     } else {
2156         error_append_hint(errp,
2157                           "This KVM seems to be too old to support VSMT.\n");
2158     }
2159 }
2160 
2161 
2162 #ifdef TARGET_PPC64
2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2164 {
2165     struct kvm_ppc_smmu_info info;
2166     long rampagesize, best_page_shift;
2167     int i;
2168 
2169     /* Find the largest hardware supported page size that's less than
2170      * or equal to the (logical) backing page size of guest RAM */
2171     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2172     rampagesize = qemu_getrampagesize();
2173     best_page_shift = 0;
2174 
2175     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2176         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2177 
2178         if (!sps->page_shift) {
2179             continue;
2180         }
2181 
2182         if ((sps->page_shift > best_page_shift)
2183             && ((1UL << sps->page_shift) <= rampagesize)) {
2184             best_page_shift = sps->page_shift;
2185         }
2186     }
2187 
2188     return MIN(current_size,
2189                1ULL << (best_page_shift + hash_shift - 7));
2190 }
2191 #endif
2192 
2193 bool kvmppc_spapr_use_multitce(void)
2194 {
2195     return cap_spapr_multitce;
2196 }
2197 
2198 int kvmppc_spapr_enable_inkernel_multitce(void)
2199 {
2200     int ret;
2201 
2202     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2203                             H_PUT_TCE_INDIRECT, 1);
2204     if (!ret) {
2205         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2206                                 H_STUFF_TCE, 1);
2207     }
2208 
2209     return ret;
2210 }
2211 
2212 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2213                               uint64_t bus_offset, uint32_t nb_table,
2214                               int *pfd, bool need_vfio)
2215 {
2216     long len;
2217     int fd;
2218     void *table;
2219 
2220     /* Must set fd to -1 so we don't try to munmap when called for
2221      * destroying the table, which the upper layers -will- do
2222      */
2223     *pfd = -1;
2224     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2225         return NULL;
2226     }
2227 
2228     if (cap_spapr_tce_64) {
2229         struct kvm_create_spapr_tce_64 args = {
2230             .liobn = liobn,
2231             .page_shift = page_shift,
2232             .offset = bus_offset >> page_shift,
2233             .size = nb_table,
2234             .flags = 0
2235         };
2236         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2237         if (fd < 0) {
2238             fprintf(stderr,
2239                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2240                     liobn);
2241             return NULL;
2242         }
2243     } else if (cap_spapr_tce) {
2244         uint64_t window_size = (uint64_t) nb_table << page_shift;
2245         struct kvm_create_spapr_tce args = {
2246             .liobn = liobn,
2247             .window_size = window_size,
2248         };
2249         if ((window_size != args.window_size) || bus_offset) {
2250             return NULL;
2251         }
2252         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2253         if (fd < 0) {
2254             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2255                     liobn);
2256             return NULL;
2257         }
2258     } else {
2259         return NULL;
2260     }
2261 
2262     len = nb_table * sizeof(uint64_t);
2263     /* FIXME: round this up to page size */
2264 
2265     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2266     if (table == MAP_FAILED) {
2267         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2268                 liobn);
2269         close(fd);
2270         return NULL;
2271     }
2272 
2273     *pfd = fd;
2274     return table;
2275 }
2276 
2277 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2278 {
2279     long len;
2280 
2281     if (fd < 0) {
2282         return -1;
2283     }
2284 
2285     len = nb_table * sizeof(uint64_t);
2286     if ((munmap(table, len) < 0) ||
2287         (close(fd) < 0)) {
2288         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2289                 strerror(errno));
2290         /* Leak the table */
2291     }
2292 
2293     return 0;
2294 }
2295 
2296 int kvmppc_reset_htab(int shift_hint)
2297 {
2298     uint32_t shift = shift_hint;
2299 
2300     if (!kvm_enabled()) {
2301         /* Full emulation, tell caller to allocate htab itself */
2302         return 0;
2303     }
2304     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2305         int ret;
2306         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2307         if (ret == -ENOTTY) {
2308             /* At least some versions of PR KVM advertise the
2309              * capability, but don't implement the ioctl().  Oops.
2310              * Return 0 so that we allocate the htab in qemu, as is
2311              * correct for PR. */
2312             return 0;
2313         } else if (ret < 0) {
2314             return ret;
2315         }
2316         return shift;
2317     }
2318 
2319     /* We have a kernel that predates the htab reset calls.  For PR
2320      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2321      * this era, it has allocated a 16MB fixed size hash table already. */
2322     if (kvmppc_is_pr(kvm_state)) {
2323         /* PR - tell caller to allocate htab */
2324         return 0;
2325     } else {
2326         /* HV - assume 16MB kernel allocated htab */
2327         return 24;
2328     }
2329 }
2330 
2331 static inline uint32_t mfpvr(void)
2332 {
2333     uint32_t pvr;
2334 
2335     asm ("mfpvr %0"
2336          : "=r"(pvr));
2337     return pvr;
2338 }
2339 
2340 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2341 {
2342     if (on) {
2343         *word |= flags;
2344     } else {
2345         *word &= ~flags;
2346     }
2347 }
2348 
2349 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2350 {
2351     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2352     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2353     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2354 
2355     /* Now fix up the class with information we can query from the host */
2356     pcc->pvr = mfpvr();
2357 
2358     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2359                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2360     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2361                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2362     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2363                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2364 
2365     if (dcache_size != -1) {
2366         pcc->l1_dcache_size = dcache_size;
2367     }
2368 
2369     if (icache_size != -1) {
2370         pcc->l1_icache_size = icache_size;
2371     }
2372 
2373 #if defined(TARGET_PPC64)
2374     pcc->radix_page_info = kvm_get_radix_page_info();
2375 
2376     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2377         /*
2378          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2379          * compliant.  More importantly, advertising ISA 3.00
2380          * architected mode may prevent guests from activating
2381          * necessary DD1 workarounds.
2382          */
2383         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2384                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2385     }
2386 #endif /* defined(TARGET_PPC64) */
2387 }
2388 
2389 bool kvmppc_has_cap_epr(void)
2390 {
2391     return cap_epr;
2392 }
2393 
2394 bool kvmppc_has_cap_fixup_hcalls(void)
2395 {
2396     return cap_fixup_hcalls;
2397 }
2398 
2399 bool kvmppc_has_cap_htm(void)
2400 {
2401     return cap_htm;
2402 }
2403 
2404 bool kvmppc_has_cap_mmu_radix(void)
2405 {
2406     return cap_mmu_radix;
2407 }
2408 
2409 bool kvmppc_has_cap_mmu_hash_v3(void)
2410 {
2411     return cap_mmu_hash_v3;
2412 }
2413 
2414 static bool kvmppc_power8_host(void)
2415 {
2416     bool ret = false;
2417 #ifdef TARGET_PPC64
2418     {
2419         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2420         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2421               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2422               (base_pvr == CPU_POWERPC_POWER8_BASE);
2423     }
2424 #endif /* TARGET_PPC64 */
2425     return ret;
2426 }
2427 
2428 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2429 {
2430     bool l1d_thread_priv_req = !kvmppc_power8_host();
2431 
2432     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2433         return 2;
2434     } else if ((!l1d_thread_priv_req ||
2435                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2436                (c.character & c.character_mask
2437                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2438         return 1;
2439     }
2440 
2441     return 0;
2442 }
2443 
2444 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2445 {
2446     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2447         return 2;
2448     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2449         return 1;
2450     }
2451 
2452     return 0;
2453 }
2454 
2455 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2456 {
2457     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2458         return  SPAPR_CAP_FIXED_CCD;
2459     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2460         return SPAPR_CAP_FIXED_IBS;
2461     }
2462 
2463     return 0;
2464 }
2465 
2466 static void kvmppc_get_cpu_characteristics(KVMState *s)
2467 {
2468     struct kvm_ppc_cpu_char c;
2469     int ret;
2470 
2471     /* Assume broken */
2472     cap_ppc_safe_cache = 0;
2473     cap_ppc_safe_bounds_check = 0;
2474     cap_ppc_safe_indirect_branch = 0;
2475 
2476     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2477     if (!ret) {
2478         return;
2479     }
2480     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2481     if (ret < 0) {
2482         return;
2483     }
2484 
2485     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2486     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2487     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2488 }
2489 
2490 int kvmppc_get_cap_safe_cache(void)
2491 {
2492     return cap_ppc_safe_cache;
2493 }
2494 
2495 int kvmppc_get_cap_safe_bounds_check(void)
2496 {
2497     return cap_ppc_safe_bounds_check;
2498 }
2499 
2500 int kvmppc_get_cap_safe_indirect_branch(void)
2501 {
2502     return cap_ppc_safe_indirect_branch;
2503 }
2504 
2505 bool kvmppc_has_cap_spapr_vfio(void)
2506 {
2507     return cap_spapr_vfio;
2508 }
2509 
2510 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2511 {
2512     uint32_t host_pvr = mfpvr();
2513     PowerPCCPUClass *pvr_pcc;
2514 
2515     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2516     if (pvr_pcc == NULL) {
2517         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2518     }
2519 
2520     return pvr_pcc;
2521 }
2522 
2523 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2524 {
2525     TypeInfo type_info = {
2526         .name = TYPE_HOST_POWERPC_CPU,
2527         .class_init = kvmppc_host_cpu_class_init,
2528     };
2529     MachineClass *mc = MACHINE_GET_CLASS(ms);
2530     PowerPCCPUClass *pvr_pcc;
2531     ObjectClass *oc;
2532     DeviceClass *dc;
2533     int i;
2534 
2535     pvr_pcc = kvm_ppc_get_host_cpu_class();
2536     if (pvr_pcc == NULL) {
2537         return -1;
2538     }
2539     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2540     type_register(&type_info);
2541     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2542         /* override TCG default cpu type with 'host' cpu model */
2543         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2544     }
2545 
2546     oc = object_class_by_name(type_info.name);
2547     g_assert(oc);
2548 
2549     /*
2550      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2551      * we want "POWER8" to be a "family" alias that points to the current
2552      * host CPU type, too)
2553      */
2554     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2555     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2556         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2557             char *suffix;
2558 
2559             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2560             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2561             if (suffix) {
2562                 *suffix = 0;
2563             }
2564             break;
2565         }
2566     }
2567 
2568     return 0;
2569 }
2570 
2571 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2572 {
2573     struct kvm_rtas_token_args args = {
2574         .token = token,
2575     };
2576 
2577     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2578         return -ENOENT;
2579     }
2580 
2581     strncpy(args.name, function, sizeof(args.name));
2582 
2583     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2584 }
2585 
2586 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2587 {
2588     struct kvm_get_htab_fd s = {
2589         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2590         .start_index = index,
2591     };
2592     int ret;
2593 
2594     if (!cap_htab_fd) {
2595         error_setg(errp, "KVM version doesn't support %s the HPT",
2596                    write ? "writing" : "reading");
2597         return -ENOTSUP;
2598     }
2599 
2600     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2601     if (ret < 0) {
2602         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2603                    write ? "writing" : "reading", write ? "to" : "from",
2604                    strerror(errno));
2605         return -errno;
2606     }
2607 
2608     return ret;
2609 }
2610 
2611 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2612 {
2613     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2614     uint8_t buf[bufsize];
2615     ssize_t rc;
2616 
2617     do {
2618         rc = read(fd, buf, bufsize);
2619         if (rc < 0) {
2620             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2621                     strerror(errno));
2622             return rc;
2623         } else if (rc) {
2624             uint8_t *buffer = buf;
2625             ssize_t n = rc;
2626             while (n) {
2627                 struct kvm_get_htab_header *head =
2628                     (struct kvm_get_htab_header *) buffer;
2629                 size_t chunksize = sizeof(*head) +
2630                      HASH_PTE_SIZE_64 * head->n_valid;
2631 
2632                 qemu_put_be32(f, head->index);
2633                 qemu_put_be16(f, head->n_valid);
2634                 qemu_put_be16(f, head->n_invalid);
2635                 qemu_put_buffer(f, (void *)(head + 1),
2636                                 HASH_PTE_SIZE_64 * head->n_valid);
2637 
2638                 buffer += chunksize;
2639                 n -= chunksize;
2640             }
2641         }
2642     } while ((rc != 0)
2643              && ((max_ns < 0)
2644                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2645 
2646     return (rc == 0) ? 1 : 0;
2647 }
2648 
2649 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2650                            uint16_t n_valid, uint16_t n_invalid)
2651 {
2652     struct kvm_get_htab_header *buf;
2653     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2654     ssize_t rc;
2655 
2656     buf = alloca(chunksize);
2657     buf->index = index;
2658     buf->n_valid = n_valid;
2659     buf->n_invalid = n_invalid;
2660 
2661     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2662 
2663     rc = write(fd, buf, chunksize);
2664     if (rc < 0) {
2665         fprintf(stderr, "Error writing KVM hash table: %s\n",
2666                 strerror(errno));
2667         return rc;
2668     }
2669     if (rc != chunksize) {
2670         /* We should never get a short write on a single chunk */
2671         fprintf(stderr, "Short write, restoring KVM hash table\n");
2672         return -1;
2673     }
2674     return 0;
2675 }
2676 
2677 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2678 {
2679     return true;
2680 }
2681 
2682 void kvm_arch_init_irq_routing(KVMState *s)
2683 {
2684 }
2685 
2686 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2687 {
2688     int fd, rc;
2689     int i;
2690 
2691     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2692 
2693     i = 0;
2694     while (i < n) {
2695         struct kvm_get_htab_header *hdr;
2696         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2697         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2698 
2699         rc = read(fd, buf, sizeof(buf));
2700         if (rc < 0) {
2701             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2702         }
2703 
2704         hdr = (struct kvm_get_htab_header *)buf;
2705         while ((i < n) && ((char *)hdr < (buf + rc))) {
2706             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2707 
2708             if (hdr->index != (ptex + i)) {
2709                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2710                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2711             }
2712 
2713             if (n - i < valid) {
2714                 valid = n - i;
2715             }
2716             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2717             i += valid;
2718 
2719             if ((n - i) < invalid) {
2720                 invalid = n - i;
2721             }
2722             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2723             i += invalid;
2724 
2725             hdr = (struct kvm_get_htab_header *)
2726                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2727         }
2728     }
2729 
2730     close(fd);
2731 }
2732 
2733 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2734 {
2735     int fd, rc;
2736     struct {
2737         struct kvm_get_htab_header hdr;
2738         uint64_t pte0;
2739         uint64_t pte1;
2740     } buf;
2741 
2742     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2743 
2744     buf.hdr.n_valid = 1;
2745     buf.hdr.n_invalid = 0;
2746     buf.hdr.index = ptex;
2747     buf.pte0 = cpu_to_be64(pte0);
2748     buf.pte1 = cpu_to_be64(pte1);
2749 
2750     rc = write(fd, &buf, sizeof(buf));
2751     if (rc != sizeof(buf)) {
2752         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2753     }
2754     close(fd);
2755 }
2756 
2757 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2758                              uint64_t address, uint32_t data, PCIDevice *dev)
2759 {
2760     return 0;
2761 }
2762 
2763 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2764                                 int vector, PCIDevice *dev)
2765 {
2766     return 0;
2767 }
2768 
2769 int kvm_arch_release_virq_post(int virq)
2770 {
2771     return 0;
2772 }
2773 
2774 int kvm_arch_msi_data_to_gsi(uint32_t data)
2775 {
2776     return data & 0xffff;
2777 }
2778 
2779 int kvmppc_enable_hwrng(void)
2780 {
2781     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2782         return -1;
2783     }
2784 
2785     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2786 }
2787 
2788 void kvmppc_check_papr_resize_hpt(Error **errp)
2789 {
2790     if (!kvm_enabled()) {
2791         return; /* No KVM, we're good */
2792     }
2793 
2794     if (cap_resize_hpt) {
2795         return; /* Kernel has explicit support, we're good */
2796     }
2797 
2798     /* Otherwise fallback on looking for PR KVM */
2799     if (kvmppc_is_pr(kvm_state)) {
2800         return;
2801     }
2802 
2803     error_setg(errp,
2804                "Hash page table resizing not available with this KVM version");
2805 }
2806 
2807 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2808 {
2809     CPUState *cs = CPU(cpu);
2810     struct kvm_ppc_resize_hpt rhpt = {
2811         .flags = flags,
2812         .shift = shift,
2813     };
2814 
2815     if (!cap_resize_hpt) {
2816         return -ENOSYS;
2817     }
2818 
2819     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2820 }
2821 
2822 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2823 {
2824     CPUState *cs = CPU(cpu);
2825     struct kvm_ppc_resize_hpt rhpt = {
2826         .flags = flags,
2827         .shift = shift,
2828     };
2829 
2830     if (!cap_resize_hpt) {
2831         return -ENOSYS;
2832     }
2833 
2834     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2835 }
2836 
2837 /*
2838  * This is a helper function to detect a post migration scenario
2839  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2840  * the guest kernel can't handle a PVR value other than the actual host
2841  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2842  *
2843  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2844  * (so, we're HV), return true. The workaround itself is done in
2845  * cpu_post_load.
2846  *
2847  * The order here is important: we'll only check for KVM PR as a
2848  * fallback if the guest kernel can't handle the situation itself.
2849  * We need to avoid as much as possible querying the running KVM type
2850  * in QEMU level.
2851  */
2852 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2853 {
2854     CPUState *cs = CPU(cpu);
2855 
2856     if (!kvm_enabled()) {
2857         return false;
2858     }
2859 
2860     if (cap_ppc_pvr_compat) {
2861         return false;
2862     }
2863 
2864     return !kvmppc_is_pr(cs->kvm_state);
2865 }
2866