xref: /openbmc/qemu/target/ppc/kvm.c (revision 9cbb6362)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
92 
93 static uint32_t debug_inst_opcode;
94 
95 /* XXX We have a race condition where we actually have a level triggered
96  *     interrupt, but the infrastructure can't expose that yet, so the guest
97  *     takes but ignores it, goes to sleep and never gets notified that there's
98  *     still an interrupt pending.
99  *
100  *     As a quick workaround, let's just wake up again 20 ms after we injected
101  *     an interrupt. That way we can assure that we're always reinjecting
102  *     interrupts in case the guest swallowed them.
103  */
104 static QEMUTimer *idle_timer;
105 
106 static void kvm_kick_cpu(void *opaque)
107 {
108     PowerPCCPU *cpu = opaque;
109 
110     qemu_cpu_kick(CPU(cpu));
111 }
112 
113 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
114  * should only be used for fallback tests - generally we should use
115  * explicit capabilities for the features we want, rather than
116  * assuming what is/isn't available depending on the KVM variant. */
117 static bool kvmppc_is_pr(KVMState *ks)
118 {
119     /* Assume KVM-PR if the GET_PVINFO capability is available */
120     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
121 }
122 
123 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
124 
125 int kvm_arch_init(MachineState *ms, KVMState *s)
126 {
127     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
128     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
129     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
130     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
131     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
132     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
133     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
134     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
135     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
136     cap_spapr_vfio = false;
137     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
138     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
139     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
140     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
141     /* Note: we don't set cap_papr here, because this capability is
142      * only activated after this by kvmppc_set_papr() */
143     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
144     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
145     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
146     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
147     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
148     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
149     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
150     /*
151      * Note: setting it to false because there is not such capability
152      * in KVM at this moment.
153      *
154      * TODO: call kvm_vm_check_extension() with the right capability
155      * after the kernel starts implementing it.*/
156     cap_ppc_pvr_compat = false;
157 
158     if (!cap_interrupt_level) {
159         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
160                         "VM to stall at times!\n");
161     }
162 
163     kvm_ppc_register_host_cpu_type(ms);
164 
165     return 0;
166 }
167 
168 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
169 {
170     return 0;
171 }
172 
173 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
174 {
175     CPUPPCState *cenv = &cpu->env;
176     CPUState *cs = CPU(cpu);
177     struct kvm_sregs sregs;
178     int ret;
179 
180     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
181         /* What we're really trying to say is "if we're on BookE, we use
182            the native PVR for now". This is the only sane way to check
183            it though, so we potentially confuse users that they can run
184            BookE guests on BookS. Let's hope nobody dares enough :) */
185         return 0;
186     } else {
187         if (!cap_segstate) {
188             fprintf(stderr, "kvm error: missing PVR setting capability\n");
189             return -ENOSYS;
190         }
191     }
192 
193     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
194     if (ret) {
195         return ret;
196     }
197 
198     sregs.pvr = cenv->spr[SPR_PVR];
199     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
200 }
201 
202 /* Set up a shared TLB array with KVM */
203 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
204 {
205     CPUPPCState *env = &cpu->env;
206     CPUState *cs = CPU(cpu);
207     struct kvm_book3e_206_tlb_params params = {};
208     struct kvm_config_tlb cfg = {};
209     unsigned int entries = 0;
210     int ret, i;
211 
212     if (!kvm_enabled() ||
213         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
214         return 0;
215     }
216 
217     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
218 
219     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
220         params.tlb_sizes[i] = booke206_tlb_size(env, i);
221         params.tlb_ways[i] = booke206_tlb_ways(env, i);
222         entries += params.tlb_sizes[i];
223     }
224 
225     assert(entries == env->nb_tlb);
226     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
227 
228     env->tlb_dirty = true;
229 
230     cfg.array = (uintptr_t)env->tlb.tlbm;
231     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
232     cfg.params = (uintptr_t)&params;
233     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
234 
235     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
236     if (ret < 0) {
237         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
238                 __func__, strerror(-ret));
239         return ret;
240     }
241 
242     env->kvm_sw_tlb = true;
243     return 0;
244 }
245 
246 
247 #if defined(TARGET_PPC64)
248 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
249                                        struct kvm_ppc_smmu_info *info)
250 {
251     CPUPPCState *env = &cpu->env;
252     CPUState *cs = CPU(cpu);
253 
254     memset(info, 0, sizeof(*info));
255 
256     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
257      * need to "guess" what the supported page sizes are.
258      *
259      * For that to work we make a few assumptions:
260      *
261      * - Check whether we are running "PR" KVM which only supports 4K
262      *   and 16M pages, but supports them regardless of the backing
263      *   store characteritics. We also don't support 1T segments.
264      *
265      *   This is safe as if HV KVM ever supports that capability or PR
266      *   KVM grows supports for more page/segment sizes, those versions
267      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
268      *   will not hit this fallback
269      *
270      * - Else we are running HV KVM. This means we only support page
271      *   sizes that fit in the backing store. Additionally we only
272      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
273      *   P7 encodings for the SLB and hash table. Here too, we assume
274      *   support for any newer processor will mean a kernel that
275      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
276      *   this fallback.
277      */
278     if (kvmppc_is_pr(cs->kvm_state)) {
279         /* No flags */
280         info->flags = 0;
281         info->slb_size = 64;
282 
283         /* Standard 4k base page size segment */
284         info->sps[0].page_shift = 12;
285         info->sps[0].slb_enc = 0;
286         info->sps[0].enc[0].page_shift = 12;
287         info->sps[0].enc[0].pte_enc = 0;
288 
289         /* Standard 16M large page size segment */
290         info->sps[1].page_shift = 24;
291         info->sps[1].slb_enc = SLB_VSID_L;
292         info->sps[1].enc[0].page_shift = 24;
293         info->sps[1].enc[0].pte_enc = 0;
294     } else {
295         int i = 0;
296 
297         /* HV KVM has backing store size restrictions */
298         info->flags = KVM_PPC_PAGE_SIZES_REAL;
299 
300         if (env->mmu_model & POWERPC_MMU_1TSEG) {
301             info->flags |= KVM_PPC_1T_SEGMENTS;
302         }
303 
304         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
305            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
306             info->slb_size = 32;
307         } else {
308             info->slb_size = 64;
309         }
310 
311         /* Standard 4k base page size segment */
312         info->sps[i].page_shift = 12;
313         info->sps[i].slb_enc = 0;
314         info->sps[i].enc[0].page_shift = 12;
315         info->sps[i].enc[0].pte_enc = 0;
316         i++;
317 
318         /* 64K on MMU 2.06 and later */
319         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
320             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
321             info->sps[i].page_shift = 16;
322             info->sps[i].slb_enc = 0x110;
323             info->sps[i].enc[0].page_shift = 16;
324             info->sps[i].enc[0].pte_enc = 1;
325             i++;
326         }
327 
328         /* Standard 16M large page size segment */
329         info->sps[i].page_shift = 24;
330         info->sps[i].slb_enc = SLB_VSID_L;
331         info->sps[i].enc[0].page_shift = 24;
332         info->sps[i].enc[0].pte_enc = 0;
333     }
334 }
335 
336 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
337 {
338     CPUState *cs = CPU(cpu);
339     int ret;
340 
341     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
342         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
343         if (ret == 0) {
344             return;
345         }
346     }
347 
348     kvm_get_fallback_smmu_info(cpu, info);
349 }
350 
351 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
352 {
353     KVMState *s = KVM_STATE(current_machine->accelerator);
354     struct ppc_radix_page_info *radix_page_info;
355     struct kvm_ppc_rmmu_info rmmu_info;
356     int i;
357 
358     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
359         return NULL;
360     }
361     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
362         return NULL;
363     }
364     radix_page_info = g_malloc0(sizeof(*radix_page_info));
365     radix_page_info->count = 0;
366     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
367         if (rmmu_info.ap_encodings[i]) {
368             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
369             radix_page_info->count++;
370         }
371     }
372     return radix_page_info;
373 }
374 
375 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
376                                      bool radix, bool gtse,
377                                      uint64_t proc_tbl)
378 {
379     CPUState *cs = CPU(cpu);
380     int ret;
381     uint64_t flags = 0;
382     struct kvm_ppc_mmuv3_cfg cfg = {
383         .process_table = proc_tbl,
384     };
385 
386     if (radix) {
387         flags |= KVM_PPC_MMUV3_RADIX;
388     }
389     if (gtse) {
390         flags |= KVM_PPC_MMUV3_GTSE;
391     }
392     cfg.flags = flags;
393     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
394     switch (ret) {
395     case 0:
396         return H_SUCCESS;
397     case -EINVAL:
398         return H_PARAMETER;
399     case -ENODEV:
400         return H_NOT_AVAILABLE;
401     default:
402         return H_HARDWARE;
403     }
404 }
405 
406 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
407 {
408     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
409         return true;
410     }
411 
412     return (1ul << shift) <= rampgsize;
413 }
414 
415 static long max_cpu_page_size;
416 
417 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
418 {
419     static struct kvm_ppc_smmu_info smmu_info;
420     static bool has_smmu_info;
421     CPUPPCState *env = &cpu->env;
422     int iq, ik, jq, jk;
423     bool has_64k_pages = false;
424 
425     /* We only handle page sizes for 64-bit server guests for now */
426     if (!(env->mmu_model & POWERPC_MMU_64)) {
427         return;
428     }
429 
430     /* Collect MMU info from kernel if not already */
431     if (!has_smmu_info) {
432         kvm_get_smmu_info(cpu, &smmu_info);
433         has_smmu_info = true;
434     }
435 
436     if (!max_cpu_page_size) {
437         max_cpu_page_size = qemu_getrampagesize();
438     }
439 
440     /* Convert to QEMU form */
441     memset(&env->sps, 0, sizeof(env->sps));
442 
443     /* If we have HV KVM, we need to forbid CI large pages if our
444      * host page size is smaller than 64K.
445      */
446     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
447         env->ci_large_pages = getpagesize() >= 0x10000;
448     }
449 
450     /*
451      * XXX This loop should be an entry wide AND of the capabilities that
452      *     the selected CPU has with the capabilities that KVM supports.
453      */
454     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
455         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
456         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
457 
458         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
459                                  ksps->page_shift)) {
460             continue;
461         }
462         qsps->page_shift = ksps->page_shift;
463         qsps->slb_enc = ksps->slb_enc;
464         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
465             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
466                                      ksps->enc[jk].page_shift)) {
467                 continue;
468             }
469             if (ksps->enc[jk].page_shift == 16) {
470                 has_64k_pages = true;
471             }
472             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
473             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
474             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
475                 break;
476             }
477         }
478         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
479             break;
480         }
481     }
482     env->slb_nr = smmu_info.slb_size;
483     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
484         env->mmu_model &= ~POWERPC_MMU_1TSEG;
485     }
486     if (!has_64k_pages) {
487         env->mmu_model &= ~POWERPC_MMU_64K;
488     }
489 }
490 
491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
492 {
493     Object *mem_obj = object_resolve_path(obj_path, NULL);
494     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
495     long pagesize;
496 
497     if (mempath) {
498         pagesize = qemu_mempath_getpagesize(mempath);
499         g_free(mempath);
500     } else {
501         pagesize = getpagesize();
502     }
503 
504     return pagesize >= max_cpu_page_size;
505 }
506 
507 #else /* defined (TARGET_PPC64) */
508 
509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
510 {
511 }
512 
513 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
514 {
515     return true;
516 }
517 
518 #endif /* !defined (TARGET_PPC64) */
519 
520 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
521 {
522     return POWERPC_CPU(cpu)->vcpu_id;
523 }
524 
525 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
526  * book3s supports only 1 watchpoint, so array size
527  * of 4 is sufficient for now.
528  */
529 #define MAX_HW_BKPTS 4
530 
531 static struct HWBreakpoint {
532     target_ulong addr;
533     int type;
534 } hw_debug_points[MAX_HW_BKPTS];
535 
536 static CPUWatchpoint hw_watchpoint;
537 
538 /* Default there is no breakpoint and watchpoint supported */
539 static int max_hw_breakpoint;
540 static int max_hw_watchpoint;
541 static int nb_hw_breakpoint;
542 static int nb_hw_watchpoint;
543 
544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
545 {
546     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
547         max_hw_breakpoint = 2;
548         max_hw_watchpoint = 2;
549     }
550 
551     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
552         fprintf(stderr, "Error initializing h/w breakpoints\n");
553         return;
554     }
555 }
556 
557 int kvm_arch_init_vcpu(CPUState *cs)
558 {
559     PowerPCCPU *cpu = POWERPC_CPU(cs);
560     CPUPPCState *cenv = &cpu->env;
561     int ret;
562 
563     /* Gather server mmu info from KVM and update the CPU state */
564     kvm_fixup_page_sizes(cpu);
565 
566     /* Synchronize sregs with kvm */
567     ret = kvm_arch_sync_sregs(cpu);
568     if (ret) {
569         if (ret == -EINVAL) {
570             error_report("Register sync failed... If you're using kvm-hv.ko,"
571                          " only \"-cpu host\" is possible");
572         }
573         return ret;
574     }
575 
576     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
577 
578     switch (cenv->mmu_model) {
579     case POWERPC_MMU_BOOKE206:
580         /* This target supports access to KVM's guest TLB */
581         ret = kvm_booke206_tlb_init(cpu);
582         break;
583     case POWERPC_MMU_2_07:
584         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
585             /* KVM-HV has transactional memory on POWER8 also without the
586              * KVM_CAP_PPC_HTM extension, so enable it here instead as
587              * long as it's availble to userspace on the host. */
588             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
589                 cap_htm = true;
590             }
591         }
592         break;
593     default:
594         break;
595     }
596 
597     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
598     kvmppc_hw_debug_points_init(cenv);
599 
600     return ret;
601 }
602 
603 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
604 {
605     CPUPPCState *env = &cpu->env;
606     CPUState *cs = CPU(cpu);
607     struct kvm_dirty_tlb dirty_tlb;
608     unsigned char *bitmap;
609     int ret;
610 
611     if (!env->kvm_sw_tlb) {
612         return;
613     }
614 
615     bitmap = g_malloc((env->nb_tlb + 7) / 8);
616     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
617 
618     dirty_tlb.bitmap = (uintptr_t)bitmap;
619     dirty_tlb.num_dirty = env->nb_tlb;
620 
621     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
622     if (ret) {
623         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
624                 __func__, strerror(-ret));
625     }
626 
627     g_free(bitmap);
628 }
629 
630 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
631 {
632     PowerPCCPU *cpu = POWERPC_CPU(cs);
633     CPUPPCState *env = &cpu->env;
634     union {
635         uint32_t u32;
636         uint64_t u64;
637     } val;
638     struct kvm_one_reg reg = {
639         .id = id,
640         .addr = (uintptr_t) &val,
641     };
642     int ret;
643 
644     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
645     if (ret != 0) {
646         trace_kvm_failed_spr_get(spr, strerror(errno));
647     } else {
648         switch (id & KVM_REG_SIZE_MASK) {
649         case KVM_REG_SIZE_U32:
650             env->spr[spr] = val.u32;
651             break;
652 
653         case KVM_REG_SIZE_U64:
654             env->spr[spr] = val.u64;
655             break;
656 
657         default:
658             /* Don't handle this size yet */
659             abort();
660         }
661     }
662 }
663 
664 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
665 {
666     PowerPCCPU *cpu = POWERPC_CPU(cs);
667     CPUPPCState *env = &cpu->env;
668     union {
669         uint32_t u32;
670         uint64_t u64;
671     } val;
672     struct kvm_one_reg reg = {
673         .id = id,
674         .addr = (uintptr_t) &val,
675     };
676     int ret;
677 
678     switch (id & KVM_REG_SIZE_MASK) {
679     case KVM_REG_SIZE_U32:
680         val.u32 = env->spr[spr];
681         break;
682 
683     case KVM_REG_SIZE_U64:
684         val.u64 = env->spr[spr];
685         break;
686 
687     default:
688         /* Don't handle this size yet */
689         abort();
690     }
691 
692     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
693     if (ret != 0) {
694         trace_kvm_failed_spr_set(spr, strerror(errno));
695     }
696 }
697 
698 static int kvm_put_fp(CPUState *cs)
699 {
700     PowerPCCPU *cpu = POWERPC_CPU(cs);
701     CPUPPCState *env = &cpu->env;
702     struct kvm_one_reg reg;
703     int i;
704     int ret;
705 
706     if (env->insns_flags & PPC_FLOAT) {
707         uint64_t fpscr = env->fpscr;
708         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
709 
710         reg.id = KVM_REG_PPC_FPSCR;
711         reg.addr = (uintptr_t)&fpscr;
712         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
713         if (ret < 0) {
714             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
715             return ret;
716         }
717 
718         for (i = 0; i < 32; i++) {
719             uint64_t vsr[2];
720 
721 #ifdef HOST_WORDS_BIGENDIAN
722             vsr[0] = float64_val(env->fpr[i]);
723             vsr[1] = env->vsr[i];
724 #else
725             vsr[0] = env->vsr[i];
726             vsr[1] = float64_val(env->fpr[i]);
727 #endif
728             reg.addr = (uintptr_t) &vsr;
729             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
730 
731             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
732             if (ret < 0) {
733                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
734                         i, strerror(errno));
735                 return ret;
736             }
737         }
738     }
739 
740     if (env->insns_flags & PPC_ALTIVEC) {
741         reg.id = KVM_REG_PPC_VSCR;
742         reg.addr = (uintptr_t)&env->vscr;
743         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
744         if (ret < 0) {
745             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
746             return ret;
747         }
748 
749         for (i = 0; i < 32; i++) {
750             reg.id = KVM_REG_PPC_VR(i);
751             reg.addr = (uintptr_t)&env->avr[i];
752             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
753             if (ret < 0) {
754                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
755                 return ret;
756             }
757         }
758     }
759 
760     return 0;
761 }
762 
763 static int kvm_get_fp(CPUState *cs)
764 {
765     PowerPCCPU *cpu = POWERPC_CPU(cs);
766     CPUPPCState *env = &cpu->env;
767     struct kvm_one_reg reg;
768     int i;
769     int ret;
770 
771     if (env->insns_flags & PPC_FLOAT) {
772         uint64_t fpscr;
773         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
774 
775         reg.id = KVM_REG_PPC_FPSCR;
776         reg.addr = (uintptr_t)&fpscr;
777         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778         if (ret < 0) {
779             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
780             return ret;
781         } else {
782             env->fpscr = fpscr;
783         }
784 
785         for (i = 0; i < 32; i++) {
786             uint64_t vsr[2];
787 
788             reg.addr = (uintptr_t) &vsr;
789             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
790 
791             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
792             if (ret < 0) {
793                 DPRINTF("Unable to get %s%d from KVM: %s\n",
794                         vsx ? "VSR" : "FPR", i, strerror(errno));
795                 return ret;
796             } else {
797 #ifdef HOST_WORDS_BIGENDIAN
798                 env->fpr[i] = vsr[0];
799                 if (vsx) {
800                     env->vsr[i] = vsr[1];
801                 }
802 #else
803                 env->fpr[i] = vsr[1];
804                 if (vsx) {
805                     env->vsr[i] = vsr[0];
806                 }
807 #endif
808             }
809         }
810     }
811 
812     if (env->insns_flags & PPC_ALTIVEC) {
813         reg.id = KVM_REG_PPC_VSCR;
814         reg.addr = (uintptr_t)&env->vscr;
815         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
816         if (ret < 0) {
817             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
818             return ret;
819         }
820 
821         for (i = 0; i < 32; i++) {
822             reg.id = KVM_REG_PPC_VR(i);
823             reg.addr = (uintptr_t)&env->avr[i];
824             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
825             if (ret < 0) {
826                 DPRINTF("Unable to get VR%d from KVM: %s\n",
827                         i, strerror(errno));
828                 return ret;
829             }
830         }
831     }
832 
833     return 0;
834 }
835 
836 #if defined(TARGET_PPC64)
837 static int kvm_get_vpa(CPUState *cs)
838 {
839     PowerPCCPU *cpu = POWERPC_CPU(cs);
840     CPUPPCState *env = &cpu->env;
841     struct kvm_one_reg reg;
842     int ret;
843 
844     reg.id = KVM_REG_PPC_VPA_ADDR;
845     reg.addr = (uintptr_t)&env->vpa_addr;
846     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
847     if (ret < 0) {
848         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
849         return ret;
850     }
851 
852     assert((uintptr_t)&env->slb_shadow_size
853            == ((uintptr_t)&env->slb_shadow_addr + 8));
854     reg.id = KVM_REG_PPC_VPA_SLB;
855     reg.addr = (uintptr_t)&env->slb_shadow_addr;
856     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
857     if (ret < 0) {
858         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
859                 strerror(errno));
860         return ret;
861     }
862 
863     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
864     reg.id = KVM_REG_PPC_VPA_DTL;
865     reg.addr = (uintptr_t)&env->dtl_addr;
866     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
867     if (ret < 0) {
868         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
869                 strerror(errno));
870         return ret;
871     }
872 
873     return 0;
874 }
875 
876 static int kvm_put_vpa(CPUState *cs)
877 {
878     PowerPCCPU *cpu = POWERPC_CPU(cs);
879     CPUPPCState *env = &cpu->env;
880     struct kvm_one_reg reg;
881     int ret;
882 
883     /* SLB shadow or DTL can't be registered unless a master VPA is
884      * registered.  That means when restoring state, if a VPA *is*
885      * registered, we need to set that up first.  If not, we need to
886      * deregister the others before deregistering the master VPA */
887     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
888 
889     if (env->vpa_addr) {
890         reg.id = KVM_REG_PPC_VPA_ADDR;
891         reg.addr = (uintptr_t)&env->vpa_addr;
892         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
893         if (ret < 0) {
894             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
895             return ret;
896         }
897     }
898 
899     assert((uintptr_t)&env->slb_shadow_size
900            == ((uintptr_t)&env->slb_shadow_addr + 8));
901     reg.id = KVM_REG_PPC_VPA_SLB;
902     reg.addr = (uintptr_t)&env->slb_shadow_addr;
903     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
904     if (ret < 0) {
905         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
906         return ret;
907     }
908 
909     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
910     reg.id = KVM_REG_PPC_VPA_DTL;
911     reg.addr = (uintptr_t)&env->dtl_addr;
912     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
913     if (ret < 0) {
914         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
915                 strerror(errno));
916         return ret;
917     }
918 
919     if (!env->vpa_addr) {
920         reg.id = KVM_REG_PPC_VPA_ADDR;
921         reg.addr = (uintptr_t)&env->vpa_addr;
922         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
923         if (ret < 0) {
924             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
925             return ret;
926         }
927     }
928 
929     return 0;
930 }
931 #endif /* TARGET_PPC64 */
932 
933 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
934 {
935     CPUPPCState *env = &cpu->env;
936     struct kvm_sregs sregs;
937     int i;
938 
939     sregs.pvr = env->spr[SPR_PVR];
940 
941     if (cpu->vhyp) {
942         PPCVirtualHypervisorClass *vhc =
943             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
944         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
945     } else {
946         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
947     }
948 
949     /* Sync SLB */
950 #ifdef TARGET_PPC64
951     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
952         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
953         if (env->slb[i].esid & SLB_ESID_V) {
954             sregs.u.s.ppc64.slb[i].slbe |= i;
955         }
956         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
957     }
958 #endif
959 
960     /* Sync SRs */
961     for (i = 0; i < 16; i++) {
962         sregs.u.s.ppc32.sr[i] = env->sr[i];
963     }
964 
965     /* Sync BATs */
966     for (i = 0; i < 8; i++) {
967         /* Beware. We have to swap upper and lower bits here */
968         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
969             | env->DBAT[1][i];
970         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
971             | env->IBAT[1][i];
972     }
973 
974     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
975 }
976 
977 int kvm_arch_put_registers(CPUState *cs, int level)
978 {
979     PowerPCCPU *cpu = POWERPC_CPU(cs);
980     CPUPPCState *env = &cpu->env;
981     struct kvm_regs regs;
982     int ret;
983     int i;
984 
985     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
986     if (ret < 0) {
987         return ret;
988     }
989 
990     regs.ctr = env->ctr;
991     regs.lr  = env->lr;
992     regs.xer = cpu_read_xer(env);
993     regs.msr = env->msr;
994     regs.pc = env->nip;
995 
996     regs.srr0 = env->spr[SPR_SRR0];
997     regs.srr1 = env->spr[SPR_SRR1];
998 
999     regs.sprg0 = env->spr[SPR_SPRG0];
1000     regs.sprg1 = env->spr[SPR_SPRG1];
1001     regs.sprg2 = env->spr[SPR_SPRG2];
1002     regs.sprg3 = env->spr[SPR_SPRG3];
1003     regs.sprg4 = env->spr[SPR_SPRG4];
1004     regs.sprg5 = env->spr[SPR_SPRG5];
1005     regs.sprg6 = env->spr[SPR_SPRG6];
1006     regs.sprg7 = env->spr[SPR_SPRG7];
1007 
1008     regs.pid = env->spr[SPR_BOOKE_PID];
1009 
1010     for (i = 0;i < 32; i++)
1011         regs.gpr[i] = env->gpr[i];
1012 
1013     regs.cr = 0;
1014     for (i = 0; i < 8; i++) {
1015         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1016     }
1017 
1018     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1019     if (ret < 0)
1020         return ret;
1021 
1022     kvm_put_fp(cs);
1023 
1024     if (env->tlb_dirty) {
1025         kvm_sw_tlb_put(cpu);
1026         env->tlb_dirty = false;
1027     }
1028 
1029     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1030         ret = kvmppc_put_books_sregs(cpu);
1031         if (ret < 0) {
1032             return ret;
1033         }
1034     }
1035 
1036     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1037         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1038     }
1039 
1040     if (cap_one_reg) {
1041         int i;
1042 
1043         /* We deliberately ignore errors here, for kernels which have
1044          * the ONE_REG calls, but don't support the specific
1045          * registers, there's a reasonable chance things will still
1046          * work, at least until we try to migrate. */
1047         for (i = 0; i < 1024; i++) {
1048             uint64_t id = env->spr_cb[i].one_reg_id;
1049 
1050             if (id != 0) {
1051                 kvm_put_one_spr(cs, id, i);
1052             }
1053         }
1054 
1055 #ifdef TARGET_PPC64
1056         if (msr_ts) {
1057             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1058                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1059             }
1060             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1061                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1062             }
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1070             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1073         }
1074 
1075         if (cap_papr) {
1076             if (kvm_put_vpa(cs) < 0) {
1077                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1078             }
1079         }
1080 
1081         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1082 #endif /* TARGET_PPC64 */
1083     }
1084 
1085     return ret;
1086 }
1087 
1088 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1089 {
1090      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1091 }
1092 
1093 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1094 {
1095     CPUPPCState *env = &cpu->env;
1096     struct kvm_sregs sregs;
1097     int ret;
1098 
1099     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1100     if (ret < 0) {
1101         return ret;
1102     }
1103 
1104     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1105         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1106         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1107         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1108         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1109         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1110         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1111         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1112         env->spr[SPR_DECR] = sregs.u.e.dec;
1113         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1114         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1115         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1116     }
1117 
1118     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1119         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1120         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1121         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1122         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1123         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_E_64) {
1127         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1128     }
1129 
1130     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1131         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1132     }
1133 
1134     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1135         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1136         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1137         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1138         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1139         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1140         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1141         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1142         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1143         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1144         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1145         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1146         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1147         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1148         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1149         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1150         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1151         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1152         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1153         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1154         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1155         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1156         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1157         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1158         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1159         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1160         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1161         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1162         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1163         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1164         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1165         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1166         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1167 
1168         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1169             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1170             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1171             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1172             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1173             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1174             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1175         }
1176 
1177         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1178             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1179             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1180         }
1181 
1182         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1183             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1184             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1185             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1186             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1187         }
1188     }
1189 
1190     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1191         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1192         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1193         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1194         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1195         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1196         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1197         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1198         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1199         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1200         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1201     }
1202 
1203     if (sregs.u.e.features & KVM_SREGS_EXP) {
1204         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1205     }
1206 
1207     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1208         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1209         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1210     }
1211 
1212     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1213         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1214         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1215         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1216 
1217         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1218             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1219             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1220         }
1221     }
1222 
1223     return 0;
1224 }
1225 
1226 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1227 {
1228     CPUPPCState *env = &cpu->env;
1229     struct kvm_sregs sregs;
1230     int ret;
1231     int i;
1232 
1233     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1234     if (ret < 0) {
1235         return ret;
1236     }
1237 
1238     if (!cpu->vhyp) {
1239         ppc_store_sdr1(env, sregs.u.s.sdr1);
1240     }
1241 
1242     /* Sync SLB */
1243 #ifdef TARGET_PPC64
1244     /*
1245      * The packed SLB array we get from KVM_GET_SREGS only contains
1246      * information about valid entries. So we flush our internal copy
1247      * to get rid of stale ones, then put all valid SLB entries back
1248      * in.
1249      */
1250     memset(env->slb, 0, sizeof(env->slb));
1251     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1252         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1253         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1254         /*
1255          * Only restore valid entries
1256          */
1257         if (rb & SLB_ESID_V) {
1258             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1259         }
1260     }
1261 #endif
1262 
1263     /* Sync SRs */
1264     for (i = 0; i < 16; i++) {
1265         env->sr[i] = sregs.u.s.ppc32.sr[i];
1266     }
1267 
1268     /* Sync BATs */
1269     for (i = 0; i < 8; i++) {
1270         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1271         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1272         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1273         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1274     }
1275 
1276     return 0;
1277 }
1278 
1279 int kvm_arch_get_registers(CPUState *cs)
1280 {
1281     PowerPCCPU *cpu = POWERPC_CPU(cs);
1282     CPUPPCState *env = &cpu->env;
1283     struct kvm_regs regs;
1284     uint32_t cr;
1285     int i, ret;
1286 
1287     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1288     if (ret < 0)
1289         return ret;
1290 
1291     cr = regs.cr;
1292     for (i = 7; i >= 0; i--) {
1293         env->crf[i] = cr & 15;
1294         cr >>= 4;
1295     }
1296 
1297     env->ctr = regs.ctr;
1298     env->lr = regs.lr;
1299     cpu_write_xer(env, regs.xer);
1300     env->msr = regs.msr;
1301     env->nip = regs.pc;
1302 
1303     env->spr[SPR_SRR0] = regs.srr0;
1304     env->spr[SPR_SRR1] = regs.srr1;
1305 
1306     env->spr[SPR_SPRG0] = regs.sprg0;
1307     env->spr[SPR_SPRG1] = regs.sprg1;
1308     env->spr[SPR_SPRG2] = regs.sprg2;
1309     env->spr[SPR_SPRG3] = regs.sprg3;
1310     env->spr[SPR_SPRG4] = regs.sprg4;
1311     env->spr[SPR_SPRG5] = regs.sprg5;
1312     env->spr[SPR_SPRG6] = regs.sprg6;
1313     env->spr[SPR_SPRG7] = regs.sprg7;
1314 
1315     env->spr[SPR_BOOKE_PID] = regs.pid;
1316 
1317     for (i = 0;i < 32; i++)
1318         env->gpr[i] = regs.gpr[i];
1319 
1320     kvm_get_fp(cs);
1321 
1322     if (cap_booke_sregs) {
1323         ret = kvmppc_get_booke_sregs(cpu);
1324         if (ret < 0) {
1325             return ret;
1326         }
1327     }
1328 
1329     if (cap_segstate) {
1330         ret = kvmppc_get_books_sregs(cpu);
1331         if (ret < 0) {
1332             return ret;
1333         }
1334     }
1335 
1336     if (cap_hior) {
1337         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1338     }
1339 
1340     if (cap_one_reg) {
1341         int i;
1342 
1343         /* We deliberately ignore errors here, for kernels which have
1344          * the ONE_REG calls, but don't support the specific
1345          * registers, there's a reasonable chance things will still
1346          * work, at least until we try to migrate. */
1347         for (i = 0; i < 1024; i++) {
1348             uint64_t id = env->spr_cb[i].one_reg_id;
1349 
1350             if (id != 0) {
1351                 kvm_get_one_spr(cs, id, i);
1352             }
1353         }
1354 
1355 #ifdef TARGET_PPC64
1356         if (msr_ts) {
1357             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1358                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1359             }
1360             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1361                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1362             }
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1370             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1373         }
1374 
1375         if (cap_papr) {
1376             if (kvm_get_vpa(cs) < 0) {
1377                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1378             }
1379         }
1380 
1381         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1382 #endif
1383     }
1384 
1385     return 0;
1386 }
1387 
1388 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1389 {
1390     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1391 
1392     if (irq != PPC_INTERRUPT_EXT) {
1393         return 0;
1394     }
1395 
1396     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1397         return 0;
1398     }
1399 
1400     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1401 
1402     return 0;
1403 }
1404 
1405 #if defined(TARGET_PPCEMB)
1406 #define PPC_INPUT_INT PPC40x_INPUT_INT
1407 #elif defined(TARGET_PPC64)
1408 #define PPC_INPUT_INT PPC970_INPUT_INT
1409 #else
1410 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1411 #endif
1412 
1413 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1414 {
1415     PowerPCCPU *cpu = POWERPC_CPU(cs);
1416     CPUPPCState *env = &cpu->env;
1417     int r;
1418     unsigned irq;
1419 
1420     qemu_mutex_lock_iothread();
1421 
1422     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1423      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1424     if (!cap_interrupt_level &&
1425         run->ready_for_interrupt_injection &&
1426         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1427         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1428     {
1429         /* For now KVM disregards the 'irq' argument. However, in the
1430          * future KVM could cache it in-kernel to avoid a heavyweight exit
1431          * when reading the UIC.
1432          */
1433         irq = KVM_INTERRUPT_SET;
1434 
1435         DPRINTF("injected interrupt %d\n", irq);
1436         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1437         if (r < 0) {
1438             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1439         }
1440 
1441         /* Always wake up soon in case the interrupt was level based */
1442         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1443                        (NANOSECONDS_PER_SECOND / 50));
1444     }
1445 
1446     /* We don't know if there are more interrupts pending after this. However,
1447      * the guest will return to userspace in the course of handling this one
1448      * anyways, so we will get a chance to deliver the rest. */
1449 
1450     qemu_mutex_unlock_iothread();
1451 }
1452 
1453 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1454 {
1455     return MEMTXATTRS_UNSPECIFIED;
1456 }
1457 
1458 int kvm_arch_process_async_events(CPUState *cs)
1459 {
1460     return cs->halted;
1461 }
1462 
1463 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1464 {
1465     CPUState *cs = CPU(cpu);
1466     CPUPPCState *env = &cpu->env;
1467 
1468     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1469         cs->halted = 1;
1470         cs->exception_index = EXCP_HLT;
1471     }
1472 
1473     return 0;
1474 }
1475 
1476 /* map dcr access to existing qemu dcr emulation */
1477 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1478 {
1479     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1480         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1481 
1482     return 0;
1483 }
1484 
1485 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1486 {
1487     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1488         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1489 
1490     return 0;
1491 }
1492 
1493 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1494 {
1495     /* Mixed endian case is not handled */
1496     uint32_t sc = debug_inst_opcode;
1497 
1498     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1499                             sizeof(sc), 0) ||
1500         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1501         return -EINVAL;
1502     }
1503 
1504     return 0;
1505 }
1506 
1507 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1508 {
1509     uint32_t sc;
1510 
1511     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1512         sc != debug_inst_opcode ||
1513         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1514                             sizeof(sc), 1)) {
1515         return -EINVAL;
1516     }
1517 
1518     return 0;
1519 }
1520 
1521 static int find_hw_breakpoint(target_ulong addr, int type)
1522 {
1523     int n;
1524 
1525     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1526            <= ARRAY_SIZE(hw_debug_points));
1527 
1528     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1529         if (hw_debug_points[n].addr == addr &&
1530              hw_debug_points[n].type == type) {
1531             return n;
1532         }
1533     }
1534 
1535     return -1;
1536 }
1537 
1538 static int find_hw_watchpoint(target_ulong addr, int *flag)
1539 {
1540     int n;
1541 
1542     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1543     if (n >= 0) {
1544         *flag = BP_MEM_ACCESS;
1545         return n;
1546     }
1547 
1548     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1549     if (n >= 0) {
1550         *flag = BP_MEM_WRITE;
1551         return n;
1552     }
1553 
1554     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1555     if (n >= 0) {
1556         *flag = BP_MEM_READ;
1557         return n;
1558     }
1559 
1560     return -1;
1561 }
1562 
1563 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1564                                   target_ulong len, int type)
1565 {
1566     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1567         return -ENOBUFS;
1568     }
1569 
1570     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1571     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1572 
1573     switch (type) {
1574     case GDB_BREAKPOINT_HW:
1575         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1576             return -ENOBUFS;
1577         }
1578 
1579         if (find_hw_breakpoint(addr, type) >= 0) {
1580             return -EEXIST;
1581         }
1582 
1583         nb_hw_breakpoint++;
1584         break;
1585 
1586     case GDB_WATCHPOINT_WRITE:
1587     case GDB_WATCHPOINT_READ:
1588     case GDB_WATCHPOINT_ACCESS:
1589         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1590             return -ENOBUFS;
1591         }
1592 
1593         if (find_hw_breakpoint(addr, type) >= 0) {
1594             return -EEXIST;
1595         }
1596 
1597         nb_hw_watchpoint++;
1598         break;
1599 
1600     default:
1601         return -ENOSYS;
1602     }
1603 
1604     return 0;
1605 }
1606 
1607 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1608                                   target_ulong len, int type)
1609 {
1610     int n;
1611 
1612     n = find_hw_breakpoint(addr, type);
1613     if (n < 0) {
1614         return -ENOENT;
1615     }
1616 
1617     switch (type) {
1618     case GDB_BREAKPOINT_HW:
1619         nb_hw_breakpoint--;
1620         break;
1621 
1622     case GDB_WATCHPOINT_WRITE:
1623     case GDB_WATCHPOINT_READ:
1624     case GDB_WATCHPOINT_ACCESS:
1625         nb_hw_watchpoint--;
1626         break;
1627 
1628     default:
1629         return -ENOSYS;
1630     }
1631     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1632 
1633     return 0;
1634 }
1635 
1636 void kvm_arch_remove_all_hw_breakpoints(void)
1637 {
1638     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1639 }
1640 
1641 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1642 {
1643     int n;
1644 
1645     /* Software Breakpoint updates */
1646     if (kvm_sw_breakpoints_active(cs)) {
1647         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1648     }
1649 
1650     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1651            <= ARRAY_SIZE(hw_debug_points));
1652     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1653 
1654     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1655         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1656         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1657         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1658             switch (hw_debug_points[n].type) {
1659             case GDB_BREAKPOINT_HW:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1661                 break;
1662             case GDB_WATCHPOINT_WRITE:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1664                 break;
1665             case GDB_WATCHPOINT_READ:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1667                 break;
1668             case GDB_WATCHPOINT_ACCESS:
1669                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1670                                         KVMPPC_DEBUG_WATCH_READ;
1671                 break;
1672             default:
1673                 cpu_abort(cs, "Unsupported breakpoint type\n");
1674             }
1675             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1676         }
1677     }
1678 }
1679 
1680 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1681 {
1682     CPUState *cs = CPU(cpu);
1683     CPUPPCState *env = &cpu->env;
1684     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1685     int handle = 0;
1686     int n;
1687     int flag = 0;
1688 
1689     if (cs->singlestep_enabled) {
1690         handle = 1;
1691     } else if (arch_info->status) {
1692         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1693             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1694                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1695                 if (n >= 0) {
1696                     handle = 1;
1697                 }
1698             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1699                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1700                 n = find_hw_watchpoint(arch_info->address,  &flag);
1701                 if (n >= 0) {
1702                     handle = 1;
1703                     cs->watchpoint_hit = &hw_watchpoint;
1704                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1705                     hw_watchpoint.flags = flag;
1706                 }
1707             }
1708         }
1709     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1710         handle = 1;
1711     } else {
1712         /* QEMU is not able to handle debug exception, so inject
1713          * program exception to guest;
1714          * Yes program exception NOT debug exception !!
1715          * When QEMU is using debug resources then debug exception must
1716          * be always set. To achieve this we set MSR_DE and also set
1717          * MSRP_DEP so guest cannot change MSR_DE.
1718          * When emulating debug resource for guest we want guest
1719          * to control MSR_DE (enable/disable debug interrupt on need).
1720          * Supporting both configurations are NOT possible.
1721          * So the result is that we cannot share debug resources
1722          * between QEMU and Guest on BOOKE architecture.
1723          * In the current design QEMU gets the priority over guest,
1724          * this means that if QEMU is using debug resources then guest
1725          * cannot use them;
1726          * For software breakpoint QEMU uses a privileged instruction;
1727          * So there cannot be any reason that we are here for guest
1728          * set debug exception, only possibility is guest executed a
1729          * privileged / illegal instruction and that's why we are
1730          * injecting a program interrupt.
1731          */
1732 
1733         cpu_synchronize_state(cs);
1734         /* env->nip is PC, so increment this by 4 to use
1735          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1736          */
1737         env->nip += 4;
1738         cs->exception_index = POWERPC_EXCP_PROGRAM;
1739         env->error_code = POWERPC_EXCP_INVAL;
1740         ppc_cpu_do_interrupt(cs);
1741     }
1742 
1743     return handle;
1744 }
1745 
1746 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1747 {
1748     PowerPCCPU *cpu = POWERPC_CPU(cs);
1749     CPUPPCState *env = &cpu->env;
1750     int ret;
1751 
1752     qemu_mutex_lock_iothread();
1753 
1754     switch (run->exit_reason) {
1755     case KVM_EXIT_DCR:
1756         if (run->dcr.is_write) {
1757             DPRINTF("handle dcr write\n");
1758             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1759         } else {
1760             DPRINTF("handle dcr read\n");
1761             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1762         }
1763         break;
1764     case KVM_EXIT_HLT:
1765         DPRINTF("handle halt\n");
1766         ret = kvmppc_handle_halt(cpu);
1767         break;
1768 #if defined(TARGET_PPC64)
1769     case KVM_EXIT_PAPR_HCALL:
1770         DPRINTF("handle PAPR hypercall\n");
1771         run->papr_hcall.ret = spapr_hypercall(cpu,
1772                                               run->papr_hcall.nr,
1773                                               run->papr_hcall.args);
1774         ret = 0;
1775         break;
1776 #endif
1777     case KVM_EXIT_EPR:
1778         DPRINTF("handle epr\n");
1779         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1780         ret = 0;
1781         break;
1782     case KVM_EXIT_WATCHDOG:
1783         DPRINTF("handle watchdog expiry\n");
1784         watchdog_perform_action();
1785         ret = 0;
1786         break;
1787 
1788     case KVM_EXIT_DEBUG:
1789         DPRINTF("handle debug exception\n");
1790         if (kvm_handle_debug(cpu, run)) {
1791             ret = EXCP_DEBUG;
1792             break;
1793         }
1794         /* re-enter, this exception was guest-internal */
1795         ret = 0;
1796         break;
1797 
1798     default:
1799         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1800         ret = -1;
1801         break;
1802     }
1803 
1804     qemu_mutex_unlock_iothread();
1805     return ret;
1806 }
1807 
1808 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1809 {
1810     CPUState *cs = CPU(cpu);
1811     uint32_t bits = tsr_bits;
1812     struct kvm_one_reg reg = {
1813         .id = KVM_REG_PPC_OR_TSR,
1814         .addr = (uintptr_t) &bits,
1815     };
1816 
1817     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1818 }
1819 
1820 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1821 {
1822 
1823     CPUState *cs = CPU(cpu);
1824     uint32_t bits = tsr_bits;
1825     struct kvm_one_reg reg = {
1826         .id = KVM_REG_PPC_CLEAR_TSR,
1827         .addr = (uintptr_t) &bits,
1828     };
1829 
1830     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1831 }
1832 
1833 int kvmppc_set_tcr(PowerPCCPU *cpu)
1834 {
1835     CPUState *cs = CPU(cpu);
1836     CPUPPCState *env = &cpu->env;
1837     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1838 
1839     struct kvm_one_reg reg = {
1840         .id = KVM_REG_PPC_TCR,
1841         .addr = (uintptr_t) &tcr,
1842     };
1843 
1844     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1845 }
1846 
1847 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1848 {
1849     CPUState *cs = CPU(cpu);
1850     int ret;
1851 
1852     if (!kvm_enabled()) {
1853         return -1;
1854     }
1855 
1856     if (!cap_ppc_watchdog) {
1857         printf("warning: KVM does not support watchdog");
1858         return -1;
1859     }
1860 
1861     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1862     if (ret < 0) {
1863         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1864                 __func__, strerror(-ret));
1865         return ret;
1866     }
1867 
1868     return ret;
1869 }
1870 
1871 static int read_cpuinfo(const char *field, char *value, int len)
1872 {
1873     FILE *f;
1874     int ret = -1;
1875     int field_len = strlen(field);
1876     char line[512];
1877 
1878     f = fopen("/proc/cpuinfo", "r");
1879     if (!f) {
1880         return -1;
1881     }
1882 
1883     do {
1884         if (!fgets(line, sizeof(line), f)) {
1885             break;
1886         }
1887         if (!strncmp(line, field, field_len)) {
1888             pstrcpy(value, len, line);
1889             ret = 0;
1890             break;
1891         }
1892     } while(*line);
1893 
1894     fclose(f);
1895 
1896     return ret;
1897 }
1898 
1899 uint32_t kvmppc_get_tbfreq(void)
1900 {
1901     char line[512];
1902     char *ns;
1903     uint32_t retval = NANOSECONDS_PER_SECOND;
1904 
1905     if (read_cpuinfo("timebase", line, sizeof(line))) {
1906         return retval;
1907     }
1908 
1909     if (!(ns = strchr(line, ':'))) {
1910         return retval;
1911     }
1912 
1913     ns++;
1914 
1915     return atoi(ns);
1916 }
1917 
1918 bool kvmppc_get_host_serial(char **value)
1919 {
1920     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1921                                NULL);
1922 }
1923 
1924 bool kvmppc_get_host_model(char **value)
1925 {
1926     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1927 }
1928 
1929 /* Try to find a device tree node for a CPU with clock-frequency property */
1930 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1931 {
1932     struct dirent *dirp;
1933     DIR *dp;
1934 
1935     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1936         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1937         return -1;
1938     }
1939 
1940     buf[0] = '\0';
1941     while ((dirp = readdir(dp)) != NULL) {
1942         FILE *f;
1943         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1944                  dirp->d_name);
1945         f = fopen(buf, "r");
1946         if (f) {
1947             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1948             fclose(f);
1949             break;
1950         }
1951         buf[0] = '\0';
1952     }
1953     closedir(dp);
1954     if (buf[0] == '\0') {
1955         printf("Unknown host!\n");
1956         return -1;
1957     }
1958 
1959     return 0;
1960 }
1961 
1962 static uint64_t kvmppc_read_int_dt(const char *filename)
1963 {
1964     union {
1965         uint32_t v32;
1966         uint64_t v64;
1967     } u;
1968     FILE *f;
1969     int len;
1970 
1971     f = fopen(filename, "rb");
1972     if (!f) {
1973         return -1;
1974     }
1975 
1976     len = fread(&u, 1, sizeof(u), f);
1977     fclose(f);
1978     switch (len) {
1979     case 4:
1980         /* property is a 32-bit quantity */
1981         return be32_to_cpu(u.v32);
1982     case 8:
1983         return be64_to_cpu(u.v64);
1984     }
1985 
1986     return 0;
1987 }
1988 
1989 /* Read a CPU node property from the host device tree that's a single
1990  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1991  * (can't find or open the property, or doesn't understand the
1992  * format) */
1993 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1994 {
1995     char buf[PATH_MAX], *tmp;
1996     uint64_t val;
1997 
1998     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1999         return -1;
2000     }
2001 
2002     tmp = g_strdup_printf("%s/%s", buf, propname);
2003     val = kvmppc_read_int_dt(tmp);
2004     g_free(tmp);
2005 
2006     return val;
2007 }
2008 
2009 uint64_t kvmppc_get_clockfreq(void)
2010 {
2011     return kvmppc_read_int_cpu_dt("clock-frequency");
2012 }
2013 
2014 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2015  {
2016      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2017      CPUState *cs = CPU(cpu);
2018 
2019     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2020         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2021         return 0;
2022     }
2023 
2024     return 1;
2025 }
2026 
2027 int kvmppc_get_hasidle(CPUPPCState *env)
2028 {
2029     struct kvm_ppc_pvinfo pvinfo;
2030 
2031     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2032         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2033         return 1;
2034     }
2035 
2036     return 0;
2037 }
2038 
2039 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2040 {
2041     uint32_t *hc = (uint32_t*)buf;
2042     struct kvm_ppc_pvinfo pvinfo;
2043 
2044     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2045         memcpy(buf, pvinfo.hcall, buf_len);
2046         return 0;
2047     }
2048 
2049     /*
2050      * Fallback to always fail hypercalls regardless of endianness:
2051      *
2052      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2053      *     li r3, -1
2054      *     b .+8       (becomes nop in wrong endian)
2055      *     bswap32(li r3, -1)
2056      */
2057 
2058     hc[0] = cpu_to_be32(0x08000048);
2059     hc[1] = cpu_to_be32(0x3860ffff);
2060     hc[2] = cpu_to_be32(0x48000008);
2061     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2062 
2063     return 1;
2064 }
2065 
2066 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2067 {
2068     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2069 }
2070 
2071 void kvmppc_enable_logical_ci_hcalls(void)
2072 {
2073     /*
2074      * FIXME: it would be nice if we could detect the cases where
2075      * we're using a device which requires the in kernel
2076      * implementation of these hcalls, but the kernel lacks them and
2077      * produce a warning.
2078      */
2079     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2080     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2081 }
2082 
2083 void kvmppc_enable_set_mode_hcall(void)
2084 {
2085     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2086 }
2087 
2088 void kvmppc_enable_clear_ref_mod_hcalls(void)
2089 {
2090     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2091     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2092 }
2093 
2094 void kvmppc_set_papr(PowerPCCPU *cpu)
2095 {
2096     CPUState *cs = CPU(cpu);
2097     int ret;
2098 
2099     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2100     if (ret) {
2101         error_report("This vCPU type or KVM version does not support PAPR");
2102         exit(1);
2103     }
2104 
2105     /* Update the capability flag so we sync the right information
2106      * with kvm */
2107     cap_papr = 1;
2108 }
2109 
2110 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2111 {
2112     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2113 }
2114 
2115 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2116 {
2117     CPUState *cs = CPU(cpu);
2118     int ret;
2119 
2120     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2121     if (ret && mpic_proxy) {
2122         error_report("This KVM version does not support EPR");
2123         exit(1);
2124     }
2125 }
2126 
2127 int kvmppc_smt_threads(void)
2128 {
2129     return cap_ppc_smt ? cap_ppc_smt : 1;
2130 }
2131 
2132 int kvmppc_set_smt_threads(int smt)
2133 {
2134     int ret;
2135 
2136     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2137     if (!ret) {
2138         cap_ppc_smt = smt;
2139     }
2140     return ret;
2141 }
2142 
2143 void kvmppc_hint_smt_possible(Error **errp)
2144 {
2145     int i;
2146     GString *g;
2147     char *s;
2148 
2149     assert(kvm_enabled());
2150     if (cap_ppc_smt_possible) {
2151         g = g_string_new("Available VSMT modes:");
2152         for (i = 63; i >= 0; i--) {
2153             if ((1UL << i) & cap_ppc_smt_possible) {
2154                 g_string_append_printf(g, " %lu", (1UL << i));
2155             }
2156         }
2157         s = g_string_free(g, false);
2158         error_append_hint(errp, "%s.\n", s);
2159         g_free(s);
2160     } else {
2161         error_append_hint(errp,
2162                           "This KVM seems to be too old to support VSMT.\n");
2163     }
2164 }
2165 
2166 
2167 #ifdef TARGET_PPC64
2168 off_t kvmppc_alloc_rma(void **rma)
2169 {
2170     off_t size;
2171     int fd;
2172     struct kvm_allocate_rma ret;
2173 
2174     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2175      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2176      *                      not necessary on this hardware
2177      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2178      *
2179      * FIXME: We should allow the user to force contiguous RMA
2180      * allocation in the cap_ppc_rma==1 case.
2181      */
2182     if (cap_ppc_rma < 2) {
2183         return 0;
2184     }
2185 
2186     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2187     if (fd < 0) {
2188         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2189                 strerror(errno));
2190         return -1;
2191     }
2192 
2193     size = MIN(ret.rma_size, 256ul << 20);
2194 
2195     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2196     if (*rma == MAP_FAILED) {
2197         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2198         return -1;
2199     };
2200 
2201     return size;
2202 }
2203 
2204 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2205 {
2206     struct kvm_ppc_smmu_info info;
2207     long rampagesize, best_page_shift;
2208     int i;
2209 
2210     if (cap_ppc_rma >= 2) {
2211         return current_size;
2212     }
2213 
2214     /* Find the largest hardware supported page size that's less than
2215      * or equal to the (logical) backing page size of guest RAM */
2216     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2217     rampagesize = qemu_getrampagesize();
2218     best_page_shift = 0;
2219 
2220     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2221         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2222 
2223         if (!sps->page_shift) {
2224             continue;
2225         }
2226 
2227         if ((sps->page_shift > best_page_shift)
2228             && ((1UL << sps->page_shift) <= rampagesize)) {
2229             best_page_shift = sps->page_shift;
2230         }
2231     }
2232 
2233     return MIN(current_size,
2234                1ULL << (best_page_shift + hash_shift - 7));
2235 }
2236 #endif
2237 
2238 bool kvmppc_spapr_use_multitce(void)
2239 {
2240     return cap_spapr_multitce;
2241 }
2242 
2243 int kvmppc_spapr_enable_inkernel_multitce(void)
2244 {
2245     int ret;
2246 
2247     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2248                             H_PUT_TCE_INDIRECT, 1);
2249     if (!ret) {
2250         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2251                                 H_STUFF_TCE, 1);
2252     }
2253 
2254     return ret;
2255 }
2256 
2257 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2258                               uint64_t bus_offset, uint32_t nb_table,
2259                               int *pfd, bool need_vfio)
2260 {
2261     long len;
2262     int fd;
2263     void *table;
2264 
2265     /* Must set fd to -1 so we don't try to munmap when called for
2266      * destroying the table, which the upper layers -will- do
2267      */
2268     *pfd = -1;
2269     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2270         return NULL;
2271     }
2272 
2273     if (cap_spapr_tce_64) {
2274         struct kvm_create_spapr_tce_64 args = {
2275             .liobn = liobn,
2276             .page_shift = page_shift,
2277             .offset = bus_offset >> page_shift,
2278             .size = nb_table,
2279             .flags = 0
2280         };
2281         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2282         if (fd < 0) {
2283             fprintf(stderr,
2284                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2285                     liobn);
2286             return NULL;
2287         }
2288     } else if (cap_spapr_tce) {
2289         uint64_t window_size = (uint64_t) nb_table << page_shift;
2290         struct kvm_create_spapr_tce args = {
2291             .liobn = liobn,
2292             .window_size = window_size,
2293         };
2294         if ((window_size != args.window_size) || bus_offset) {
2295             return NULL;
2296         }
2297         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2298         if (fd < 0) {
2299             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2300                     liobn);
2301             return NULL;
2302         }
2303     } else {
2304         return NULL;
2305     }
2306 
2307     len = nb_table * sizeof(uint64_t);
2308     /* FIXME: round this up to page size */
2309 
2310     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2311     if (table == MAP_FAILED) {
2312         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2313                 liobn);
2314         close(fd);
2315         return NULL;
2316     }
2317 
2318     *pfd = fd;
2319     return table;
2320 }
2321 
2322 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2323 {
2324     long len;
2325 
2326     if (fd < 0) {
2327         return -1;
2328     }
2329 
2330     len = nb_table * sizeof(uint64_t);
2331     if ((munmap(table, len) < 0) ||
2332         (close(fd) < 0)) {
2333         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2334                 strerror(errno));
2335         /* Leak the table */
2336     }
2337 
2338     return 0;
2339 }
2340 
2341 int kvmppc_reset_htab(int shift_hint)
2342 {
2343     uint32_t shift = shift_hint;
2344 
2345     if (!kvm_enabled()) {
2346         /* Full emulation, tell caller to allocate htab itself */
2347         return 0;
2348     }
2349     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2350         int ret;
2351         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2352         if (ret == -ENOTTY) {
2353             /* At least some versions of PR KVM advertise the
2354              * capability, but don't implement the ioctl().  Oops.
2355              * Return 0 so that we allocate the htab in qemu, as is
2356              * correct for PR. */
2357             return 0;
2358         } else if (ret < 0) {
2359             return ret;
2360         }
2361         return shift;
2362     }
2363 
2364     /* We have a kernel that predates the htab reset calls.  For PR
2365      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2366      * this era, it has allocated a 16MB fixed size hash table already. */
2367     if (kvmppc_is_pr(kvm_state)) {
2368         /* PR - tell caller to allocate htab */
2369         return 0;
2370     } else {
2371         /* HV - assume 16MB kernel allocated htab */
2372         return 24;
2373     }
2374 }
2375 
2376 static inline uint32_t mfpvr(void)
2377 {
2378     uint32_t pvr;
2379 
2380     asm ("mfpvr %0"
2381          : "=r"(pvr));
2382     return pvr;
2383 }
2384 
2385 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2386 {
2387     if (on) {
2388         *word |= flags;
2389     } else {
2390         *word &= ~flags;
2391     }
2392 }
2393 
2394 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2395 {
2396     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2397     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2398     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2399 
2400     /* Now fix up the class with information we can query from the host */
2401     pcc->pvr = mfpvr();
2402 
2403     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2404                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2405     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2406                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2407     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2408                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2409 
2410     if (dcache_size != -1) {
2411         pcc->l1_dcache_size = dcache_size;
2412     }
2413 
2414     if (icache_size != -1) {
2415         pcc->l1_icache_size = icache_size;
2416     }
2417 
2418 #if defined(TARGET_PPC64)
2419     pcc->radix_page_info = kvm_get_radix_page_info();
2420 
2421     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2422         /*
2423          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2424          * compliant.  More importantly, advertising ISA 3.00
2425          * architected mode may prevent guests from activating
2426          * necessary DD1 workarounds.
2427          */
2428         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2429                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2430     }
2431 #endif /* defined(TARGET_PPC64) */
2432 }
2433 
2434 bool kvmppc_has_cap_epr(void)
2435 {
2436     return cap_epr;
2437 }
2438 
2439 bool kvmppc_has_cap_fixup_hcalls(void)
2440 {
2441     return cap_fixup_hcalls;
2442 }
2443 
2444 bool kvmppc_has_cap_htm(void)
2445 {
2446     return cap_htm;
2447 }
2448 
2449 bool kvmppc_has_cap_mmu_radix(void)
2450 {
2451     return cap_mmu_radix;
2452 }
2453 
2454 bool kvmppc_has_cap_mmu_hash_v3(void)
2455 {
2456     return cap_mmu_hash_v3;
2457 }
2458 
2459 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2460 {
2461     uint32_t host_pvr = mfpvr();
2462     PowerPCCPUClass *pvr_pcc;
2463 
2464     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2465     if (pvr_pcc == NULL) {
2466         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2467     }
2468 
2469     return pvr_pcc;
2470 }
2471 
2472 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2473 {
2474     TypeInfo type_info = {
2475         .name = TYPE_HOST_POWERPC_CPU,
2476         .class_init = kvmppc_host_cpu_class_init,
2477     };
2478     MachineClass *mc = MACHINE_GET_CLASS(ms);
2479     PowerPCCPUClass *pvr_pcc;
2480     ObjectClass *oc;
2481     DeviceClass *dc;
2482     int i;
2483 
2484     pvr_pcc = kvm_ppc_get_host_cpu_class();
2485     if (pvr_pcc == NULL) {
2486         return -1;
2487     }
2488     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2489     type_register(&type_info);
2490     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2491         /* override TCG default cpu type with 'host' cpu model */
2492         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2493     }
2494 
2495     oc = object_class_by_name(type_info.name);
2496     g_assert(oc);
2497 
2498     /*
2499      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2500      * we want "POWER8" to be a "family" alias that points to the current
2501      * host CPU type, too)
2502      */
2503     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2504     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2505         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2506             char *suffix;
2507 
2508             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2509             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2510             if (suffix) {
2511                 *suffix = 0;
2512             }
2513             break;
2514         }
2515     }
2516 
2517     return 0;
2518 }
2519 
2520 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2521 {
2522     struct kvm_rtas_token_args args = {
2523         .token = token,
2524     };
2525 
2526     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2527         return -ENOENT;
2528     }
2529 
2530     strncpy(args.name, function, sizeof(args.name));
2531 
2532     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2533 }
2534 
2535 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2536 {
2537     struct kvm_get_htab_fd s = {
2538         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2539         .start_index = index,
2540     };
2541     int ret;
2542 
2543     if (!cap_htab_fd) {
2544         error_setg(errp, "KVM version doesn't support %s the HPT",
2545                    write ? "writing" : "reading");
2546         return -ENOTSUP;
2547     }
2548 
2549     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2550     if (ret < 0) {
2551         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2552                    write ? "writing" : "reading", write ? "to" : "from",
2553                    strerror(errno));
2554         return -errno;
2555     }
2556 
2557     return ret;
2558 }
2559 
2560 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2561 {
2562     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2563     uint8_t buf[bufsize];
2564     ssize_t rc;
2565 
2566     do {
2567         rc = read(fd, buf, bufsize);
2568         if (rc < 0) {
2569             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2570                     strerror(errno));
2571             return rc;
2572         } else if (rc) {
2573             uint8_t *buffer = buf;
2574             ssize_t n = rc;
2575             while (n) {
2576                 struct kvm_get_htab_header *head =
2577                     (struct kvm_get_htab_header *) buffer;
2578                 size_t chunksize = sizeof(*head) +
2579                      HASH_PTE_SIZE_64 * head->n_valid;
2580 
2581                 qemu_put_be32(f, head->index);
2582                 qemu_put_be16(f, head->n_valid);
2583                 qemu_put_be16(f, head->n_invalid);
2584                 qemu_put_buffer(f, (void *)(head + 1),
2585                                 HASH_PTE_SIZE_64 * head->n_valid);
2586 
2587                 buffer += chunksize;
2588                 n -= chunksize;
2589             }
2590         }
2591     } while ((rc != 0)
2592              && ((max_ns < 0)
2593                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2594 
2595     return (rc == 0) ? 1 : 0;
2596 }
2597 
2598 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2599                            uint16_t n_valid, uint16_t n_invalid)
2600 {
2601     struct kvm_get_htab_header *buf;
2602     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2603     ssize_t rc;
2604 
2605     buf = alloca(chunksize);
2606     buf->index = index;
2607     buf->n_valid = n_valid;
2608     buf->n_invalid = n_invalid;
2609 
2610     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2611 
2612     rc = write(fd, buf, chunksize);
2613     if (rc < 0) {
2614         fprintf(stderr, "Error writing KVM hash table: %s\n",
2615                 strerror(errno));
2616         return rc;
2617     }
2618     if (rc != chunksize) {
2619         /* We should never get a short write on a single chunk */
2620         fprintf(stderr, "Short write, restoring KVM hash table\n");
2621         return -1;
2622     }
2623     return 0;
2624 }
2625 
2626 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2627 {
2628     return true;
2629 }
2630 
2631 void kvm_arch_init_irq_routing(KVMState *s)
2632 {
2633 }
2634 
2635 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2636 {
2637     int fd, rc;
2638     int i;
2639 
2640     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2641 
2642     i = 0;
2643     while (i < n) {
2644         struct kvm_get_htab_header *hdr;
2645         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2646         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2647 
2648         rc = read(fd, buf, sizeof(buf));
2649         if (rc < 0) {
2650             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2651         }
2652 
2653         hdr = (struct kvm_get_htab_header *)buf;
2654         while ((i < n) && ((char *)hdr < (buf + rc))) {
2655             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2656 
2657             if (hdr->index != (ptex + i)) {
2658                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2659                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2660             }
2661 
2662             if (n - i < valid) {
2663                 valid = n - i;
2664             }
2665             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2666             i += valid;
2667 
2668             if ((n - i) < invalid) {
2669                 invalid = n - i;
2670             }
2671             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2672             i += invalid;
2673 
2674             hdr = (struct kvm_get_htab_header *)
2675                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2676         }
2677     }
2678 
2679     close(fd);
2680 }
2681 
2682 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2683 {
2684     int fd, rc;
2685     struct {
2686         struct kvm_get_htab_header hdr;
2687         uint64_t pte0;
2688         uint64_t pte1;
2689     } buf;
2690 
2691     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2692 
2693     buf.hdr.n_valid = 1;
2694     buf.hdr.n_invalid = 0;
2695     buf.hdr.index = ptex;
2696     buf.pte0 = cpu_to_be64(pte0);
2697     buf.pte1 = cpu_to_be64(pte1);
2698 
2699     rc = write(fd, &buf, sizeof(buf));
2700     if (rc != sizeof(buf)) {
2701         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2702     }
2703     close(fd);
2704 }
2705 
2706 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2707                              uint64_t address, uint32_t data, PCIDevice *dev)
2708 {
2709     return 0;
2710 }
2711 
2712 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2713                                 int vector, PCIDevice *dev)
2714 {
2715     return 0;
2716 }
2717 
2718 int kvm_arch_release_virq_post(int virq)
2719 {
2720     return 0;
2721 }
2722 
2723 int kvm_arch_msi_data_to_gsi(uint32_t data)
2724 {
2725     return data & 0xffff;
2726 }
2727 
2728 int kvmppc_enable_hwrng(void)
2729 {
2730     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2731         return -1;
2732     }
2733 
2734     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2735 }
2736 
2737 void kvmppc_check_papr_resize_hpt(Error **errp)
2738 {
2739     if (!kvm_enabled()) {
2740         return; /* No KVM, we're good */
2741     }
2742 
2743     if (cap_resize_hpt) {
2744         return; /* Kernel has explicit support, we're good */
2745     }
2746 
2747     /* Otherwise fallback on looking for PR KVM */
2748     if (kvmppc_is_pr(kvm_state)) {
2749         return;
2750     }
2751 
2752     error_setg(errp,
2753                "Hash page table resizing not available with this KVM version");
2754 }
2755 
2756 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2757 {
2758     CPUState *cs = CPU(cpu);
2759     struct kvm_ppc_resize_hpt rhpt = {
2760         .flags = flags,
2761         .shift = shift,
2762     };
2763 
2764     if (!cap_resize_hpt) {
2765         return -ENOSYS;
2766     }
2767 
2768     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2769 }
2770 
2771 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2772 {
2773     CPUState *cs = CPU(cpu);
2774     struct kvm_ppc_resize_hpt rhpt = {
2775         .flags = flags,
2776         .shift = shift,
2777     };
2778 
2779     if (!cap_resize_hpt) {
2780         return -ENOSYS;
2781     }
2782 
2783     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2784 }
2785 
2786 /*
2787  * This is a helper function to detect a post migration scenario
2788  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2789  * the guest kernel can't handle a PVR value other than the actual host
2790  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2791  *
2792  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2793  * (so, we're HV), return true. The workaround itself is done in
2794  * cpu_post_load.
2795  *
2796  * The order here is important: we'll only check for KVM PR as a
2797  * fallback if the guest kernel can't handle the situation itself.
2798  * We need to avoid as much as possible querying the running KVM type
2799  * in QEMU level.
2800  */
2801 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2802 {
2803     CPUState *cs = CPU(cpu);
2804 
2805     if (!kvm_enabled()) {
2806         return false;
2807     }
2808 
2809     if (cap_ppc_pvr_compat) {
2810         return false;
2811     }
2812 
2813     return !kvmppc_is_pr(cs->kvm_state);
2814 }
2815