1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #if defined(TARGET_PPC64) 51 #include "hw/ppc/spapr_cpu_core.h" 52 #endif 53 #include "elf.h" 54 #include "sysemu/kvm_int.h" 55 56 //#define DEBUG_KVM 57 58 #ifdef DEBUG_KVM 59 #define DPRINTF(fmt, ...) \ 60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 61 #else 62 #define DPRINTF(fmt, ...) \ 63 do { } while (0) 64 #endif 65 66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 67 68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 69 KVM_CAP_LAST_INFO 70 }; 71 72 static int cap_interrupt_unset = false; 73 static int cap_interrupt_level = false; 74 static int cap_segstate; 75 static int cap_booke_sregs; 76 static int cap_ppc_smt; 77 static int cap_ppc_rma; 78 static int cap_spapr_tce; 79 static int cap_spapr_tce_64; 80 static int cap_spapr_multitce; 81 static int cap_spapr_vfio; 82 static int cap_hior; 83 static int cap_one_reg; 84 static int cap_epr; 85 static int cap_ppc_watchdog; 86 static int cap_papr; 87 static int cap_htab_fd; 88 static int cap_fixup_hcalls; 89 static int cap_htm; /* Hardware transactional memory support */ 90 static int cap_mmu_radix; 91 static int cap_mmu_hash_v3; 92 static int cap_resize_hpt; 93 94 static uint32_t debug_inst_opcode; 95 96 /* XXX We have a race condition where we actually have a level triggered 97 * interrupt, but the infrastructure can't expose that yet, so the guest 98 * takes but ignores it, goes to sleep and never gets notified that there's 99 * still an interrupt pending. 100 * 101 * As a quick workaround, let's just wake up again 20 ms after we injected 102 * an interrupt. That way we can assure that we're always reinjecting 103 * interrupts in case the guest swallowed them. 104 */ 105 static QEMUTimer *idle_timer; 106 107 static void kvm_kick_cpu(void *opaque) 108 { 109 PowerPCCPU *cpu = opaque; 110 111 qemu_cpu_kick(CPU(cpu)); 112 } 113 114 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 115 * should only be used for fallback tests - generally we should use 116 * explicit capabilities for the features we want, rather than 117 * assuming what is/isn't available depending on the KVM variant. */ 118 static bool kvmppc_is_pr(KVMState *ks) 119 { 120 /* Assume KVM-PR if the GET_PVINFO capability is available */ 121 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 122 } 123 124 static int kvm_ppc_register_host_cpu_type(void); 125 126 int kvm_arch_init(MachineState *ms, KVMState *s) 127 { 128 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 129 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 130 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 131 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 132 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 133 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 134 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 135 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 136 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 137 cap_spapr_vfio = false; 138 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 139 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 140 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 141 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 142 /* Note: we don't set cap_papr here, because this capability is 143 * only activated after this by kvmppc_set_papr() */ 144 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 145 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 146 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 147 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 148 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 149 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 150 151 if (!cap_interrupt_level) { 152 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 153 "VM to stall at times!\n"); 154 } 155 156 kvm_ppc_register_host_cpu_type(); 157 158 return 0; 159 } 160 161 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 162 { 163 return 0; 164 } 165 166 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 167 { 168 CPUPPCState *cenv = &cpu->env; 169 CPUState *cs = CPU(cpu); 170 struct kvm_sregs sregs; 171 int ret; 172 173 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 174 /* What we're really trying to say is "if we're on BookE, we use 175 the native PVR for now". This is the only sane way to check 176 it though, so we potentially confuse users that they can run 177 BookE guests on BookS. Let's hope nobody dares enough :) */ 178 return 0; 179 } else { 180 if (!cap_segstate) { 181 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 182 return -ENOSYS; 183 } 184 } 185 186 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 187 if (ret) { 188 return ret; 189 } 190 191 sregs.pvr = cenv->spr[SPR_PVR]; 192 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 193 } 194 195 /* Set up a shared TLB array with KVM */ 196 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 197 { 198 CPUPPCState *env = &cpu->env; 199 CPUState *cs = CPU(cpu); 200 struct kvm_book3e_206_tlb_params params = {}; 201 struct kvm_config_tlb cfg = {}; 202 unsigned int entries = 0; 203 int ret, i; 204 205 if (!kvm_enabled() || 206 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 207 return 0; 208 } 209 210 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 211 212 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 213 params.tlb_sizes[i] = booke206_tlb_size(env, i); 214 params.tlb_ways[i] = booke206_tlb_ways(env, i); 215 entries += params.tlb_sizes[i]; 216 } 217 218 assert(entries == env->nb_tlb); 219 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 220 221 env->tlb_dirty = true; 222 223 cfg.array = (uintptr_t)env->tlb.tlbm; 224 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 225 cfg.params = (uintptr_t)¶ms; 226 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 227 228 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 229 if (ret < 0) { 230 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 231 __func__, strerror(-ret)); 232 return ret; 233 } 234 235 env->kvm_sw_tlb = true; 236 return 0; 237 } 238 239 240 #if defined(TARGET_PPC64) 241 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 242 struct kvm_ppc_smmu_info *info) 243 { 244 CPUPPCState *env = &cpu->env; 245 CPUState *cs = CPU(cpu); 246 247 memset(info, 0, sizeof(*info)); 248 249 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 250 * need to "guess" what the supported page sizes are. 251 * 252 * For that to work we make a few assumptions: 253 * 254 * - Check whether we are running "PR" KVM which only supports 4K 255 * and 16M pages, but supports them regardless of the backing 256 * store characteritics. We also don't support 1T segments. 257 * 258 * This is safe as if HV KVM ever supports that capability or PR 259 * KVM grows supports for more page/segment sizes, those versions 260 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 261 * will not hit this fallback 262 * 263 * - Else we are running HV KVM. This means we only support page 264 * sizes that fit in the backing store. Additionally we only 265 * advertize 64K pages if the processor is ARCH 2.06 and we assume 266 * P7 encodings for the SLB and hash table. Here too, we assume 267 * support for any newer processor will mean a kernel that 268 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 269 * this fallback. 270 */ 271 if (kvmppc_is_pr(cs->kvm_state)) { 272 /* No flags */ 273 info->flags = 0; 274 info->slb_size = 64; 275 276 /* Standard 4k base page size segment */ 277 info->sps[0].page_shift = 12; 278 info->sps[0].slb_enc = 0; 279 info->sps[0].enc[0].page_shift = 12; 280 info->sps[0].enc[0].pte_enc = 0; 281 282 /* Standard 16M large page size segment */ 283 info->sps[1].page_shift = 24; 284 info->sps[1].slb_enc = SLB_VSID_L; 285 info->sps[1].enc[0].page_shift = 24; 286 info->sps[1].enc[0].pte_enc = 0; 287 } else { 288 int i = 0; 289 290 /* HV KVM has backing store size restrictions */ 291 info->flags = KVM_PPC_PAGE_SIZES_REAL; 292 293 if (env->mmu_model & POWERPC_MMU_1TSEG) { 294 info->flags |= KVM_PPC_1T_SEGMENTS; 295 } 296 297 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 298 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 299 info->slb_size = 32; 300 } else { 301 info->slb_size = 64; 302 } 303 304 /* Standard 4k base page size segment */ 305 info->sps[i].page_shift = 12; 306 info->sps[i].slb_enc = 0; 307 info->sps[i].enc[0].page_shift = 12; 308 info->sps[i].enc[0].pte_enc = 0; 309 i++; 310 311 /* 64K on MMU 2.06 and later */ 312 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 313 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 314 info->sps[i].page_shift = 16; 315 info->sps[i].slb_enc = 0x110; 316 info->sps[i].enc[0].page_shift = 16; 317 info->sps[i].enc[0].pte_enc = 1; 318 i++; 319 } 320 321 /* Standard 16M large page size segment */ 322 info->sps[i].page_shift = 24; 323 info->sps[i].slb_enc = SLB_VSID_L; 324 info->sps[i].enc[0].page_shift = 24; 325 info->sps[i].enc[0].pte_enc = 0; 326 } 327 } 328 329 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 330 { 331 CPUState *cs = CPU(cpu); 332 int ret; 333 334 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 335 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 336 if (ret == 0) { 337 return; 338 } 339 } 340 341 kvm_get_fallback_smmu_info(cpu, info); 342 } 343 344 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 345 { 346 KVMState *s = KVM_STATE(current_machine->accelerator); 347 struct ppc_radix_page_info *radix_page_info; 348 struct kvm_ppc_rmmu_info rmmu_info; 349 int i; 350 351 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 352 return NULL; 353 } 354 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 355 return NULL; 356 } 357 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 358 radix_page_info->count = 0; 359 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 360 if (rmmu_info.ap_encodings[i]) { 361 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 362 radix_page_info->count++; 363 } 364 } 365 return radix_page_info; 366 } 367 368 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 369 bool radix, bool gtse, 370 uint64_t proc_tbl) 371 { 372 CPUState *cs = CPU(cpu); 373 int ret; 374 uint64_t flags = 0; 375 struct kvm_ppc_mmuv3_cfg cfg = { 376 .process_table = proc_tbl, 377 }; 378 379 if (radix) { 380 flags |= KVM_PPC_MMUV3_RADIX; 381 } 382 if (gtse) { 383 flags |= KVM_PPC_MMUV3_GTSE; 384 } 385 cfg.flags = flags; 386 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 387 switch (ret) { 388 case 0: 389 return H_SUCCESS; 390 case -EINVAL: 391 return H_PARAMETER; 392 case -ENODEV: 393 return H_NOT_AVAILABLE; 394 default: 395 return H_HARDWARE; 396 } 397 } 398 399 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 400 { 401 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 402 return true; 403 } 404 405 return (1ul << shift) <= rampgsize; 406 } 407 408 static long max_cpu_page_size; 409 410 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 411 { 412 static struct kvm_ppc_smmu_info smmu_info; 413 static bool has_smmu_info; 414 CPUPPCState *env = &cpu->env; 415 int iq, ik, jq, jk; 416 bool has_64k_pages = false; 417 418 /* We only handle page sizes for 64-bit server guests for now */ 419 if (!(env->mmu_model & POWERPC_MMU_64)) { 420 return; 421 } 422 423 /* Collect MMU info from kernel if not already */ 424 if (!has_smmu_info) { 425 kvm_get_smmu_info(cpu, &smmu_info); 426 has_smmu_info = true; 427 } 428 429 if (!max_cpu_page_size) { 430 max_cpu_page_size = qemu_getrampagesize(); 431 } 432 433 /* Convert to QEMU form */ 434 memset(&env->sps, 0, sizeof(env->sps)); 435 436 /* If we have HV KVM, we need to forbid CI large pages if our 437 * host page size is smaller than 64K. 438 */ 439 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 440 env->ci_large_pages = getpagesize() >= 0x10000; 441 } 442 443 /* 444 * XXX This loop should be an entry wide AND of the capabilities that 445 * the selected CPU has with the capabilities that KVM supports. 446 */ 447 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 448 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 449 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 450 451 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 452 ksps->page_shift)) { 453 continue; 454 } 455 qsps->page_shift = ksps->page_shift; 456 qsps->slb_enc = ksps->slb_enc; 457 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 458 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 459 ksps->enc[jk].page_shift)) { 460 continue; 461 } 462 if (ksps->enc[jk].page_shift == 16) { 463 has_64k_pages = true; 464 } 465 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 466 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 467 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 468 break; 469 } 470 } 471 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 472 break; 473 } 474 } 475 env->slb_nr = smmu_info.slb_size; 476 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 477 env->mmu_model &= ~POWERPC_MMU_1TSEG; 478 } 479 if (!has_64k_pages) { 480 env->mmu_model &= ~POWERPC_MMU_64K; 481 } 482 } 483 484 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 485 { 486 Object *mem_obj = object_resolve_path(obj_path, NULL); 487 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 488 long pagesize; 489 490 if (mempath) { 491 pagesize = qemu_mempath_getpagesize(mempath); 492 g_free(mempath); 493 } else { 494 pagesize = getpagesize(); 495 } 496 497 return pagesize >= max_cpu_page_size; 498 } 499 500 #else /* defined (TARGET_PPC64) */ 501 502 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 503 { 504 } 505 506 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 507 { 508 return true; 509 } 510 511 #endif /* !defined (TARGET_PPC64) */ 512 513 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 514 { 515 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 516 } 517 518 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 519 * book3s supports only 1 watchpoint, so array size 520 * of 4 is sufficient for now. 521 */ 522 #define MAX_HW_BKPTS 4 523 524 static struct HWBreakpoint { 525 target_ulong addr; 526 int type; 527 } hw_debug_points[MAX_HW_BKPTS]; 528 529 static CPUWatchpoint hw_watchpoint; 530 531 /* Default there is no breakpoint and watchpoint supported */ 532 static int max_hw_breakpoint; 533 static int max_hw_watchpoint; 534 static int nb_hw_breakpoint; 535 static int nb_hw_watchpoint; 536 537 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 538 { 539 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 540 max_hw_breakpoint = 2; 541 max_hw_watchpoint = 2; 542 } 543 544 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 545 fprintf(stderr, "Error initializing h/w breakpoints\n"); 546 return; 547 } 548 } 549 550 int kvm_arch_init_vcpu(CPUState *cs) 551 { 552 PowerPCCPU *cpu = POWERPC_CPU(cs); 553 CPUPPCState *cenv = &cpu->env; 554 int ret; 555 556 /* Gather server mmu info from KVM and update the CPU state */ 557 kvm_fixup_page_sizes(cpu); 558 559 /* Synchronize sregs with kvm */ 560 ret = kvm_arch_sync_sregs(cpu); 561 if (ret) { 562 if (ret == -EINVAL) { 563 error_report("Register sync failed... If you're using kvm-hv.ko," 564 " only \"-cpu host\" is possible"); 565 } 566 return ret; 567 } 568 569 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 570 571 switch (cenv->mmu_model) { 572 case POWERPC_MMU_BOOKE206: 573 /* This target supports access to KVM's guest TLB */ 574 ret = kvm_booke206_tlb_init(cpu); 575 break; 576 case POWERPC_MMU_2_07: 577 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 578 /* KVM-HV has transactional memory on POWER8 also without the 579 * KVM_CAP_PPC_HTM extension, so enable it here instead as 580 * long as it's availble to userspace on the host. */ 581 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 582 cap_htm = true; 583 } 584 } 585 break; 586 default: 587 break; 588 } 589 590 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 591 kvmppc_hw_debug_points_init(cenv); 592 593 return ret; 594 } 595 596 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 597 { 598 CPUPPCState *env = &cpu->env; 599 CPUState *cs = CPU(cpu); 600 struct kvm_dirty_tlb dirty_tlb; 601 unsigned char *bitmap; 602 int ret; 603 604 if (!env->kvm_sw_tlb) { 605 return; 606 } 607 608 bitmap = g_malloc((env->nb_tlb + 7) / 8); 609 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 610 611 dirty_tlb.bitmap = (uintptr_t)bitmap; 612 dirty_tlb.num_dirty = env->nb_tlb; 613 614 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 615 if (ret) { 616 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 617 __func__, strerror(-ret)); 618 } 619 620 g_free(bitmap); 621 } 622 623 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 624 { 625 PowerPCCPU *cpu = POWERPC_CPU(cs); 626 CPUPPCState *env = &cpu->env; 627 union { 628 uint32_t u32; 629 uint64_t u64; 630 } val; 631 struct kvm_one_reg reg = { 632 .id = id, 633 .addr = (uintptr_t) &val, 634 }; 635 int ret; 636 637 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 638 if (ret != 0) { 639 trace_kvm_failed_spr_get(spr, strerror(errno)); 640 } else { 641 switch (id & KVM_REG_SIZE_MASK) { 642 case KVM_REG_SIZE_U32: 643 env->spr[spr] = val.u32; 644 break; 645 646 case KVM_REG_SIZE_U64: 647 env->spr[spr] = val.u64; 648 break; 649 650 default: 651 /* Don't handle this size yet */ 652 abort(); 653 } 654 } 655 } 656 657 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 658 { 659 PowerPCCPU *cpu = POWERPC_CPU(cs); 660 CPUPPCState *env = &cpu->env; 661 union { 662 uint32_t u32; 663 uint64_t u64; 664 } val; 665 struct kvm_one_reg reg = { 666 .id = id, 667 .addr = (uintptr_t) &val, 668 }; 669 int ret; 670 671 switch (id & KVM_REG_SIZE_MASK) { 672 case KVM_REG_SIZE_U32: 673 val.u32 = env->spr[spr]; 674 break; 675 676 case KVM_REG_SIZE_U64: 677 val.u64 = env->spr[spr]; 678 break; 679 680 default: 681 /* Don't handle this size yet */ 682 abort(); 683 } 684 685 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 686 if (ret != 0) { 687 trace_kvm_failed_spr_set(spr, strerror(errno)); 688 } 689 } 690 691 static int kvm_put_fp(CPUState *cs) 692 { 693 PowerPCCPU *cpu = POWERPC_CPU(cs); 694 CPUPPCState *env = &cpu->env; 695 struct kvm_one_reg reg; 696 int i; 697 int ret; 698 699 if (env->insns_flags & PPC_FLOAT) { 700 uint64_t fpscr = env->fpscr; 701 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 702 703 reg.id = KVM_REG_PPC_FPSCR; 704 reg.addr = (uintptr_t)&fpscr; 705 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 706 if (ret < 0) { 707 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 708 return ret; 709 } 710 711 for (i = 0; i < 32; i++) { 712 uint64_t vsr[2]; 713 714 #ifdef HOST_WORDS_BIGENDIAN 715 vsr[0] = float64_val(env->fpr[i]); 716 vsr[1] = env->vsr[i]; 717 #else 718 vsr[0] = env->vsr[i]; 719 vsr[1] = float64_val(env->fpr[i]); 720 #endif 721 reg.addr = (uintptr_t) &vsr; 722 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 723 724 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 725 if (ret < 0) { 726 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 727 i, strerror(errno)); 728 return ret; 729 } 730 } 731 } 732 733 if (env->insns_flags & PPC_ALTIVEC) { 734 reg.id = KVM_REG_PPC_VSCR; 735 reg.addr = (uintptr_t)&env->vscr; 736 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 737 if (ret < 0) { 738 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 739 return ret; 740 } 741 742 for (i = 0; i < 32; i++) { 743 reg.id = KVM_REG_PPC_VR(i); 744 reg.addr = (uintptr_t)&env->avr[i]; 745 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 746 if (ret < 0) { 747 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 748 return ret; 749 } 750 } 751 } 752 753 return 0; 754 } 755 756 static int kvm_get_fp(CPUState *cs) 757 { 758 PowerPCCPU *cpu = POWERPC_CPU(cs); 759 CPUPPCState *env = &cpu->env; 760 struct kvm_one_reg reg; 761 int i; 762 int ret; 763 764 if (env->insns_flags & PPC_FLOAT) { 765 uint64_t fpscr; 766 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 767 768 reg.id = KVM_REG_PPC_FPSCR; 769 reg.addr = (uintptr_t)&fpscr; 770 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 771 if (ret < 0) { 772 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 773 return ret; 774 } else { 775 env->fpscr = fpscr; 776 } 777 778 for (i = 0; i < 32; i++) { 779 uint64_t vsr[2]; 780 781 reg.addr = (uintptr_t) &vsr; 782 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 783 784 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 785 if (ret < 0) { 786 DPRINTF("Unable to get %s%d from KVM: %s\n", 787 vsx ? "VSR" : "FPR", i, strerror(errno)); 788 return ret; 789 } else { 790 #ifdef HOST_WORDS_BIGENDIAN 791 env->fpr[i] = vsr[0]; 792 if (vsx) { 793 env->vsr[i] = vsr[1]; 794 } 795 #else 796 env->fpr[i] = vsr[1]; 797 if (vsx) { 798 env->vsr[i] = vsr[0]; 799 } 800 #endif 801 } 802 } 803 } 804 805 if (env->insns_flags & PPC_ALTIVEC) { 806 reg.id = KVM_REG_PPC_VSCR; 807 reg.addr = (uintptr_t)&env->vscr; 808 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 809 if (ret < 0) { 810 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 811 return ret; 812 } 813 814 for (i = 0; i < 32; i++) { 815 reg.id = KVM_REG_PPC_VR(i); 816 reg.addr = (uintptr_t)&env->avr[i]; 817 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 818 if (ret < 0) { 819 DPRINTF("Unable to get VR%d from KVM: %s\n", 820 i, strerror(errno)); 821 return ret; 822 } 823 } 824 } 825 826 return 0; 827 } 828 829 #if defined(TARGET_PPC64) 830 static int kvm_get_vpa(CPUState *cs) 831 { 832 PowerPCCPU *cpu = POWERPC_CPU(cs); 833 CPUPPCState *env = &cpu->env; 834 struct kvm_one_reg reg; 835 int ret; 836 837 reg.id = KVM_REG_PPC_VPA_ADDR; 838 reg.addr = (uintptr_t)&env->vpa_addr; 839 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 840 if (ret < 0) { 841 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 842 return ret; 843 } 844 845 assert((uintptr_t)&env->slb_shadow_size 846 == ((uintptr_t)&env->slb_shadow_addr + 8)); 847 reg.id = KVM_REG_PPC_VPA_SLB; 848 reg.addr = (uintptr_t)&env->slb_shadow_addr; 849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 850 if (ret < 0) { 851 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 852 strerror(errno)); 853 return ret; 854 } 855 856 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 857 reg.id = KVM_REG_PPC_VPA_DTL; 858 reg.addr = (uintptr_t)&env->dtl_addr; 859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 860 if (ret < 0) { 861 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 862 strerror(errno)); 863 return ret; 864 } 865 866 return 0; 867 } 868 869 static int kvm_put_vpa(CPUState *cs) 870 { 871 PowerPCCPU *cpu = POWERPC_CPU(cs); 872 CPUPPCState *env = &cpu->env; 873 struct kvm_one_reg reg; 874 int ret; 875 876 /* SLB shadow or DTL can't be registered unless a master VPA is 877 * registered. That means when restoring state, if a VPA *is* 878 * registered, we need to set that up first. If not, we need to 879 * deregister the others before deregistering the master VPA */ 880 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 881 882 if (env->vpa_addr) { 883 reg.id = KVM_REG_PPC_VPA_ADDR; 884 reg.addr = (uintptr_t)&env->vpa_addr; 885 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 886 if (ret < 0) { 887 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 888 return ret; 889 } 890 } 891 892 assert((uintptr_t)&env->slb_shadow_size 893 == ((uintptr_t)&env->slb_shadow_addr + 8)); 894 reg.id = KVM_REG_PPC_VPA_SLB; 895 reg.addr = (uintptr_t)&env->slb_shadow_addr; 896 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 897 if (ret < 0) { 898 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 899 return ret; 900 } 901 902 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 903 reg.id = KVM_REG_PPC_VPA_DTL; 904 reg.addr = (uintptr_t)&env->dtl_addr; 905 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 906 if (ret < 0) { 907 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 908 strerror(errno)); 909 return ret; 910 } 911 912 if (!env->vpa_addr) { 913 reg.id = KVM_REG_PPC_VPA_ADDR; 914 reg.addr = (uintptr_t)&env->vpa_addr; 915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 916 if (ret < 0) { 917 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 918 return ret; 919 } 920 } 921 922 return 0; 923 } 924 #endif /* TARGET_PPC64 */ 925 926 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 927 { 928 CPUPPCState *env = &cpu->env; 929 struct kvm_sregs sregs; 930 int i; 931 932 sregs.pvr = env->spr[SPR_PVR]; 933 934 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 935 936 /* Sync SLB */ 937 #ifdef TARGET_PPC64 938 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 939 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 940 if (env->slb[i].esid & SLB_ESID_V) { 941 sregs.u.s.ppc64.slb[i].slbe |= i; 942 } 943 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 944 } 945 #endif 946 947 /* Sync SRs */ 948 for (i = 0; i < 16; i++) { 949 sregs.u.s.ppc32.sr[i] = env->sr[i]; 950 } 951 952 /* Sync BATs */ 953 for (i = 0; i < 8; i++) { 954 /* Beware. We have to swap upper and lower bits here */ 955 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 956 | env->DBAT[1][i]; 957 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 958 | env->IBAT[1][i]; 959 } 960 961 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 962 } 963 964 int kvm_arch_put_registers(CPUState *cs, int level) 965 { 966 PowerPCCPU *cpu = POWERPC_CPU(cs); 967 CPUPPCState *env = &cpu->env; 968 struct kvm_regs regs; 969 int ret; 970 int i; 971 972 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 973 if (ret < 0) { 974 return ret; 975 } 976 977 regs.ctr = env->ctr; 978 regs.lr = env->lr; 979 regs.xer = cpu_read_xer(env); 980 regs.msr = env->msr; 981 regs.pc = env->nip; 982 983 regs.srr0 = env->spr[SPR_SRR0]; 984 regs.srr1 = env->spr[SPR_SRR1]; 985 986 regs.sprg0 = env->spr[SPR_SPRG0]; 987 regs.sprg1 = env->spr[SPR_SPRG1]; 988 regs.sprg2 = env->spr[SPR_SPRG2]; 989 regs.sprg3 = env->spr[SPR_SPRG3]; 990 regs.sprg4 = env->spr[SPR_SPRG4]; 991 regs.sprg5 = env->spr[SPR_SPRG5]; 992 regs.sprg6 = env->spr[SPR_SPRG6]; 993 regs.sprg7 = env->spr[SPR_SPRG7]; 994 995 regs.pid = env->spr[SPR_BOOKE_PID]; 996 997 for (i = 0;i < 32; i++) 998 regs.gpr[i] = env->gpr[i]; 999 1000 regs.cr = 0; 1001 for (i = 0; i < 8; i++) { 1002 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1003 } 1004 1005 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1006 if (ret < 0) 1007 return ret; 1008 1009 kvm_put_fp(cs); 1010 1011 if (env->tlb_dirty) { 1012 kvm_sw_tlb_put(cpu); 1013 env->tlb_dirty = false; 1014 } 1015 1016 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1017 ret = kvmppc_put_books_sregs(cpu); 1018 if (ret < 0) { 1019 return ret; 1020 } 1021 } 1022 1023 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1024 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1025 } 1026 1027 if (cap_one_reg) { 1028 int i; 1029 1030 /* We deliberately ignore errors here, for kernels which have 1031 * the ONE_REG calls, but don't support the specific 1032 * registers, there's a reasonable chance things will still 1033 * work, at least until we try to migrate. */ 1034 for (i = 0; i < 1024; i++) { 1035 uint64_t id = env->spr_cb[i].one_reg_id; 1036 1037 if (id != 0) { 1038 kvm_put_one_spr(cs, id, i); 1039 } 1040 } 1041 1042 #ifdef TARGET_PPC64 1043 if (msr_ts) { 1044 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1045 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1046 } 1047 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1048 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1049 } 1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1060 } 1061 1062 if (cap_papr) { 1063 if (kvm_put_vpa(cs) < 0) { 1064 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1065 } 1066 } 1067 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1069 #endif /* TARGET_PPC64 */ 1070 } 1071 1072 return ret; 1073 } 1074 1075 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1076 { 1077 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1078 } 1079 1080 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1081 { 1082 CPUPPCState *env = &cpu->env; 1083 struct kvm_sregs sregs; 1084 int ret; 1085 1086 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1087 if (ret < 0) { 1088 return ret; 1089 } 1090 1091 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1092 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1093 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1094 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1095 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1096 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1097 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1098 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1099 env->spr[SPR_DECR] = sregs.u.e.dec; 1100 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1101 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1102 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1103 } 1104 1105 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1106 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1107 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1108 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1109 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1110 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1111 } 1112 1113 if (sregs.u.e.features & KVM_SREGS_E_64) { 1114 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1115 } 1116 1117 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1118 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1119 } 1120 1121 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1122 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1123 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1124 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1125 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1126 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1127 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1128 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1129 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1130 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1131 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1132 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1133 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1134 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1135 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1136 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1137 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1138 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1139 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1140 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1141 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1142 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1143 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1144 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1145 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1146 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1147 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1148 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1149 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1150 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1151 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1152 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1153 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1154 1155 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1156 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1157 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1158 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1159 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1160 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1161 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1162 } 1163 1164 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1165 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1166 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1167 } 1168 1169 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1170 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1171 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1172 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1173 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1174 } 1175 } 1176 1177 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1178 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1179 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1180 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1181 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1182 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1183 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1184 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1185 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1186 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1187 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1188 } 1189 1190 if (sregs.u.e.features & KVM_SREGS_EXP) { 1191 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1192 } 1193 1194 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1195 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1196 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1197 } 1198 1199 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1200 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1201 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1202 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1203 1204 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1205 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1206 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1207 } 1208 } 1209 1210 return 0; 1211 } 1212 1213 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1214 { 1215 CPUPPCState *env = &cpu->env; 1216 struct kvm_sregs sregs; 1217 int ret; 1218 int i; 1219 1220 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1221 if (ret < 0) { 1222 return ret; 1223 } 1224 1225 if (!cpu->vhyp) { 1226 ppc_store_sdr1(env, sregs.u.s.sdr1); 1227 } 1228 1229 /* Sync SLB */ 1230 #ifdef TARGET_PPC64 1231 /* 1232 * The packed SLB array we get from KVM_GET_SREGS only contains 1233 * information about valid entries. So we flush our internal copy 1234 * to get rid of stale ones, then put all valid SLB entries back 1235 * in. 1236 */ 1237 memset(env->slb, 0, sizeof(env->slb)); 1238 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1239 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1240 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1241 /* 1242 * Only restore valid entries 1243 */ 1244 if (rb & SLB_ESID_V) { 1245 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1246 } 1247 } 1248 #endif 1249 1250 /* Sync SRs */ 1251 for (i = 0; i < 16; i++) { 1252 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1253 } 1254 1255 /* Sync BATs */ 1256 for (i = 0; i < 8; i++) { 1257 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1258 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1259 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1260 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1261 } 1262 1263 return 0; 1264 } 1265 1266 int kvm_arch_get_registers(CPUState *cs) 1267 { 1268 PowerPCCPU *cpu = POWERPC_CPU(cs); 1269 CPUPPCState *env = &cpu->env; 1270 struct kvm_regs regs; 1271 uint32_t cr; 1272 int i, ret; 1273 1274 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1275 if (ret < 0) 1276 return ret; 1277 1278 cr = regs.cr; 1279 for (i = 7; i >= 0; i--) { 1280 env->crf[i] = cr & 15; 1281 cr >>= 4; 1282 } 1283 1284 env->ctr = regs.ctr; 1285 env->lr = regs.lr; 1286 cpu_write_xer(env, regs.xer); 1287 env->msr = regs.msr; 1288 env->nip = regs.pc; 1289 1290 env->spr[SPR_SRR0] = regs.srr0; 1291 env->spr[SPR_SRR1] = regs.srr1; 1292 1293 env->spr[SPR_SPRG0] = regs.sprg0; 1294 env->spr[SPR_SPRG1] = regs.sprg1; 1295 env->spr[SPR_SPRG2] = regs.sprg2; 1296 env->spr[SPR_SPRG3] = regs.sprg3; 1297 env->spr[SPR_SPRG4] = regs.sprg4; 1298 env->spr[SPR_SPRG5] = regs.sprg5; 1299 env->spr[SPR_SPRG6] = regs.sprg6; 1300 env->spr[SPR_SPRG7] = regs.sprg7; 1301 1302 env->spr[SPR_BOOKE_PID] = regs.pid; 1303 1304 for (i = 0;i < 32; i++) 1305 env->gpr[i] = regs.gpr[i]; 1306 1307 kvm_get_fp(cs); 1308 1309 if (cap_booke_sregs) { 1310 ret = kvmppc_get_booke_sregs(cpu); 1311 if (ret < 0) { 1312 return ret; 1313 } 1314 } 1315 1316 if (cap_segstate) { 1317 ret = kvmppc_get_books_sregs(cpu); 1318 if (ret < 0) { 1319 return ret; 1320 } 1321 } 1322 1323 if (cap_hior) { 1324 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1325 } 1326 1327 if (cap_one_reg) { 1328 int i; 1329 1330 /* We deliberately ignore errors here, for kernels which have 1331 * the ONE_REG calls, but don't support the specific 1332 * registers, there's a reasonable chance things will still 1333 * work, at least until we try to migrate. */ 1334 for (i = 0; i < 1024; i++) { 1335 uint64_t id = env->spr_cb[i].one_reg_id; 1336 1337 if (id != 0) { 1338 kvm_get_one_spr(cs, id, i); 1339 } 1340 } 1341 1342 #ifdef TARGET_PPC64 1343 if (msr_ts) { 1344 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1345 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1346 } 1347 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1348 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1349 } 1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1360 } 1361 1362 if (cap_papr) { 1363 if (kvm_get_vpa(cs) < 0) { 1364 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1365 } 1366 } 1367 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1369 #endif 1370 } 1371 1372 return 0; 1373 } 1374 1375 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1376 { 1377 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1378 1379 if (irq != PPC_INTERRUPT_EXT) { 1380 return 0; 1381 } 1382 1383 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1384 return 0; 1385 } 1386 1387 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1388 1389 return 0; 1390 } 1391 1392 #if defined(TARGET_PPCEMB) 1393 #define PPC_INPUT_INT PPC40x_INPUT_INT 1394 #elif defined(TARGET_PPC64) 1395 #define PPC_INPUT_INT PPC970_INPUT_INT 1396 #else 1397 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1398 #endif 1399 1400 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1401 { 1402 PowerPCCPU *cpu = POWERPC_CPU(cs); 1403 CPUPPCState *env = &cpu->env; 1404 int r; 1405 unsigned irq; 1406 1407 qemu_mutex_lock_iothread(); 1408 1409 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1410 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1411 if (!cap_interrupt_level && 1412 run->ready_for_interrupt_injection && 1413 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1414 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1415 { 1416 /* For now KVM disregards the 'irq' argument. However, in the 1417 * future KVM could cache it in-kernel to avoid a heavyweight exit 1418 * when reading the UIC. 1419 */ 1420 irq = KVM_INTERRUPT_SET; 1421 1422 DPRINTF("injected interrupt %d\n", irq); 1423 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1424 if (r < 0) { 1425 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1426 } 1427 1428 /* Always wake up soon in case the interrupt was level based */ 1429 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1430 (NANOSECONDS_PER_SECOND / 50)); 1431 } 1432 1433 /* We don't know if there are more interrupts pending after this. However, 1434 * the guest will return to userspace in the course of handling this one 1435 * anyways, so we will get a chance to deliver the rest. */ 1436 1437 qemu_mutex_unlock_iothread(); 1438 } 1439 1440 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1441 { 1442 return MEMTXATTRS_UNSPECIFIED; 1443 } 1444 1445 int kvm_arch_process_async_events(CPUState *cs) 1446 { 1447 return cs->halted; 1448 } 1449 1450 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1451 { 1452 CPUState *cs = CPU(cpu); 1453 CPUPPCState *env = &cpu->env; 1454 1455 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1456 cs->halted = 1; 1457 cs->exception_index = EXCP_HLT; 1458 } 1459 1460 return 0; 1461 } 1462 1463 /* map dcr access to existing qemu dcr emulation */ 1464 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1465 { 1466 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1467 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1468 1469 return 0; 1470 } 1471 1472 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1473 { 1474 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1475 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1476 1477 return 0; 1478 } 1479 1480 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1481 { 1482 /* Mixed endian case is not handled */ 1483 uint32_t sc = debug_inst_opcode; 1484 1485 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1486 sizeof(sc), 0) || 1487 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1488 return -EINVAL; 1489 } 1490 1491 return 0; 1492 } 1493 1494 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1495 { 1496 uint32_t sc; 1497 1498 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1499 sc != debug_inst_opcode || 1500 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1501 sizeof(sc), 1)) { 1502 return -EINVAL; 1503 } 1504 1505 return 0; 1506 } 1507 1508 static int find_hw_breakpoint(target_ulong addr, int type) 1509 { 1510 int n; 1511 1512 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1513 <= ARRAY_SIZE(hw_debug_points)); 1514 1515 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1516 if (hw_debug_points[n].addr == addr && 1517 hw_debug_points[n].type == type) { 1518 return n; 1519 } 1520 } 1521 1522 return -1; 1523 } 1524 1525 static int find_hw_watchpoint(target_ulong addr, int *flag) 1526 { 1527 int n; 1528 1529 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1530 if (n >= 0) { 1531 *flag = BP_MEM_ACCESS; 1532 return n; 1533 } 1534 1535 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1536 if (n >= 0) { 1537 *flag = BP_MEM_WRITE; 1538 return n; 1539 } 1540 1541 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1542 if (n >= 0) { 1543 *flag = BP_MEM_READ; 1544 return n; 1545 } 1546 1547 return -1; 1548 } 1549 1550 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1551 target_ulong len, int type) 1552 { 1553 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1554 return -ENOBUFS; 1555 } 1556 1557 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1558 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1559 1560 switch (type) { 1561 case GDB_BREAKPOINT_HW: 1562 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1563 return -ENOBUFS; 1564 } 1565 1566 if (find_hw_breakpoint(addr, type) >= 0) { 1567 return -EEXIST; 1568 } 1569 1570 nb_hw_breakpoint++; 1571 break; 1572 1573 case GDB_WATCHPOINT_WRITE: 1574 case GDB_WATCHPOINT_READ: 1575 case GDB_WATCHPOINT_ACCESS: 1576 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1577 return -ENOBUFS; 1578 } 1579 1580 if (find_hw_breakpoint(addr, type) >= 0) { 1581 return -EEXIST; 1582 } 1583 1584 nb_hw_watchpoint++; 1585 break; 1586 1587 default: 1588 return -ENOSYS; 1589 } 1590 1591 return 0; 1592 } 1593 1594 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1595 target_ulong len, int type) 1596 { 1597 int n; 1598 1599 n = find_hw_breakpoint(addr, type); 1600 if (n < 0) { 1601 return -ENOENT; 1602 } 1603 1604 switch (type) { 1605 case GDB_BREAKPOINT_HW: 1606 nb_hw_breakpoint--; 1607 break; 1608 1609 case GDB_WATCHPOINT_WRITE: 1610 case GDB_WATCHPOINT_READ: 1611 case GDB_WATCHPOINT_ACCESS: 1612 nb_hw_watchpoint--; 1613 break; 1614 1615 default: 1616 return -ENOSYS; 1617 } 1618 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1619 1620 return 0; 1621 } 1622 1623 void kvm_arch_remove_all_hw_breakpoints(void) 1624 { 1625 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1626 } 1627 1628 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1629 { 1630 int n; 1631 1632 /* Software Breakpoint updates */ 1633 if (kvm_sw_breakpoints_active(cs)) { 1634 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1635 } 1636 1637 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1638 <= ARRAY_SIZE(hw_debug_points)); 1639 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1640 1641 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1642 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1643 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1644 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1645 switch (hw_debug_points[n].type) { 1646 case GDB_BREAKPOINT_HW: 1647 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1648 break; 1649 case GDB_WATCHPOINT_WRITE: 1650 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1651 break; 1652 case GDB_WATCHPOINT_READ: 1653 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1654 break; 1655 case GDB_WATCHPOINT_ACCESS: 1656 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1657 KVMPPC_DEBUG_WATCH_READ; 1658 break; 1659 default: 1660 cpu_abort(cs, "Unsupported breakpoint type\n"); 1661 } 1662 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1663 } 1664 } 1665 } 1666 1667 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1668 { 1669 CPUState *cs = CPU(cpu); 1670 CPUPPCState *env = &cpu->env; 1671 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1672 int handle = 0; 1673 int n; 1674 int flag = 0; 1675 1676 if (cs->singlestep_enabled) { 1677 handle = 1; 1678 } else if (arch_info->status) { 1679 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1680 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1681 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1682 if (n >= 0) { 1683 handle = 1; 1684 } 1685 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1686 KVMPPC_DEBUG_WATCH_WRITE)) { 1687 n = find_hw_watchpoint(arch_info->address, &flag); 1688 if (n >= 0) { 1689 handle = 1; 1690 cs->watchpoint_hit = &hw_watchpoint; 1691 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1692 hw_watchpoint.flags = flag; 1693 } 1694 } 1695 } 1696 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1697 handle = 1; 1698 } else { 1699 /* QEMU is not able to handle debug exception, so inject 1700 * program exception to guest; 1701 * Yes program exception NOT debug exception !! 1702 * When QEMU is using debug resources then debug exception must 1703 * be always set. To achieve this we set MSR_DE and also set 1704 * MSRP_DEP so guest cannot change MSR_DE. 1705 * When emulating debug resource for guest we want guest 1706 * to control MSR_DE (enable/disable debug interrupt on need). 1707 * Supporting both configurations are NOT possible. 1708 * So the result is that we cannot share debug resources 1709 * between QEMU and Guest on BOOKE architecture. 1710 * In the current design QEMU gets the priority over guest, 1711 * this means that if QEMU is using debug resources then guest 1712 * cannot use them; 1713 * For software breakpoint QEMU uses a privileged instruction; 1714 * So there cannot be any reason that we are here for guest 1715 * set debug exception, only possibility is guest executed a 1716 * privileged / illegal instruction and that's why we are 1717 * injecting a program interrupt. 1718 */ 1719 1720 cpu_synchronize_state(cs); 1721 /* env->nip is PC, so increment this by 4 to use 1722 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1723 */ 1724 env->nip += 4; 1725 cs->exception_index = POWERPC_EXCP_PROGRAM; 1726 env->error_code = POWERPC_EXCP_INVAL; 1727 ppc_cpu_do_interrupt(cs); 1728 } 1729 1730 return handle; 1731 } 1732 1733 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1734 { 1735 PowerPCCPU *cpu = POWERPC_CPU(cs); 1736 CPUPPCState *env = &cpu->env; 1737 int ret; 1738 1739 qemu_mutex_lock_iothread(); 1740 1741 switch (run->exit_reason) { 1742 case KVM_EXIT_DCR: 1743 if (run->dcr.is_write) { 1744 DPRINTF("handle dcr write\n"); 1745 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1746 } else { 1747 DPRINTF("handle dcr read\n"); 1748 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1749 } 1750 break; 1751 case KVM_EXIT_HLT: 1752 DPRINTF("handle halt\n"); 1753 ret = kvmppc_handle_halt(cpu); 1754 break; 1755 #if defined(TARGET_PPC64) 1756 case KVM_EXIT_PAPR_HCALL: 1757 DPRINTF("handle PAPR hypercall\n"); 1758 run->papr_hcall.ret = spapr_hypercall(cpu, 1759 run->papr_hcall.nr, 1760 run->papr_hcall.args); 1761 ret = 0; 1762 break; 1763 #endif 1764 case KVM_EXIT_EPR: 1765 DPRINTF("handle epr\n"); 1766 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1767 ret = 0; 1768 break; 1769 case KVM_EXIT_WATCHDOG: 1770 DPRINTF("handle watchdog expiry\n"); 1771 watchdog_perform_action(); 1772 ret = 0; 1773 break; 1774 1775 case KVM_EXIT_DEBUG: 1776 DPRINTF("handle debug exception\n"); 1777 if (kvm_handle_debug(cpu, run)) { 1778 ret = EXCP_DEBUG; 1779 break; 1780 } 1781 /* re-enter, this exception was guest-internal */ 1782 ret = 0; 1783 break; 1784 1785 default: 1786 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1787 ret = -1; 1788 break; 1789 } 1790 1791 qemu_mutex_unlock_iothread(); 1792 return ret; 1793 } 1794 1795 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1796 { 1797 CPUState *cs = CPU(cpu); 1798 uint32_t bits = tsr_bits; 1799 struct kvm_one_reg reg = { 1800 .id = KVM_REG_PPC_OR_TSR, 1801 .addr = (uintptr_t) &bits, 1802 }; 1803 1804 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1805 } 1806 1807 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1808 { 1809 1810 CPUState *cs = CPU(cpu); 1811 uint32_t bits = tsr_bits; 1812 struct kvm_one_reg reg = { 1813 .id = KVM_REG_PPC_CLEAR_TSR, 1814 .addr = (uintptr_t) &bits, 1815 }; 1816 1817 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1818 } 1819 1820 int kvmppc_set_tcr(PowerPCCPU *cpu) 1821 { 1822 CPUState *cs = CPU(cpu); 1823 CPUPPCState *env = &cpu->env; 1824 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1825 1826 struct kvm_one_reg reg = { 1827 .id = KVM_REG_PPC_TCR, 1828 .addr = (uintptr_t) &tcr, 1829 }; 1830 1831 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1832 } 1833 1834 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1835 { 1836 CPUState *cs = CPU(cpu); 1837 int ret; 1838 1839 if (!kvm_enabled()) { 1840 return -1; 1841 } 1842 1843 if (!cap_ppc_watchdog) { 1844 printf("warning: KVM does not support watchdog"); 1845 return -1; 1846 } 1847 1848 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1849 if (ret < 0) { 1850 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1851 __func__, strerror(-ret)); 1852 return ret; 1853 } 1854 1855 return ret; 1856 } 1857 1858 static int read_cpuinfo(const char *field, char *value, int len) 1859 { 1860 FILE *f; 1861 int ret = -1; 1862 int field_len = strlen(field); 1863 char line[512]; 1864 1865 f = fopen("/proc/cpuinfo", "r"); 1866 if (!f) { 1867 return -1; 1868 } 1869 1870 do { 1871 if (!fgets(line, sizeof(line), f)) { 1872 break; 1873 } 1874 if (!strncmp(line, field, field_len)) { 1875 pstrcpy(value, len, line); 1876 ret = 0; 1877 break; 1878 } 1879 } while(*line); 1880 1881 fclose(f); 1882 1883 return ret; 1884 } 1885 1886 uint32_t kvmppc_get_tbfreq(void) 1887 { 1888 char line[512]; 1889 char *ns; 1890 uint32_t retval = NANOSECONDS_PER_SECOND; 1891 1892 if (read_cpuinfo("timebase", line, sizeof(line))) { 1893 return retval; 1894 } 1895 1896 if (!(ns = strchr(line, ':'))) { 1897 return retval; 1898 } 1899 1900 ns++; 1901 1902 return atoi(ns); 1903 } 1904 1905 bool kvmppc_get_host_serial(char **value) 1906 { 1907 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1908 NULL); 1909 } 1910 1911 bool kvmppc_get_host_model(char **value) 1912 { 1913 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1914 } 1915 1916 /* Try to find a device tree node for a CPU with clock-frequency property */ 1917 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1918 { 1919 struct dirent *dirp; 1920 DIR *dp; 1921 1922 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1923 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1924 return -1; 1925 } 1926 1927 buf[0] = '\0'; 1928 while ((dirp = readdir(dp)) != NULL) { 1929 FILE *f; 1930 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1931 dirp->d_name); 1932 f = fopen(buf, "r"); 1933 if (f) { 1934 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1935 fclose(f); 1936 break; 1937 } 1938 buf[0] = '\0'; 1939 } 1940 closedir(dp); 1941 if (buf[0] == '\0') { 1942 printf("Unknown host!\n"); 1943 return -1; 1944 } 1945 1946 return 0; 1947 } 1948 1949 static uint64_t kvmppc_read_int_dt(const char *filename) 1950 { 1951 union { 1952 uint32_t v32; 1953 uint64_t v64; 1954 } u; 1955 FILE *f; 1956 int len; 1957 1958 f = fopen(filename, "rb"); 1959 if (!f) { 1960 return -1; 1961 } 1962 1963 len = fread(&u, 1, sizeof(u), f); 1964 fclose(f); 1965 switch (len) { 1966 case 4: 1967 /* property is a 32-bit quantity */ 1968 return be32_to_cpu(u.v32); 1969 case 8: 1970 return be64_to_cpu(u.v64); 1971 } 1972 1973 return 0; 1974 } 1975 1976 /* Read a CPU node property from the host device tree that's a single 1977 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1978 * (can't find or open the property, or doesn't understand the 1979 * format) */ 1980 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1981 { 1982 char buf[PATH_MAX], *tmp; 1983 uint64_t val; 1984 1985 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1986 return -1; 1987 } 1988 1989 tmp = g_strdup_printf("%s/%s", buf, propname); 1990 val = kvmppc_read_int_dt(tmp); 1991 g_free(tmp); 1992 1993 return val; 1994 } 1995 1996 uint64_t kvmppc_get_clockfreq(void) 1997 { 1998 return kvmppc_read_int_cpu_dt("clock-frequency"); 1999 } 2000 2001 uint32_t kvmppc_get_vmx(void) 2002 { 2003 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2004 } 2005 2006 uint32_t kvmppc_get_dfp(void) 2007 { 2008 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2009 } 2010 2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2012 { 2013 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2014 CPUState *cs = CPU(cpu); 2015 2016 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2017 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2018 return 0; 2019 } 2020 2021 return 1; 2022 } 2023 2024 int kvmppc_get_hasidle(CPUPPCState *env) 2025 { 2026 struct kvm_ppc_pvinfo pvinfo; 2027 2028 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2029 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2030 return 1; 2031 } 2032 2033 return 0; 2034 } 2035 2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2037 { 2038 uint32_t *hc = (uint32_t*)buf; 2039 struct kvm_ppc_pvinfo pvinfo; 2040 2041 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2042 memcpy(buf, pvinfo.hcall, buf_len); 2043 return 0; 2044 } 2045 2046 /* 2047 * Fallback to always fail hypercalls regardless of endianness: 2048 * 2049 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2050 * li r3, -1 2051 * b .+8 (becomes nop in wrong endian) 2052 * bswap32(li r3, -1) 2053 */ 2054 2055 hc[0] = cpu_to_be32(0x08000048); 2056 hc[1] = cpu_to_be32(0x3860ffff); 2057 hc[2] = cpu_to_be32(0x48000008); 2058 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2059 2060 return 1; 2061 } 2062 2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2064 { 2065 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2066 } 2067 2068 void kvmppc_enable_logical_ci_hcalls(void) 2069 { 2070 /* 2071 * FIXME: it would be nice if we could detect the cases where 2072 * we're using a device which requires the in kernel 2073 * implementation of these hcalls, but the kernel lacks them and 2074 * produce a warning. 2075 */ 2076 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2077 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2078 } 2079 2080 void kvmppc_enable_set_mode_hcall(void) 2081 { 2082 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2083 } 2084 2085 void kvmppc_enable_clear_ref_mod_hcalls(void) 2086 { 2087 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2088 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2089 } 2090 2091 void kvmppc_set_papr(PowerPCCPU *cpu) 2092 { 2093 CPUState *cs = CPU(cpu); 2094 int ret; 2095 2096 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2097 if (ret) { 2098 error_report("This vCPU type or KVM version does not support PAPR"); 2099 exit(1); 2100 } 2101 2102 /* Update the capability flag so we sync the right information 2103 * with kvm */ 2104 cap_papr = 1; 2105 } 2106 2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2108 { 2109 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2110 } 2111 2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2113 { 2114 CPUState *cs = CPU(cpu); 2115 int ret; 2116 2117 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2118 if (ret && mpic_proxy) { 2119 error_report("This KVM version does not support EPR"); 2120 exit(1); 2121 } 2122 } 2123 2124 int kvmppc_smt_threads(void) 2125 { 2126 return cap_ppc_smt ? cap_ppc_smt : 1; 2127 } 2128 2129 #ifdef TARGET_PPC64 2130 off_t kvmppc_alloc_rma(void **rma) 2131 { 2132 off_t size; 2133 int fd; 2134 struct kvm_allocate_rma ret; 2135 2136 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2137 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2138 * not necessary on this hardware 2139 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2140 * 2141 * FIXME: We should allow the user to force contiguous RMA 2142 * allocation in the cap_ppc_rma==1 case. 2143 */ 2144 if (cap_ppc_rma < 2) { 2145 return 0; 2146 } 2147 2148 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2149 if (fd < 0) { 2150 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2151 strerror(errno)); 2152 return -1; 2153 } 2154 2155 size = MIN(ret.rma_size, 256ul << 20); 2156 2157 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2158 if (*rma == MAP_FAILED) { 2159 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2160 return -1; 2161 }; 2162 2163 return size; 2164 } 2165 2166 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2167 { 2168 struct kvm_ppc_smmu_info info; 2169 long rampagesize, best_page_shift; 2170 int i; 2171 2172 if (cap_ppc_rma >= 2) { 2173 return current_size; 2174 } 2175 2176 /* Find the largest hardware supported page size that's less than 2177 * or equal to the (logical) backing page size of guest RAM */ 2178 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2179 rampagesize = qemu_getrampagesize(); 2180 best_page_shift = 0; 2181 2182 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2183 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2184 2185 if (!sps->page_shift) { 2186 continue; 2187 } 2188 2189 if ((sps->page_shift > best_page_shift) 2190 && ((1UL << sps->page_shift) <= rampagesize)) { 2191 best_page_shift = sps->page_shift; 2192 } 2193 } 2194 2195 return MIN(current_size, 2196 1ULL << (best_page_shift + hash_shift - 7)); 2197 } 2198 #endif 2199 2200 bool kvmppc_spapr_use_multitce(void) 2201 { 2202 return cap_spapr_multitce; 2203 } 2204 2205 int kvmppc_spapr_enable_inkernel_multitce(void) 2206 { 2207 int ret; 2208 2209 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2210 H_PUT_TCE_INDIRECT, 1); 2211 if (!ret) { 2212 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2213 H_STUFF_TCE, 1); 2214 } 2215 2216 return ret; 2217 } 2218 2219 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2220 uint64_t bus_offset, uint32_t nb_table, 2221 int *pfd, bool need_vfio) 2222 { 2223 long len; 2224 int fd; 2225 void *table; 2226 2227 /* Must set fd to -1 so we don't try to munmap when called for 2228 * destroying the table, which the upper layers -will- do 2229 */ 2230 *pfd = -1; 2231 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2232 return NULL; 2233 } 2234 2235 if (cap_spapr_tce_64) { 2236 struct kvm_create_spapr_tce_64 args = { 2237 .liobn = liobn, 2238 .page_shift = page_shift, 2239 .offset = bus_offset >> page_shift, 2240 .size = nb_table, 2241 .flags = 0 2242 }; 2243 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2244 if (fd < 0) { 2245 fprintf(stderr, 2246 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2247 liobn); 2248 return NULL; 2249 } 2250 } else if (cap_spapr_tce) { 2251 uint64_t window_size = (uint64_t) nb_table << page_shift; 2252 struct kvm_create_spapr_tce args = { 2253 .liobn = liobn, 2254 .window_size = window_size, 2255 }; 2256 if ((window_size != args.window_size) || bus_offset) { 2257 return NULL; 2258 } 2259 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2260 if (fd < 0) { 2261 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2262 liobn); 2263 return NULL; 2264 } 2265 } else { 2266 return NULL; 2267 } 2268 2269 len = nb_table * sizeof(uint64_t); 2270 /* FIXME: round this up to page size */ 2271 2272 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2273 if (table == MAP_FAILED) { 2274 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2275 liobn); 2276 close(fd); 2277 return NULL; 2278 } 2279 2280 *pfd = fd; 2281 return table; 2282 } 2283 2284 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2285 { 2286 long len; 2287 2288 if (fd < 0) { 2289 return -1; 2290 } 2291 2292 len = nb_table * sizeof(uint64_t); 2293 if ((munmap(table, len) < 0) || 2294 (close(fd) < 0)) { 2295 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2296 strerror(errno)); 2297 /* Leak the table */ 2298 } 2299 2300 return 0; 2301 } 2302 2303 int kvmppc_reset_htab(int shift_hint) 2304 { 2305 uint32_t shift = shift_hint; 2306 2307 if (!kvm_enabled()) { 2308 /* Full emulation, tell caller to allocate htab itself */ 2309 return 0; 2310 } 2311 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2312 int ret; 2313 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2314 if (ret == -ENOTTY) { 2315 /* At least some versions of PR KVM advertise the 2316 * capability, but don't implement the ioctl(). Oops. 2317 * Return 0 so that we allocate the htab in qemu, as is 2318 * correct for PR. */ 2319 return 0; 2320 } else if (ret < 0) { 2321 return ret; 2322 } 2323 return shift; 2324 } 2325 2326 /* We have a kernel that predates the htab reset calls. For PR 2327 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2328 * this era, it has allocated a 16MB fixed size hash table already. */ 2329 if (kvmppc_is_pr(kvm_state)) { 2330 /* PR - tell caller to allocate htab */ 2331 return 0; 2332 } else { 2333 /* HV - assume 16MB kernel allocated htab */ 2334 return 24; 2335 } 2336 } 2337 2338 static inline uint32_t mfpvr(void) 2339 { 2340 uint32_t pvr; 2341 2342 asm ("mfpvr %0" 2343 : "=r"(pvr)); 2344 return pvr; 2345 } 2346 2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2348 { 2349 if (on) { 2350 *word |= flags; 2351 } else { 2352 *word &= ~flags; 2353 } 2354 } 2355 2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2357 { 2358 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2359 uint32_t vmx = kvmppc_get_vmx(); 2360 uint32_t dfp = kvmppc_get_dfp(); 2361 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2362 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2363 2364 /* Now fix up the class with information we can query from the host */ 2365 pcc->pvr = mfpvr(); 2366 2367 if (vmx != -1) { 2368 /* Only override when we know what the host supports */ 2369 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2370 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2371 } 2372 if (dfp != -1) { 2373 /* Only override when we know what the host supports */ 2374 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2375 } 2376 2377 if (dcache_size != -1) { 2378 pcc->l1_dcache_size = dcache_size; 2379 } 2380 2381 if (icache_size != -1) { 2382 pcc->l1_icache_size = icache_size; 2383 } 2384 2385 #if defined(TARGET_PPC64) 2386 pcc->radix_page_info = kvm_get_radix_page_info(); 2387 2388 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2389 /* 2390 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2391 * compliant. More importantly, advertising ISA 3.00 2392 * architected mode may prevent guests from activating 2393 * necessary DD1 workarounds. 2394 */ 2395 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2396 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2397 } 2398 #endif /* defined(TARGET_PPC64) */ 2399 } 2400 2401 bool kvmppc_has_cap_epr(void) 2402 { 2403 return cap_epr; 2404 } 2405 2406 bool kvmppc_has_cap_htab_fd(void) 2407 { 2408 return cap_htab_fd; 2409 } 2410 2411 bool kvmppc_has_cap_fixup_hcalls(void) 2412 { 2413 return cap_fixup_hcalls; 2414 } 2415 2416 bool kvmppc_has_cap_htm(void) 2417 { 2418 return cap_htm; 2419 } 2420 2421 bool kvmppc_has_cap_mmu_radix(void) 2422 { 2423 return cap_mmu_radix; 2424 } 2425 2426 bool kvmppc_has_cap_mmu_hash_v3(void) 2427 { 2428 return cap_mmu_hash_v3; 2429 } 2430 2431 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2432 { 2433 uint32_t host_pvr = mfpvr(); 2434 PowerPCCPUClass *pvr_pcc; 2435 2436 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2437 if (pvr_pcc == NULL) { 2438 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2439 } 2440 2441 return pvr_pcc; 2442 } 2443 2444 static int kvm_ppc_register_host_cpu_type(void) 2445 { 2446 TypeInfo type_info = { 2447 .name = TYPE_HOST_POWERPC_CPU, 2448 .class_init = kvmppc_host_cpu_class_init, 2449 }; 2450 PowerPCCPUClass *pvr_pcc; 2451 ObjectClass *oc; 2452 DeviceClass *dc; 2453 int i; 2454 2455 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2456 if (pvr_pcc == NULL) { 2457 return -1; 2458 } 2459 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2460 type_register(&type_info); 2461 2462 oc = object_class_by_name(type_info.name); 2463 g_assert(oc); 2464 2465 #if defined(TARGET_PPC64) 2466 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2467 type_info.parent = TYPE_SPAPR_CPU_CORE, 2468 type_info.instance_size = sizeof(sPAPRCPUCore); 2469 type_info.instance_init = NULL; 2470 type_info.class_init = spapr_cpu_core_class_init; 2471 type_info.class_data = (void *) "host"; 2472 type_register(&type_info); 2473 g_free((void *)type_info.name); 2474 #endif 2475 2476 /* 2477 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2478 * we want "POWER8" to be a "family" alias that points to the current 2479 * host CPU type, too) 2480 */ 2481 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2482 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2483 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2484 char *suffix; 2485 2486 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2487 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU); 2488 if (suffix) { 2489 *suffix = 0; 2490 } 2491 ppc_cpu_aliases[i].oc = oc; 2492 break; 2493 } 2494 } 2495 2496 return 0; 2497 } 2498 2499 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2500 { 2501 struct kvm_rtas_token_args args = { 2502 .token = token, 2503 }; 2504 2505 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2506 return -ENOENT; 2507 } 2508 2509 strncpy(args.name, function, sizeof(args.name)); 2510 2511 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2512 } 2513 2514 int kvmppc_get_htab_fd(bool write) 2515 { 2516 struct kvm_get_htab_fd s = { 2517 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2518 .start_index = 0, 2519 }; 2520 2521 if (!cap_htab_fd) { 2522 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2523 return -1; 2524 } 2525 2526 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2527 } 2528 2529 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2530 { 2531 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2532 uint8_t buf[bufsize]; 2533 ssize_t rc; 2534 2535 do { 2536 rc = read(fd, buf, bufsize); 2537 if (rc < 0) { 2538 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2539 strerror(errno)); 2540 return rc; 2541 } else if (rc) { 2542 uint8_t *buffer = buf; 2543 ssize_t n = rc; 2544 while (n) { 2545 struct kvm_get_htab_header *head = 2546 (struct kvm_get_htab_header *) buffer; 2547 size_t chunksize = sizeof(*head) + 2548 HASH_PTE_SIZE_64 * head->n_valid; 2549 2550 qemu_put_be32(f, head->index); 2551 qemu_put_be16(f, head->n_valid); 2552 qemu_put_be16(f, head->n_invalid); 2553 qemu_put_buffer(f, (void *)(head + 1), 2554 HASH_PTE_SIZE_64 * head->n_valid); 2555 2556 buffer += chunksize; 2557 n -= chunksize; 2558 } 2559 } 2560 } while ((rc != 0) 2561 && ((max_ns < 0) 2562 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2563 2564 return (rc == 0) ? 1 : 0; 2565 } 2566 2567 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2568 uint16_t n_valid, uint16_t n_invalid) 2569 { 2570 struct kvm_get_htab_header *buf; 2571 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2572 ssize_t rc; 2573 2574 buf = alloca(chunksize); 2575 buf->index = index; 2576 buf->n_valid = n_valid; 2577 buf->n_invalid = n_invalid; 2578 2579 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2580 2581 rc = write(fd, buf, chunksize); 2582 if (rc < 0) { 2583 fprintf(stderr, "Error writing KVM hash table: %s\n", 2584 strerror(errno)); 2585 return rc; 2586 } 2587 if (rc != chunksize) { 2588 /* We should never get a short write on a single chunk */ 2589 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2590 return -1; 2591 } 2592 return 0; 2593 } 2594 2595 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2596 { 2597 return true; 2598 } 2599 2600 void kvm_arch_init_irq_routing(KVMState *s) 2601 { 2602 } 2603 2604 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2605 { 2606 struct kvm_get_htab_fd ghf = { 2607 .flags = 0, 2608 .start_index = ptex, 2609 }; 2610 int fd, rc; 2611 int i; 2612 2613 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2614 if (fd < 0) { 2615 hw_error("kvmppc_read_hptes: Unable to open HPT fd"); 2616 } 2617 2618 i = 0; 2619 while (i < n) { 2620 struct kvm_get_htab_header *hdr; 2621 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2622 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2623 2624 rc = read(fd, buf, sizeof(buf)); 2625 if (rc < 0) { 2626 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2627 } 2628 2629 hdr = (struct kvm_get_htab_header *)buf; 2630 while ((i < n) && ((char *)hdr < (buf + rc))) { 2631 int invalid = hdr->n_invalid; 2632 2633 if (hdr->index != (ptex + i)) { 2634 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2635 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2636 } 2637 2638 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2639 i += hdr->n_valid; 2640 2641 if ((n - i) < invalid) { 2642 invalid = n - i; 2643 } 2644 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2645 i += hdr->n_invalid; 2646 2647 hdr = (struct kvm_get_htab_header *) 2648 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2649 } 2650 } 2651 2652 close(fd); 2653 } 2654 2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2656 { 2657 int fd, rc; 2658 struct kvm_get_htab_fd ghf; 2659 struct { 2660 struct kvm_get_htab_header hdr; 2661 uint64_t pte0; 2662 uint64_t pte1; 2663 } buf; 2664 2665 ghf.flags = 0; 2666 ghf.start_index = 0; /* Ignored */ 2667 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2668 if (fd < 0) { 2669 hw_error("kvmppc_write_hpte: Unable to open HPT fd"); 2670 } 2671 2672 buf.hdr.n_valid = 1; 2673 buf.hdr.n_invalid = 0; 2674 buf.hdr.index = ptex; 2675 buf.pte0 = cpu_to_be64(pte0); 2676 buf.pte1 = cpu_to_be64(pte1); 2677 2678 rc = write(fd, &buf, sizeof(buf)); 2679 if (rc != sizeof(buf)) { 2680 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2681 } 2682 close(fd); 2683 } 2684 2685 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2686 uint64_t address, uint32_t data, PCIDevice *dev) 2687 { 2688 return 0; 2689 } 2690 2691 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2692 int vector, PCIDevice *dev) 2693 { 2694 return 0; 2695 } 2696 2697 int kvm_arch_release_virq_post(int virq) 2698 { 2699 return 0; 2700 } 2701 2702 int kvm_arch_msi_data_to_gsi(uint32_t data) 2703 { 2704 return data & 0xffff; 2705 } 2706 2707 int kvmppc_enable_hwrng(void) 2708 { 2709 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2710 return -1; 2711 } 2712 2713 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2714 } 2715 2716 void kvmppc_check_papr_resize_hpt(Error **errp) 2717 { 2718 if (!kvm_enabled()) { 2719 return; /* No KVM, we're good */ 2720 } 2721 2722 if (cap_resize_hpt) { 2723 return; /* Kernel has explicit support, we're good */ 2724 } 2725 2726 /* Otherwise fallback on looking for PR KVM */ 2727 if (kvmppc_is_pr(kvm_state)) { 2728 return; 2729 } 2730 2731 error_setg(errp, 2732 "Hash page table resizing not available with this KVM version"); 2733 } 2734 2735 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2736 { 2737 CPUState *cs = CPU(cpu); 2738 struct kvm_ppc_resize_hpt rhpt = { 2739 .flags = flags, 2740 .shift = shift, 2741 }; 2742 2743 if (!cap_resize_hpt) { 2744 return -ENOSYS; 2745 } 2746 2747 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2748 } 2749 2750 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2751 { 2752 CPUState *cs = CPU(cpu); 2753 struct kvm_ppc_resize_hpt rhpt = { 2754 .flags = flags, 2755 .shift = shift, 2756 }; 2757 2758 if (!cap_resize_hpt) { 2759 return -ENOSYS; 2760 } 2761 2762 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2763 } 2764 2765 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg) 2766 { 2767 target_ulong sdr1 = arg.target_ptr; 2768 PowerPCCPU *cpu = POWERPC_CPU(cs); 2769 CPUPPCState *env = &cpu->env; 2770 2771 /* This is just for the benefit of PR KVM */ 2772 cpu_synchronize_state(cs); 2773 env->spr[SPR_SDR1] = sdr1; 2774 if (kvmppc_put_books_sregs(cpu) < 0) { 2775 error_report("Unable to update SDR1 in KVM"); 2776 exit(1); 2777 } 2778 } 2779 2780 void kvmppc_update_sdr1(target_ulong sdr1) 2781 { 2782 CPUState *cs; 2783 2784 CPU_FOREACH(cs) { 2785 run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); 2786 } 2787 } 2788