1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "sysemu/watchdog.h" 42 #include "trace.h" 43 #include "exec/gdbstub.h" 44 #include "exec/memattrs.h" 45 #include "exec/ram_addr.h" 46 #include "sysemu/hostmem.h" 47 #include "qemu/cutils.h" 48 #include "qemu/mmap-alloc.h" 49 #include "elf.h" 50 #include "sysemu/kvm_int.h" 51 52 //#define DEBUG_KVM 53 54 #ifdef DEBUG_KVM 55 #define DPRINTF(fmt, ...) \ 56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 57 #else 58 #define DPRINTF(fmt, ...) \ 59 do { } while (0) 60 #endif 61 62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 63 64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 65 KVM_CAP_LAST_INFO 66 }; 67 68 static int cap_interrupt_unset = false; 69 static int cap_interrupt_level = false; 70 static int cap_segstate; 71 static int cap_booke_sregs; 72 static int cap_ppc_smt; 73 static int cap_ppc_smt_possible; 74 static int cap_spapr_tce; 75 static int cap_spapr_tce_64; 76 static int cap_spapr_multitce; 77 static int cap_spapr_vfio; 78 static int cap_hior; 79 static int cap_one_reg; 80 static int cap_epr; 81 static int cap_ppc_watchdog; 82 static int cap_papr; 83 static int cap_htab_fd; 84 static int cap_fixup_hcalls; 85 static int cap_htm; /* Hardware transactional memory support */ 86 static int cap_mmu_radix; 87 static int cap_mmu_hash_v3; 88 static int cap_resize_hpt; 89 static int cap_ppc_pvr_compat; 90 static int cap_ppc_safe_cache; 91 static int cap_ppc_safe_bounds_check; 92 static int cap_ppc_safe_indirect_branch; 93 static int cap_ppc_nested_kvm_hv; 94 95 static uint32_t debug_inst_opcode; 96 97 /* XXX We have a race condition where we actually have a level triggered 98 * interrupt, but the infrastructure can't expose that yet, so the guest 99 * takes but ignores it, goes to sleep and never gets notified that there's 100 * still an interrupt pending. 101 * 102 * As a quick workaround, let's just wake up again 20 ms after we injected 103 * an interrupt. That way we can assure that we're always reinjecting 104 * interrupts in case the guest swallowed them. 105 */ 106 static QEMUTimer *idle_timer; 107 108 static void kvm_kick_cpu(void *opaque) 109 { 110 PowerPCCPU *cpu = opaque; 111 112 qemu_cpu_kick(CPU(cpu)); 113 } 114 115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 116 * should only be used for fallback tests - generally we should use 117 * explicit capabilities for the features we want, rather than 118 * assuming what is/isn't available depending on the KVM variant. */ 119 static bool kvmppc_is_pr(KVMState *ks) 120 { 121 /* Assume KVM-PR if the GET_PVINFO capability is available */ 122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 123 } 124 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 126 static void kvmppc_get_cpu_characteristics(KVMState *s); 127 128 int kvm_arch_init(MachineState *ms, KVMState *s) 129 { 130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 143 /* Note: we don't set cap_papr here, because this capability is 144 * only activated after this by kvmppc_set_papr() */ 145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 152 kvmppc_get_cpu_characteristics(s); 153 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); 154 /* 155 * Note: setting it to false because there is not such capability 156 * in KVM at this moment. 157 * 158 * TODO: call kvm_vm_check_extension() with the right capability 159 * after the kernel starts implementing it.*/ 160 cap_ppc_pvr_compat = false; 161 162 if (!cap_interrupt_level) { 163 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 164 "VM to stall at times!\n"); 165 } 166 167 kvm_ppc_register_host_cpu_type(ms); 168 169 return 0; 170 } 171 172 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 173 { 174 return 0; 175 } 176 177 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 178 { 179 CPUPPCState *cenv = &cpu->env; 180 CPUState *cs = CPU(cpu); 181 struct kvm_sregs sregs; 182 int ret; 183 184 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 185 /* What we're really trying to say is "if we're on BookE, we use 186 the native PVR for now". This is the only sane way to check 187 it though, so we potentially confuse users that they can run 188 BookE guests on BookS. Let's hope nobody dares enough :) */ 189 return 0; 190 } else { 191 if (!cap_segstate) { 192 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 193 return -ENOSYS; 194 } 195 } 196 197 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 198 if (ret) { 199 return ret; 200 } 201 202 sregs.pvr = cenv->spr[SPR_PVR]; 203 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 204 } 205 206 /* Set up a shared TLB array with KVM */ 207 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 208 { 209 CPUPPCState *env = &cpu->env; 210 CPUState *cs = CPU(cpu); 211 struct kvm_book3e_206_tlb_params params = {}; 212 struct kvm_config_tlb cfg = {}; 213 unsigned int entries = 0; 214 int ret, i; 215 216 if (!kvm_enabled() || 217 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 218 return 0; 219 } 220 221 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 222 223 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 224 params.tlb_sizes[i] = booke206_tlb_size(env, i); 225 params.tlb_ways[i] = booke206_tlb_ways(env, i); 226 entries += params.tlb_sizes[i]; 227 } 228 229 assert(entries == env->nb_tlb); 230 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 231 232 env->tlb_dirty = true; 233 234 cfg.array = (uintptr_t)env->tlb.tlbm; 235 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 236 cfg.params = (uintptr_t)¶ms; 237 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 238 239 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 240 if (ret < 0) { 241 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 242 __func__, strerror(-ret)); 243 return ret; 244 } 245 246 env->kvm_sw_tlb = true; 247 return 0; 248 } 249 250 251 #if defined(TARGET_PPC64) 252 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp) 253 { 254 int ret; 255 256 assert(kvm_state != NULL); 257 258 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 259 error_setg(errp, "KVM doesn't expose the MMU features it supports"); 260 error_append_hint(errp, "Consider switching to a newer KVM\n"); 261 return; 262 } 263 264 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info); 265 if (ret == 0) { 266 return; 267 } 268 269 error_setg_errno(errp, -ret, 270 "KVM failed to provide the MMU features it supports"); 271 } 272 273 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 274 { 275 KVMState *s = KVM_STATE(current_machine->accelerator); 276 struct ppc_radix_page_info *radix_page_info; 277 struct kvm_ppc_rmmu_info rmmu_info; 278 int i; 279 280 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 281 return NULL; 282 } 283 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 284 return NULL; 285 } 286 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 287 radix_page_info->count = 0; 288 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 289 if (rmmu_info.ap_encodings[i]) { 290 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 291 radix_page_info->count++; 292 } 293 } 294 return radix_page_info; 295 } 296 297 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 298 bool radix, bool gtse, 299 uint64_t proc_tbl) 300 { 301 CPUState *cs = CPU(cpu); 302 int ret; 303 uint64_t flags = 0; 304 struct kvm_ppc_mmuv3_cfg cfg = { 305 .process_table = proc_tbl, 306 }; 307 308 if (radix) { 309 flags |= KVM_PPC_MMUV3_RADIX; 310 } 311 if (gtse) { 312 flags |= KVM_PPC_MMUV3_GTSE; 313 } 314 cfg.flags = flags; 315 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 316 switch (ret) { 317 case 0: 318 return H_SUCCESS; 319 case -EINVAL: 320 return H_PARAMETER; 321 case -ENODEV: 322 return H_NOT_AVAILABLE; 323 default: 324 return H_HARDWARE; 325 } 326 } 327 328 bool kvmppc_hpt_needs_host_contiguous_pages(void) 329 { 330 static struct kvm_ppc_smmu_info smmu_info; 331 332 if (!kvm_enabled()) { 333 return false; 334 } 335 336 kvm_get_smmu_info(&smmu_info, &error_fatal); 337 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL); 338 } 339 340 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp) 341 { 342 struct kvm_ppc_smmu_info smmu_info; 343 int iq, ik, jq, jk; 344 Error *local_err = NULL; 345 346 /* For now, we only have anything to check on hash64 MMUs */ 347 if (!cpu->hash64_opts || !kvm_enabled()) { 348 return; 349 } 350 351 kvm_get_smmu_info(&smmu_info, &local_err); 352 if (local_err) { 353 error_propagate(errp, local_err); 354 return; 355 } 356 357 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG) 358 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 359 error_setg(errp, 360 "KVM does not support 1TiB segments which guest expects"); 361 return; 362 } 363 364 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) { 365 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u", 366 smmu_info.slb_size, cpu->hash64_opts->slb_size); 367 return; 368 } 369 370 /* 371 * Verify that every pagesize supported by the cpu model is 372 * supported by KVM with the same encodings 373 */ 374 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) { 375 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 376 struct kvm_ppc_one_seg_page_size *ksps; 377 378 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) { 379 if (qsps->page_shift == smmu_info.sps[ik].page_shift) { 380 break; 381 } 382 } 383 if (ik >= ARRAY_SIZE(smmu_info.sps)) { 384 error_setg(errp, "KVM doesn't support for base page shift %u", 385 qsps->page_shift); 386 return; 387 } 388 389 ksps = &smmu_info.sps[ik]; 390 if (ksps->slb_enc != qsps->slb_enc) { 391 error_setg(errp, 392 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x", 393 ksps->slb_enc, ksps->page_shift, qsps->slb_enc); 394 return; 395 } 396 397 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) { 398 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) { 399 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) { 400 break; 401 } 402 } 403 404 if (jk >= ARRAY_SIZE(ksps->enc)) { 405 error_setg(errp, "KVM doesn't support page shift %u/%u", 406 qsps->enc[jq].page_shift, qsps->page_shift); 407 return; 408 } 409 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) { 410 error_setg(errp, 411 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x", 412 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift, 413 qsps->page_shift, qsps->enc[jq].pte_enc); 414 return; 415 } 416 } 417 } 418 419 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { 420 /* Mostly what guest pagesizes we can use are related to the 421 * host pages used to map guest RAM, which is handled in the 422 * platform code. Cache-Inhibited largepages (64k) however are 423 * used for I/O, so if they're mapped to the host at all it 424 * will be a normal mapping, not a special hugepage one used 425 * for RAM. */ 426 if (getpagesize() < 0x10000) { 427 error_setg(errp, 428 "KVM can't supply 64kiB CI pages, which guest expects"); 429 } 430 } 431 } 432 #endif /* !defined (TARGET_PPC64) */ 433 434 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 435 { 436 return POWERPC_CPU(cpu)->vcpu_id; 437 } 438 439 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 440 * book3s supports only 1 watchpoint, so array size 441 * of 4 is sufficient for now. 442 */ 443 #define MAX_HW_BKPTS 4 444 445 static struct HWBreakpoint { 446 target_ulong addr; 447 int type; 448 } hw_debug_points[MAX_HW_BKPTS]; 449 450 static CPUWatchpoint hw_watchpoint; 451 452 /* Default there is no breakpoint and watchpoint supported */ 453 static int max_hw_breakpoint; 454 static int max_hw_watchpoint; 455 static int nb_hw_breakpoint; 456 static int nb_hw_watchpoint; 457 458 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 459 { 460 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 461 max_hw_breakpoint = 2; 462 max_hw_watchpoint = 2; 463 } 464 465 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 466 fprintf(stderr, "Error initializing h/w breakpoints\n"); 467 return; 468 } 469 } 470 471 int kvm_arch_init_vcpu(CPUState *cs) 472 { 473 PowerPCCPU *cpu = POWERPC_CPU(cs); 474 CPUPPCState *cenv = &cpu->env; 475 int ret; 476 477 /* Synchronize sregs with kvm */ 478 ret = kvm_arch_sync_sregs(cpu); 479 if (ret) { 480 if (ret == -EINVAL) { 481 error_report("Register sync failed... If you're using kvm-hv.ko," 482 " only \"-cpu host\" is possible"); 483 } 484 return ret; 485 } 486 487 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 488 489 switch (cenv->mmu_model) { 490 case POWERPC_MMU_BOOKE206: 491 /* This target supports access to KVM's guest TLB */ 492 ret = kvm_booke206_tlb_init(cpu); 493 break; 494 case POWERPC_MMU_2_07: 495 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 496 /* KVM-HV has transactional memory on POWER8 also without the 497 * KVM_CAP_PPC_HTM extension, so enable it here instead as 498 * long as it's availble to userspace on the host. */ 499 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 500 cap_htm = true; 501 } 502 } 503 break; 504 default: 505 break; 506 } 507 508 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 509 kvmppc_hw_debug_points_init(cenv); 510 511 return ret; 512 } 513 514 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 515 { 516 CPUPPCState *env = &cpu->env; 517 CPUState *cs = CPU(cpu); 518 struct kvm_dirty_tlb dirty_tlb; 519 unsigned char *bitmap; 520 int ret; 521 522 if (!env->kvm_sw_tlb) { 523 return; 524 } 525 526 bitmap = g_malloc((env->nb_tlb + 7) / 8); 527 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 528 529 dirty_tlb.bitmap = (uintptr_t)bitmap; 530 dirty_tlb.num_dirty = env->nb_tlb; 531 532 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 533 if (ret) { 534 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 535 __func__, strerror(-ret)); 536 } 537 538 g_free(bitmap); 539 } 540 541 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 542 { 543 PowerPCCPU *cpu = POWERPC_CPU(cs); 544 CPUPPCState *env = &cpu->env; 545 union { 546 uint32_t u32; 547 uint64_t u64; 548 } val; 549 struct kvm_one_reg reg = { 550 .id = id, 551 .addr = (uintptr_t) &val, 552 }; 553 int ret; 554 555 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 556 if (ret != 0) { 557 trace_kvm_failed_spr_get(spr, strerror(errno)); 558 } else { 559 switch (id & KVM_REG_SIZE_MASK) { 560 case KVM_REG_SIZE_U32: 561 env->spr[spr] = val.u32; 562 break; 563 564 case KVM_REG_SIZE_U64: 565 env->spr[spr] = val.u64; 566 break; 567 568 default: 569 /* Don't handle this size yet */ 570 abort(); 571 } 572 } 573 } 574 575 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 576 { 577 PowerPCCPU *cpu = POWERPC_CPU(cs); 578 CPUPPCState *env = &cpu->env; 579 union { 580 uint32_t u32; 581 uint64_t u64; 582 } val; 583 struct kvm_one_reg reg = { 584 .id = id, 585 .addr = (uintptr_t) &val, 586 }; 587 int ret; 588 589 switch (id & KVM_REG_SIZE_MASK) { 590 case KVM_REG_SIZE_U32: 591 val.u32 = env->spr[spr]; 592 break; 593 594 case KVM_REG_SIZE_U64: 595 val.u64 = env->spr[spr]; 596 break; 597 598 default: 599 /* Don't handle this size yet */ 600 abort(); 601 } 602 603 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 604 if (ret != 0) { 605 trace_kvm_failed_spr_set(spr, strerror(errno)); 606 } 607 } 608 609 static int kvm_put_fp(CPUState *cs) 610 { 611 PowerPCCPU *cpu = POWERPC_CPU(cs); 612 CPUPPCState *env = &cpu->env; 613 struct kvm_one_reg reg; 614 int i; 615 int ret; 616 617 if (env->insns_flags & PPC_FLOAT) { 618 uint64_t fpscr = env->fpscr; 619 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 620 621 reg.id = KVM_REG_PPC_FPSCR; 622 reg.addr = (uintptr_t)&fpscr; 623 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 624 if (ret < 0) { 625 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 626 return ret; 627 } 628 629 for (i = 0; i < 32; i++) { 630 uint64_t vsr[2]; 631 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 632 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 633 634 #ifdef HOST_WORDS_BIGENDIAN 635 vsr[0] = float64_val(*fpr); 636 vsr[1] = *vsrl; 637 #else 638 vsr[0] = *vsrl; 639 vsr[1] = float64_val(*fpr); 640 #endif 641 reg.addr = (uintptr_t) &vsr; 642 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 643 644 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 645 if (ret < 0) { 646 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 647 i, strerror(errno)); 648 return ret; 649 } 650 } 651 } 652 653 if (env->insns_flags & PPC_ALTIVEC) { 654 reg.id = KVM_REG_PPC_VSCR; 655 reg.addr = (uintptr_t)&env->vscr; 656 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 657 if (ret < 0) { 658 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 659 return ret; 660 } 661 662 for (i = 0; i < 32; i++) { 663 reg.id = KVM_REG_PPC_VR(i); 664 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 665 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 666 if (ret < 0) { 667 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 668 return ret; 669 } 670 } 671 } 672 673 return 0; 674 } 675 676 static int kvm_get_fp(CPUState *cs) 677 { 678 PowerPCCPU *cpu = POWERPC_CPU(cs); 679 CPUPPCState *env = &cpu->env; 680 struct kvm_one_reg reg; 681 int i; 682 int ret; 683 684 if (env->insns_flags & PPC_FLOAT) { 685 uint64_t fpscr; 686 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 687 688 reg.id = KVM_REG_PPC_FPSCR; 689 reg.addr = (uintptr_t)&fpscr; 690 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 691 if (ret < 0) { 692 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 693 return ret; 694 } else { 695 env->fpscr = fpscr; 696 } 697 698 for (i = 0; i < 32; i++) { 699 uint64_t vsr[2]; 700 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 701 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 702 703 reg.addr = (uintptr_t) &vsr; 704 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 705 706 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 707 if (ret < 0) { 708 DPRINTF("Unable to get %s%d from KVM: %s\n", 709 vsx ? "VSR" : "FPR", i, strerror(errno)); 710 return ret; 711 } else { 712 #ifdef HOST_WORDS_BIGENDIAN 713 *fpr = vsr[0]; 714 if (vsx) { 715 *vsrl = vsr[1]; 716 } 717 #else 718 *fpr = vsr[1]; 719 if (vsx) { 720 *vsrl = vsr[0]; 721 } 722 #endif 723 } 724 } 725 } 726 727 if (env->insns_flags & PPC_ALTIVEC) { 728 reg.id = KVM_REG_PPC_VSCR; 729 reg.addr = (uintptr_t)&env->vscr; 730 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 731 if (ret < 0) { 732 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 733 return ret; 734 } 735 736 for (i = 0; i < 32; i++) { 737 reg.id = KVM_REG_PPC_VR(i); 738 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 739 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 740 if (ret < 0) { 741 DPRINTF("Unable to get VR%d from KVM: %s\n", 742 i, strerror(errno)); 743 return ret; 744 } 745 } 746 } 747 748 return 0; 749 } 750 751 #if defined(TARGET_PPC64) 752 static int kvm_get_vpa(CPUState *cs) 753 { 754 PowerPCCPU *cpu = POWERPC_CPU(cs); 755 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 756 struct kvm_one_reg reg; 757 int ret; 758 759 reg.id = KVM_REG_PPC_VPA_ADDR; 760 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 761 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 762 if (ret < 0) { 763 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 764 return ret; 765 } 766 767 assert((uintptr_t)&spapr_cpu->slb_shadow_size 768 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 769 reg.id = KVM_REG_PPC_VPA_SLB; 770 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 771 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 772 if (ret < 0) { 773 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 774 strerror(errno)); 775 return ret; 776 } 777 778 assert((uintptr_t)&spapr_cpu->dtl_size 779 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 780 reg.id = KVM_REG_PPC_VPA_DTL; 781 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 782 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 783 if (ret < 0) { 784 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 785 strerror(errno)); 786 return ret; 787 } 788 789 return 0; 790 } 791 792 static int kvm_put_vpa(CPUState *cs) 793 { 794 PowerPCCPU *cpu = POWERPC_CPU(cs); 795 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 796 struct kvm_one_reg reg; 797 int ret; 798 799 /* SLB shadow or DTL can't be registered unless a master VPA is 800 * registered. That means when restoring state, if a VPA *is* 801 * registered, we need to set that up first. If not, we need to 802 * deregister the others before deregistering the master VPA */ 803 assert(spapr_cpu->vpa_addr 804 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); 805 806 if (spapr_cpu->vpa_addr) { 807 reg.id = KVM_REG_PPC_VPA_ADDR; 808 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 809 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 810 if (ret < 0) { 811 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 812 return ret; 813 } 814 } 815 816 assert((uintptr_t)&spapr_cpu->slb_shadow_size 817 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 818 reg.id = KVM_REG_PPC_VPA_SLB; 819 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 820 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 821 if (ret < 0) { 822 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 823 return ret; 824 } 825 826 assert((uintptr_t)&spapr_cpu->dtl_size 827 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 828 reg.id = KVM_REG_PPC_VPA_DTL; 829 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 830 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 831 if (ret < 0) { 832 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 833 strerror(errno)); 834 return ret; 835 } 836 837 if (!spapr_cpu->vpa_addr) { 838 reg.id = KVM_REG_PPC_VPA_ADDR; 839 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 840 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 841 if (ret < 0) { 842 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 843 return ret; 844 } 845 } 846 847 return 0; 848 } 849 #endif /* TARGET_PPC64 */ 850 851 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 852 { 853 CPUPPCState *env = &cpu->env; 854 struct kvm_sregs sregs; 855 int i; 856 857 sregs.pvr = env->spr[SPR_PVR]; 858 859 if (cpu->vhyp) { 860 PPCVirtualHypervisorClass *vhc = 861 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 862 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 863 } else { 864 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 865 } 866 867 /* Sync SLB */ 868 #ifdef TARGET_PPC64 869 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 870 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 871 if (env->slb[i].esid & SLB_ESID_V) { 872 sregs.u.s.ppc64.slb[i].slbe |= i; 873 } 874 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 875 } 876 #endif 877 878 /* Sync SRs */ 879 for (i = 0; i < 16; i++) { 880 sregs.u.s.ppc32.sr[i] = env->sr[i]; 881 } 882 883 /* Sync BATs */ 884 for (i = 0; i < 8; i++) { 885 /* Beware. We have to swap upper and lower bits here */ 886 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 887 | env->DBAT[1][i]; 888 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 889 | env->IBAT[1][i]; 890 } 891 892 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 893 } 894 895 int kvm_arch_put_registers(CPUState *cs, int level) 896 { 897 PowerPCCPU *cpu = POWERPC_CPU(cs); 898 CPUPPCState *env = &cpu->env; 899 struct kvm_regs regs; 900 int ret; 901 int i; 902 903 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 904 if (ret < 0) { 905 return ret; 906 } 907 908 regs.ctr = env->ctr; 909 regs.lr = env->lr; 910 regs.xer = cpu_read_xer(env); 911 regs.msr = env->msr; 912 regs.pc = env->nip; 913 914 regs.srr0 = env->spr[SPR_SRR0]; 915 regs.srr1 = env->spr[SPR_SRR1]; 916 917 regs.sprg0 = env->spr[SPR_SPRG0]; 918 regs.sprg1 = env->spr[SPR_SPRG1]; 919 regs.sprg2 = env->spr[SPR_SPRG2]; 920 regs.sprg3 = env->spr[SPR_SPRG3]; 921 regs.sprg4 = env->spr[SPR_SPRG4]; 922 regs.sprg5 = env->spr[SPR_SPRG5]; 923 regs.sprg6 = env->spr[SPR_SPRG6]; 924 regs.sprg7 = env->spr[SPR_SPRG7]; 925 926 regs.pid = env->spr[SPR_BOOKE_PID]; 927 928 for (i = 0;i < 32; i++) 929 regs.gpr[i] = env->gpr[i]; 930 931 regs.cr = 0; 932 for (i = 0; i < 8; i++) { 933 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 934 } 935 936 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 937 if (ret < 0) 938 return ret; 939 940 kvm_put_fp(cs); 941 942 if (env->tlb_dirty) { 943 kvm_sw_tlb_put(cpu); 944 env->tlb_dirty = false; 945 } 946 947 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 948 ret = kvmppc_put_books_sregs(cpu); 949 if (ret < 0) { 950 return ret; 951 } 952 } 953 954 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 955 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 956 } 957 958 if (cap_one_reg) { 959 int i; 960 961 /* We deliberately ignore errors here, for kernels which have 962 * the ONE_REG calls, but don't support the specific 963 * registers, there's a reasonable chance things will still 964 * work, at least until we try to migrate. */ 965 for (i = 0; i < 1024; i++) { 966 uint64_t id = env->spr_cb[i].one_reg_id; 967 968 if (id != 0) { 969 kvm_put_one_spr(cs, id, i); 970 } 971 } 972 973 #ifdef TARGET_PPC64 974 if (msr_ts) { 975 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 976 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 977 } 978 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 980 } 981 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 991 } 992 993 if (cap_papr) { 994 if (kvm_put_vpa(cs) < 0) { 995 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 996 } 997 } 998 999 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1000 #endif /* TARGET_PPC64 */ 1001 } 1002 1003 return ret; 1004 } 1005 1006 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1007 { 1008 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1009 } 1010 1011 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1012 { 1013 CPUPPCState *env = &cpu->env; 1014 struct kvm_sregs sregs; 1015 int ret; 1016 1017 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1018 if (ret < 0) { 1019 return ret; 1020 } 1021 1022 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1023 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1024 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1025 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1026 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1027 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1028 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1029 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1030 env->spr[SPR_DECR] = sregs.u.e.dec; 1031 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1032 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1033 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1034 } 1035 1036 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1037 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1038 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1039 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1040 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1041 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1042 } 1043 1044 if (sregs.u.e.features & KVM_SREGS_E_64) { 1045 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1046 } 1047 1048 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1049 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1050 } 1051 1052 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1053 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1054 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1055 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1056 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1057 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1058 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1059 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1060 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1061 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1062 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1063 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1064 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1065 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1066 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1067 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1068 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1069 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1070 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1071 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1072 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1073 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1074 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1075 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1076 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1077 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1078 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1079 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1080 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1081 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1082 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1083 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1084 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1085 1086 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1087 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1088 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1089 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1090 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1091 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1092 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1093 } 1094 1095 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1096 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1097 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1098 } 1099 1100 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1101 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1102 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1103 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1104 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1105 } 1106 } 1107 1108 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1109 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1110 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1111 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1112 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1113 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1114 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1115 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1116 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1117 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1118 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1119 } 1120 1121 if (sregs.u.e.features & KVM_SREGS_EXP) { 1122 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1123 } 1124 1125 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1126 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1127 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1128 } 1129 1130 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1131 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1132 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1133 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1134 1135 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1136 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1137 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1138 } 1139 } 1140 1141 return 0; 1142 } 1143 1144 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1145 { 1146 CPUPPCState *env = &cpu->env; 1147 struct kvm_sregs sregs; 1148 int ret; 1149 int i; 1150 1151 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1152 if (ret < 0) { 1153 return ret; 1154 } 1155 1156 if (!cpu->vhyp) { 1157 ppc_store_sdr1(env, sregs.u.s.sdr1); 1158 } 1159 1160 /* Sync SLB */ 1161 #ifdef TARGET_PPC64 1162 /* 1163 * The packed SLB array we get from KVM_GET_SREGS only contains 1164 * information about valid entries. So we flush our internal copy 1165 * to get rid of stale ones, then put all valid SLB entries back 1166 * in. 1167 */ 1168 memset(env->slb, 0, sizeof(env->slb)); 1169 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1170 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1171 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1172 /* 1173 * Only restore valid entries 1174 */ 1175 if (rb & SLB_ESID_V) { 1176 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1177 } 1178 } 1179 #endif 1180 1181 /* Sync SRs */ 1182 for (i = 0; i < 16; i++) { 1183 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1184 } 1185 1186 /* Sync BATs */ 1187 for (i = 0; i < 8; i++) { 1188 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1189 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1190 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1191 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1192 } 1193 1194 return 0; 1195 } 1196 1197 int kvm_arch_get_registers(CPUState *cs) 1198 { 1199 PowerPCCPU *cpu = POWERPC_CPU(cs); 1200 CPUPPCState *env = &cpu->env; 1201 struct kvm_regs regs; 1202 uint32_t cr; 1203 int i, ret; 1204 1205 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1206 if (ret < 0) 1207 return ret; 1208 1209 cr = regs.cr; 1210 for (i = 7; i >= 0; i--) { 1211 env->crf[i] = cr & 15; 1212 cr >>= 4; 1213 } 1214 1215 env->ctr = regs.ctr; 1216 env->lr = regs.lr; 1217 cpu_write_xer(env, regs.xer); 1218 env->msr = regs.msr; 1219 env->nip = regs.pc; 1220 1221 env->spr[SPR_SRR0] = regs.srr0; 1222 env->spr[SPR_SRR1] = regs.srr1; 1223 1224 env->spr[SPR_SPRG0] = regs.sprg0; 1225 env->spr[SPR_SPRG1] = regs.sprg1; 1226 env->spr[SPR_SPRG2] = regs.sprg2; 1227 env->spr[SPR_SPRG3] = regs.sprg3; 1228 env->spr[SPR_SPRG4] = regs.sprg4; 1229 env->spr[SPR_SPRG5] = regs.sprg5; 1230 env->spr[SPR_SPRG6] = regs.sprg6; 1231 env->spr[SPR_SPRG7] = regs.sprg7; 1232 1233 env->spr[SPR_BOOKE_PID] = regs.pid; 1234 1235 for (i = 0;i < 32; i++) 1236 env->gpr[i] = regs.gpr[i]; 1237 1238 kvm_get_fp(cs); 1239 1240 if (cap_booke_sregs) { 1241 ret = kvmppc_get_booke_sregs(cpu); 1242 if (ret < 0) { 1243 return ret; 1244 } 1245 } 1246 1247 if (cap_segstate) { 1248 ret = kvmppc_get_books_sregs(cpu); 1249 if (ret < 0) { 1250 return ret; 1251 } 1252 } 1253 1254 if (cap_hior) { 1255 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1256 } 1257 1258 if (cap_one_reg) { 1259 int i; 1260 1261 /* We deliberately ignore errors here, for kernels which have 1262 * the ONE_REG calls, but don't support the specific 1263 * registers, there's a reasonable chance things will still 1264 * work, at least until we try to migrate. */ 1265 for (i = 0; i < 1024; i++) { 1266 uint64_t id = env->spr_cb[i].one_reg_id; 1267 1268 if (id != 0) { 1269 kvm_get_one_spr(cs, id, i); 1270 } 1271 } 1272 1273 #ifdef TARGET_PPC64 1274 if (msr_ts) { 1275 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1276 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1277 } 1278 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1279 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1280 } 1281 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1291 } 1292 1293 if (cap_papr) { 1294 if (kvm_get_vpa(cs) < 0) { 1295 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1296 } 1297 } 1298 1299 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1300 #endif 1301 } 1302 1303 return 0; 1304 } 1305 1306 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1307 { 1308 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1309 1310 if (irq != PPC_INTERRUPT_EXT) { 1311 return 0; 1312 } 1313 1314 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1315 return 0; 1316 } 1317 1318 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1319 1320 return 0; 1321 } 1322 1323 #if defined(TARGET_PPC64) 1324 #define PPC_INPUT_INT PPC970_INPUT_INT 1325 #else 1326 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1327 #endif 1328 1329 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1330 { 1331 PowerPCCPU *cpu = POWERPC_CPU(cs); 1332 CPUPPCState *env = &cpu->env; 1333 int r; 1334 unsigned irq; 1335 1336 qemu_mutex_lock_iothread(); 1337 1338 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1339 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1340 if (!cap_interrupt_level && 1341 run->ready_for_interrupt_injection && 1342 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1343 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1344 { 1345 /* For now KVM disregards the 'irq' argument. However, in the 1346 * future KVM could cache it in-kernel to avoid a heavyweight exit 1347 * when reading the UIC. 1348 */ 1349 irq = KVM_INTERRUPT_SET; 1350 1351 DPRINTF("injected interrupt %d\n", irq); 1352 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1353 if (r < 0) { 1354 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1355 } 1356 1357 /* Always wake up soon in case the interrupt was level based */ 1358 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1359 (NANOSECONDS_PER_SECOND / 50)); 1360 } 1361 1362 /* We don't know if there are more interrupts pending after this. However, 1363 * the guest will return to userspace in the course of handling this one 1364 * anyways, so we will get a chance to deliver the rest. */ 1365 1366 qemu_mutex_unlock_iothread(); 1367 } 1368 1369 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1370 { 1371 return MEMTXATTRS_UNSPECIFIED; 1372 } 1373 1374 int kvm_arch_process_async_events(CPUState *cs) 1375 { 1376 return cs->halted; 1377 } 1378 1379 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1380 { 1381 CPUState *cs = CPU(cpu); 1382 CPUPPCState *env = &cpu->env; 1383 1384 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1385 cs->halted = 1; 1386 cs->exception_index = EXCP_HLT; 1387 } 1388 1389 return 0; 1390 } 1391 1392 /* map dcr access to existing qemu dcr emulation */ 1393 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1394 { 1395 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1396 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1397 1398 return 0; 1399 } 1400 1401 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1402 { 1403 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1404 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1405 1406 return 0; 1407 } 1408 1409 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1410 { 1411 /* Mixed endian case is not handled */ 1412 uint32_t sc = debug_inst_opcode; 1413 1414 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1415 sizeof(sc), 0) || 1416 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1417 return -EINVAL; 1418 } 1419 1420 return 0; 1421 } 1422 1423 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1424 { 1425 uint32_t sc; 1426 1427 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1428 sc != debug_inst_opcode || 1429 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1430 sizeof(sc), 1)) { 1431 return -EINVAL; 1432 } 1433 1434 return 0; 1435 } 1436 1437 static int find_hw_breakpoint(target_ulong addr, int type) 1438 { 1439 int n; 1440 1441 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1442 <= ARRAY_SIZE(hw_debug_points)); 1443 1444 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1445 if (hw_debug_points[n].addr == addr && 1446 hw_debug_points[n].type == type) { 1447 return n; 1448 } 1449 } 1450 1451 return -1; 1452 } 1453 1454 static int find_hw_watchpoint(target_ulong addr, int *flag) 1455 { 1456 int n; 1457 1458 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1459 if (n >= 0) { 1460 *flag = BP_MEM_ACCESS; 1461 return n; 1462 } 1463 1464 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1465 if (n >= 0) { 1466 *flag = BP_MEM_WRITE; 1467 return n; 1468 } 1469 1470 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1471 if (n >= 0) { 1472 *flag = BP_MEM_READ; 1473 return n; 1474 } 1475 1476 return -1; 1477 } 1478 1479 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1480 target_ulong len, int type) 1481 { 1482 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1483 return -ENOBUFS; 1484 } 1485 1486 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1487 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1488 1489 switch (type) { 1490 case GDB_BREAKPOINT_HW: 1491 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1492 return -ENOBUFS; 1493 } 1494 1495 if (find_hw_breakpoint(addr, type) >= 0) { 1496 return -EEXIST; 1497 } 1498 1499 nb_hw_breakpoint++; 1500 break; 1501 1502 case GDB_WATCHPOINT_WRITE: 1503 case GDB_WATCHPOINT_READ: 1504 case GDB_WATCHPOINT_ACCESS: 1505 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1506 return -ENOBUFS; 1507 } 1508 1509 if (find_hw_breakpoint(addr, type) >= 0) { 1510 return -EEXIST; 1511 } 1512 1513 nb_hw_watchpoint++; 1514 break; 1515 1516 default: 1517 return -ENOSYS; 1518 } 1519 1520 return 0; 1521 } 1522 1523 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1524 target_ulong len, int type) 1525 { 1526 int n; 1527 1528 n = find_hw_breakpoint(addr, type); 1529 if (n < 0) { 1530 return -ENOENT; 1531 } 1532 1533 switch (type) { 1534 case GDB_BREAKPOINT_HW: 1535 nb_hw_breakpoint--; 1536 break; 1537 1538 case GDB_WATCHPOINT_WRITE: 1539 case GDB_WATCHPOINT_READ: 1540 case GDB_WATCHPOINT_ACCESS: 1541 nb_hw_watchpoint--; 1542 break; 1543 1544 default: 1545 return -ENOSYS; 1546 } 1547 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1548 1549 return 0; 1550 } 1551 1552 void kvm_arch_remove_all_hw_breakpoints(void) 1553 { 1554 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1555 } 1556 1557 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1558 { 1559 int n; 1560 1561 /* Software Breakpoint updates */ 1562 if (kvm_sw_breakpoints_active(cs)) { 1563 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1564 } 1565 1566 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1567 <= ARRAY_SIZE(hw_debug_points)); 1568 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1569 1570 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1571 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1572 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1573 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1574 switch (hw_debug_points[n].type) { 1575 case GDB_BREAKPOINT_HW: 1576 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1577 break; 1578 case GDB_WATCHPOINT_WRITE: 1579 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1580 break; 1581 case GDB_WATCHPOINT_READ: 1582 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1583 break; 1584 case GDB_WATCHPOINT_ACCESS: 1585 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1586 KVMPPC_DEBUG_WATCH_READ; 1587 break; 1588 default: 1589 cpu_abort(cs, "Unsupported breakpoint type\n"); 1590 } 1591 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1592 } 1593 } 1594 } 1595 1596 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1597 { 1598 CPUState *cs = CPU(cpu); 1599 CPUPPCState *env = &cpu->env; 1600 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1601 int handle = 0; 1602 int n; 1603 int flag = 0; 1604 1605 if (cs->singlestep_enabled) { 1606 handle = 1; 1607 } else if (arch_info->status) { 1608 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1609 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1610 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1611 if (n >= 0) { 1612 handle = 1; 1613 } 1614 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1615 KVMPPC_DEBUG_WATCH_WRITE)) { 1616 n = find_hw_watchpoint(arch_info->address, &flag); 1617 if (n >= 0) { 1618 handle = 1; 1619 cs->watchpoint_hit = &hw_watchpoint; 1620 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1621 hw_watchpoint.flags = flag; 1622 } 1623 } 1624 } 1625 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1626 handle = 1; 1627 } else { 1628 /* QEMU is not able to handle debug exception, so inject 1629 * program exception to guest; 1630 * Yes program exception NOT debug exception !! 1631 * When QEMU is using debug resources then debug exception must 1632 * be always set. To achieve this we set MSR_DE and also set 1633 * MSRP_DEP so guest cannot change MSR_DE. 1634 * When emulating debug resource for guest we want guest 1635 * to control MSR_DE (enable/disable debug interrupt on need). 1636 * Supporting both configurations are NOT possible. 1637 * So the result is that we cannot share debug resources 1638 * between QEMU and Guest on BOOKE architecture. 1639 * In the current design QEMU gets the priority over guest, 1640 * this means that if QEMU is using debug resources then guest 1641 * cannot use them; 1642 * For software breakpoint QEMU uses a privileged instruction; 1643 * So there cannot be any reason that we are here for guest 1644 * set debug exception, only possibility is guest executed a 1645 * privileged / illegal instruction and that's why we are 1646 * injecting a program interrupt. 1647 */ 1648 1649 cpu_synchronize_state(cs); 1650 /* env->nip is PC, so increment this by 4 to use 1651 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1652 */ 1653 env->nip += 4; 1654 cs->exception_index = POWERPC_EXCP_PROGRAM; 1655 env->error_code = POWERPC_EXCP_INVAL; 1656 ppc_cpu_do_interrupt(cs); 1657 } 1658 1659 return handle; 1660 } 1661 1662 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1663 { 1664 PowerPCCPU *cpu = POWERPC_CPU(cs); 1665 CPUPPCState *env = &cpu->env; 1666 int ret; 1667 1668 qemu_mutex_lock_iothread(); 1669 1670 switch (run->exit_reason) { 1671 case KVM_EXIT_DCR: 1672 if (run->dcr.is_write) { 1673 DPRINTF("handle dcr write\n"); 1674 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1675 } else { 1676 DPRINTF("handle dcr read\n"); 1677 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1678 } 1679 break; 1680 case KVM_EXIT_HLT: 1681 DPRINTF("handle halt\n"); 1682 ret = kvmppc_handle_halt(cpu); 1683 break; 1684 #if defined(TARGET_PPC64) 1685 case KVM_EXIT_PAPR_HCALL: 1686 DPRINTF("handle PAPR hypercall\n"); 1687 run->papr_hcall.ret = spapr_hypercall(cpu, 1688 run->papr_hcall.nr, 1689 run->papr_hcall.args); 1690 ret = 0; 1691 break; 1692 #endif 1693 case KVM_EXIT_EPR: 1694 DPRINTF("handle epr\n"); 1695 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1696 ret = 0; 1697 break; 1698 case KVM_EXIT_WATCHDOG: 1699 DPRINTF("handle watchdog expiry\n"); 1700 watchdog_perform_action(); 1701 ret = 0; 1702 break; 1703 1704 case KVM_EXIT_DEBUG: 1705 DPRINTF("handle debug exception\n"); 1706 if (kvm_handle_debug(cpu, run)) { 1707 ret = EXCP_DEBUG; 1708 break; 1709 } 1710 /* re-enter, this exception was guest-internal */ 1711 ret = 0; 1712 break; 1713 1714 default: 1715 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1716 ret = -1; 1717 break; 1718 } 1719 1720 qemu_mutex_unlock_iothread(); 1721 return ret; 1722 } 1723 1724 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1725 { 1726 CPUState *cs = CPU(cpu); 1727 uint32_t bits = tsr_bits; 1728 struct kvm_one_reg reg = { 1729 .id = KVM_REG_PPC_OR_TSR, 1730 .addr = (uintptr_t) &bits, 1731 }; 1732 1733 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1734 } 1735 1736 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1737 { 1738 1739 CPUState *cs = CPU(cpu); 1740 uint32_t bits = tsr_bits; 1741 struct kvm_one_reg reg = { 1742 .id = KVM_REG_PPC_CLEAR_TSR, 1743 .addr = (uintptr_t) &bits, 1744 }; 1745 1746 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1747 } 1748 1749 int kvmppc_set_tcr(PowerPCCPU *cpu) 1750 { 1751 CPUState *cs = CPU(cpu); 1752 CPUPPCState *env = &cpu->env; 1753 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1754 1755 struct kvm_one_reg reg = { 1756 .id = KVM_REG_PPC_TCR, 1757 .addr = (uintptr_t) &tcr, 1758 }; 1759 1760 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1761 } 1762 1763 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1764 { 1765 CPUState *cs = CPU(cpu); 1766 int ret; 1767 1768 if (!kvm_enabled()) { 1769 return -1; 1770 } 1771 1772 if (!cap_ppc_watchdog) { 1773 printf("warning: KVM does not support watchdog"); 1774 return -1; 1775 } 1776 1777 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1778 if (ret < 0) { 1779 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1780 __func__, strerror(-ret)); 1781 return ret; 1782 } 1783 1784 return ret; 1785 } 1786 1787 static int read_cpuinfo(const char *field, char *value, int len) 1788 { 1789 FILE *f; 1790 int ret = -1; 1791 int field_len = strlen(field); 1792 char line[512]; 1793 1794 f = fopen("/proc/cpuinfo", "r"); 1795 if (!f) { 1796 return -1; 1797 } 1798 1799 do { 1800 if (!fgets(line, sizeof(line), f)) { 1801 break; 1802 } 1803 if (!strncmp(line, field, field_len)) { 1804 pstrcpy(value, len, line); 1805 ret = 0; 1806 break; 1807 } 1808 } while(*line); 1809 1810 fclose(f); 1811 1812 return ret; 1813 } 1814 1815 uint32_t kvmppc_get_tbfreq(void) 1816 { 1817 char line[512]; 1818 char *ns; 1819 uint32_t retval = NANOSECONDS_PER_SECOND; 1820 1821 if (read_cpuinfo("timebase", line, sizeof(line))) { 1822 return retval; 1823 } 1824 1825 if (!(ns = strchr(line, ':'))) { 1826 return retval; 1827 } 1828 1829 ns++; 1830 1831 return atoi(ns); 1832 } 1833 1834 bool kvmppc_get_host_serial(char **value) 1835 { 1836 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1837 NULL); 1838 } 1839 1840 bool kvmppc_get_host_model(char **value) 1841 { 1842 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1843 } 1844 1845 /* Try to find a device tree node for a CPU with clock-frequency property */ 1846 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1847 { 1848 struct dirent *dirp; 1849 DIR *dp; 1850 1851 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1852 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1853 return -1; 1854 } 1855 1856 buf[0] = '\0'; 1857 while ((dirp = readdir(dp)) != NULL) { 1858 FILE *f; 1859 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1860 dirp->d_name); 1861 f = fopen(buf, "r"); 1862 if (f) { 1863 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1864 fclose(f); 1865 break; 1866 } 1867 buf[0] = '\0'; 1868 } 1869 closedir(dp); 1870 if (buf[0] == '\0') { 1871 printf("Unknown host!\n"); 1872 return -1; 1873 } 1874 1875 return 0; 1876 } 1877 1878 static uint64_t kvmppc_read_int_dt(const char *filename) 1879 { 1880 union { 1881 uint32_t v32; 1882 uint64_t v64; 1883 } u; 1884 FILE *f; 1885 int len; 1886 1887 f = fopen(filename, "rb"); 1888 if (!f) { 1889 return -1; 1890 } 1891 1892 len = fread(&u, 1, sizeof(u), f); 1893 fclose(f); 1894 switch (len) { 1895 case 4: 1896 /* property is a 32-bit quantity */ 1897 return be32_to_cpu(u.v32); 1898 case 8: 1899 return be64_to_cpu(u.v64); 1900 } 1901 1902 return 0; 1903 } 1904 1905 /* Read a CPU node property from the host device tree that's a single 1906 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1907 * (can't find or open the property, or doesn't understand the 1908 * format) */ 1909 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1910 { 1911 char buf[PATH_MAX], *tmp; 1912 uint64_t val; 1913 1914 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1915 return -1; 1916 } 1917 1918 tmp = g_strdup_printf("%s/%s", buf, propname); 1919 val = kvmppc_read_int_dt(tmp); 1920 g_free(tmp); 1921 1922 return val; 1923 } 1924 1925 uint64_t kvmppc_get_clockfreq(void) 1926 { 1927 return kvmppc_read_int_cpu_dt("clock-frequency"); 1928 } 1929 1930 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 1931 { 1932 PowerPCCPU *cpu = ppc_env_get_cpu(env); 1933 CPUState *cs = CPU(cpu); 1934 1935 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 1936 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 1937 return 0; 1938 } 1939 1940 return 1; 1941 } 1942 1943 int kvmppc_get_hasidle(CPUPPCState *env) 1944 { 1945 struct kvm_ppc_pvinfo pvinfo; 1946 1947 if (!kvmppc_get_pvinfo(env, &pvinfo) && 1948 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 1949 return 1; 1950 } 1951 1952 return 0; 1953 } 1954 1955 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 1956 { 1957 uint32_t *hc = (uint32_t*)buf; 1958 struct kvm_ppc_pvinfo pvinfo; 1959 1960 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 1961 memcpy(buf, pvinfo.hcall, buf_len); 1962 return 0; 1963 } 1964 1965 /* 1966 * Fallback to always fail hypercalls regardless of endianness: 1967 * 1968 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 1969 * li r3, -1 1970 * b .+8 (becomes nop in wrong endian) 1971 * bswap32(li r3, -1) 1972 */ 1973 1974 hc[0] = cpu_to_be32(0x08000048); 1975 hc[1] = cpu_to_be32(0x3860ffff); 1976 hc[2] = cpu_to_be32(0x48000008); 1977 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 1978 1979 return 1; 1980 } 1981 1982 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 1983 { 1984 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 1985 } 1986 1987 void kvmppc_enable_logical_ci_hcalls(void) 1988 { 1989 /* 1990 * FIXME: it would be nice if we could detect the cases where 1991 * we're using a device which requires the in kernel 1992 * implementation of these hcalls, but the kernel lacks them and 1993 * produce a warning. 1994 */ 1995 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 1996 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 1997 } 1998 1999 void kvmppc_enable_set_mode_hcall(void) 2000 { 2001 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2002 } 2003 2004 void kvmppc_enable_clear_ref_mod_hcalls(void) 2005 { 2006 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2007 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2008 } 2009 2010 void kvmppc_set_papr(PowerPCCPU *cpu) 2011 { 2012 CPUState *cs = CPU(cpu); 2013 int ret; 2014 2015 if (!kvm_enabled()) { 2016 return; 2017 } 2018 2019 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2020 if (ret) { 2021 error_report("This vCPU type or KVM version does not support PAPR"); 2022 exit(1); 2023 } 2024 2025 /* Update the capability flag so we sync the right information 2026 * with kvm */ 2027 cap_papr = 1; 2028 } 2029 2030 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2031 { 2032 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2033 } 2034 2035 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2036 { 2037 CPUState *cs = CPU(cpu); 2038 int ret; 2039 2040 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2041 if (ret && mpic_proxy) { 2042 error_report("This KVM version does not support EPR"); 2043 exit(1); 2044 } 2045 } 2046 2047 int kvmppc_smt_threads(void) 2048 { 2049 return cap_ppc_smt ? cap_ppc_smt : 1; 2050 } 2051 2052 int kvmppc_set_smt_threads(int smt) 2053 { 2054 int ret; 2055 2056 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2057 if (!ret) { 2058 cap_ppc_smt = smt; 2059 } 2060 return ret; 2061 } 2062 2063 void kvmppc_hint_smt_possible(Error **errp) 2064 { 2065 int i; 2066 GString *g; 2067 char *s; 2068 2069 assert(kvm_enabled()); 2070 if (cap_ppc_smt_possible) { 2071 g = g_string_new("Available VSMT modes:"); 2072 for (i = 63; i >= 0; i--) { 2073 if ((1UL << i) & cap_ppc_smt_possible) { 2074 g_string_append_printf(g, " %lu", (1UL << i)); 2075 } 2076 } 2077 s = g_string_free(g, false); 2078 error_append_hint(errp, "%s.\n", s); 2079 g_free(s); 2080 } else { 2081 error_append_hint(errp, 2082 "This KVM seems to be too old to support VSMT.\n"); 2083 } 2084 } 2085 2086 2087 #ifdef TARGET_PPC64 2088 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2089 { 2090 struct kvm_ppc_smmu_info info; 2091 long rampagesize, best_page_shift; 2092 int i; 2093 2094 /* Find the largest hardware supported page size that's less than 2095 * or equal to the (logical) backing page size of guest RAM */ 2096 kvm_get_smmu_info(&info, &error_fatal); 2097 rampagesize = qemu_getrampagesize(); 2098 best_page_shift = 0; 2099 2100 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2101 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2102 2103 if (!sps->page_shift) { 2104 continue; 2105 } 2106 2107 if ((sps->page_shift > best_page_shift) 2108 && ((1UL << sps->page_shift) <= rampagesize)) { 2109 best_page_shift = sps->page_shift; 2110 } 2111 } 2112 2113 return MIN(current_size, 2114 1ULL << (best_page_shift + hash_shift - 7)); 2115 } 2116 #endif 2117 2118 bool kvmppc_spapr_use_multitce(void) 2119 { 2120 return cap_spapr_multitce; 2121 } 2122 2123 int kvmppc_spapr_enable_inkernel_multitce(void) 2124 { 2125 int ret; 2126 2127 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2128 H_PUT_TCE_INDIRECT, 1); 2129 if (!ret) { 2130 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2131 H_STUFF_TCE, 1); 2132 } 2133 2134 return ret; 2135 } 2136 2137 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2138 uint64_t bus_offset, uint32_t nb_table, 2139 int *pfd, bool need_vfio) 2140 { 2141 long len; 2142 int fd; 2143 void *table; 2144 2145 /* Must set fd to -1 so we don't try to munmap when called for 2146 * destroying the table, which the upper layers -will- do 2147 */ 2148 *pfd = -1; 2149 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2150 return NULL; 2151 } 2152 2153 if (cap_spapr_tce_64) { 2154 struct kvm_create_spapr_tce_64 args = { 2155 .liobn = liobn, 2156 .page_shift = page_shift, 2157 .offset = bus_offset >> page_shift, 2158 .size = nb_table, 2159 .flags = 0 2160 }; 2161 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2162 if (fd < 0) { 2163 fprintf(stderr, 2164 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2165 liobn); 2166 return NULL; 2167 } 2168 } else if (cap_spapr_tce) { 2169 uint64_t window_size = (uint64_t) nb_table << page_shift; 2170 struct kvm_create_spapr_tce args = { 2171 .liobn = liobn, 2172 .window_size = window_size, 2173 }; 2174 if ((window_size != args.window_size) || bus_offset) { 2175 return NULL; 2176 } 2177 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2178 if (fd < 0) { 2179 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2180 liobn); 2181 return NULL; 2182 } 2183 } else { 2184 return NULL; 2185 } 2186 2187 len = nb_table * sizeof(uint64_t); 2188 /* FIXME: round this up to page size */ 2189 2190 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2191 if (table == MAP_FAILED) { 2192 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2193 liobn); 2194 close(fd); 2195 return NULL; 2196 } 2197 2198 *pfd = fd; 2199 return table; 2200 } 2201 2202 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2203 { 2204 long len; 2205 2206 if (fd < 0) { 2207 return -1; 2208 } 2209 2210 len = nb_table * sizeof(uint64_t); 2211 if ((munmap(table, len) < 0) || 2212 (close(fd) < 0)) { 2213 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2214 strerror(errno)); 2215 /* Leak the table */ 2216 } 2217 2218 return 0; 2219 } 2220 2221 int kvmppc_reset_htab(int shift_hint) 2222 { 2223 uint32_t shift = shift_hint; 2224 2225 if (!kvm_enabled()) { 2226 /* Full emulation, tell caller to allocate htab itself */ 2227 return 0; 2228 } 2229 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2230 int ret; 2231 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2232 if (ret == -ENOTTY) { 2233 /* At least some versions of PR KVM advertise the 2234 * capability, but don't implement the ioctl(). Oops. 2235 * Return 0 so that we allocate the htab in qemu, as is 2236 * correct for PR. */ 2237 return 0; 2238 } else if (ret < 0) { 2239 return ret; 2240 } 2241 return shift; 2242 } 2243 2244 /* We have a kernel that predates the htab reset calls. For PR 2245 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2246 * this era, it has allocated a 16MB fixed size hash table already. */ 2247 if (kvmppc_is_pr(kvm_state)) { 2248 /* PR - tell caller to allocate htab */ 2249 return 0; 2250 } else { 2251 /* HV - assume 16MB kernel allocated htab */ 2252 return 24; 2253 } 2254 } 2255 2256 static inline uint32_t mfpvr(void) 2257 { 2258 uint32_t pvr; 2259 2260 asm ("mfpvr %0" 2261 : "=r"(pvr)); 2262 return pvr; 2263 } 2264 2265 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2266 { 2267 if (on) { 2268 *word |= flags; 2269 } else { 2270 *word &= ~flags; 2271 } 2272 } 2273 2274 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2275 { 2276 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2277 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2278 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2279 2280 /* Now fix up the class with information we can query from the host */ 2281 pcc->pvr = mfpvr(); 2282 2283 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2284 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2285 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2286 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2287 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2288 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2289 2290 if (dcache_size != -1) { 2291 pcc->l1_dcache_size = dcache_size; 2292 } 2293 2294 if (icache_size != -1) { 2295 pcc->l1_icache_size = icache_size; 2296 } 2297 2298 #if defined(TARGET_PPC64) 2299 pcc->radix_page_info = kvm_get_radix_page_info(); 2300 2301 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2302 /* 2303 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2304 * compliant. More importantly, advertising ISA 3.00 2305 * architected mode may prevent guests from activating 2306 * necessary DD1 workarounds. 2307 */ 2308 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2309 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2310 } 2311 #endif /* defined(TARGET_PPC64) */ 2312 } 2313 2314 bool kvmppc_has_cap_epr(void) 2315 { 2316 return cap_epr; 2317 } 2318 2319 bool kvmppc_has_cap_fixup_hcalls(void) 2320 { 2321 return cap_fixup_hcalls; 2322 } 2323 2324 bool kvmppc_has_cap_htm(void) 2325 { 2326 return cap_htm; 2327 } 2328 2329 bool kvmppc_has_cap_mmu_radix(void) 2330 { 2331 return cap_mmu_radix; 2332 } 2333 2334 bool kvmppc_has_cap_mmu_hash_v3(void) 2335 { 2336 return cap_mmu_hash_v3; 2337 } 2338 2339 static bool kvmppc_power8_host(void) 2340 { 2341 bool ret = false; 2342 #ifdef TARGET_PPC64 2343 { 2344 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); 2345 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || 2346 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || 2347 (base_pvr == CPU_POWERPC_POWER8_BASE); 2348 } 2349 #endif /* TARGET_PPC64 */ 2350 return ret; 2351 } 2352 2353 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) 2354 { 2355 bool l1d_thread_priv_req = !kvmppc_power8_host(); 2356 2357 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2358 return 2; 2359 } else if ((!l1d_thread_priv_req || 2360 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2361 (c.character & c.character_mask 2362 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2363 return 1; 2364 } 2365 2366 return 0; 2367 } 2368 2369 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) 2370 { 2371 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2372 return 2; 2373 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2374 return 1; 2375 } 2376 2377 return 0; 2378 } 2379 2380 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) 2381 { 2382 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2383 return SPAPR_CAP_FIXED_CCD; 2384 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2385 return SPAPR_CAP_FIXED_IBS; 2386 } 2387 2388 return 0; 2389 } 2390 2391 static void kvmppc_get_cpu_characteristics(KVMState *s) 2392 { 2393 struct kvm_ppc_cpu_char c; 2394 int ret; 2395 2396 /* Assume broken */ 2397 cap_ppc_safe_cache = 0; 2398 cap_ppc_safe_bounds_check = 0; 2399 cap_ppc_safe_indirect_branch = 0; 2400 2401 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2402 if (!ret) { 2403 return; 2404 } 2405 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2406 if (ret < 0) { 2407 return; 2408 } 2409 2410 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); 2411 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); 2412 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); 2413 } 2414 2415 int kvmppc_get_cap_safe_cache(void) 2416 { 2417 return cap_ppc_safe_cache; 2418 } 2419 2420 int kvmppc_get_cap_safe_bounds_check(void) 2421 { 2422 return cap_ppc_safe_bounds_check; 2423 } 2424 2425 int kvmppc_get_cap_safe_indirect_branch(void) 2426 { 2427 return cap_ppc_safe_indirect_branch; 2428 } 2429 2430 bool kvmppc_has_cap_nested_kvm_hv(void) 2431 { 2432 return !!cap_ppc_nested_kvm_hv; 2433 } 2434 2435 int kvmppc_set_cap_nested_kvm_hv(int enable) 2436 { 2437 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable); 2438 } 2439 2440 bool kvmppc_has_cap_spapr_vfio(void) 2441 { 2442 return cap_spapr_vfio; 2443 } 2444 2445 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2446 { 2447 uint32_t host_pvr = mfpvr(); 2448 PowerPCCPUClass *pvr_pcc; 2449 2450 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2451 if (pvr_pcc == NULL) { 2452 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2453 } 2454 2455 return pvr_pcc; 2456 } 2457 2458 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2459 { 2460 TypeInfo type_info = { 2461 .name = TYPE_HOST_POWERPC_CPU, 2462 .class_init = kvmppc_host_cpu_class_init, 2463 }; 2464 MachineClass *mc = MACHINE_GET_CLASS(ms); 2465 PowerPCCPUClass *pvr_pcc; 2466 ObjectClass *oc; 2467 DeviceClass *dc; 2468 int i; 2469 2470 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2471 if (pvr_pcc == NULL) { 2472 return -1; 2473 } 2474 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2475 type_register(&type_info); 2476 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2477 /* override TCG default cpu type with 'host' cpu model */ 2478 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2479 } 2480 2481 oc = object_class_by_name(type_info.name); 2482 g_assert(oc); 2483 2484 /* 2485 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2486 * we want "POWER8" to be a "family" alias that points to the current 2487 * host CPU type, too) 2488 */ 2489 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2490 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2491 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2492 char *suffix; 2493 2494 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2495 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2496 if (suffix) { 2497 *suffix = 0; 2498 } 2499 break; 2500 } 2501 } 2502 2503 return 0; 2504 } 2505 2506 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2507 { 2508 struct kvm_rtas_token_args args = { 2509 .token = token, 2510 }; 2511 2512 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2513 return -ENOENT; 2514 } 2515 2516 strncpy(args.name, function, sizeof(args.name)); 2517 2518 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2519 } 2520 2521 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2522 { 2523 struct kvm_get_htab_fd s = { 2524 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2525 .start_index = index, 2526 }; 2527 int ret; 2528 2529 if (!cap_htab_fd) { 2530 error_setg(errp, "KVM version doesn't support %s the HPT", 2531 write ? "writing" : "reading"); 2532 return -ENOTSUP; 2533 } 2534 2535 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2536 if (ret < 0) { 2537 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2538 write ? "writing" : "reading", write ? "to" : "from", 2539 strerror(errno)); 2540 return -errno; 2541 } 2542 2543 return ret; 2544 } 2545 2546 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2547 { 2548 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2549 uint8_t buf[bufsize]; 2550 ssize_t rc; 2551 2552 do { 2553 rc = read(fd, buf, bufsize); 2554 if (rc < 0) { 2555 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2556 strerror(errno)); 2557 return rc; 2558 } else if (rc) { 2559 uint8_t *buffer = buf; 2560 ssize_t n = rc; 2561 while (n) { 2562 struct kvm_get_htab_header *head = 2563 (struct kvm_get_htab_header *) buffer; 2564 size_t chunksize = sizeof(*head) + 2565 HASH_PTE_SIZE_64 * head->n_valid; 2566 2567 qemu_put_be32(f, head->index); 2568 qemu_put_be16(f, head->n_valid); 2569 qemu_put_be16(f, head->n_invalid); 2570 qemu_put_buffer(f, (void *)(head + 1), 2571 HASH_PTE_SIZE_64 * head->n_valid); 2572 2573 buffer += chunksize; 2574 n -= chunksize; 2575 } 2576 } 2577 } while ((rc != 0) 2578 && ((max_ns < 0) 2579 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2580 2581 return (rc == 0) ? 1 : 0; 2582 } 2583 2584 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2585 uint16_t n_valid, uint16_t n_invalid) 2586 { 2587 struct kvm_get_htab_header *buf; 2588 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2589 ssize_t rc; 2590 2591 buf = alloca(chunksize); 2592 buf->index = index; 2593 buf->n_valid = n_valid; 2594 buf->n_invalid = n_invalid; 2595 2596 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2597 2598 rc = write(fd, buf, chunksize); 2599 if (rc < 0) { 2600 fprintf(stderr, "Error writing KVM hash table: %s\n", 2601 strerror(errno)); 2602 return rc; 2603 } 2604 if (rc != chunksize) { 2605 /* We should never get a short write on a single chunk */ 2606 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2607 return -1; 2608 } 2609 return 0; 2610 } 2611 2612 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2613 { 2614 return true; 2615 } 2616 2617 void kvm_arch_init_irq_routing(KVMState *s) 2618 { 2619 } 2620 2621 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2622 { 2623 int fd, rc; 2624 int i; 2625 2626 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2627 2628 i = 0; 2629 while (i < n) { 2630 struct kvm_get_htab_header *hdr; 2631 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2632 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2633 2634 rc = read(fd, buf, sizeof(buf)); 2635 if (rc < 0) { 2636 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2637 } 2638 2639 hdr = (struct kvm_get_htab_header *)buf; 2640 while ((i < n) && ((char *)hdr < (buf + rc))) { 2641 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2642 2643 if (hdr->index != (ptex + i)) { 2644 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2645 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2646 } 2647 2648 if (n - i < valid) { 2649 valid = n - i; 2650 } 2651 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2652 i += valid; 2653 2654 if ((n - i) < invalid) { 2655 invalid = n - i; 2656 } 2657 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2658 i += invalid; 2659 2660 hdr = (struct kvm_get_htab_header *) 2661 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2662 } 2663 } 2664 2665 close(fd); 2666 } 2667 2668 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2669 { 2670 int fd, rc; 2671 struct { 2672 struct kvm_get_htab_header hdr; 2673 uint64_t pte0; 2674 uint64_t pte1; 2675 } buf; 2676 2677 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2678 2679 buf.hdr.n_valid = 1; 2680 buf.hdr.n_invalid = 0; 2681 buf.hdr.index = ptex; 2682 buf.pte0 = cpu_to_be64(pte0); 2683 buf.pte1 = cpu_to_be64(pte1); 2684 2685 rc = write(fd, &buf, sizeof(buf)); 2686 if (rc != sizeof(buf)) { 2687 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2688 } 2689 close(fd); 2690 } 2691 2692 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2693 uint64_t address, uint32_t data, PCIDevice *dev) 2694 { 2695 return 0; 2696 } 2697 2698 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2699 int vector, PCIDevice *dev) 2700 { 2701 return 0; 2702 } 2703 2704 int kvm_arch_release_virq_post(int virq) 2705 { 2706 return 0; 2707 } 2708 2709 int kvm_arch_msi_data_to_gsi(uint32_t data) 2710 { 2711 return data & 0xffff; 2712 } 2713 2714 int kvmppc_enable_hwrng(void) 2715 { 2716 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2717 return -1; 2718 } 2719 2720 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2721 } 2722 2723 void kvmppc_check_papr_resize_hpt(Error **errp) 2724 { 2725 if (!kvm_enabled()) { 2726 return; /* No KVM, we're good */ 2727 } 2728 2729 if (cap_resize_hpt) { 2730 return; /* Kernel has explicit support, we're good */ 2731 } 2732 2733 /* Otherwise fallback on looking for PR KVM */ 2734 if (kvmppc_is_pr(kvm_state)) { 2735 return; 2736 } 2737 2738 error_setg(errp, 2739 "Hash page table resizing not available with this KVM version"); 2740 } 2741 2742 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2743 { 2744 CPUState *cs = CPU(cpu); 2745 struct kvm_ppc_resize_hpt rhpt = { 2746 .flags = flags, 2747 .shift = shift, 2748 }; 2749 2750 if (!cap_resize_hpt) { 2751 return -ENOSYS; 2752 } 2753 2754 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2755 } 2756 2757 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2758 { 2759 CPUState *cs = CPU(cpu); 2760 struct kvm_ppc_resize_hpt rhpt = { 2761 .flags = flags, 2762 .shift = shift, 2763 }; 2764 2765 if (!cap_resize_hpt) { 2766 return -ENOSYS; 2767 } 2768 2769 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2770 } 2771 2772 /* 2773 * This is a helper function to detect a post migration scenario 2774 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2775 * the guest kernel can't handle a PVR value other than the actual host 2776 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2777 * 2778 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2779 * (so, we're HV), return true. The workaround itself is done in 2780 * cpu_post_load. 2781 * 2782 * The order here is important: we'll only check for KVM PR as a 2783 * fallback if the guest kernel can't handle the situation itself. 2784 * We need to avoid as much as possible querying the running KVM type 2785 * in QEMU level. 2786 */ 2787 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2788 { 2789 CPUState *cs = CPU(cpu); 2790 2791 if (!kvm_enabled()) { 2792 return false; 2793 } 2794 2795 if (cap_ppc_pvr_compat) { 2796 return false; 2797 } 2798 2799 return !kvmppc_is_pr(cs->kvm_state); 2800 } 2801 2802 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) 2803 { 2804 CPUState *cs = CPU(cpu); 2805 2806 if (kvm_enabled()) { 2807 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online); 2808 } 2809 } 2810