1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_spapr_tce; 76 static int cap_spapr_tce_64; 77 static int cap_spapr_multitce; 78 static int cap_spapr_vfio; 79 static int cap_hior; 80 static int cap_one_reg; 81 static int cap_epr; 82 static int cap_ppc_watchdog; 83 static int cap_papr; 84 static int cap_htab_fd; 85 static int cap_fixup_hcalls; 86 static int cap_htm; /* Hardware transactional memory support */ 87 static int cap_mmu_radix; 88 static int cap_mmu_hash_v3; 89 static int cap_resize_hpt; 90 static int cap_ppc_pvr_compat; 91 static int cap_ppc_safe_cache; 92 static int cap_ppc_safe_bounds_check; 93 static int cap_ppc_safe_indirect_branch; 94 static int cap_ppc_nested_kvm_hv; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 127 static void kvmppc_get_cpu_characteristics(KVMState *s); 128 129 int kvm_arch_init(MachineState *ms, KVMState *s) 130 { 131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 139 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 144 /* Note: we don't set cap_papr here, because this capability is 145 * only activated after this by kvmppc_set_papr() */ 146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 153 kvmppc_get_cpu_characteristics(s); 154 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); 155 /* 156 * Note: setting it to false because there is not such capability 157 * in KVM at this moment. 158 * 159 * TODO: call kvm_vm_check_extension() with the right capability 160 * after the kernel starts implementing it.*/ 161 cap_ppc_pvr_compat = false; 162 163 if (!cap_interrupt_level) { 164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 165 "VM to stall at times!\n"); 166 } 167 168 kvm_ppc_register_host_cpu_type(ms); 169 170 return 0; 171 } 172 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 174 { 175 return 0; 176 } 177 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 179 { 180 CPUPPCState *cenv = &cpu->env; 181 CPUState *cs = CPU(cpu); 182 struct kvm_sregs sregs; 183 int ret; 184 185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 186 /* What we're really trying to say is "if we're on BookE, we use 187 the native PVR for now". This is the only sane way to check 188 it though, so we potentially confuse users that they can run 189 BookE guests on BookS. Let's hope nobody dares enough :) */ 190 return 0; 191 } else { 192 if (!cap_segstate) { 193 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 194 return -ENOSYS; 195 } 196 } 197 198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 199 if (ret) { 200 return ret; 201 } 202 203 sregs.pvr = cenv->spr[SPR_PVR]; 204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 205 } 206 207 /* Set up a shared TLB array with KVM */ 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 209 { 210 CPUPPCState *env = &cpu->env; 211 CPUState *cs = CPU(cpu); 212 struct kvm_book3e_206_tlb_params params = {}; 213 struct kvm_config_tlb cfg = {}; 214 unsigned int entries = 0; 215 int ret, i; 216 217 if (!kvm_enabled() || 218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 219 return 0; 220 } 221 222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 223 224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 225 params.tlb_sizes[i] = booke206_tlb_size(env, i); 226 params.tlb_ways[i] = booke206_tlb_ways(env, i); 227 entries += params.tlb_sizes[i]; 228 } 229 230 assert(entries == env->nb_tlb); 231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 232 233 env->tlb_dirty = true; 234 235 cfg.array = (uintptr_t)env->tlb.tlbm; 236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 237 cfg.params = (uintptr_t)¶ms; 238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 239 240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 241 if (ret < 0) { 242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 243 __func__, strerror(-ret)); 244 return ret; 245 } 246 247 env->kvm_sw_tlb = true; 248 return 0; 249 } 250 251 252 #if defined(TARGET_PPC64) 253 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp) 254 { 255 int ret; 256 257 assert(kvm_state != NULL); 258 259 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 260 error_setg(errp, "KVM doesn't expose the MMU features it supports"); 261 error_append_hint(errp, "Consider switching to a newer KVM\n"); 262 return; 263 } 264 265 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info); 266 if (ret == 0) { 267 return; 268 } 269 270 error_setg_errno(errp, -ret, 271 "KVM failed to provide the MMU features it supports"); 272 } 273 274 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 275 { 276 KVMState *s = KVM_STATE(current_machine->accelerator); 277 struct ppc_radix_page_info *radix_page_info; 278 struct kvm_ppc_rmmu_info rmmu_info; 279 int i; 280 281 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 282 return NULL; 283 } 284 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 285 return NULL; 286 } 287 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 288 radix_page_info->count = 0; 289 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 290 if (rmmu_info.ap_encodings[i]) { 291 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 292 radix_page_info->count++; 293 } 294 } 295 return radix_page_info; 296 } 297 298 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 299 bool radix, bool gtse, 300 uint64_t proc_tbl) 301 { 302 CPUState *cs = CPU(cpu); 303 int ret; 304 uint64_t flags = 0; 305 struct kvm_ppc_mmuv3_cfg cfg = { 306 .process_table = proc_tbl, 307 }; 308 309 if (radix) { 310 flags |= KVM_PPC_MMUV3_RADIX; 311 } 312 if (gtse) { 313 flags |= KVM_PPC_MMUV3_GTSE; 314 } 315 cfg.flags = flags; 316 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 317 switch (ret) { 318 case 0: 319 return H_SUCCESS; 320 case -EINVAL: 321 return H_PARAMETER; 322 case -ENODEV: 323 return H_NOT_AVAILABLE; 324 default: 325 return H_HARDWARE; 326 } 327 } 328 329 bool kvmppc_hpt_needs_host_contiguous_pages(void) 330 { 331 static struct kvm_ppc_smmu_info smmu_info; 332 333 if (!kvm_enabled()) { 334 return false; 335 } 336 337 kvm_get_smmu_info(&smmu_info, &error_fatal); 338 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL); 339 } 340 341 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp) 342 { 343 struct kvm_ppc_smmu_info smmu_info; 344 int iq, ik, jq, jk; 345 Error *local_err = NULL; 346 347 /* For now, we only have anything to check on hash64 MMUs */ 348 if (!cpu->hash64_opts || !kvm_enabled()) { 349 return; 350 } 351 352 kvm_get_smmu_info(&smmu_info, &local_err); 353 if (local_err) { 354 error_propagate(errp, local_err); 355 return; 356 } 357 358 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG) 359 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 360 error_setg(errp, 361 "KVM does not support 1TiB segments which guest expects"); 362 return; 363 } 364 365 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) { 366 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u", 367 smmu_info.slb_size, cpu->hash64_opts->slb_size); 368 return; 369 } 370 371 /* 372 * Verify that every pagesize supported by the cpu model is 373 * supported by KVM with the same encodings 374 */ 375 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) { 376 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 377 struct kvm_ppc_one_seg_page_size *ksps; 378 379 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) { 380 if (qsps->page_shift == smmu_info.sps[ik].page_shift) { 381 break; 382 } 383 } 384 if (ik >= ARRAY_SIZE(smmu_info.sps)) { 385 error_setg(errp, "KVM doesn't support for base page shift %u", 386 qsps->page_shift); 387 return; 388 } 389 390 ksps = &smmu_info.sps[ik]; 391 if (ksps->slb_enc != qsps->slb_enc) { 392 error_setg(errp, 393 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x", 394 ksps->slb_enc, ksps->page_shift, qsps->slb_enc); 395 return; 396 } 397 398 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) { 399 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) { 400 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) { 401 break; 402 } 403 } 404 405 if (jk >= ARRAY_SIZE(ksps->enc)) { 406 error_setg(errp, "KVM doesn't support page shift %u/%u", 407 qsps->enc[jq].page_shift, qsps->page_shift); 408 return; 409 } 410 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) { 411 error_setg(errp, 412 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x", 413 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift, 414 qsps->page_shift, qsps->enc[jq].pte_enc); 415 return; 416 } 417 } 418 } 419 420 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { 421 /* Mostly what guest pagesizes we can use are related to the 422 * host pages used to map guest RAM, which is handled in the 423 * platform code. Cache-Inhibited largepages (64k) however are 424 * used for I/O, so if they're mapped to the host at all it 425 * will be a normal mapping, not a special hugepage one used 426 * for RAM. */ 427 if (getpagesize() < 0x10000) { 428 error_setg(errp, 429 "KVM can't supply 64kiB CI pages, which guest expects"); 430 } 431 } 432 } 433 #endif /* !defined (TARGET_PPC64) */ 434 435 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 436 { 437 return POWERPC_CPU(cpu)->vcpu_id; 438 } 439 440 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 441 * book3s supports only 1 watchpoint, so array size 442 * of 4 is sufficient for now. 443 */ 444 #define MAX_HW_BKPTS 4 445 446 static struct HWBreakpoint { 447 target_ulong addr; 448 int type; 449 } hw_debug_points[MAX_HW_BKPTS]; 450 451 static CPUWatchpoint hw_watchpoint; 452 453 /* Default there is no breakpoint and watchpoint supported */ 454 static int max_hw_breakpoint; 455 static int max_hw_watchpoint; 456 static int nb_hw_breakpoint; 457 static int nb_hw_watchpoint; 458 459 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 460 { 461 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 462 max_hw_breakpoint = 2; 463 max_hw_watchpoint = 2; 464 } 465 466 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 467 fprintf(stderr, "Error initializing h/w breakpoints\n"); 468 return; 469 } 470 } 471 472 int kvm_arch_init_vcpu(CPUState *cs) 473 { 474 PowerPCCPU *cpu = POWERPC_CPU(cs); 475 CPUPPCState *cenv = &cpu->env; 476 int ret; 477 478 /* Synchronize sregs with kvm */ 479 ret = kvm_arch_sync_sregs(cpu); 480 if (ret) { 481 if (ret == -EINVAL) { 482 error_report("Register sync failed... If you're using kvm-hv.ko," 483 " only \"-cpu host\" is possible"); 484 } 485 return ret; 486 } 487 488 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 489 490 switch (cenv->mmu_model) { 491 case POWERPC_MMU_BOOKE206: 492 /* This target supports access to KVM's guest TLB */ 493 ret = kvm_booke206_tlb_init(cpu); 494 break; 495 case POWERPC_MMU_2_07: 496 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 497 /* KVM-HV has transactional memory on POWER8 also without the 498 * KVM_CAP_PPC_HTM extension, so enable it here instead as 499 * long as it's availble to userspace on the host. */ 500 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 501 cap_htm = true; 502 } 503 } 504 break; 505 default: 506 break; 507 } 508 509 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 510 kvmppc_hw_debug_points_init(cenv); 511 512 return ret; 513 } 514 515 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 516 { 517 CPUPPCState *env = &cpu->env; 518 CPUState *cs = CPU(cpu); 519 struct kvm_dirty_tlb dirty_tlb; 520 unsigned char *bitmap; 521 int ret; 522 523 if (!env->kvm_sw_tlb) { 524 return; 525 } 526 527 bitmap = g_malloc((env->nb_tlb + 7) / 8); 528 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 529 530 dirty_tlb.bitmap = (uintptr_t)bitmap; 531 dirty_tlb.num_dirty = env->nb_tlb; 532 533 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 534 if (ret) { 535 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 536 __func__, strerror(-ret)); 537 } 538 539 g_free(bitmap); 540 } 541 542 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 543 { 544 PowerPCCPU *cpu = POWERPC_CPU(cs); 545 CPUPPCState *env = &cpu->env; 546 union { 547 uint32_t u32; 548 uint64_t u64; 549 } val; 550 struct kvm_one_reg reg = { 551 .id = id, 552 .addr = (uintptr_t) &val, 553 }; 554 int ret; 555 556 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 557 if (ret != 0) { 558 trace_kvm_failed_spr_get(spr, strerror(errno)); 559 } else { 560 switch (id & KVM_REG_SIZE_MASK) { 561 case KVM_REG_SIZE_U32: 562 env->spr[spr] = val.u32; 563 break; 564 565 case KVM_REG_SIZE_U64: 566 env->spr[spr] = val.u64; 567 break; 568 569 default: 570 /* Don't handle this size yet */ 571 abort(); 572 } 573 } 574 } 575 576 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 577 { 578 PowerPCCPU *cpu = POWERPC_CPU(cs); 579 CPUPPCState *env = &cpu->env; 580 union { 581 uint32_t u32; 582 uint64_t u64; 583 } val; 584 struct kvm_one_reg reg = { 585 .id = id, 586 .addr = (uintptr_t) &val, 587 }; 588 int ret; 589 590 switch (id & KVM_REG_SIZE_MASK) { 591 case KVM_REG_SIZE_U32: 592 val.u32 = env->spr[spr]; 593 break; 594 595 case KVM_REG_SIZE_U64: 596 val.u64 = env->spr[spr]; 597 break; 598 599 default: 600 /* Don't handle this size yet */ 601 abort(); 602 } 603 604 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 605 if (ret != 0) { 606 trace_kvm_failed_spr_set(spr, strerror(errno)); 607 } 608 } 609 610 static int kvm_put_fp(CPUState *cs) 611 { 612 PowerPCCPU *cpu = POWERPC_CPU(cs); 613 CPUPPCState *env = &cpu->env; 614 struct kvm_one_reg reg; 615 int i; 616 int ret; 617 618 if (env->insns_flags & PPC_FLOAT) { 619 uint64_t fpscr = env->fpscr; 620 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 621 622 reg.id = KVM_REG_PPC_FPSCR; 623 reg.addr = (uintptr_t)&fpscr; 624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 625 if (ret < 0) { 626 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 627 return ret; 628 } 629 630 for (i = 0; i < 32; i++) { 631 uint64_t vsr[2]; 632 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 633 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 634 635 #ifdef HOST_WORDS_BIGENDIAN 636 vsr[0] = float64_val(*fpr); 637 vsr[1] = *vsrl; 638 #else 639 vsr[0] = *vsrl; 640 vsr[1] = float64_val(*fpr); 641 #endif 642 reg.addr = (uintptr_t) &vsr; 643 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 644 645 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 646 if (ret < 0) { 647 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 648 i, strerror(errno)); 649 return ret; 650 } 651 } 652 } 653 654 if (env->insns_flags & PPC_ALTIVEC) { 655 reg.id = KVM_REG_PPC_VSCR; 656 reg.addr = (uintptr_t)&env->vscr; 657 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 658 if (ret < 0) { 659 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 660 return ret; 661 } 662 663 for (i = 0; i < 32; i++) { 664 reg.id = KVM_REG_PPC_VR(i); 665 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 667 if (ret < 0) { 668 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 669 return ret; 670 } 671 } 672 } 673 674 return 0; 675 } 676 677 static int kvm_get_fp(CPUState *cs) 678 { 679 PowerPCCPU *cpu = POWERPC_CPU(cs); 680 CPUPPCState *env = &cpu->env; 681 struct kvm_one_reg reg; 682 int i; 683 int ret; 684 685 if (env->insns_flags & PPC_FLOAT) { 686 uint64_t fpscr; 687 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 688 689 reg.id = KVM_REG_PPC_FPSCR; 690 reg.addr = (uintptr_t)&fpscr; 691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 692 if (ret < 0) { 693 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 694 return ret; 695 } else { 696 env->fpscr = fpscr; 697 } 698 699 for (i = 0; i < 32; i++) { 700 uint64_t vsr[2]; 701 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 702 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 703 704 reg.addr = (uintptr_t) &vsr; 705 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 706 707 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 708 if (ret < 0) { 709 DPRINTF("Unable to get %s%d from KVM: %s\n", 710 vsx ? "VSR" : "FPR", i, strerror(errno)); 711 return ret; 712 } else { 713 #ifdef HOST_WORDS_BIGENDIAN 714 *fpr = vsr[0]; 715 if (vsx) { 716 *vsrl = vsr[1]; 717 } 718 #else 719 *fpr = vsr[1]; 720 if (vsx) { 721 *vsrl = vsr[0]; 722 } 723 #endif 724 } 725 } 726 } 727 728 if (env->insns_flags & PPC_ALTIVEC) { 729 reg.id = KVM_REG_PPC_VSCR; 730 reg.addr = (uintptr_t)&env->vscr; 731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 732 if (ret < 0) { 733 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 734 return ret; 735 } 736 737 for (i = 0; i < 32; i++) { 738 reg.id = KVM_REG_PPC_VR(i); 739 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 740 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 741 if (ret < 0) { 742 DPRINTF("Unable to get VR%d from KVM: %s\n", 743 i, strerror(errno)); 744 return ret; 745 } 746 } 747 } 748 749 return 0; 750 } 751 752 #if defined(TARGET_PPC64) 753 static int kvm_get_vpa(CPUState *cs) 754 { 755 PowerPCCPU *cpu = POWERPC_CPU(cs); 756 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 757 struct kvm_one_reg reg; 758 int ret; 759 760 reg.id = KVM_REG_PPC_VPA_ADDR; 761 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 762 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 763 if (ret < 0) { 764 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 765 return ret; 766 } 767 768 assert((uintptr_t)&spapr_cpu->slb_shadow_size 769 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 770 reg.id = KVM_REG_PPC_VPA_SLB; 771 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 772 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 773 if (ret < 0) { 774 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 775 strerror(errno)); 776 return ret; 777 } 778 779 assert((uintptr_t)&spapr_cpu->dtl_size 780 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 781 reg.id = KVM_REG_PPC_VPA_DTL; 782 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 784 if (ret < 0) { 785 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 786 strerror(errno)); 787 return ret; 788 } 789 790 return 0; 791 } 792 793 static int kvm_put_vpa(CPUState *cs) 794 { 795 PowerPCCPU *cpu = POWERPC_CPU(cs); 796 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 797 struct kvm_one_reg reg; 798 int ret; 799 800 /* SLB shadow or DTL can't be registered unless a master VPA is 801 * registered. That means when restoring state, if a VPA *is* 802 * registered, we need to set that up first. If not, we need to 803 * deregister the others before deregistering the master VPA */ 804 assert(spapr_cpu->vpa_addr 805 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); 806 807 if (spapr_cpu->vpa_addr) { 808 reg.id = KVM_REG_PPC_VPA_ADDR; 809 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 810 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 811 if (ret < 0) { 812 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 813 return ret; 814 } 815 } 816 817 assert((uintptr_t)&spapr_cpu->slb_shadow_size 818 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 819 reg.id = KVM_REG_PPC_VPA_SLB; 820 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 821 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 822 if (ret < 0) { 823 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 824 return ret; 825 } 826 827 assert((uintptr_t)&spapr_cpu->dtl_size 828 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 829 reg.id = KVM_REG_PPC_VPA_DTL; 830 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 831 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 832 if (ret < 0) { 833 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 834 strerror(errno)); 835 return ret; 836 } 837 838 if (!spapr_cpu->vpa_addr) { 839 reg.id = KVM_REG_PPC_VPA_ADDR; 840 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 841 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 842 if (ret < 0) { 843 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 844 return ret; 845 } 846 } 847 848 return 0; 849 } 850 #endif /* TARGET_PPC64 */ 851 852 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 853 { 854 CPUPPCState *env = &cpu->env; 855 struct kvm_sregs sregs; 856 int i; 857 858 sregs.pvr = env->spr[SPR_PVR]; 859 860 if (cpu->vhyp) { 861 PPCVirtualHypervisorClass *vhc = 862 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 863 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 864 } else { 865 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 866 } 867 868 /* Sync SLB */ 869 #ifdef TARGET_PPC64 870 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 871 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 872 if (env->slb[i].esid & SLB_ESID_V) { 873 sregs.u.s.ppc64.slb[i].slbe |= i; 874 } 875 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 876 } 877 #endif 878 879 /* Sync SRs */ 880 for (i = 0; i < 16; i++) { 881 sregs.u.s.ppc32.sr[i] = env->sr[i]; 882 } 883 884 /* Sync BATs */ 885 for (i = 0; i < 8; i++) { 886 /* Beware. We have to swap upper and lower bits here */ 887 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 888 | env->DBAT[1][i]; 889 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 890 | env->IBAT[1][i]; 891 } 892 893 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 894 } 895 896 int kvm_arch_put_registers(CPUState *cs, int level) 897 { 898 PowerPCCPU *cpu = POWERPC_CPU(cs); 899 CPUPPCState *env = &cpu->env; 900 struct kvm_regs regs; 901 int ret; 902 int i; 903 904 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 905 if (ret < 0) { 906 return ret; 907 } 908 909 regs.ctr = env->ctr; 910 regs.lr = env->lr; 911 regs.xer = cpu_read_xer(env); 912 regs.msr = env->msr; 913 regs.pc = env->nip; 914 915 regs.srr0 = env->spr[SPR_SRR0]; 916 regs.srr1 = env->spr[SPR_SRR1]; 917 918 regs.sprg0 = env->spr[SPR_SPRG0]; 919 regs.sprg1 = env->spr[SPR_SPRG1]; 920 regs.sprg2 = env->spr[SPR_SPRG2]; 921 regs.sprg3 = env->spr[SPR_SPRG3]; 922 regs.sprg4 = env->spr[SPR_SPRG4]; 923 regs.sprg5 = env->spr[SPR_SPRG5]; 924 regs.sprg6 = env->spr[SPR_SPRG6]; 925 regs.sprg7 = env->spr[SPR_SPRG7]; 926 927 regs.pid = env->spr[SPR_BOOKE_PID]; 928 929 for (i = 0;i < 32; i++) 930 regs.gpr[i] = env->gpr[i]; 931 932 regs.cr = 0; 933 for (i = 0; i < 8; i++) { 934 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 935 } 936 937 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 938 if (ret < 0) 939 return ret; 940 941 kvm_put_fp(cs); 942 943 if (env->tlb_dirty) { 944 kvm_sw_tlb_put(cpu); 945 env->tlb_dirty = false; 946 } 947 948 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 949 ret = kvmppc_put_books_sregs(cpu); 950 if (ret < 0) { 951 return ret; 952 } 953 } 954 955 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 956 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 957 } 958 959 if (cap_one_reg) { 960 int i; 961 962 /* We deliberately ignore errors here, for kernels which have 963 * the ONE_REG calls, but don't support the specific 964 * registers, there's a reasonable chance things will still 965 * work, at least until we try to migrate. */ 966 for (i = 0; i < 1024; i++) { 967 uint64_t id = env->spr_cb[i].one_reg_id; 968 969 if (id != 0) { 970 kvm_put_one_spr(cs, id, i); 971 } 972 } 973 974 #ifdef TARGET_PPC64 975 if (msr_ts) { 976 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 977 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 978 } 979 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 981 } 982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 992 } 993 994 if (cap_papr) { 995 if (kvm_put_vpa(cs) < 0) { 996 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 997 } 998 } 999 1000 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1001 #endif /* TARGET_PPC64 */ 1002 } 1003 1004 return ret; 1005 } 1006 1007 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1008 { 1009 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1010 } 1011 1012 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1013 { 1014 CPUPPCState *env = &cpu->env; 1015 struct kvm_sregs sregs; 1016 int ret; 1017 1018 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1019 if (ret < 0) { 1020 return ret; 1021 } 1022 1023 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1024 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1025 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1026 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1027 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1028 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1029 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1030 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1031 env->spr[SPR_DECR] = sregs.u.e.dec; 1032 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1033 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1034 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1035 } 1036 1037 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1038 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1039 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1040 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1041 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1042 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1043 } 1044 1045 if (sregs.u.e.features & KVM_SREGS_E_64) { 1046 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1047 } 1048 1049 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1050 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1051 } 1052 1053 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1054 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1055 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1056 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1057 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1058 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1059 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1060 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1061 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1062 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1063 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1064 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1065 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1066 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1067 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1068 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1069 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1070 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1071 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1072 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1073 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1074 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1075 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1076 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1077 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1078 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1079 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1080 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1081 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1082 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1083 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1084 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1085 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1086 1087 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1088 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1089 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1090 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1091 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1092 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1093 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1094 } 1095 1096 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1097 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1098 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1099 } 1100 1101 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1102 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1103 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1104 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1105 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1106 } 1107 } 1108 1109 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1110 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1111 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1112 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1113 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1114 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1115 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1116 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1117 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1118 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1119 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1120 } 1121 1122 if (sregs.u.e.features & KVM_SREGS_EXP) { 1123 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1124 } 1125 1126 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1127 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1128 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1129 } 1130 1131 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1132 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1133 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1134 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1135 1136 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1137 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1138 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1139 } 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1146 { 1147 CPUPPCState *env = &cpu->env; 1148 struct kvm_sregs sregs; 1149 int ret; 1150 int i; 1151 1152 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1153 if (ret < 0) { 1154 return ret; 1155 } 1156 1157 if (!cpu->vhyp) { 1158 ppc_store_sdr1(env, sregs.u.s.sdr1); 1159 } 1160 1161 /* Sync SLB */ 1162 #ifdef TARGET_PPC64 1163 /* 1164 * The packed SLB array we get from KVM_GET_SREGS only contains 1165 * information about valid entries. So we flush our internal copy 1166 * to get rid of stale ones, then put all valid SLB entries back 1167 * in. 1168 */ 1169 memset(env->slb, 0, sizeof(env->slb)); 1170 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1171 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1172 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1173 /* 1174 * Only restore valid entries 1175 */ 1176 if (rb & SLB_ESID_V) { 1177 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1178 } 1179 } 1180 #endif 1181 1182 /* Sync SRs */ 1183 for (i = 0; i < 16; i++) { 1184 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1185 } 1186 1187 /* Sync BATs */ 1188 for (i = 0; i < 8; i++) { 1189 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1190 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1191 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1192 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1193 } 1194 1195 return 0; 1196 } 1197 1198 int kvm_arch_get_registers(CPUState *cs) 1199 { 1200 PowerPCCPU *cpu = POWERPC_CPU(cs); 1201 CPUPPCState *env = &cpu->env; 1202 struct kvm_regs regs; 1203 uint32_t cr; 1204 int i, ret; 1205 1206 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1207 if (ret < 0) 1208 return ret; 1209 1210 cr = regs.cr; 1211 for (i = 7; i >= 0; i--) { 1212 env->crf[i] = cr & 15; 1213 cr >>= 4; 1214 } 1215 1216 env->ctr = regs.ctr; 1217 env->lr = regs.lr; 1218 cpu_write_xer(env, regs.xer); 1219 env->msr = regs.msr; 1220 env->nip = regs.pc; 1221 1222 env->spr[SPR_SRR0] = regs.srr0; 1223 env->spr[SPR_SRR1] = regs.srr1; 1224 1225 env->spr[SPR_SPRG0] = regs.sprg0; 1226 env->spr[SPR_SPRG1] = regs.sprg1; 1227 env->spr[SPR_SPRG2] = regs.sprg2; 1228 env->spr[SPR_SPRG3] = regs.sprg3; 1229 env->spr[SPR_SPRG4] = regs.sprg4; 1230 env->spr[SPR_SPRG5] = regs.sprg5; 1231 env->spr[SPR_SPRG6] = regs.sprg6; 1232 env->spr[SPR_SPRG7] = regs.sprg7; 1233 1234 env->spr[SPR_BOOKE_PID] = regs.pid; 1235 1236 for (i = 0;i < 32; i++) 1237 env->gpr[i] = regs.gpr[i]; 1238 1239 kvm_get_fp(cs); 1240 1241 if (cap_booke_sregs) { 1242 ret = kvmppc_get_booke_sregs(cpu); 1243 if (ret < 0) { 1244 return ret; 1245 } 1246 } 1247 1248 if (cap_segstate) { 1249 ret = kvmppc_get_books_sregs(cpu); 1250 if (ret < 0) { 1251 return ret; 1252 } 1253 } 1254 1255 if (cap_hior) { 1256 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1257 } 1258 1259 if (cap_one_reg) { 1260 int i; 1261 1262 /* We deliberately ignore errors here, for kernels which have 1263 * the ONE_REG calls, but don't support the specific 1264 * registers, there's a reasonable chance things will still 1265 * work, at least until we try to migrate. */ 1266 for (i = 0; i < 1024; i++) { 1267 uint64_t id = env->spr_cb[i].one_reg_id; 1268 1269 if (id != 0) { 1270 kvm_get_one_spr(cs, id, i); 1271 } 1272 } 1273 1274 #ifdef TARGET_PPC64 1275 if (msr_ts) { 1276 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1277 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1278 } 1279 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1281 } 1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1292 } 1293 1294 if (cap_papr) { 1295 if (kvm_get_vpa(cs) < 0) { 1296 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1297 } 1298 } 1299 1300 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1301 #endif 1302 } 1303 1304 return 0; 1305 } 1306 1307 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1308 { 1309 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1310 1311 if (irq != PPC_INTERRUPT_EXT) { 1312 return 0; 1313 } 1314 1315 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1316 return 0; 1317 } 1318 1319 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1320 1321 return 0; 1322 } 1323 1324 #if defined(TARGET_PPC64) 1325 #define PPC_INPUT_INT PPC970_INPUT_INT 1326 #else 1327 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1328 #endif 1329 1330 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1331 { 1332 PowerPCCPU *cpu = POWERPC_CPU(cs); 1333 CPUPPCState *env = &cpu->env; 1334 int r; 1335 unsigned irq; 1336 1337 qemu_mutex_lock_iothread(); 1338 1339 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1340 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1341 if (!cap_interrupt_level && 1342 run->ready_for_interrupt_injection && 1343 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1344 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1345 { 1346 /* For now KVM disregards the 'irq' argument. However, in the 1347 * future KVM could cache it in-kernel to avoid a heavyweight exit 1348 * when reading the UIC. 1349 */ 1350 irq = KVM_INTERRUPT_SET; 1351 1352 DPRINTF("injected interrupt %d\n", irq); 1353 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1354 if (r < 0) { 1355 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1356 } 1357 1358 /* Always wake up soon in case the interrupt was level based */ 1359 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1360 (NANOSECONDS_PER_SECOND / 50)); 1361 } 1362 1363 /* We don't know if there are more interrupts pending after this. However, 1364 * the guest will return to userspace in the course of handling this one 1365 * anyways, so we will get a chance to deliver the rest. */ 1366 1367 qemu_mutex_unlock_iothread(); 1368 } 1369 1370 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1371 { 1372 return MEMTXATTRS_UNSPECIFIED; 1373 } 1374 1375 int kvm_arch_process_async_events(CPUState *cs) 1376 { 1377 return cs->halted; 1378 } 1379 1380 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1381 { 1382 CPUState *cs = CPU(cpu); 1383 CPUPPCState *env = &cpu->env; 1384 1385 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1386 cs->halted = 1; 1387 cs->exception_index = EXCP_HLT; 1388 } 1389 1390 return 0; 1391 } 1392 1393 /* map dcr access to existing qemu dcr emulation */ 1394 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1395 { 1396 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1397 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1398 1399 return 0; 1400 } 1401 1402 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1403 { 1404 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1405 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1406 1407 return 0; 1408 } 1409 1410 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1411 { 1412 /* Mixed endian case is not handled */ 1413 uint32_t sc = debug_inst_opcode; 1414 1415 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1416 sizeof(sc), 0) || 1417 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1418 return -EINVAL; 1419 } 1420 1421 return 0; 1422 } 1423 1424 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1425 { 1426 uint32_t sc; 1427 1428 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1429 sc != debug_inst_opcode || 1430 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1431 sizeof(sc), 1)) { 1432 return -EINVAL; 1433 } 1434 1435 return 0; 1436 } 1437 1438 static int find_hw_breakpoint(target_ulong addr, int type) 1439 { 1440 int n; 1441 1442 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1443 <= ARRAY_SIZE(hw_debug_points)); 1444 1445 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1446 if (hw_debug_points[n].addr == addr && 1447 hw_debug_points[n].type == type) { 1448 return n; 1449 } 1450 } 1451 1452 return -1; 1453 } 1454 1455 static int find_hw_watchpoint(target_ulong addr, int *flag) 1456 { 1457 int n; 1458 1459 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1460 if (n >= 0) { 1461 *flag = BP_MEM_ACCESS; 1462 return n; 1463 } 1464 1465 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1466 if (n >= 0) { 1467 *flag = BP_MEM_WRITE; 1468 return n; 1469 } 1470 1471 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1472 if (n >= 0) { 1473 *flag = BP_MEM_READ; 1474 return n; 1475 } 1476 1477 return -1; 1478 } 1479 1480 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1481 target_ulong len, int type) 1482 { 1483 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1484 return -ENOBUFS; 1485 } 1486 1487 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1488 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1489 1490 switch (type) { 1491 case GDB_BREAKPOINT_HW: 1492 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1493 return -ENOBUFS; 1494 } 1495 1496 if (find_hw_breakpoint(addr, type) >= 0) { 1497 return -EEXIST; 1498 } 1499 1500 nb_hw_breakpoint++; 1501 break; 1502 1503 case GDB_WATCHPOINT_WRITE: 1504 case GDB_WATCHPOINT_READ: 1505 case GDB_WATCHPOINT_ACCESS: 1506 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1507 return -ENOBUFS; 1508 } 1509 1510 if (find_hw_breakpoint(addr, type) >= 0) { 1511 return -EEXIST; 1512 } 1513 1514 nb_hw_watchpoint++; 1515 break; 1516 1517 default: 1518 return -ENOSYS; 1519 } 1520 1521 return 0; 1522 } 1523 1524 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1525 target_ulong len, int type) 1526 { 1527 int n; 1528 1529 n = find_hw_breakpoint(addr, type); 1530 if (n < 0) { 1531 return -ENOENT; 1532 } 1533 1534 switch (type) { 1535 case GDB_BREAKPOINT_HW: 1536 nb_hw_breakpoint--; 1537 break; 1538 1539 case GDB_WATCHPOINT_WRITE: 1540 case GDB_WATCHPOINT_READ: 1541 case GDB_WATCHPOINT_ACCESS: 1542 nb_hw_watchpoint--; 1543 break; 1544 1545 default: 1546 return -ENOSYS; 1547 } 1548 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1549 1550 return 0; 1551 } 1552 1553 void kvm_arch_remove_all_hw_breakpoints(void) 1554 { 1555 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1556 } 1557 1558 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1559 { 1560 int n; 1561 1562 /* Software Breakpoint updates */ 1563 if (kvm_sw_breakpoints_active(cs)) { 1564 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1565 } 1566 1567 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1568 <= ARRAY_SIZE(hw_debug_points)); 1569 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1570 1571 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1572 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1573 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1574 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1575 switch (hw_debug_points[n].type) { 1576 case GDB_BREAKPOINT_HW: 1577 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1578 break; 1579 case GDB_WATCHPOINT_WRITE: 1580 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1581 break; 1582 case GDB_WATCHPOINT_READ: 1583 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1584 break; 1585 case GDB_WATCHPOINT_ACCESS: 1586 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1587 KVMPPC_DEBUG_WATCH_READ; 1588 break; 1589 default: 1590 cpu_abort(cs, "Unsupported breakpoint type\n"); 1591 } 1592 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1593 } 1594 } 1595 } 1596 1597 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1598 { 1599 CPUState *cs = CPU(cpu); 1600 CPUPPCState *env = &cpu->env; 1601 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1602 int handle = 0; 1603 int n; 1604 int flag = 0; 1605 1606 if (cs->singlestep_enabled) { 1607 handle = 1; 1608 } else if (arch_info->status) { 1609 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1610 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1611 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1612 if (n >= 0) { 1613 handle = 1; 1614 } 1615 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1616 KVMPPC_DEBUG_WATCH_WRITE)) { 1617 n = find_hw_watchpoint(arch_info->address, &flag); 1618 if (n >= 0) { 1619 handle = 1; 1620 cs->watchpoint_hit = &hw_watchpoint; 1621 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1622 hw_watchpoint.flags = flag; 1623 } 1624 } 1625 } 1626 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1627 handle = 1; 1628 } else { 1629 /* QEMU is not able to handle debug exception, so inject 1630 * program exception to guest; 1631 * Yes program exception NOT debug exception !! 1632 * When QEMU is using debug resources then debug exception must 1633 * be always set. To achieve this we set MSR_DE and also set 1634 * MSRP_DEP so guest cannot change MSR_DE. 1635 * When emulating debug resource for guest we want guest 1636 * to control MSR_DE (enable/disable debug interrupt on need). 1637 * Supporting both configurations are NOT possible. 1638 * So the result is that we cannot share debug resources 1639 * between QEMU and Guest on BOOKE architecture. 1640 * In the current design QEMU gets the priority over guest, 1641 * this means that if QEMU is using debug resources then guest 1642 * cannot use them; 1643 * For software breakpoint QEMU uses a privileged instruction; 1644 * So there cannot be any reason that we are here for guest 1645 * set debug exception, only possibility is guest executed a 1646 * privileged / illegal instruction and that's why we are 1647 * injecting a program interrupt. 1648 */ 1649 1650 cpu_synchronize_state(cs); 1651 /* env->nip is PC, so increment this by 4 to use 1652 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1653 */ 1654 env->nip += 4; 1655 cs->exception_index = POWERPC_EXCP_PROGRAM; 1656 env->error_code = POWERPC_EXCP_INVAL; 1657 ppc_cpu_do_interrupt(cs); 1658 } 1659 1660 return handle; 1661 } 1662 1663 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1664 { 1665 PowerPCCPU *cpu = POWERPC_CPU(cs); 1666 CPUPPCState *env = &cpu->env; 1667 int ret; 1668 1669 qemu_mutex_lock_iothread(); 1670 1671 switch (run->exit_reason) { 1672 case KVM_EXIT_DCR: 1673 if (run->dcr.is_write) { 1674 DPRINTF("handle dcr write\n"); 1675 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1676 } else { 1677 DPRINTF("handle dcr read\n"); 1678 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1679 } 1680 break; 1681 case KVM_EXIT_HLT: 1682 DPRINTF("handle halt\n"); 1683 ret = kvmppc_handle_halt(cpu); 1684 break; 1685 #if defined(TARGET_PPC64) 1686 case KVM_EXIT_PAPR_HCALL: 1687 DPRINTF("handle PAPR hypercall\n"); 1688 run->papr_hcall.ret = spapr_hypercall(cpu, 1689 run->papr_hcall.nr, 1690 run->papr_hcall.args); 1691 ret = 0; 1692 break; 1693 #endif 1694 case KVM_EXIT_EPR: 1695 DPRINTF("handle epr\n"); 1696 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1697 ret = 0; 1698 break; 1699 case KVM_EXIT_WATCHDOG: 1700 DPRINTF("handle watchdog expiry\n"); 1701 watchdog_perform_action(); 1702 ret = 0; 1703 break; 1704 1705 case KVM_EXIT_DEBUG: 1706 DPRINTF("handle debug exception\n"); 1707 if (kvm_handle_debug(cpu, run)) { 1708 ret = EXCP_DEBUG; 1709 break; 1710 } 1711 /* re-enter, this exception was guest-internal */ 1712 ret = 0; 1713 break; 1714 1715 default: 1716 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1717 ret = -1; 1718 break; 1719 } 1720 1721 qemu_mutex_unlock_iothread(); 1722 return ret; 1723 } 1724 1725 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1726 { 1727 CPUState *cs = CPU(cpu); 1728 uint32_t bits = tsr_bits; 1729 struct kvm_one_reg reg = { 1730 .id = KVM_REG_PPC_OR_TSR, 1731 .addr = (uintptr_t) &bits, 1732 }; 1733 1734 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1735 } 1736 1737 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1738 { 1739 1740 CPUState *cs = CPU(cpu); 1741 uint32_t bits = tsr_bits; 1742 struct kvm_one_reg reg = { 1743 .id = KVM_REG_PPC_CLEAR_TSR, 1744 .addr = (uintptr_t) &bits, 1745 }; 1746 1747 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1748 } 1749 1750 int kvmppc_set_tcr(PowerPCCPU *cpu) 1751 { 1752 CPUState *cs = CPU(cpu); 1753 CPUPPCState *env = &cpu->env; 1754 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1755 1756 struct kvm_one_reg reg = { 1757 .id = KVM_REG_PPC_TCR, 1758 .addr = (uintptr_t) &tcr, 1759 }; 1760 1761 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1762 } 1763 1764 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1765 { 1766 CPUState *cs = CPU(cpu); 1767 int ret; 1768 1769 if (!kvm_enabled()) { 1770 return -1; 1771 } 1772 1773 if (!cap_ppc_watchdog) { 1774 printf("warning: KVM does not support watchdog"); 1775 return -1; 1776 } 1777 1778 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1779 if (ret < 0) { 1780 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1781 __func__, strerror(-ret)); 1782 return ret; 1783 } 1784 1785 return ret; 1786 } 1787 1788 static int read_cpuinfo(const char *field, char *value, int len) 1789 { 1790 FILE *f; 1791 int ret = -1; 1792 int field_len = strlen(field); 1793 char line[512]; 1794 1795 f = fopen("/proc/cpuinfo", "r"); 1796 if (!f) { 1797 return -1; 1798 } 1799 1800 do { 1801 if (!fgets(line, sizeof(line), f)) { 1802 break; 1803 } 1804 if (!strncmp(line, field, field_len)) { 1805 pstrcpy(value, len, line); 1806 ret = 0; 1807 break; 1808 } 1809 } while(*line); 1810 1811 fclose(f); 1812 1813 return ret; 1814 } 1815 1816 uint32_t kvmppc_get_tbfreq(void) 1817 { 1818 char line[512]; 1819 char *ns; 1820 uint32_t retval = NANOSECONDS_PER_SECOND; 1821 1822 if (read_cpuinfo("timebase", line, sizeof(line))) { 1823 return retval; 1824 } 1825 1826 if (!(ns = strchr(line, ':'))) { 1827 return retval; 1828 } 1829 1830 ns++; 1831 1832 return atoi(ns); 1833 } 1834 1835 bool kvmppc_get_host_serial(char **value) 1836 { 1837 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1838 NULL); 1839 } 1840 1841 bool kvmppc_get_host_model(char **value) 1842 { 1843 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1844 } 1845 1846 /* Try to find a device tree node for a CPU with clock-frequency property */ 1847 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1848 { 1849 struct dirent *dirp; 1850 DIR *dp; 1851 1852 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1853 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1854 return -1; 1855 } 1856 1857 buf[0] = '\0'; 1858 while ((dirp = readdir(dp)) != NULL) { 1859 FILE *f; 1860 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1861 dirp->d_name); 1862 f = fopen(buf, "r"); 1863 if (f) { 1864 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1865 fclose(f); 1866 break; 1867 } 1868 buf[0] = '\0'; 1869 } 1870 closedir(dp); 1871 if (buf[0] == '\0') { 1872 printf("Unknown host!\n"); 1873 return -1; 1874 } 1875 1876 return 0; 1877 } 1878 1879 static uint64_t kvmppc_read_int_dt(const char *filename) 1880 { 1881 union { 1882 uint32_t v32; 1883 uint64_t v64; 1884 } u; 1885 FILE *f; 1886 int len; 1887 1888 f = fopen(filename, "rb"); 1889 if (!f) { 1890 return -1; 1891 } 1892 1893 len = fread(&u, 1, sizeof(u), f); 1894 fclose(f); 1895 switch (len) { 1896 case 4: 1897 /* property is a 32-bit quantity */ 1898 return be32_to_cpu(u.v32); 1899 case 8: 1900 return be64_to_cpu(u.v64); 1901 } 1902 1903 return 0; 1904 } 1905 1906 /* Read a CPU node property from the host device tree that's a single 1907 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1908 * (can't find or open the property, or doesn't understand the 1909 * format) */ 1910 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1911 { 1912 char buf[PATH_MAX], *tmp; 1913 uint64_t val; 1914 1915 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1916 return -1; 1917 } 1918 1919 tmp = g_strdup_printf("%s/%s", buf, propname); 1920 val = kvmppc_read_int_dt(tmp); 1921 g_free(tmp); 1922 1923 return val; 1924 } 1925 1926 uint64_t kvmppc_get_clockfreq(void) 1927 { 1928 return kvmppc_read_int_cpu_dt("clock-frequency"); 1929 } 1930 1931 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 1932 { 1933 PowerPCCPU *cpu = ppc_env_get_cpu(env); 1934 CPUState *cs = CPU(cpu); 1935 1936 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 1937 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 1938 return 0; 1939 } 1940 1941 return 1; 1942 } 1943 1944 int kvmppc_get_hasidle(CPUPPCState *env) 1945 { 1946 struct kvm_ppc_pvinfo pvinfo; 1947 1948 if (!kvmppc_get_pvinfo(env, &pvinfo) && 1949 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 1950 return 1; 1951 } 1952 1953 return 0; 1954 } 1955 1956 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 1957 { 1958 uint32_t *hc = (uint32_t*)buf; 1959 struct kvm_ppc_pvinfo pvinfo; 1960 1961 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 1962 memcpy(buf, pvinfo.hcall, buf_len); 1963 return 0; 1964 } 1965 1966 /* 1967 * Fallback to always fail hypercalls regardless of endianness: 1968 * 1969 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 1970 * li r3, -1 1971 * b .+8 (becomes nop in wrong endian) 1972 * bswap32(li r3, -1) 1973 */ 1974 1975 hc[0] = cpu_to_be32(0x08000048); 1976 hc[1] = cpu_to_be32(0x3860ffff); 1977 hc[2] = cpu_to_be32(0x48000008); 1978 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 1979 1980 return 1; 1981 } 1982 1983 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 1984 { 1985 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 1986 } 1987 1988 void kvmppc_enable_logical_ci_hcalls(void) 1989 { 1990 /* 1991 * FIXME: it would be nice if we could detect the cases where 1992 * we're using a device which requires the in kernel 1993 * implementation of these hcalls, but the kernel lacks them and 1994 * produce a warning. 1995 */ 1996 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 1997 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 1998 } 1999 2000 void kvmppc_enable_set_mode_hcall(void) 2001 { 2002 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2003 } 2004 2005 void kvmppc_enable_clear_ref_mod_hcalls(void) 2006 { 2007 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2008 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2009 } 2010 2011 void kvmppc_set_papr(PowerPCCPU *cpu) 2012 { 2013 CPUState *cs = CPU(cpu); 2014 int ret; 2015 2016 if (!kvm_enabled()) { 2017 return; 2018 } 2019 2020 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2021 if (ret) { 2022 error_report("This vCPU type or KVM version does not support PAPR"); 2023 exit(1); 2024 } 2025 2026 /* Update the capability flag so we sync the right information 2027 * with kvm */ 2028 cap_papr = 1; 2029 } 2030 2031 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2032 { 2033 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2034 } 2035 2036 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2037 { 2038 CPUState *cs = CPU(cpu); 2039 int ret; 2040 2041 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2042 if (ret && mpic_proxy) { 2043 error_report("This KVM version does not support EPR"); 2044 exit(1); 2045 } 2046 } 2047 2048 int kvmppc_smt_threads(void) 2049 { 2050 return cap_ppc_smt ? cap_ppc_smt : 1; 2051 } 2052 2053 int kvmppc_set_smt_threads(int smt) 2054 { 2055 int ret; 2056 2057 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2058 if (!ret) { 2059 cap_ppc_smt = smt; 2060 } 2061 return ret; 2062 } 2063 2064 void kvmppc_hint_smt_possible(Error **errp) 2065 { 2066 int i; 2067 GString *g; 2068 char *s; 2069 2070 assert(kvm_enabled()); 2071 if (cap_ppc_smt_possible) { 2072 g = g_string_new("Available VSMT modes:"); 2073 for (i = 63; i >= 0; i--) { 2074 if ((1UL << i) & cap_ppc_smt_possible) { 2075 g_string_append_printf(g, " %lu", (1UL << i)); 2076 } 2077 } 2078 s = g_string_free(g, false); 2079 error_append_hint(errp, "%s.\n", s); 2080 g_free(s); 2081 } else { 2082 error_append_hint(errp, 2083 "This KVM seems to be too old to support VSMT.\n"); 2084 } 2085 } 2086 2087 2088 #ifdef TARGET_PPC64 2089 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2090 { 2091 struct kvm_ppc_smmu_info info; 2092 long rampagesize, best_page_shift; 2093 int i; 2094 2095 /* Find the largest hardware supported page size that's less than 2096 * or equal to the (logical) backing page size of guest RAM */ 2097 kvm_get_smmu_info(&info, &error_fatal); 2098 rampagesize = qemu_getrampagesize(); 2099 best_page_shift = 0; 2100 2101 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2102 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2103 2104 if (!sps->page_shift) { 2105 continue; 2106 } 2107 2108 if ((sps->page_shift > best_page_shift) 2109 && ((1UL << sps->page_shift) <= rampagesize)) { 2110 best_page_shift = sps->page_shift; 2111 } 2112 } 2113 2114 return MIN(current_size, 2115 1ULL << (best_page_shift + hash_shift - 7)); 2116 } 2117 #endif 2118 2119 bool kvmppc_spapr_use_multitce(void) 2120 { 2121 return cap_spapr_multitce; 2122 } 2123 2124 int kvmppc_spapr_enable_inkernel_multitce(void) 2125 { 2126 int ret; 2127 2128 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2129 H_PUT_TCE_INDIRECT, 1); 2130 if (!ret) { 2131 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2132 H_STUFF_TCE, 1); 2133 } 2134 2135 return ret; 2136 } 2137 2138 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2139 uint64_t bus_offset, uint32_t nb_table, 2140 int *pfd, bool need_vfio) 2141 { 2142 long len; 2143 int fd; 2144 void *table; 2145 2146 /* Must set fd to -1 so we don't try to munmap when called for 2147 * destroying the table, which the upper layers -will- do 2148 */ 2149 *pfd = -1; 2150 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2151 return NULL; 2152 } 2153 2154 if (cap_spapr_tce_64) { 2155 struct kvm_create_spapr_tce_64 args = { 2156 .liobn = liobn, 2157 .page_shift = page_shift, 2158 .offset = bus_offset >> page_shift, 2159 .size = nb_table, 2160 .flags = 0 2161 }; 2162 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2163 if (fd < 0) { 2164 fprintf(stderr, 2165 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2166 liobn); 2167 return NULL; 2168 } 2169 } else if (cap_spapr_tce) { 2170 uint64_t window_size = (uint64_t) nb_table << page_shift; 2171 struct kvm_create_spapr_tce args = { 2172 .liobn = liobn, 2173 .window_size = window_size, 2174 }; 2175 if ((window_size != args.window_size) || bus_offset) { 2176 return NULL; 2177 } 2178 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2179 if (fd < 0) { 2180 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2181 liobn); 2182 return NULL; 2183 } 2184 } else { 2185 return NULL; 2186 } 2187 2188 len = nb_table * sizeof(uint64_t); 2189 /* FIXME: round this up to page size */ 2190 2191 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2192 if (table == MAP_FAILED) { 2193 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2194 liobn); 2195 close(fd); 2196 return NULL; 2197 } 2198 2199 *pfd = fd; 2200 return table; 2201 } 2202 2203 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2204 { 2205 long len; 2206 2207 if (fd < 0) { 2208 return -1; 2209 } 2210 2211 len = nb_table * sizeof(uint64_t); 2212 if ((munmap(table, len) < 0) || 2213 (close(fd) < 0)) { 2214 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2215 strerror(errno)); 2216 /* Leak the table */ 2217 } 2218 2219 return 0; 2220 } 2221 2222 int kvmppc_reset_htab(int shift_hint) 2223 { 2224 uint32_t shift = shift_hint; 2225 2226 if (!kvm_enabled()) { 2227 /* Full emulation, tell caller to allocate htab itself */ 2228 return 0; 2229 } 2230 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2231 int ret; 2232 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2233 if (ret == -ENOTTY) { 2234 /* At least some versions of PR KVM advertise the 2235 * capability, but don't implement the ioctl(). Oops. 2236 * Return 0 so that we allocate the htab in qemu, as is 2237 * correct for PR. */ 2238 return 0; 2239 } else if (ret < 0) { 2240 return ret; 2241 } 2242 return shift; 2243 } 2244 2245 /* We have a kernel that predates the htab reset calls. For PR 2246 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2247 * this era, it has allocated a 16MB fixed size hash table already. */ 2248 if (kvmppc_is_pr(kvm_state)) { 2249 /* PR - tell caller to allocate htab */ 2250 return 0; 2251 } else { 2252 /* HV - assume 16MB kernel allocated htab */ 2253 return 24; 2254 } 2255 } 2256 2257 static inline uint32_t mfpvr(void) 2258 { 2259 uint32_t pvr; 2260 2261 asm ("mfpvr %0" 2262 : "=r"(pvr)); 2263 return pvr; 2264 } 2265 2266 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2267 { 2268 if (on) { 2269 *word |= flags; 2270 } else { 2271 *word &= ~flags; 2272 } 2273 } 2274 2275 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2276 { 2277 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2278 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2279 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2280 2281 /* Now fix up the class with information we can query from the host */ 2282 pcc->pvr = mfpvr(); 2283 2284 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2285 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2286 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2287 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2288 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2289 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2290 2291 if (dcache_size != -1) { 2292 pcc->l1_dcache_size = dcache_size; 2293 } 2294 2295 if (icache_size != -1) { 2296 pcc->l1_icache_size = icache_size; 2297 } 2298 2299 #if defined(TARGET_PPC64) 2300 pcc->radix_page_info = kvm_get_radix_page_info(); 2301 2302 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2303 /* 2304 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2305 * compliant. More importantly, advertising ISA 3.00 2306 * architected mode may prevent guests from activating 2307 * necessary DD1 workarounds. 2308 */ 2309 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2310 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2311 } 2312 #endif /* defined(TARGET_PPC64) */ 2313 } 2314 2315 bool kvmppc_has_cap_epr(void) 2316 { 2317 return cap_epr; 2318 } 2319 2320 bool kvmppc_has_cap_fixup_hcalls(void) 2321 { 2322 return cap_fixup_hcalls; 2323 } 2324 2325 bool kvmppc_has_cap_htm(void) 2326 { 2327 return cap_htm; 2328 } 2329 2330 bool kvmppc_has_cap_mmu_radix(void) 2331 { 2332 return cap_mmu_radix; 2333 } 2334 2335 bool kvmppc_has_cap_mmu_hash_v3(void) 2336 { 2337 return cap_mmu_hash_v3; 2338 } 2339 2340 static bool kvmppc_power8_host(void) 2341 { 2342 bool ret = false; 2343 #ifdef TARGET_PPC64 2344 { 2345 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); 2346 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || 2347 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || 2348 (base_pvr == CPU_POWERPC_POWER8_BASE); 2349 } 2350 #endif /* TARGET_PPC64 */ 2351 return ret; 2352 } 2353 2354 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) 2355 { 2356 bool l1d_thread_priv_req = !kvmppc_power8_host(); 2357 2358 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2359 return 2; 2360 } else if ((!l1d_thread_priv_req || 2361 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2362 (c.character & c.character_mask 2363 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2364 return 1; 2365 } 2366 2367 return 0; 2368 } 2369 2370 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) 2371 { 2372 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2373 return 2; 2374 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2375 return 1; 2376 } 2377 2378 return 0; 2379 } 2380 2381 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) 2382 { 2383 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2384 return SPAPR_CAP_FIXED_CCD; 2385 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2386 return SPAPR_CAP_FIXED_IBS; 2387 } 2388 2389 return 0; 2390 } 2391 2392 static void kvmppc_get_cpu_characteristics(KVMState *s) 2393 { 2394 struct kvm_ppc_cpu_char c; 2395 int ret; 2396 2397 /* Assume broken */ 2398 cap_ppc_safe_cache = 0; 2399 cap_ppc_safe_bounds_check = 0; 2400 cap_ppc_safe_indirect_branch = 0; 2401 2402 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2403 if (!ret) { 2404 return; 2405 } 2406 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2407 if (ret < 0) { 2408 return; 2409 } 2410 2411 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); 2412 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); 2413 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); 2414 } 2415 2416 int kvmppc_get_cap_safe_cache(void) 2417 { 2418 return cap_ppc_safe_cache; 2419 } 2420 2421 int kvmppc_get_cap_safe_bounds_check(void) 2422 { 2423 return cap_ppc_safe_bounds_check; 2424 } 2425 2426 int kvmppc_get_cap_safe_indirect_branch(void) 2427 { 2428 return cap_ppc_safe_indirect_branch; 2429 } 2430 2431 bool kvmppc_has_cap_nested_kvm_hv(void) 2432 { 2433 return !!cap_ppc_nested_kvm_hv; 2434 } 2435 2436 int kvmppc_set_cap_nested_kvm_hv(int enable) 2437 { 2438 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable); 2439 } 2440 2441 bool kvmppc_has_cap_spapr_vfio(void) 2442 { 2443 return cap_spapr_vfio; 2444 } 2445 2446 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2447 { 2448 uint32_t host_pvr = mfpvr(); 2449 PowerPCCPUClass *pvr_pcc; 2450 2451 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2452 if (pvr_pcc == NULL) { 2453 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2454 } 2455 2456 return pvr_pcc; 2457 } 2458 2459 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2460 { 2461 TypeInfo type_info = { 2462 .name = TYPE_HOST_POWERPC_CPU, 2463 .class_init = kvmppc_host_cpu_class_init, 2464 }; 2465 MachineClass *mc = MACHINE_GET_CLASS(ms); 2466 PowerPCCPUClass *pvr_pcc; 2467 ObjectClass *oc; 2468 DeviceClass *dc; 2469 int i; 2470 2471 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2472 if (pvr_pcc == NULL) { 2473 return -1; 2474 } 2475 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2476 type_register(&type_info); 2477 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2478 /* override TCG default cpu type with 'host' cpu model */ 2479 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2480 } 2481 2482 oc = object_class_by_name(type_info.name); 2483 g_assert(oc); 2484 2485 /* 2486 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2487 * we want "POWER8" to be a "family" alias that points to the current 2488 * host CPU type, too) 2489 */ 2490 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2491 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2492 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2493 char *suffix; 2494 2495 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2496 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2497 if (suffix) { 2498 *suffix = 0; 2499 } 2500 break; 2501 } 2502 } 2503 2504 return 0; 2505 } 2506 2507 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2508 { 2509 struct kvm_rtas_token_args args = { 2510 .token = token, 2511 }; 2512 2513 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2514 return -ENOENT; 2515 } 2516 2517 strncpy(args.name, function, sizeof(args.name)); 2518 2519 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2520 } 2521 2522 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2523 { 2524 struct kvm_get_htab_fd s = { 2525 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2526 .start_index = index, 2527 }; 2528 int ret; 2529 2530 if (!cap_htab_fd) { 2531 error_setg(errp, "KVM version doesn't support %s the HPT", 2532 write ? "writing" : "reading"); 2533 return -ENOTSUP; 2534 } 2535 2536 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2537 if (ret < 0) { 2538 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2539 write ? "writing" : "reading", write ? "to" : "from", 2540 strerror(errno)); 2541 return -errno; 2542 } 2543 2544 return ret; 2545 } 2546 2547 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2548 { 2549 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2550 uint8_t buf[bufsize]; 2551 ssize_t rc; 2552 2553 do { 2554 rc = read(fd, buf, bufsize); 2555 if (rc < 0) { 2556 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2557 strerror(errno)); 2558 return rc; 2559 } else if (rc) { 2560 uint8_t *buffer = buf; 2561 ssize_t n = rc; 2562 while (n) { 2563 struct kvm_get_htab_header *head = 2564 (struct kvm_get_htab_header *) buffer; 2565 size_t chunksize = sizeof(*head) + 2566 HASH_PTE_SIZE_64 * head->n_valid; 2567 2568 qemu_put_be32(f, head->index); 2569 qemu_put_be16(f, head->n_valid); 2570 qemu_put_be16(f, head->n_invalid); 2571 qemu_put_buffer(f, (void *)(head + 1), 2572 HASH_PTE_SIZE_64 * head->n_valid); 2573 2574 buffer += chunksize; 2575 n -= chunksize; 2576 } 2577 } 2578 } while ((rc != 0) 2579 && ((max_ns < 0) 2580 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2581 2582 return (rc == 0) ? 1 : 0; 2583 } 2584 2585 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2586 uint16_t n_valid, uint16_t n_invalid) 2587 { 2588 struct kvm_get_htab_header *buf; 2589 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2590 ssize_t rc; 2591 2592 buf = alloca(chunksize); 2593 buf->index = index; 2594 buf->n_valid = n_valid; 2595 buf->n_invalid = n_invalid; 2596 2597 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2598 2599 rc = write(fd, buf, chunksize); 2600 if (rc < 0) { 2601 fprintf(stderr, "Error writing KVM hash table: %s\n", 2602 strerror(errno)); 2603 return rc; 2604 } 2605 if (rc != chunksize) { 2606 /* We should never get a short write on a single chunk */ 2607 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2608 return -1; 2609 } 2610 return 0; 2611 } 2612 2613 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2614 { 2615 return true; 2616 } 2617 2618 void kvm_arch_init_irq_routing(KVMState *s) 2619 { 2620 } 2621 2622 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2623 { 2624 int fd, rc; 2625 int i; 2626 2627 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2628 2629 i = 0; 2630 while (i < n) { 2631 struct kvm_get_htab_header *hdr; 2632 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2633 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2634 2635 rc = read(fd, buf, sizeof(buf)); 2636 if (rc < 0) { 2637 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2638 } 2639 2640 hdr = (struct kvm_get_htab_header *)buf; 2641 while ((i < n) && ((char *)hdr < (buf + rc))) { 2642 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2643 2644 if (hdr->index != (ptex + i)) { 2645 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2646 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2647 } 2648 2649 if (n - i < valid) { 2650 valid = n - i; 2651 } 2652 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2653 i += valid; 2654 2655 if ((n - i) < invalid) { 2656 invalid = n - i; 2657 } 2658 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2659 i += invalid; 2660 2661 hdr = (struct kvm_get_htab_header *) 2662 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2663 } 2664 } 2665 2666 close(fd); 2667 } 2668 2669 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2670 { 2671 int fd, rc; 2672 struct { 2673 struct kvm_get_htab_header hdr; 2674 uint64_t pte0; 2675 uint64_t pte1; 2676 } buf; 2677 2678 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2679 2680 buf.hdr.n_valid = 1; 2681 buf.hdr.n_invalid = 0; 2682 buf.hdr.index = ptex; 2683 buf.pte0 = cpu_to_be64(pte0); 2684 buf.pte1 = cpu_to_be64(pte1); 2685 2686 rc = write(fd, &buf, sizeof(buf)); 2687 if (rc != sizeof(buf)) { 2688 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2689 } 2690 close(fd); 2691 } 2692 2693 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2694 uint64_t address, uint32_t data, PCIDevice *dev) 2695 { 2696 return 0; 2697 } 2698 2699 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2700 int vector, PCIDevice *dev) 2701 { 2702 return 0; 2703 } 2704 2705 int kvm_arch_release_virq_post(int virq) 2706 { 2707 return 0; 2708 } 2709 2710 int kvm_arch_msi_data_to_gsi(uint32_t data) 2711 { 2712 return data & 0xffff; 2713 } 2714 2715 int kvmppc_enable_hwrng(void) 2716 { 2717 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2718 return -1; 2719 } 2720 2721 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2722 } 2723 2724 void kvmppc_check_papr_resize_hpt(Error **errp) 2725 { 2726 if (!kvm_enabled()) { 2727 return; /* No KVM, we're good */ 2728 } 2729 2730 if (cap_resize_hpt) { 2731 return; /* Kernel has explicit support, we're good */ 2732 } 2733 2734 /* Otherwise fallback on looking for PR KVM */ 2735 if (kvmppc_is_pr(kvm_state)) { 2736 return; 2737 } 2738 2739 error_setg(errp, 2740 "Hash page table resizing not available with this KVM version"); 2741 } 2742 2743 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2744 { 2745 CPUState *cs = CPU(cpu); 2746 struct kvm_ppc_resize_hpt rhpt = { 2747 .flags = flags, 2748 .shift = shift, 2749 }; 2750 2751 if (!cap_resize_hpt) { 2752 return -ENOSYS; 2753 } 2754 2755 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2756 } 2757 2758 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2759 { 2760 CPUState *cs = CPU(cpu); 2761 struct kvm_ppc_resize_hpt rhpt = { 2762 .flags = flags, 2763 .shift = shift, 2764 }; 2765 2766 if (!cap_resize_hpt) { 2767 return -ENOSYS; 2768 } 2769 2770 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2771 } 2772 2773 /* 2774 * This is a helper function to detect a post migration scenario 2775 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2776 * the guest kernel can't handle a PVR value other than the actual host 2777 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2778 * 2779 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2780 * (so, we're HV), return true. The workaround itself is done in 2781 * cpu_post_load. 2782 * 2783 * The order here is important: we'll only check for KVM PR as a 2784 * fallback if the guest kernel can't handle the situation itself. 2785 * We need to avoid as much as possible querying the running KVM type 2786 * in QEMU level. 2787 */ 2788 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2789 { 2790 CPUState *cs = CPU(cpu); 2791 2792 if (!kvm_enabled()) { 2793 return false; 2794 } 2795 2796 if (cap_ppc_pvr_compat) { 2797 return false; 2798 } 2799 2800 return !kvmppc_is_pr(cs->kvm_state); 2801 } 2802 2803 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) 2804 { 2805 CPUState *cs = CPU(cpu); 2806 2807 if (kvm_enabled()) { 2808 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online); 2809 } 2810 } 2811