1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "migration/qemu-file-types.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 54 55 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 56 KVM_CAP_LAST_INFO 57 }; 58 59 static int cap_interrupt_unset; 60 static int cap_interrupt_level; 61 static int cap_segstate; 62 static int cap_booke_sregs; 63 static int cap_ppc_smt; 64 static int cap_ppc_smt_possible; 65 static int cap_spapr_tce; 66 static int cap_spapr_tce_64; 67 static int cap_spapr_multitce; 68 static int cap_spapr_vfio; 69 static int cap_hior; 70 static int cap_one_reg; 71 static int cap_epr; 72 static int cap_ppc_watchdog; 73 static int cap_papr; 74 static int cap_htab_fd; 75 static int cap_fixup_hcalls; 76 static int cap_htm; /* Hardware transactional memory support */ 77 static int cap_mmu_radix; 78 static int cap_mmu_hash_v3; 79 static int cap_xive; 80 static int cap_resize_hpt; 81 static int cap_ppc_pvr_compat; 82 static int cap_ppc_safe_cache; 83 static int cap_ppc_safe_bounds_check; 84 static int cap_ppc_safe_indirect_branch; 85 static int cap_ppc_count_cache_flush_assist; 86 static int cap_ppc_nested_kvm_hv; 87 static int cap_large_decr; 88 89 static uint32_t debug_inst_opcode; 90 91 /* 92 * XXX We have a race condition where we actually have a level triggered 93 * interrupt, but the infrastructure can't expose that yet, so the guest 94 * takes but ignores it, goes to sleep and never gets notified that there's 95 * still an interrupt pending. 96 * 97 * As a quick workaround, let's just wake up again 20 ms after we injected 98 * an interrupt. That way we can assure that we're always reinjecting 99 * interrupts in case the guest swallowed them. 100 */ 101 static QEMUTimer *idle_timer; 102 103 static void kvm_kick_cpu(void *opaque) 104 { 105 PowerPCCPU *cpu = opaque; 106 107 qemu_cpu_kick(CPU(cpu)); 108 } 109 110 /* 111 * Check whether we are running with KVM-PR (instead of KVM-HV). This 112 * should only be used for fallback tests - generally we should use 113 * explicit capabilities for the features we want, rather than 114 * assuming what is/isn't available depending on the KVM variant. 115 */ 116 static bool kvmppc_is_pr(KVMState *ks) 117 { 118 /* Assume KVM-PR if the GET_PVINFO capability is available */ 119 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 120 } 121 122 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 123 static void kvmppc_get_cpu_characteristics(KVMState *s); 124 static int kvmppc_get_dec_bits(void); 125 126 int kvm_arch_init(MachineState *ms, KVMState *s) 127 { 128 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 129 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 130 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 131 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 132 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 133 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 134 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 135 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 136 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 137 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 138 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 139 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 140 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 141 /* 142 * Note: we don't set cap_papr here, because this capability is 143 * only activated after this by kvmppc_set_papr() 144 */ 145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 151 cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE); 152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 153 kvmppc_get_cpu_characteristics(s); 154 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); 155 cap_large_decr = kvmppc_get_dec_bits(); 156 /* 157 * Note: setting it to false because there is not such capability 158 * in KVM at this moment. 159 * 160 * TODO: call kvm_vm_check_extension() with the right capability 161 * after the kernel starts implementing it. 162 */ 163 cap_ppc_pvr_compat = false; 164 165 if (!cap_interrupt_level) { 166 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 167 "VM to stall at times!\n"); 168 } 169 170 kvm_ppc_register_host_cpu_type(ms); 171 172 return 0; 173 } 174 175 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 176 { 177 return 0; 178 } 179 180 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 181 { 182 CPUPPCState *cenv = &cpu->env; 183 CPUState *cs = CPU(cpu); 184 struct kvm_sregs sregs; 185 int ret; 186 187 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 188 /* 189 * What we're really trying to say is "if we're on BookE, we 190 * use the native PVR for now". This is the only sane way to 191 * check it though, so we potentially confuse users that they 192 * can run BookE guests on BookS. Let's hope nobody dares 193 * enough :) 194 */ 195 return 0; 196 } else { 197 if (!cap_segstate) { 198 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 199 return -ENOSYS; 200 } 201 } 202 203 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 204 if (ret) { 205 return ret; 206 } 207 208 sregs.pvr = cenv->spr[SPR_PVR]; 209 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 210 } 211 212 /* Set up a shared TLB array with KVM */ 213 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 214 { 215 CPUPPCState *env = &cpu->env; 216 CPUState *cs = CPU(cpu); 217 struct kvm_book3e_206_tlb_params params = {}; 218 struct kvm_config_tlb cfg = {}; 219 unsigned int entries = 0; 220 int ret, i; 221 222 if (!kvm_enabled() || 223 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 224 return 0; 225 } 226 227 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 228 229 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 230 params.tlb_sizes[i] = booke206_tlb_size(env, i); 231 params.tlb_ways[i] = booke206_tlb_ways(env, i); 232 entries += params.tlb_sizes[i]; 233 } 234 235 assert(entries == env->nb_tlb); 236 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 237 238 env->tlb_dirty = true; 239 240 cfg.array = (uintptr_t)env->tlb.tlbm; 241 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 242 cfg.params = (uintptr_t)¶ms; 243 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 244 245 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 246 if (ret < 0) { 247 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 248 __func__, strerror(-ret)); 249 return ret; 250 } 251 252 env->kvm_sw_tlb = true; 253 return 0; 254 } 255 256 257 #if defined(TARGET_PPC64) 258 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp) 259 { 260 int ret; 261 262 assert(kvm_state != NULL); 263 264 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 265 error_setg(errp, "KVM doesn't expose the MMU features it supports"); 266 error_append_hint(errp, "Consider switching to a newer KVM\n"); 267 return; 268 } 269 270 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info); 271 if (ret == 0) { 272 return; 273 } 274 275 error_setg_errno(errp, -ret, 276 "KVM failed to provide the MMU features it supports"); 277 } 278 279 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 280 { 281 KVMState *s = KVM_STATE(current_machine->accelerator); 282 struct ppc_radix_page_info *radix_page_info; 283 struct kvm_ppc_rmmu_info rmmu_info; 284 int i; 285 286 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 287 return NULL; 288 } 289 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 290 return NULL; 291 } 292 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 293 radix_page_info->count = 0; 294 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 295 if (rmmu_info.ap_encodings[i]) { 296 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 297 radix_page_info->count++; 298 } 299 } 300 return radix_page_info; 301 } 302 303 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 304 bool radix, bool gtse, 305 uint64_t proc_tbl) 306 { 307 CPUState *cs = CPU(cpu); 308 int ret; 309 uint64_t flags = 0; 310 struct kvm_ppc_mmuv3_cfg cfg = { 311 .process_table = proc_tbl, 312 }; 313 314 if (radix) { 315 flags |= KVM_PPC_MMUV3_RADIX; 316 } 317 if (gtse) { 318 flags |= KVM_PPC_MMUV3_GTSE; 319 } 320 cfg.flags = flags; 321 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 322 switch (ret) { 323 case 0: 324 return H_SUCCESS; 325 case -EINVAL: 326 return H_PARAMETER; 327 case -ENODEV: 328 return H_NOT_AVAILABLE; 329 default: 330 return H_HARDWARE; 331 } 332 } 333 334 bool kvmppc_hpt_needs_host_contiguous_pages(void) 335 { 336 static struct kvm_ppc_smmu_info smmu_info; 337 338 if (!kvm_enabled()) { 339 return false; 340 } 341 342 kvm_get_smmu_info(&smmu_info, &error_fatal); 343 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL); 344 } 345 346 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp) 347 { 348 struct kvm_ppc_smmu_info smmu_info; 349 int iq, ik, jq, jk; 350 Error *local_err = NULL; 351 352 /* For now, we only have anything to check on hash64 MMUs */ 353 if (!cpu->hash64_opts || !kvm_enabled()) { 354 return; 355 } 356 357 kvm_get_smmu_info(&smmu_info, &local_err); 358 if (local_err) { 359 error_propagate(errp, local_err); 360 return; 361 } 362 363 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG) 364 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 365 error_setg(errp, 366 "KVM does not support 1TiB segments which guest expects"); 367 return; 368 } 369 370 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) { 371 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u", 372 smmu_info.slb_size, cpu->hash64_opts->slb_size); 373 return; 374 } 375 376 /* 377 * Verify that every pagesize supported by the cpu model is 378 * supported by KVM with the same encodings 379 */ 380 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) { 381 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 382 struct kvm_ppc_one_seg_page_size *ksps; 383 384 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) { 385 if (qsps->page_shift == smmu_info.sps[ik].page_shift) { 386 break; 387 } 388 } 389 if (ik >= ARRAY_SIZE(smmu_info.sps)) { 390 error_setg(errp, "KVM doesn't support for base page shift %u", 391 qsps->page_shift); 392 return; 393 } 394 395 ksps = &smmu_info.sps[ik]; 396 if (ksps->slb_enc != qsps->slb_enc) { 397 error_setg(errp, 398 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x", 399 ksps->slb_enc, ksps->page_shift, qsps->slb_enc); 400 return; 401 } 402 403 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) { 404 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) { 405 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) { 406 break; 407 } 408 } 409 410 if (jk >= ARRAY_SIZE(ksps->enc)) { 411 error_setg(errp, "KVM doesn't support page shift %u/%u", 412 qsps->enc[jq].page_shift, qsps->page_shift); 413 return; 414 } 415 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) { 416 error_setg(errp, 417 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x", 418 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift, 419 qsps->page_shift, qsps->enc[jq].pte_enc); 420 return; 421 } 422 } 423 } 424 425 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { 426 /* 427 * Mostly what guest pagesizes we can use are related to the 428 * host pages used to map guest RAM, which is handled in the 429 * platform code. Cache-Inhibited largepages (64k) however are 430 * used for I/O, so if they're mapped to the host at all it 431 * will be a normal mapping, not a special hugepage one used 432 * for RAM. 433 */ 434 if (getpagesize() < 0x10000) { 435 error_setg(errp, 436 "KVM can't supply 64kiB CI pages, which guest expects"); 437 } 438 } 439 } 440 #endif /* !defined (TARGET_PPC64) */ 441 442 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 443 { 444 return POWERPC_CPU(cpu)->vcpu_id; 445 } 446 447 /* 448 * e500 supports 2 h/w breakpoint and 2 watchpoint. book3s supports 449 * only 1 watchpoint, so array size of 4 is sufficient for now. 450 */ 451 #define MAX_HW_BKPTS 4 452 453 static struct HWBreakpoint { 454 target_ulong addr; 455 int type; 456 } hw_debug_points[MAX_HW_BKPTS]; 457 458 static CPUWatchpoint hw_watchpoint; 459 460 /* Default there is no breakpoint and watchpoint supported */ 461 static int max_hw_breakpoint; 462 static int max_hw_watchpoint; 463 static int nb_hw_breakpoint; 464 static int nb_hw_watchpoint; 465 466 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 467 { 468 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 469 max_hw_breakpoint = 2; 470 max_hw_watchpoint = 2; 471 } 472 473 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 474 fprintf(stderr, "Error initializing h/w breakpoints\n"); 475 return; 476 } 477 } 478 479 int kvm_arch_init_vcpu(CPUState *cs) 480 { 481 PowerPCCPU *cpu = POWERPC_CPU(cs); 482 CPUPPCState *cenv = &cpu->env; 483 int ret; 484 485 /* Synchronize sregs with kvm */ 486 ret = kvm_arch_sync_sregs(cpu); 487 if (ret) { 488 if (ret == -EINVAL) { 489 error_report("Register sync failed... If you're using kvm-hv.ko," 490 " only \"-cpu host\" is possible"); 491 } 492 return ret; 493 } 494 495 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 496 497 switch (cenv->mmu_model) { 498 case POWERPC_MMU_BOOKE206: 499 /* This target supports access to KVM's guest TLB */ 500 ret = kvm_booke206_tlb_init(cpu); 501 break; 502 case POWERPC_MMU_2_07: 503 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 504 /* 505 * KVM-HV has transactional memory on POWER8 also without 506 * the KVM_CAP_PPC_HTM extension, so enable it here 507 * instead as long as it's availble to userspace on the 508 * host. 509 */ 510 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 511 cap_htm = true; 512 } 513 } 514 break; 515 default: 516 break; 517 } 518 519 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 520 kvmppc_hw_debug_points_init(cenv); 521 522 return ret; 523 } 524 525 int kvm_arch_destroy_vcpu(CPUState *cs) 526 { 527 return 0; 528 } 529 530 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 531 { 532 CPUPPCState *env = &cpu->env; 533 CPUState *cs = CPU(cpu); 534 struct kvm_dirty_tlb dirty_tlb; 535 unsigned char *bitmap; 536 int ret; 537 538 if (!env->kvm_sw_tlb) { 539 return; 540 } 541 542 bitmap = g_malloc((env->nb_tlb + 7) / 8); 543 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 544 545 dirty_tlb.bitmap = (uintptr_t)bitmap; 546 dirty_tlb.num_dirty = env->nb_tlb; 547 548 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 549 if (ret) { 550 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 551 __func__, strerror(-ret)); 552 } 553 554 g_free(bitmap); 555 } 556 557 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 558 { 559 PowerPCCPU *cpu = POWERPC_CPU(cs); 560 CPUPPCState *env = &cpu->env; 561 union { 562 uint32_t u32; 563 uint64_t u64; 564 } val; 565 struct kvm_one_reg reg = { 566 .id = id, 567 .addr = (uintptr_t) &val, 568 }; 569 int ret; 570 571 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 572 if (ret != 0) { 573 trace_kvm_failed_spr_get(spr, strerror(errno)); 574 } else { 575 switch (id & KVM_REG_SIZE_MASK) { 576 case KVM_REG_SIZE_U32: 577 env->spr[spr] = val.u32; 578 break; 579 580 case KVM_REG_SIZE_U64: 581 env->spr[spr] = val.u64; 582 break; 583 584 default: 585 /* Don't handle this size yet */ 586 abort(); 587 } 588 } 589 } 590 591 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 592 { 593 PowerPCCPU *cpu = POWERPC_CPU(cs); 594 CPUPPCState *env = &cpu->env; 595 union { 596 uint32_t u32; 597 uint64_t u64; 598 } val; 599 struct kvm_one_reg reg = { 600 .id = id, 601 .addr = (uintptr_t) &val, 602 }; 603 int ret; 604 605 switch (id & KVM_REG_SIZE_MASK) { 606 case KVM_REG_SIZE_U32: 607 val.u32 = env->spr[spr]; 608 break; 609 610 case KVM_REG_SIZE_U64: 611 val.u64 = env->spr[spr]; 612 break; 613 614 default: 615 /* Don't handle this size yet */ 616 abort(); 617 } 618 619 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 620 if (ret != 0) { 621 trace_kvm_failed_spr_set(spr, strerror(errno)); 622 } 623 } 624 625 static int kvm_put_fp(CPUState *cs) 626 { 627 PowerPCCPU *cpu = POWERPC_CPU(cs); 628 CPUPPCState *env = &cpu->env; 629 struct kvm_one_reg reg; 630 int i; 631 int ret; 632 633 if (env->insns_flags & PPC_FLOAT) { 634 uint64_t fpscr = env->fpscr; 635 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 636 637 reg.id = KVM_REG_PPC_FPSCR; 638 reg.addr = (uintptr_t)&fpscr; 639 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 640 if (ret < 0) { 641 trace_kvm_failed_fpscr_set(strerror(errno)); 642 return ret; 643 } 644 645 for (i = 0; i < 32; i++) { 646 uint64_t vsr[2]; 647 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 648 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 649 650 #ifdef HOST_WORDS_BIGENDIAN 651 vsr[0] = float64_val(*fpr); 652 vsr[1] = *vsrl; 653 #else 654 vsr[0] = *vsrl; 655 vsr[1] = float64_val(*fpr); 656 #endif 657 reg.addr = (uintptr_t) &vsr; 658 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 659 660 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 661 if (ret < 0) { 662 trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i, 663 strerror(errno)); 664 return ret; 665 } 666 } 667 } 668 669 if (env->insns_flags & PPC_ALTIVEC) { 670 reg.id = KVM_REG_PPC_VSCR; 671 reg.addr = (uintptr_t)&env->vscr; 672 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 673 if (ret < 0) { 674 trace_kvm_failed_vscr_set(strerror(errno)); 675 return ret; 676 } 677 678 for (i = 0; i < 32; i++) { 679 reg.id = KVM_REG_PPC_VR(i); 680 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 681 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 682 if (ret < 0) { 683 trace_kvm_failed_vr_set(i, strerror(errno)); 684 return ret; 685 } 686 } 687 } 688 689 return 0; 690 } 691 692 static int kvm_get_fp(CPUState *cs) 693 { 694 PowerPCCPU *cpu = POWERPC_CPU(cs); 695 CPUPPCState *env = &cpu->env; 696 struct kvm_one_reg reg; 697 int i; 698 int ret; 699 700 if (env->insns_flags & PPC_FLOAT) { 701 uint64_t fpscr; 702 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 703 704 reg.id = KVM_REG_PPC_FPSCR; 705 reg.addr = (uintptr_t)&fpscr; 706 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 707 if (ret < 0) { 708 trace_kvm_failed_fpscr_get(strerror(errno)); 709 return ret; 710 } else { 711 env->fpscr = fpscr; 712 } 713 714 for (i = 0; i < 32; i++) { 715 uint64_t vsr[2]; 716 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 717 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 718 719 reg.addr = (uintptr_t) &vsr; 720 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 721 722 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 723 if (ret < 0) { 724 trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i, 725 strerror(errno)); 726 return ret; 727 } else { 728 #ifdef HOST_WORDS_BIGENDIAN 729 *fpr = vsr[0]; 730 if (vsx) { 731 *vsrl = vsr[1]; 732 } 733 #else 734 *fpr = vsr[1]; 735 if (vsx) { 736 *vsrl = vsr[0]; 737 } 738 #endif 739 } 740 } 741 } 742 743 if (env->insns_flags & PPC_ALTIVEC) { 744 reg.id = KVM_REG_PPC_VSCR; 745 reg.addr = (uintptr_t)&env->vscr; 746 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 747 if (ret < 0) { 748 trace_kvm_failed_vscr_get(strerror(errno)); 749 return ret; 750 } 751 752 for (i = 0; i < 32; i++) { 753 reg.id = KVM_REG_PPC_VR(i); 754 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 755 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 756 if (ret < 0) { 757 trace_kvm_failed_vr_get(i, strerror(errno)); 758 return ret; 759 } 760 } 761 } 762 763 return 0; 764 } 765 766 #if defined(TARGET_PPC64) 767 static int kvm_get_vpa(CPUState *cs) 768 { 769 PowerPCCPU *cpu = POWERPC_CPU(cs); 770 SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); 771 struct kvm_one_reg reg; 772 int ret; 773 774 reg.id = KVM_REG_PPC_VPA_ADDR; 775 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 776 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 777 if (ret < 0) { 778 trace_kvm_failed_vpa_addr_get(strerror(errno)); 779 return ret; 780 } 781 782 assert((uintptr_t)&spapr_cpu->slb_shadow_size 783 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 784 reg.id = KVM_REG_PPC_VPA_SLB; 785 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 786 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 787 if (ret < 0) { 788 trace_kvm_failed_slb_get(strerror(errno)); 789 return ret; 790 } 791 792 assert((uintptr_t)&spapr_cpu->dtl_size 793 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 794 reg.id = KVM_REG_PPC_VPA_DTL; 795 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 796 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 797 if (ret < 0) { 798 trace_kvm_failed_dtl_get(strerror(errno)); 799 return ret; 800 } 801 802 return 0; 803 } 804 805 static int kvm_put_vpa(CPUState *cs) 806 { 807 PowerPCCPU *cpu = POWERPC_CPU(cs); 808 SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); 809 struct kvm_one_reg reg; 810 int ret; 811 812 /* 813 * SLB shadow or DTL can't be registered unless a master VPA is 814 * registered. That means when restoring state, if a VPA *is* 815 * registered, we need to set that up first. If not, we need to 816 * deregister the others before deregistering the master VPA 817 */ 818 assert(spapr_cpu->vpa_addr 819 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); 820 821 if (spapr_cpu->vpa_addr) { 822 reg.id = KVM_REG_PPC_VPA_ADDR; 823 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 825 if (ret < 0) { 826 trace_kvm_failed_vpa_addr_set(strerror(errno)); 827 return ret; 828 } 829 } 830 831 assert((uintptr_t)&spapr_cpu->slb_shadow_size 832 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 833 reg.id = KVM_REG_PPC_VPA_SLB; 834 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 835 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 836 if (ret < 0) { 837 trace_kvm_failed_slb_set(strerror(errno)); 838 return ret; 839 } 840 841 assert((uintptr_t)&spapr_cpu->dtl_size 842 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 843 reg.id = KVM_REG_PPC_VPA_DTL; 844 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 845 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 846 if (ret < 0) { 847 trace_kvm_failed_dtl_set(strerror(errno)); 848 return ret; 849 } 850 851 if (!spapr_cpu->vpa_addr) { 852 reg.id = KVM_REG_PPC_VPA_ADDR; 853 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 854 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 855 if (ret < 0) { 856 trace_kvm_failed_null_vpa_addr_set(strerror(errno)); 857 return ret; 858 } 859 } 860 861 return 0; 862 } 863 #endif /* TARGET_PPC64 */ 864 865 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 866 { 867 CPUPPCState *env = &cpu->env; 868 struct kvm_sregs sregs; 869 int i; 870 871 sregs.pvr = env->spr[SPR_PVR]; 872 873 if (cpu->vhyp) { 874 PPCVirtualHypervisorClass *vhc = 875 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 876 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 877 } else { 878 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 879 } 880 881 /* Sync SLB */ 882 #ifdef TARGET_PPC64 883 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 884 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 885 if (env->slb[i].esid & SLB_ESID_V) { 886 sregs.u.s.ppc64.slb[i].slbe |= i; 887 } 888 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 889 } 890 #endif 891 892 /* Sync SRs */ 893 for (i = 0; i < 16; i++) { 894 sregs.u.s.ppc32.sr[i] = env->sr[i]; 895 } 896 897 /* Sync BATs */ 898 for (i = 0; i < 8; i++) { 899 /* Beware. We have to swap upper and lower bits here */ 900 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 901 | env->DBAT[1][i]; 902 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 903 | env->IBAT[1][i]; 904 } 905 906 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 907 } 908 909 int kvm_arch_put_registers(CPUState *cs, int level) 910 { 911 PowerPCCPU *cpu = POWERPC_CPU(cs); 912 CPUPPCState *env = &cpu->env; 913 struct kvm_regs regs; 914 int ret; 915 int i; 916 917 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 918 if (ret < 0) { 919 return ret; 920 } 921 922 regs.ctr = env->ctr; 923 regs.lr = env->lr; 924 regs.xer = cpu_read_xer(env); 925 regs.msr = env->msr; 926 regs.pc = env->nip; 927 928 regs.srr0 = env->spr[SPR_SRR0]; 929 regs.srr1 = env->spr[SPR_SRR1]; 930 931 regs.sprg0 = env->spr[SPR_SPRG0]; 932 regs.sprg1 = env->spr[SPR_SPRG1]; 933 regs.sprg2 = env->spr[SPR_SPRG2]; 934 regs.sprg3 = env->spr[SPR_SPRG3]; 935 regs.sprg4 = env->spr[SPR_SPRG4]; 936 regs.sprg5 = env->spr[SPR_SPRG5]; 937 regs.sprg6 = env->spr[SPR_SPRG6]; 938 regs.sprg7 = env->spr[SPR_SPRG7]; 939 940 regs.pid = env->spr[SPR_BOOKE_PID]; 941 942 for (i = 0; i < 32; i++) { 943 regs.gpr[i] = env->gpr[i]; 944 } 945 946 regs.cr = 0; 947 for (i = 0; i < 8; i++) { 948 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 949 } 950 951 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 952 if (ret < 0) { 953 return ret; 954 } 955 956 kvm_put_fp(cs); 957 958 if (env->tlb_dirty) { 959 kvm_sw_tlb_put(cpu); 960 env->tlb_dirty = false; 961 } 962 963 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 964 ret = kvmppc_put_books_sregs(cpu); 965 if (ret < 0) { 966 return ret; 967 } 968 } 969 970 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 971 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 972 } 973 974 if (cap_one_reg) { 975 int i; 976 977 /* 978 * We deliberately ignore errors here, for kernels which have 979 * the ONE_REG calls, but don't support the specific 980 * registers, there's a reasonable chance things will still 981 * work, at least until we try to migrate. 982 */ 983 for (i = 0; i < 1024; i++) { 984 uint64_t id = env->spr_cb[i].one_reg_id; 985 986 if (id != 0) { 987 kvm_put_one_spr(cs, id, i); 988 } 989 } 990 991 #ifdef TARGET_PPC64 992 if (msr_ts) { 993 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 995 } 996 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 997 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 998 } 999 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1000 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1002 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1004 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1005 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1007 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1008 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1009 } 1010 1011 if (cap_papr) { 1012 if (kvm_put_vpa(cs) < 0) { 1013 trace_kvm_failed_put_vpa(); 1014 } 1015 } 1016 1017 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1018 #endif /* TARGET_PPC64 */ 1019 } 1020 1021 return ret; 1022 } 1023 1024 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1025 { 1026 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1027 } 1028 1029 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1030 { 1031 CPUPPCState *env = &cpu->env; 1032 struct kvm_sregs sregs; 1033 int ret; 1034 1035 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1036 if (ret < 0) { 1037 return ret; 1038 } 1039 1040 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1041 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1042 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1043 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1044 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1045 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1046 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1047 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1048 env->spr[SPR_DECR] = sregs.u.e.dec; 1049 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1050 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1051 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1052 } 1053 1054 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1055 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1056 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1057 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1058 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1059 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1060 } 1061 1062 if (sregs.u.e.features & KVM_SREGS_E_64) { 1063 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1064 } 1065 1066 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1067 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1068 } 1069 1070 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1071 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1072 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1073 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1074 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1075 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1076 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1077 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1078 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1079 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1080 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1081 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1082 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1083 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1084 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1085 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1086 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1087 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1088 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1089 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1090 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1091 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1092 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1093 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1094 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1095 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1096 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1097 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1098 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1099 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1100 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1101 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1102 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1103 1104 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1105 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1106 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1107 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1108 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1109 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1110 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1111 } 1112 1113 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1114 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1115 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1116 } 1117 1118 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1119 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1120 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1121 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1122 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1123 } 1124 } 1125 1126 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1127 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1128 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1129 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1130 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1131 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1132 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1133 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1134 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1135 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1136 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1137 } 1138 1139 if (sregs.u.e.features & KVM_SREGS_EXP) { 1140 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1141 } 1142 1143 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1144 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1145 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1146 } 1147 1148 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1149 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1150 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1151 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1152 1153 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1154 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1155 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1156 } 1157 } 1158 1159 return 0; 1160 } 1161 1162 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1163 { 1164 CPUPPCState *env = &cpu->env; 1165 struct kvm_sregs sregs; 1166 int ret; 1167 int i; 1168 1169 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1170 if (ret < 0) { 1171 return ret; 1172 } 1173 1174 if (!cpu->vhyp) { 1175 ppc_store_sdr1(env, sregs.u.s.sdr1); 1176 } 1177 1178 /* Sync SLB */ 1179 #ifdef TARGET_PPC64 1180 /* 1181 * The packed SLB array we get from KVM_GET_SREGS only contains 1182 * information about valid entries. So we flush our internal copy 1183 * to get rid of stale ones, then put all valid SLB entries back 1184 * in. 1185 */ 1186 memset(env->slb, 0, sizeof(env->slb)); 1187 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1188 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1189 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1190 /* 1191 * Only restore valid entries 1192 */ 1193 if (rb & SLB_ESID_V) { 1194 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1195 } 1196 } 1197 #endif 1198 1199 /* Sync SRs */ 1200 for (i = 0; i < 16; i++) { 1201 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1202 } 1203 1204 /* Sync BATs */ 1205 for (i = 0; i < 8; i++) { 1206 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1207 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1208 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1209 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1210 } 1211 1212 return 0; 1213 } 1214 1215 int kvm_arch_get_registers(CPUState *cs) 1216 { 1217 PowerPCCPU *cpu = POWERPC_CPU(cs); 1218 CPUPPCState *env = &cpu->env; 1219 struct kvm_regs regs; 1220 uint32_t cr; 1221 int i, ret; 1222 1223 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1224 if (ret < 0) { 1225 return ret; 1226 } 1227 1228 cr = regs.cr; 1229 for (i = 7; i >= 0; i--) { 1230 env->crf[i] = cr & 15; 1231 cr >>= 4; 1232 } 1233 1234 env->ctr = regs.ctr; 1235 env->lr = regs.lr; 1236 cpu_write_xer(env, regs.xer); 1237 env->msr = regs.msr; 1238 env->nip = regs.pc; 1239 1240 env->spr[SPR_SRR0] = regs.srr0; 1241 env->spr[SPR_SRR1] = regs.srr1; 1242 1243 env->spr[SPR_SPRG0] = regs.sprg0; 1244 env->spr[SPR_SPRG1] = regs.sprg1; 1245 env->spr[SPR_SPRG2] = regs.sprg2; 1246 env->spr[SPR_SPRG3] = regs.sprg3; 1247 env->spr[SPR_SPRG4] = regs.sprg4; 1248 env->spr[SPR_SPRG5] = regs.sprg5; 1249 env->spr[SPR_SPRG6] = regs.sprg6; 1250 env->spr[SPR_SPRG7] = regs.sprg7; 1251 1252 env->spr[SPR_BOOKE_PID] = regs.pid; 1253 1254 for (i = 0; i < 32; i++) { 1255 env->gpr[i] = regs.gpr[i]; 1256 } 1257 1258 kvm_get_fp(cs); 1259 1260 if (cap_booke_sregs) { 1261 ret = kvmppc_get_booke_sregs(cpu); 1262 if (ret < 0) { 1263 return ret; 1264 } 1265 } 1266 1267 if (cap_segstate) { 1268 ret = kvmppc_get_books_sregs(cpu); 1269 if (ret < 0) { 1270 return ret; 1271 } 1272 } 1273 1274 if (cap_hior) { 1275 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1276 } 1277 1278 if (cap_one_reg) { 1279 int i; 1280 1281 /* 1282 * We deliberately ignore errors here, for kernels which have 1283 * the ONE_REG calls, but don't support the specific 1284 * registers, there's a reasonable chance things will still 1285 * work, at least until we try to migrate. 1286 */ 1287 for (i = 0; i < 1024; i++) { 1288 uint64_t id = env->spr_cb[i].one_reg_id; 1289 1290 if (id != 0) { 1291 kvm_get_one_spr(cs, id, i); 1292 } 1293 } 1294 1295 #ifdef TARGET_PPC64 1296 if (msr_ts) { 1297 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1298 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1299 } 1300 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1302 } 1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1304 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1305 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1307 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1308 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1310 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1311 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1312 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1313 } 1314 1315 if (cap_papr) { 1316 if (kvm_get_vpa(cs) < 0) { 1317 trace_kvm_failed_get_vpa(); 1318 } 1319 } 1320 1321 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1322 #endif 1323 } 1324 1325 return 0; 1326 } 1327 1328 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1329 { 1330 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1331 1332 if (irq != PPC_INTERRUPT_EXT) { 1333 return 0; 1334 } 1335 1336 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1337 return 0; 1338 } 1339 1340 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1341 1342 return 0; 1343 } 1344 1345 #if defined(TARGET_PPC64) 1346 #define PPC_INPUT_INT PPC970_INPUT_INT 1347 #else 1348 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1349 #endif 1350 1351 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1352 { 1353 PowerPCCPU *cpu = POWERPC_CPU(cs); 1354 CPUPPCState *env = &cpu->env; 1355 int r; 1356 unsigned irq; 1357 1358 qemu_mutex_lock_iothread(); 1359 1360 /* 1361 * PowerPC QEMU tracks the various core input pins (interrupt, 1362 * critical interrupt, reset, etc) in PPC-specific 1363 * env->irq_input_state. 1364 */ 1365 if (!cap_interrupt_level && 1366 run->ready_for_interrupt_injection && 1367 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1368 (env->irq_input_state & (1 << PPC_INPUT_INT))) 1369 { 1370 /* 1371 * For now KVM disregards the 'irq' argument. However, in the 1372 * future KVM could cache it in-kernel to avoid a heavyweight 1373 * exit when reading the UIC. 1374 */ 1375 irq = KVM_INTERRUPT_SET; 1376 1377 trace_kvm_injected_interrupt(irq); 1378 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1379 if (r < 0) { 1380 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1381 } 1382 1383 /* Always wake up soon in case the interrupt was level based */ 1384 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1385 (NANOSECONDS_PER_SECOND / 50)); 1386 } 1387 1388 /* 1389 * We don't know if there are more interrupts pending after 1390 * this. However, the guest will return to userspace in the course 1391 * of handling this one anyways, so we will get a chance to 1392 * deliver the rest. 1393 */ 1394 1395 qemu_mutex_unlock_iothread(); 1396 } 1397 1398 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1399 { 1400 return MEMTXATTRS_UNSPECIFIED; 1401 } 1402 1403 int kvm_arch_process_async_events(CPUState *cs) 1404 { 1405 return cs->halted; 1406 } 1407 1408 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1409 { 1410 CPUState *cs = CPU(cpu); 1411 CPUPPCState *env = &cpu->env; 1412 1413 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1414 cs->halted = 1; 1415 cs->exception_index = EXCP_HLT; 1416 } 1417 1418 return 0; 1419 } 1420 1421 /* map dcr access to existing qemu dcr emulation */ 1422 static int kvmppc_handle_dcr_read(CPUPPCState *env, 1423 uint32_t dcrn, uint32_t *data) 1424 { 1425 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) { 1426 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1427 } 1428 1429 return 0; 1430 } 1431 1432 static int kvmppc_handle_dcr_write(CPUPPCState *env, 1433 uint32_t dcrn, uint32_t data) 1434 { 1435 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) { 1436 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1437 } 1438 1439 return 0; 1440 } 1441 1442 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1443 { 1444 /* Mixed endian case is not handled */ 1445 uint32_t sc = debug_inst_opcode; 1446 1447 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1448 sizeof(sc), 0) || 1449 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1450 return -EINVAL; 1451 } 1452 1453 return 0; 1454 } 1455 1456 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1457 { 1458 uint32_t sc; 1459 1460 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1461 sc != debug_inst_opcode || 1462 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1463 sizeof(sc), 1)) { 1464 return -EINVAL; 1465 } 1466 1467 return 0; 1468 } 1469 1470 static int find_hw_breakpoint(target_ulong addr, int type) 1471 { 1472 int n; 1473 1474 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1475 <= ARRAY_SIZE(hw_debug_points)); 1476 1477 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1478 if (hw_debug_points[n].addr == addr && 1479 hw_debug_points[n].type == type) { 1480 return n; 1481 } 1482 } 1483 1484 return -1; 1485 } 1486 1487 static int find_hw_watchpoint(target_ulong addr, int *flag) 1488 { 1489 int n; 1490 1491 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1492 if (n >= 0) { 1493 *flag = BP_MEM_ACCESS; 1494 return n; 1495 } 1496 1497 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1498 if (n >= 0) { 1499 *flag = BP_MEM_WRITE; 1500 return n; 1501 } 1502 1503 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1504 if (n >= 0) { 1505 *flag = BP_MEM_READ; 1506 return n; 1507 } 1508 1509 return -1; 1510 } 1511 1512 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1513 target_ulong len, int type) 1514 { 1515 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1516 return -ENOBUFS; 1517 } 1518 1519 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1520 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1521 1522 switch (type) { 1523 case GDB_BREAKPOINT_HW: 1524 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1525 return -ENOBUFS; 1526 } 1527 1528 if (find_hw_breakpoint(addr, type) >= 0) { 1529 return -EEXIST; 1530 } 1531 1532 nb_hw_breakpoint++; 1533 break; 1534 1535 case GDB_WATCHPOINT_WRITE: 1536 case GDB_WATCHPOINT_READ: 1537 case GDB_WATCHPOINT_ACCESS: 1538 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1539 return -ENOBUFS; 1540 } 1541 1542 if (find_hw_breakpoint(addr, type) >= 0) { 1543 return -EEXIST; 1544 } 1545 1546 nb_hw_watchpoint++; 1547 break; 1548 1549 default: 1550 return -ENOSYS; 1551 } 1552 1553 return 0; 1554 } 1555 1556 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1557 target_ulong len, int type) 1558 { 1559 int n; 1560 1561 n = find_hw_breakpoint(addr, type); 1562 if (n < 0) { 1563 return -ENOENT; 1564 } 1565 1566 switch (type) { 1567 case GDB_BREAKPOINT_HW: 1568 nb_hw_breakpoint--; 1569 break; 1570 1571 case GDB_WATCHPOINT_WRITE: 1572 case GDB_WATCHPOINT_READ: 1573 case GDB_WATCHPOINT_ACCESS: 1574 nb_hw_watchpoint--; 1575 break; 1576 1577 default: 1578 return -ENOSYS; 1579 } 1580 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1581 1582 return 0; 1583 } 1584 1585 void kvm_arch_remove_all_hw_breakpoints(void) 1586 { 1587 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1588 } 1589 1590 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1591 { 1592 int n; 1593 1594 /* Software Breakpoint updates */ 1595 if (kvm_sw_breakpoints_active(cs)) { 1596 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1597 } 1598 1599 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1600 <= ARRAY_SIZE(hw_debug_points)); 1601 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1602 1603 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1604 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1605 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1606 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1607 switch (hw_debug_points[n].type) { 1608 case GDB_BREAKPOINT_HW: 1609 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1610 break; 1611 case GDB_WATCHPOINT_WRITE: 1612 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1613 break; 1614 case GDB_WATCHPOINT_READ: 1615 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1616 break; 1617 case GDB_WATCHPOINT_ACCESS: 1618 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1619 KVMPPC_DEBUG_WATCH_READ; 1620 break; 1621 default: 1622 cpu_abort(cs, "Unsupported breakpoint type\n"); 1623 } 1624 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1625 } 1626 } 1627 } 1628 1629 static int kvm_handle_hw_breakpoint(CPUState *cs, 1630 struct kvm_debug_exit_arch *arch_info) 1631 { 1632 int handle = 0; 1633 int n; 1634 int flag = 0; 1635 1636 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1637 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1638 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1639 if (n >= 0) { 1640 handle = 1; 1641 } 1642 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1643 KVMPPC_DEBUG_WATCH_WRITE)) { 1644 n = find_hw_watchpoint(arch_info->address, &flag); 1645 if (n >= 0) { 1646 handle = 1; 1647 cs->watchpoint_hit = &hw_watchpoint; 1648 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1649 hw_watchpoint.flags = flag; 1650 } 1651 } 1652 } 1653 return handle; 1654 } 1655 1656 static int kvm_handle_singlestep(void) 1657 { 1658 return 1; 1659 } 1660 1661 static int kvm_handle_sw_breakpoint(void) 1662 { 1663 return 1; 1664 } 1665 1666 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1667 { 1668 CPUState *cs = CPU(cpu); 1669 CPUPPCState *env = &cpu->env; 1670 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1671 1672 if (cs->singlestep_enabled) { 1673 return kvm_handle_singlestep(); 1674 } 1675 1676 if (arch_info->status) { 1677 return kvm_handle_hw_breakpoint(cs, arch_info); 1678 } 1679 1680 if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1681 return kvm_handle_sw_breakpoint(); 1682 } 1683 1684 /* 1685 * QEMU is not able to handle debug exception, so inject 1686 * program exception to guest; 1687 * Yes program exception NOT debug exception !! 1688 * When QEMU is using debug resources then debug exception must 1689 * be always set. To achieve this we set MSR_DE and also set 1690 * MSRP_DEP so guest cannot change MSR_DE. 1691 * When emulating debug resource for guest we want guest 1692 * to control MSR_DE (enable/disable debug interrupt on need). 1693 * Supporting both configurations are NOT possible. 1694 * So the result is that we cannot share debug resources 1695 * between QEMU and Guest on BOOKE architecture. 1696 * In the current design QEMU gets the priority over guest, 1697 * this means that if QEMU is using debug resources then guest 1698 * cannot use them; 1699 * For software breakpoint QEMU uses a privileged instruction; 1700 * So there cannot be any reason that we are here for guest 1701 * set debug exception, only possibility is guest executed a 1702 * privileged / illegal instruction and that's why we are 1703 * injecting a program interrupt. 1704 */ 1705 cpu_synchronize_state(cs); 1706 /* 1707 * env->nip is PC, so increment this by 4 to use 1708 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1709 */ 1710 env->nip += 4; 1711 cs->exception_index = POWERPC_EXCP_PROGRAM; 1712 env->error_code = POWERPC_EXCP_INVAL; 1713 ppc_cpu_do_interrupt(cs); 1714 1715 return 0; 1716 } 1717 1718 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1719 { 1720 PowerPCCPU *cpu = POWERPC_CPU(cs); 1721 CPUPPCState *env = &cpu->env; 1722 int ret; 1723 1724 qemu_mutex_lock_iothread(); 1725 1726 switch (run->exit_reason) { 1727 case KVM_EXIT_DCR: 1728 if (run->dcr.is_write) { 1729 trace_kvm_handle_dcr_write(); 1730 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1731 } else { 1732 trace_kvm_handle_dcr_read(); 1733 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1734 } 1735 break; 1736 case KVM_EXIT_HLT: 1737 trace_kvm_handle_halt(); 1738 ret = kvmppc_handle_halt(cpu); 1739 break; 1740 #if defined(TARGET_PPC64) 1741 case KVM_EXIT_PAPR_HCALL: 1742 trace_kvm_handle_papr_hcall(); 1743 run->papr_hcall.ret = spapr_hypercall(cpu, 1744 run->papr_hcall.nr, 1745 run->papr_hcall.args); 1746 ret = 0; 1747 break; 1748 #endif 1749 case KVM_EXIT_EPR: 1750 trace_kvm_handle_epr(); 1751 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1752 ret = 0; 1753 break; 1754 case KVM_EXIT_WATCHDOG: 1755 trace_kvm_handle_watchdog_expiry(); 1756 watchdog_perform_action(); 1757 ret = 0; 1758 break; 1759 1760 case KVM_EXIT_DEBUG: 1761 trace_kvm_handle_debug_exception(); 1762 if (kvm_handle_debug(cpu, run)) { 1763 ret = EXCP_DEBUG; 1764 break; 1765 } 1766 /* re-enter, this exception was guest-internal */ 1767 ret = 0; 1768 break; 1769 1770 default: 1771 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1772 ret = -1; 1773 break; 1774 } 1775 1776 qemu_mutex_unlock_iothread(); 1777 return ret; 1778 } 1779 1780 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1781 { 1782 CPUState *cs = CPU(cpu); 1783 uint32_t bits = tsr_bits; 1784 struct kvm_one_reg reg = { 1785 .id = KVM_REG_PPC_OR_TSR, 1786 .addr = (uintptr_t) &bits, 1787 }; 1788 1789 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1790 } 1791 1792 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1793 { 1794 1795 CPUState *cs = CPU(cpu); 1796 uint32_t bits = tsr_bits; 1797 struct kvm_one_reg reg = { 1798 .id = KVM_REG_PPC_CLEAR_TSR, 1799 .addr = (uintptr_t) &bits, 1800 }; 1801 1802 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1803 } 1804 1805 int kvmppc_set_tcr(PowerPCCPU *cpu) 1806 { 1807 CPUState *cs = CPU(cpu); 1808 CPUPPCState *env = &cpu->env; 1809 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1810 1811 struct kvm_one_reg reg = { 1812 .id = KVM_REG_PPC_TCR, 1813 .addr = (uintptr_t) &tcr, 1814 }; 1815 1816 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1817 } 1818 1819 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1820 { 1821 CPUState *cs = CPU(cpu); 1822 int ret; 1823 1824 if (!kvm_enabled()) { 1825 return -1; 1826 } 1827 1828 if (!cap_ppc_watchdog) { 1829 printf("warning: KVM does not support watchdog"); 1830 return -1; 1831 } 1832 1833 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1834 if (ret < 0) { 1835 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1836 __func__, strerror(-ret)); 1837 return ret; 1838 } 1839 1840 return ret; 1841 } 1842 1843 static int read_cpuinfo(const char *field, char *value, int len) 1844 { 1845 FILE *f; 1846 int ret = -1; 1847 int field_len = strlen(field); 1848 char line[512]; 1849 1850 f = fopen("/proc/cpuinfo", "r"); 1851 if (!f) { 1852 return -1; 1853 } 1854 1855 do { 1856 if (!fgets(line, sizeof(line), f)) { 1857 break; 1858 } 1859 if (!strncmp(line, field, field_len)) { 1860 pstrcpy(value, len, line); 1861 ret = 0; 1862 break; 1863 } 1864 } while (*line); 1865 1866 fclose(f); 1867 1868 return ret; 1869 } 1870 1871 uint32_t kvmppc_get_tbfreq(void) 1872 { 1873 char line[512]; 1874 char *ns; 1875 uint32_t retval = NANOSECONDS_PER_SECOND; 1876 1877 if (read_cpuinfo("timebase", line, sizeof(line))) { 1878 return retval; 1879 } 1880 1881 ns = strchr(line, ':'); 1882 if (!ns) { 1883 return retval; 1884 } 1885 1886 ns++; 1887 1888 return atoi(ns); 1889 } 1890 1891 bool kvmppc_get_host_serial(char **value) 1892 { 1893 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1894 NULL); 1895 } 1896 1897 bool kvmppc_get_host_model(char **value) 1898 { 1899 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1900 } 1901 1902 /* Try to find a device tree node for a CPU with clock-frequency property */ 1903 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1904 { 1905 struct dirent *dirp; 1906 DIR *dp; 1907 1908 dp = opendir(PROC_DEVTREE_CPU); 1909 if (!dp) { 1910 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1911 return -1; 1912 } 1913 1914 buf[0] = '\0'; 1915 while ((dirp = readdir(dp)) != NULL) { 1916 FILE *f; 1917 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1918 dirp->d_name); 1919 f = fopen(buf, "r"); 1920 if (f) { 1921 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1922 fclose(f); 1923 break; 1924 } 1925 buf[0] = '\0'; 1926 } 1927 closedir(dp); 1928 if (buf[0] == '\0') { 1929 printf("Unknown host!\n"); 1930 return -1; 1931 } 1932 1933 return 0; 1934 } 1935 1936 static uint64_t kvmppc_read_int_dt(const char *filename) 1937 { 1938 union { 1939 uint32_t v32; 1940 uint64_t v64; 1941 } u; 1942 FILE *f; 1943 int len; 1944 1945 f = fopen(filename, "rb"); 1946 if (!f) { 1947 return -1; 1948 } 1949 1950 len = fread(&u, 1, sizeof(u), f); 1951 fclose(f); 1952 switch (len) { 1953 case 4: 1954 /* property is a 32-bit quantity */ 1955 return be32_to_cpu(u.v32); 1956 case 8: 1957 return be64_to_cpu(u.v64); 1958 } 1959 1960 return 0; 1961 } 1962 1963 /* 1964 * Read a CPU node property from the host device tree that's a single 1965 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1966 * (can't find or open the property, or doesn't understand the format) 1967 */ 1968 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1969 { 1970 char buf[PATH_MAX], *tmp; 1971 uint64_t val; 1972 1973 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1974 return -1; 1975 } 1976 1977 tmp = g_strdup_printf("%s/%s", buf, propname); 1978 val = kvmppc_read_int_dt(tmp); 1979 g_free(tmp); 1980 1981 return val; 1982 } 1983 1984 uint64_t kvmppc_get_clockfreq(void) 1985 { 1986 return kvmppc_read_int_cpu_dt("clock-frequency"); 1987 } 1988 1989 static int kvmppc_get_dec_bits(void) 1990 { 1991 int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits"); 1992 1993 if (nr_bits > 0) { 1994 return nr_bits; 1995 } 1996 return 0; 1997 } 1998 1999 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2000 { 2001 CPUState *cs = env_cpu(env); 2002 2003 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2004 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2005 return 0; 2006 } 2007 2008 return 1; 2009 } 2010 2011 int kvmppc_get_hasidle(CPUPPCState *env) 2012 { 2013 struct kvm_ppc_pvinfo pvinfo; 2014 2015 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2016 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2017 return 1; 2018 } 2019 2020 return 0; 2021 } 2022 2023 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2024 { 2025 uint32_t *hc = (uint32_t *)buf; 2026 struct kvm_ppc_pvinfo pvinfo; 2027 2028 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2029 memcpy(buf, pvinfo.hcall, buf_len); 2030 return 0; 2031 } 2032 2033 /* 2034 * Fallback to always fail hypercalls regardless of endianness: 2035 * 2036 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2037 * li r3, -1 2038 * b .+8 (becomes nop in wrong endian) 2039 * bswap32(li r3, -1) 2040 */ 2041 2042 hc[0] = cpu_to_be32(0x08000048); 2043 hc[1] = cpu_to_be32(0x3860ffff); 2044 hc[2] = cpu_to_be32(0x48000008); 2045 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2046 2047 return 1; 2048 } 2049 2050 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2051 { 2052 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2053 } 2054 2055 void kvmppc_enable_logical_ci_hcalls(void) 2056 { 2057 /* 2058 * FIXME: it would be nice if we could detect the cases where 2059 * we're using a device which requires the in kernel 2060 * implementation of these hcalls, but the kernel lacks them and 2061 * produce a warning. 2062 */ 2063 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2064 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2065 } 2066 2067 void kvmppc_enable_set_mode_hcall(void) 2068 { 2069 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2070 } 2071 2072 void kvmppc_enable_clear_ref_mod_hcalls(void) 2073 { 2074 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2075 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2076 } 2077 2078 void kvmppc_enable_h_page_init(void) 2079 { 2080 kvmppc_enable_hcall(kvm_state, H_PAGE_INIT); 2081 } 2082 2083 void kvmppc_set_papr(PowerPCCPU *cpu) 2084 { 2085 CPUState *cs = CPU(cpu); 2086 int ret; 2087 2088 if (!kvm_enabled()) { 2089 return; 2090 } 2091 2092 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2093 if (ret) { 2094 error_report("This vCPU type or KVM version does not support PAPR"); 2095 exit(1); 2096 } 2097 2098 /* 2099 * Update the capability flag so we sync the right information 2100 * with kvm 2101 */ 2102 cap_papr = 1; 2103 } 2104 2105 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2106 { 2107 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2108 } 2109 2110 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2111 { 2112 CPUState *cs = CPU(cpu); 2113 int ret; 2114 2115 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2116 if (ret && mpic_proxy) { 2117 error_report("This KVM version does not support EPR"); 2118 exit(1); 2119 } 2120 } 2121 2122 int kvmppc_smt_threads(void) 2123 { 2124 return cap_ppc_smt ? cap_ppc_smt : 1; 2125 } 2126 2127 int kvmppc_set_smt_threads(int smt) 2128 { 2129 int ret; 2130 2131 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2132 if (!ret) { 2133 cap_ppc_smt = smt; 2134 } 2135 return ret; 2136 } 2137 2138 void kvmppc_hint_smt_possible(Error **errp) 2139 { 2140 int i; 2141 GString *g; 2142 char *s; 2143 2144 assert(kvm_enabled()); 2145 if (cap_ppc_smt_possible) { 2146 g = g_string_new("Available VSMT modes:"); 2147 for (i = 63; i >= 0; i--) { 2148 if ((1UL << i) & cap_ppc_smt_possible) { 2149 g_string_append_printf(g, " %lu", (1UL << i)); 2150 } 2151 } 2152 s = g_string_free(g, false); 2153 error_append_hint(errp, "%s.\n", s); 2154 g_free(s); 2155 } else { 2156 error_append_hint(errp, 2157 "This KVM seems to be too old to support VSMT.\n"); 2158 } 2159 } 2160 2161 2162 #ifdef TARGET_PPC64 2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2164 { 2165 struct kvm_ppc_smmu_info info; 2166 long rampagesize, best_page_shift; 2167 int i; 2168 2169 /* 2170 * Find the largest hardware supported page size that's less than 2171 * or equal to the (logical) backing page size of guest RAM 2172 */ 2173 kvm_get_smmu_info(&info, &error_fatal); 2174 rampagesize = qemu_minrampagesize(); 2175 best_page_shift = 0; 2176 2177 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2178 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2179 2180 if (!sps->page_shift) { 2181 continue; 2182 } 2183 2184 if ((sps->page_shift > best_page_shift) 2185 && ((1UL << sps->page_shift) <= rampagesize)) { 2186 best_page_shift = sps->page_shift; 2187 } 2188 } 2189 2190 return MIN(current_size, 2191 1ULL << (best_page_shift + hash_shift - 7)); 2192 } 2193 #endif 2194 2195 bool kvmppc_spapr_use_multitce(void) 2196 { 2197 return cap_spapr_multitce; 2198 } 2199 2200 int kvmppc_spapr_enable_inkernel_multitce(void) 2201 { 2202 int ret; 2203 2204 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2205 H_PUT_TCE_INDIRECT, 1); 2206 if (!ret) { 2207 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2208 H_STUFF_TCE, 1); 2209 } 2210 2211 return ret; 2212 } 2213 2214 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2215 uint64_t bus_offset, uint32_t nb_table, 2216 int *pfd, bool need_vfio) 2217 { 2218 long len; 2219 int fd; 2220 void *table; 2221 2222 /* 2223 * Must set fd to -1 so we don't try to munmap when called for 2224 * destroying the table, which the upper layers -will- do 2225 */ 2226 *pfd = -1; 2227 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2228 return NULL; 2229 } 2230 2231 if (cap_spapr_tce_64) { 2232 struct kvm_create_spapr_tce_64 args = { 2233 .liobn = liobn, 2234 .page_shift = page_shift, 2235 .offset = bus_offset >> page_shift, 2236 .size = nb_table, 2237 .flags = 0 2238 }; 2239 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2240 if (fd < 0) { 2241 fprintf(stderr, 2242 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2243 liobn); 2244 return NULL; 2245 } 2246 } else if (cap_spapr_tce) { 2247 uint64_t window_size = (uint64_t) nb_table << page_shift; 2248 struct kvm_create_spapr_tce args = { 2249 .liobn = liobn, 2250 .window_size = window_size, 2251 }; 2252 if ((window_size != args.window_size) || bus_offset) { 2253 return NULL; 2254 } 2255 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2256 if (fd < 0) { 2257 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2258 liobn); 2259 return NULL; 2260 } 2261 } else { 2262 return NULL; 2263 } 2264 2265 len = nb_table * sizeof(uint64_t); 2266 /* FIXME: round this up to page size */ 2267 2268 table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2269 if (table == MAP_FAILED) { 2270 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2271 liobn); 2272 close(fd); 2273 return NULL; 2274 } 2275 2276 *pfd = fd; 2277 return table; 2278 } 2279 2280 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2281 { 2282 long len; 2283 2284 if (fd < 0) { 2285 return -1; 2286 } 2287 2288 len = nb_table * sizeof(uint64_t); 2289 if ((munmap(table, len) < 0) || 2290 (close(fd) < 0)) { 2291 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2292 strerror(errno)); 2293 /* Leak the table */ 2294 } 2295 2296 return 0; 2297 } 2298 2299 int kvmppc_reset_htab(int shift_hint) 2300 { 2301 uint32_t shift = shift_hint; 2302 2303 if (!kvm_enabled()) { 2304 /* Full emulation, tell caller to allocate htab itself */ 2305 return 0; 2306 } 2307 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2308 int ret; 2309 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2310 if (ret == -ENOTTY) { 2311 /* 2312 * At least some versions of PR KVM advertise the 2313 * capability, but don't implement the ioctl(). Oops. 2314 * Return 0 so that we allocate the htab in qemu, as is 2315 * correct for PR. 2316 */ 2317 return 0; 2318 } else if (ret < 0) { 2319 return ret; 2320 } 2321 return shift; 2322 } 2323 2324 /* 2325 * We have a kernel that predates the htab reset calls. For PR 2326 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2327 * this era, it has allocated a 16MB fixed size hash table 2328 * already. 2329 */ 2330 if (kvmppc_is_pr(kvm_state)) { 2331 /* PR - tell caller to allocate htab */ 2332 return 0; 2333 } else { 2334 /* HV - assume 16MB kernel allocated htab */ 2335 return 24; 2336 } 2337 } 2338 2339 static inline uint32_t mfpvr(void) 2340 { 2341 uint32_t pvr; 2342 2343 asm ("mfpvr %0" 2344 : "=r"(pvr)); 2345 return pvr; 2346 } 2347 2348 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2349 { 2350 if (on) { 2351 *word |= flags; 2352 } else { 2353 *word &= ~flags; 2354 } 2355 } 2356 2357 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2358 { 2359 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2360 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2361 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2362 2363 /* Now fix up the class with information we can query from the host */ 2364 pcc->pvr = mfpvr(); 2365 2366 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2367 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2368 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2369 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2370 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2371 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2372 2373 if (dcache_size != -1) { 2374 pcc->l1_dcache_size = dcache_size; 2375 } 2376 2377 if (icache_size != -1) { 2378 pcc->l1_icache_size = icache_size; 2379 } 2380 2381 #if defined(TARGET_PPC64) 2382 pcc->radix_page_info = kvm_get_radix_page_info(); 2383 2384 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2385 /* 2386 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2387 * compliant. More importantly, advertising ISA 3.00 2388 * architected mode may prevent guests from activating 2389 * necessary DD1 workarounds. 2390 */ 2391 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2392 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2393 } 2394 #endif /* defined(TARGET_PPC64) */ 2395 } 2396 2397 bool kvmppc_has_cap_epr(void) 2398 { 2399 return cap_epr; 2400 } 2401 2402 bool kvmppc_has_cap_fixup_hcalls(void) 2403 { 2404 return cap_fixup_hcalls; 2405 } 2406 2407 bool kvmppc_has_cap_htm(void) 2408 { 2409 return cap_htm; 2410 } 2411 2412 bool kvmppc_has_cap_mmu_radix(void) 2413 { 2414 return cap_mmu_radix; 2415 } 2416 2417 bool kvmppc_has_cap_mmu_hash_v3(void) 2418 { 2419 return cap_mmu_hash_v3; 2420 } 2421 2422 static bool kvmppc_power8_host(void) 2423 { 2424 bool ret = false; 2425 #ifdef TARGET_PPC64 2426 { 2427 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); 2428 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || 2429 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || 2430 (base_pvr == CPU_POWERPC_POWER8_BASE); 2431 } 2432 #endif /* TARGET_PPC64 */ 2433 return ret; 2434 } 2435 2436 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) 2437 { 2438 bool l1d_thread_priv_req = !kvmppc_power8_host(); 2439 2440 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2441 return 2; 2442 } else if ((!l1d_thread_priv_req || 2443 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2444 (c.character & c.character_mask 2445 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2446 return 1; 2447 } 2448 2449 return 0; 2450 } 2451 2452 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) 2453 { 2454 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2455 return 2; 2456 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2457 return 1; 2458 } 2459 2460 return 0; 2461 } 2462 2463 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) 2464 { 2465 if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) && 2466 (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) && 2467 (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) { 2468 return SPAPR_CAP_FIXED_NA; 2469 } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) { 2470 return SPAPR_CAP_WORKAROUND; 2471 } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2472 return SPAPR_CAP_FIXED_CCD; 2473 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2474 return SPAPR_CAP_FIXED_IBS; 2475 } 2476 2477 return 0; 2478 } 2479 2480 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c) 2481 { 2482 if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) { 2483 return 1; 2484 } 2485 return 0; 2486 } 2487 2488 bool kvmppc_has_cap_xive(void) 2489 { 2490 return cap_xive; 2491 } 2492 2493 static void kvmppc_get_cpu_characteristics(KVMState *s) 2494 { 2495 struct kvm_ppc_cpu_char c; 2496 int ret; 2497 2498 /* Assume broken */ 2499 cap_ppc_safe_cache = 0; 2500 cap_ppc_safe_bounds_check = 0; 2501 cap_ppc_safe_indirect_branch = 0; 2502 2503 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2504 if (!ret) { 2505 return; 2506 } 2507 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2508 if (ret < 0) { 2509 return; 2510 } 2511 2512 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); 2513 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); 2514 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); 2515 cap_ppc_count_cache_flush_assist = 2516 parse_cap_ppc_count_cache_flush_assist(c); 2517 } 2518 2519 int kvmppc_get_cap_safe_cache(void) 2520 { 2521 return cap_ppc_safe_cache; 2522 } 2523 2524 int kvmppc_get_cap_safe_bounds_check(void) 2525 { 2526 return cap_ppc_safe_bounds_check; 2527 } 2528 2529 int kvmppc_get_cap_safe_indirect_branch(void) 2530 { 2531 return cap_ppc_safe_indirect_branch; 2532 } 2533 2534 int kvmppc_get_cap_count_cache_flush_assist(void) 2535 { 2536 return cap_ppc_count_cache_flush_assist; 2537 } 2538 2539 bool kvmppc_has_cap_nested_kvm_hv(void) 2540 { 2541 return !!cap_ppc_nested_kvm_hv; 2542 } 2543 2544 int kvmppc_set_cap_nested_kvm_hv(int enable) 2545 { 2546 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable); 2547 } 2548 2549 bool kvmppc_has_cap_spapr_vfio(void) 2550 { 2551 return cap_spapr_vfio; 2552 } 2553 2554 int kvmppc_get_cap_large_decr(void) 2555 { 2556 return cap_large_decr; 2557 } 2558 2559 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) 2560 { 2561 CPUState *cs = CPU(cpu); 2562 uint64_t lpcr; 2563 2564 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2565 /* Do we need to modify the LPCR? */ 2566 if (!!(lpcr & LPCR_LD) != !!enable) { 2567 if (enable) { 2568 lpcr |= LPCR_LD; 2569 } else { 2570 lpcr &= ~LPCR_LD; 2571 } 2572 kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2573 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2574 2575 if (!!(lpcr & LPCR_LD) != !!enable) { 2576 return -1; 2577 } 2578 } 2579 2580 return 0; 2581 } 2582 2583 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2584 { 2585 uint32_t host_pvr = mfpvr(); 2586 PowerPCCPUClass *pvr_pcc; 2587 2588 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2589 if (pvr_pcc == NULL) { 2590 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2591 } 2592 2593 return pvr_pcc; 2594 } 2595 2596 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2597 { 2598 TypeInfo type_info = { 2599 .name = TYPE_HOST_POWERPC_CPU, 2600 .class_init = kvmppc_host_cpu_class_init, 2601 }; 2602 MachineClass *mc = MACHINE_GET_CLASS(ms); 2603 PowerPCCPUClass *pvr_pcc; 2604 ObjectClass *oc; 2605 DeviceClass *dc; 2606 int i; 2607 2608 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2609 if (pvr_pcc == NULL) { 2610 return -1; 2611 } 2612 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2613 type_register(&type_info); 2614 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2615 /* override TCG default cpu type with 'host' cpu model */ 2616 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2617 } 2618 2619 oc = object_class_by_name(type_info.name); 2620 g_assert(oc); 2621 2622 /* 2623 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2624 * we want "POWER8" to be a "family" alias that points to the current 2625 * host CPU type, too) 2626 */ 2627 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2628 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2629 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2630 char *suffix; 2631 2632 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2633 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2634 if (suffix) { 2635 *suffix = 0; 2636 } 2637 break; 2638 } 2639 } 2640 2641 return 0; 2642 } 2643 2644 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2645 { 2646 struct kvm_rtas_token_args args = { 2647 .token = token, 2648 }; 2649 2650 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2651 return -ENOENT; 2652 } 2653 2654 strncpy(args.name, function, sizeof(args.name) - 1); 2655 2656 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2657 } 2658 2659 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2660 { 2661 struct kvm_get_htab_fd s = { 2662 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2663 .start_index = index, 2664 }; 2665 int ret; 2666 2667 if (!cap_htab_fd) { 2668 error_setg(errp, "KVM version doesn't support %s the HPT", 2669 write ? "writing" : "reading"); 2670 return -ENOTSUP; 2671 } 2672 2673 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2674 if (ret < 0) { 2675 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2676 write ? "writing" : "reading", write ? "to" : "from", 2677 strerror(errno)); 2678 return -errno; 2679 } 2680 2681 return ret; 2682 } 2683 2684 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2685 { 2686 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2687 uint8_t buf[bufsize]; 2688 ssize_t rc; 2689 2690 do { 2691 rc = read(fd, buf, bufsize); 2692 if (rc < 0) { 2693 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2694 strerror(errno)); 2695 return rc; 2696 } else if (rc) { 2697 uint8_t *buffer = buf; 2698 ssize_t n = rc; 2699 while (n) { 2700 struct kvm_get_htab_header *head = 2701 (struct kvm_get_htab_header *) buffer; 2702 size_t chunksize = sizeof(*head) + 2703 HASH_PTE_SIZE_64 * head->n_valid; 2704 2705 qemu_put_be32(f, head->index); 2706 qemu_put_be16(f, head->n_valid); 2707 qemu_put_be16(f, head->n_invalid); 2708 qemu_put_buffer(f, (void *)(head + 1), 2709 HASH_PTE_SIZE_64 * head->n_valid); 2710 2711 buffer += chunksize; 2712 n -= chunksize; 2713 } 2714 } 2715 } while ((rc != 0) 2716 && ((max_ns < 0) || 2717 ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2718 2719 return (rc == 0) ? 1 : 0; 2720 } 2721 2722 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2723 uint16_t n_valid, uint16_t n_invalid) 2724 { 2725 struct kvm_get_htab_header *buf; 2726 size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64; 2727 ssize_t rc; 2728 2729 buf = alloca(chunksize); 2730 buf->index = index; 2731 buf->n_valid = n_valid; 2732 buf->n_invalid = n_invalid; 2733 2734 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid); 2735 2736 rc = write(fd, buf, chunksize); 2737 if (rc < 0) { 2738 fprintf(stderr, "Error writing KVM hash table: %s\n", 2739 strerror(errno)); 2740 return rc; 2741 } 2742 if (rc != chunksize) { 2743 /* We should never get a short write on a single chunk */ 2744 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2745 return -1; 2746 } 2747 return 0; 2748 } 2749 2750 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2751 { 2752 return true; 2753 } 2754 2755 void kvm_arch_init_irq_routing(KVMState *s) 2756 { 2757 } 2758 2759 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2760 { 2761 int fd, rc; 2762 int i; 2763 2764 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2765 2766 i = 0; 2767 while (i < n) { 2768 struct kvm_get_htab_header *hdr; 2769 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2770 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2771 2772 rc = read(fd, buf, sizeof(buf)); 2773 if (rc < 0) { 2774 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2775 } 2776 2777 hdr = (struct kvm_get_htab_header *)buf; 2778 while ((i < n) && ((char *)hdr < (buf + rc))) { 2779 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2780 2781 if (hdr->index != (ptex + i)) { 2782 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2783 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2784 } 2785 2786 if (n - i < valid) { 2787 valid = n - i; 2788 } 2789 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2790 i += valid; 2791 2792 if ((n - i) < invalid) { 2793 invalid = n - i; 2794 } 2795 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2796 i += invalid; 2797 2798 hdr = (struct kvm_get_htab_header *) 2799 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2800 } 2801 } 2802 2803 close(fd); 2804 } 2805 2806 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2807 { 2808 int fd, rc; 2809 struct { 2810 struct kvm_get_htab_header hdr; 2811 uint64_t pte0; 2812 uint64_t pte1; 2813 } buf; 2814 2815 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2816 2817 buf.hdr.n_valid = 1; 2818 buf.hdr.n_invalid = 0; 2819 buf.hdr.index = ptex; 2820 buf.pte0 = cpu_to_be64(pte0); 2821 buf.pte1 = cpu_to_be64(pte1); 2822 2823 rc = write(fd, &buf, sizeof(buf)); 2824 if (rc != sizeof(buf)) { 2825 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2826 } 2827 close(fd); 2828 } 2829 2830 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2831 uint64_t address, uint32_t data, PCIDevice *dev) 2832 { 2833 return 0; 2834 } 2835 2836 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2837 int vector, PCIDevice *dev) 2838 { 2839 return 0; 2840 } 2841 2842 int kvm_arch_release_virq_post(int virq) 2843 { 2844 return 0; 2845 } 2846 2847 int kvm_arch_msi_data_to_gsi(uint32_t data) 2848 { 2849 return data & 0xffff; 2850 } 2851 2852 int kvmppc_enable_hwrng(void) 2853 { 2854 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2855 return -1; 2856 } 2857 2858 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2859 } 2860 2861 void kvmppc_check_papr_resize_hpt(Error **errp) 2862 { 2863 if (!kvm_enabled()) { 2864 return; /* No KVM, we're good */ 2865 } 2866 2867 if (cap_resize_hpt) { 2868 return; /* Kernel has explicit support, we're good */ 2869 } 2870 2871 /* Otherwise fallback on looking for PR KVM */ 2872 if (kvmppc_is_pr(kvm_state)) { 2873 return; 2874 } 2875 2876 error_setg(errp, 2877 "Hash page table resizing not available with this KVM version"); 2878 } 2879 2880 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2881 { 2882 CPUState *cs = CPU(cpu); 2883 struct kvm_ppc_resize_hpt rhpt = { 2884 .flags = flags, 2885 .shift = shift, 2886 }; 2887 2888 if (!cap_resize_hpt) { 2889 return -ENOSYS; 2890 } 2891 2892 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2893 } 2894 2895 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2896 { 2897 CPUState *cs = CPU(cpu); 2898 struct kvm_ppc_resize_hpt rhpt = { 2899 .flags = flags, 2900 .shift = shift, 2901 }; 2902 2903 if (!cap_resize_hpt) { 2904 return -ENOSYS; 2905 } 2906 2907 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2908 } 2909 2910 /* 2911 * This is a helper function to detect a post migration scenario 2912 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2913 * the guest kernel can't handle a PVR value other than the actual host 2914 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2915 * 2916 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2917 * (so, we're HV), return true. The workaround itself is done in 2918 * cpu_post_load. 2919 * 2920 * The order here is important: we'll only check for KVM PR as a 2921 * fallback if the guest kernel can't handle the situation itself. 2922 * We need to avoid as much as possible querying the running KVM type 2923 * in QEMU level. 2924 */ 2925 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2926 { 2927 CPUState *cs = CPU(cpu); 2928 2929 if (!kvm_enabled()) { 2930 return false; 2931 } 2932 2933 if (cap_ppc_pvr_compat) { 2934 return false; 2935 } 2936 2937 return !kvmppc_is_pr(cs->kvm_state); 2938 } 2939 2940 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) 2941 { 2942 CPUState *cs = CPU(cpu); 2943 2944 if (kvm_enabled()) { 2945 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online); 2946 } 2947 } 2948 2949 void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) 2950 { 2951 CPUState *cs = CPU(cpu); 2952 2953 if (kvm_enabled()) { 2954 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset); 2955 } 2956 } 2957