1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_spapr_tce; 76 static int cap_spapr_tce_64; 77 static int cap_spapr_multitce; 78 static int cap_spapr_vfio; 79 static int cap_hior; 80 static int cap_one_reg; 81 static int cap_epr; 82 static int cap_ppc_watchdog; 83 static int cap_papr; 84 static int cap_htab_fd; 85 static int cap_fixup_hcalls; 86 static int cap_htm; /* Hardware transactional memory support */ 87 static int cap_mmu_radix; 88 static int cap_mmu_hash_v3; 89 static int cap_resize_hpt; 90 static int cap_ppc_pvr_compat; 91 static int cap_ppc_safe_cache; 92 static int cap_ppc_safe_bounds_check; 93 static int cap_ppc_safe_indirect_branch; 94 95 static uint32_t debug_inst_opcode; 96 97 /* XXX We have a race condition where we actually have a level triggered 98 * interrupt, but the infrastructure can't expose that yet, so the guest 99 * takes but ignores it, goes to sleep and never gets notified that there's 100 * still an interrupt pending. 101 * 102 * As a quick workaround, let's just wake up again 20 ms after we injected 103 * an interrupt. That way we can assure that we're always reinjecting 104 * interrupts in case the guest swallowed them. 105 */ 106 static QEMUTimer *idle_timer; 107 108 static void kvm_kick_cpu(void *opaque) 109 { 110 PowerPCCPU *cpu = opaque; 111 112 qemu_cpu_kick(CPU(cpu)); 113 } 114 115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 116 * should only be used for fallback tests - generally we should use 117 * explicit capabilities for the features we want, rather than 118 * assuming what is/isn't available depending on the KVM variant. */ 119 static bool kvmppc_is_pr(KVMState *ks) 120 { 121 /* Assume KVM-PR if the GET_PVINFO capability is available */ 122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 123 } 124 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 126 static void kvmppc_get_cpu_characteristics(KVMState *s); 127 128 int kvm_arch_init(MachineState *ms, KVMState *s) 129 { 130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 143 /* Note: we don't set cap_papr here, because this capability is 144 * only activated after this by kvmppc_set_papr() */ 145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 152 kvmppc_get_cpu_characteristics(s); 153 /* 154 * Note: setting it to false because there is not such capability 155 * in KVM at this moment. 156 * 157 * TODO: call kvm_vm_check_extension() with the right capability 158 * after the kernel starts implementing it.*/ 159 cap_ppc_pvr_compat = false; 160 161 if (!cap_interrupt_level) { 162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 163 "VM to stall at times!\n"); 164 } 165 166 kvm_ppc_register_host_cpu_type(ms); 167 168 return 0; 169 } 170 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 172 { 173 return 0; 174 } 175 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 177 { 178 CPUPPCState *cenv = &cpu->env; 179 CPUState *cs = CPU(cpu); 180 struct kvm_sregs sregs; 181 int ret; 182 183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 184 /* What we're really trying to say is "if we're on BookE, we use 185 the native PVR for now". This is the only sane way to check 186 it though, so we potentially confuse users that they can run 187 BookE guests on BookS. Let's hope nobody dares enough :) */ 188 return 0; 189 } else { 190 if (!cap_segstate) { 191 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 192 return -ENOSYS; 193 } 194 } 195 196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 197 if (ret) { 198 return ret; 199 } 200 201 sregs.pvr = cenv->spr[SPR_PVR]; 202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 203 } 204 205 /* Set up a shared TLB array with KVM */ 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 207 { 208 CPUPPCState *env = &cpu->env; 209 CPUState *cs = CPU(cpu); 210 struct kvm_book3e_206_tlb_params params = {}; 211 struct kvm_config_tlb cfg = {}; 212 unsigned int entries = 0; 213 int ret, i; 214 215 if (!kvm_enabled() || 216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 217 return 0; 218 } 219 220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 221 222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 223 params.tlb_sizes[i] = booke206_tlb_size(env, i); 224 params.tlb_ways[i] = booke206_tlb_ways(env, i); 225 entries += params.tlb_sizes[i]; 226 } 227 228 assert(entries == env->nb_tlb); 229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 230 231 env->tlb_dirty = true; 232 233 cfg.array = (uintptr_t)env->tlb.tlbm; 234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 235 cfg.params = (uintptr_t)¶ms; 236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 237 238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 239 if (ret < 0) { 240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 241 __func__, strerror(-ret)); 242 return ret; 243 } 244 245 env->kvm_sw_tlb = true; 246 return 0; 247 } 248 249 250 #if defined(TARGET_PPC64) 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 252 struct kvm_ppc_smmu_info *info) 253 { 254 CPUPPCState *env = &cpu->env; 255 CPUState *cs = CPU(cpu); 256 257 memset(info, 0, sizeof(*info)); 258 259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 260 * need to "guess" what the supported page sizes are. 261 * 262 * For that to work we make a few assumptions: 263 * 264 * - Check whether we are running "PR" KVM which only supports 4K 265 * and 16M pages, but supports them regardless of the backing 266 * store characteritics. We also don't support 1T segments. 267 * 268 * This is safe as if HV KVM ever supports that capability or PR 269 * KVM grows supports for more page/segment sizes, those versions 270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 271 * will not hit this fallback 272 * 273 * - Else we are running HV KVM. This means we only support page 274 * sizes that fit in the backing store. Additionally we only 275 * advertize 64K pages if the processor is ARCH 2.06 and we assume 276 * P7 encodings for the SLB and hash table. Here too, we assume 277 * support for any newer processor will mean a kernel that 278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 279 * this fallback. 280 */ 281 if (kvmppc_is_pr(cs->kvm_state)) { 282 /* No flags */ 283 info->flags = 0; 284 info->slb_size = 64; 285 286 /* Standard 4k base page size segment */ 287 info->sps[0].page_shift = 12; 288 info->sps[0].slb_enc = 0; 289 info->sps[0].enc[0].page_shift = 12; 290 info->sps[0].enc[0].pte_enc = 0; 291 292 /* Standard 16M large page size segment */ 293 info->sps[1].page_shift = 24; 294 info->sps[1].slb_enc = SLB_VSID_L; 295 info->sps[1].enc[0].page_shift = 24; 296 info->sps[1].enc[0].pte_enc = 0; 297 } else { 298 int i = 0; 299 300 /* HV KVM has backing store size restrictions */ 301 info->flags = KVM_PPC_PAGE_SIZES_REAL; 302 303 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) { 304 info->flags |= KVM_PPC_1T_SEGMENTS; 305 } 306 307 if (env->mmu_model == POWERPC_MMU_2_06 || 308 env->mmu_model == POWERPC_MMU_2_07) { 309 info->slb_size = 32; 310 } else { 311 info->slb_size = 64; 312 } 313 314 /* Standard 4k base page size segment */ 315 info->sps[i].page_shift = 12; 316 info->sps[i].slb_enc = 0; 317 info->sps[i].enc[0].page_shift = 12; 318 info->sps[i].enc[0].pte_enc = 0; 319 i++; 320 321 /* 64K on MMU 2.06 and later */ 322 if (env->mmu_model == POWERPC_MMU_2_06 || 323 env->mmu_model == POWERPC_MMU_2_07) { 324 info->sps[i].page_shift = 16; 325 info->sps[i].slb_enc = 0x110; 326 info->sps[i].enc[0].page_shift = 16; 327 info->sps[i].enc[0].pte_enc = 1; 328 i++; 329 } 330 331 /* Standard 16M large page size segment */ 332 info->sps[i].page_shift = 24; 333 info->sps[i].slb_enc = SLB_VSID_L; 334 info->sps[i].enc[0].page_shift = 24; 335 info->sps[i].enc[0].pte_enc = 0; 336 } 337 } 338 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 340 { 341 CPUState *cs = CPU(cpu); 342 int ret; 343 344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 346 if (ret == 0) { 347 return; 348 } 349 } 350 351 kvm_get_fallback_smmu_info(cpu, info); 352 } 353 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 355 { 356 KVMState *s = KVM_STATE(current_machine->accelerator); 357 struct ppc_radix_page_info *radix_page_info; 358 struct kvm_ppc_rmmu_info rmmu_info; 359 int i; 360 361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 362 return NULL; 363 } 364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 365 return NULL; 366 } 367 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 368 radix_page_info->count = 0; 369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 370 if (rmmu_info.ap_encodings[i]) { 371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 372 radix_page_info->count++; 373 } 374 } 375 return radix_page_info; 376 } 377 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 379 bool radix, bool gtse, 380 uint64_t proc_tbl) 381 { 382 CPUState *cs = CPU(cpu); 383 int ret; 384 uint64_t flags = 0; 385 struct kvm_ppc_mmuv3_cfg cfg = { 386 .process_table = proc_tbl, 387 }; 388 389 if (radix) { 390 flags |= KVM_PPC_MMUV3_RADIX; 391 } 392 if (gtse) { 393 flags |= KVM_PPC_MMUV3_GTSE; 394 } 395 cfg.flags = flags; 396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 397 switch (ret) { 398 case 0: 399 return H_SUCCESS; 400 case -EINVAL: 401 return H_PARAMETER; 402 case -ENODEV: 403 return H_NOT_AVAILABLE; 404 default: 405 return H_HARDWARE; 406 } 407 } 408 409 bool kvmppc_hpt_needs_host_contiguous_pages(void) 410 { 411 PowerPCCPU *cpu = POWERPC_CPU(first_cpu); 412 static struct kvm_ppc_smmu_info smmu_info; 413 414 if (!kvm_enabled()) { 415 return false; 416 } 417 418 kvm_get_smmu_info(cpu, &smmu_info); 419 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL); 420 } 421 422 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 423 { 424 if (!kvmppc_hpt_needs_host_contiguous_pages()) { 425 return true; 426 } 427 428 return (1ul << shift) <= rampgsize; 429 } 430 431 static long max_cpu_page_size; 432 433 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 434 { 435 static struct kvm_ppc_smmu_info smmu_info; 436 static bool has_smmu_info; 437 CPUPPCState *env = &cpu->env; 438 int iq, ik, jq, jk; 439 440 /* We only handle page sizes for 64-bit server guests for now */ 441 if (!(env->mmu_model & POWERPC_MMU_64)) { 442 return; 443 } 444 445 /* Collect MMU info from kernel if not already */ 446 if (!has_smmu_info) { 447 kvm_get_smmu_info(cpu, &smmu_info); 448 has_smmu_info = true; 449 } 450 451 if (!max_cpu_page_size) { 452 max_cpu_page_size = qemu_getrampagesize(); 453 } 454 455 /* Convert to QEMU form */ 456 memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps)); 457 458 /* If we have HV KVM, we need to forbid CI large pages if our 459 * host page size is smaller than 64K. 460 */ 461 if (kvmppc_hpt_needs_host_contiguous_pages()) { 462 if (getpagesize() >= 0x10000) { 463 cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE; 464 } else { 465 cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE; 466 } 467 } 468 469 /* 470 * XXX This loop should be an entry wide AND of the capabilities that 471 * the selected CPU has with the capabilities that KVM supports. 472 */ 473 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 474 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 475 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 476 477 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 478 ksps->page_shift)) { 479 continue; 480 } 481 qsps->page_shift = ksps->page_shift; 482 qsps->slb_enc = ksps->slb_enc; 483 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 484 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 485 ksps->enc[jk].page_shift)) { 486 continue; 487 } 488 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 489 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 490 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 491 break; 492 } 493 } 494 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 495 break; 496 } 497 } 498 cpu->hash64_opts->slb_size = smmu_info.slb_size; 499 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 500 cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG; 501 } 502 } 503 504 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 505 { 506 Object *mem_obj = object_resolve_path(obj_path, NULL); 507 long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj)); 508 509 return pagesize >= max_cpu_page_size; 510 } 511 512 #else /* defined (TARGET_PPC64) */ 513 514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 515 { 516 } 517 518 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 519 { 520 return true; 521 } 522 523 #endif /* !defined (TARGET_PPC64) */ 524 525 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 526 { 527 return POWERPC_CPU(cpu)->vcpu_id; 528 } 529 530 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 531 * book3s supports only 1 watchpoint, so array size 532 * of 4 is sufficient for now. 533 */ 534 #define MAX_HW_BKPTS 4 535 536 static struct HWBreakpoint { 537 target_ulong addr; 538 int type; 539 } hw_debug_points[MAX_HW_BKPTS]; 540 541 static CPUWatchpoint hw_watchpoint; 542 543 /* Default there is no breakpoint and watchpoint supported */ 544 static int max_hw_breakpoint; 545 static int max_hw_watchpoint; 546 static int nb_hw_breakpoint; 547 static int nb_hw_watchpoint; 548 549 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 550 { 551 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 552 max_hw_breakpoint = 2; 553 max_hw_watchpoint = 2; 554 } 555 556 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 557 fprintf(stderr, "Error initializing h/w breakpoints\n"); 558 return; 559 } 560 } 561 562 int kvm_arch_init_vcpu(CPUState *cs) 563 { 564 PowerPCCPU *cpu = POWERPC_CPU(cs); 565 CPUPPCState *cenv = &cpu->env; 566 int ret; 567 568 /* Gather server mmu info from KVM and update the CPU state */ 569 kvm_fixup_page_sizes(cpu); 570 571 /* Synchronize sregs with kvm */ 572 ret = kvm_arch_sync_sregs(cpu); 573 if (ret) { 574 if (ret == -EINVAL) { 575 error_report("Register sync failed... If you're using kvm-hv.ko," 576 " only \"-cpu host\" is possible"); 577 } 578 return ret; 579 } 580 581 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 582 583 switch (cenv->mmu_model) { 584 case POWERPC_MMU_BOOKE206: 585 /* This target supports access to KVM's guest TLB */ 586 ret = kvm_booke206_tlb_init(cpu); 587 break; 588 case POWERPC_MMU_2_07: 589 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 590 /* KVM-HV has transactional memory on POWER8 also without the 591 * KVM_CAP_PPC_HTM extension, so enable it here instead as 592 * long as it's availble to userspace on the host. */ 593 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 594 cap_htm = true; 595 } 596 } 597 break; 598 default: 599 break; 600 } 601 602 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 603 kvmppc_hw_debug_points_init(cenv); 604 605 return ret; 606 } 607 608 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 609 { 610 CPUPPCState *env = &cpu->env; 611 CPUState *cs = CPU(cpu); 612 struct kvm_dirty_tlb dirty_tlb; 613 unsigned char *bitmap; 614 int ret; 615 616 if (!env->kvm_sw_tlb) { 617 return; 618 } 619 620 bitmap = g_malloc((env->nb_tlb + 7) / 8); 621 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 622 623 dirty_tlb.bitmap = (uintptr_t)bitmap; 624 dirty_tlb.num_dirty = env->nb_tlb; 625 626 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 627 if (ret) { 628 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 629 __func__, strerror(-ret)); 630 } 631 632 g_free(bitmap); 633 } 634 635 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 636 { 637 PowerPCCPU *cpu = POWERPC_CPU(cs); 638 CPUPPCState *env = &cpu->env; 639 union { 640 uint32_t u32; 641 uint64_t u64; 642 } val; 643 struct kvm_one_reg reg = { 644 .id = id, 645 .addr = (uintptr_t) &val, 646 }; 647 int ret; 648 649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 650 if (ret != 0) { 651 trace_kvm_failed_spr_get(spr, strerror(errno)); 652 } else { 653 switch (id & KVM_REG_SIZE_MASK) { 654 case KVM_REG_SIZE_U32: 655 env->spr[spr] = val.u32; 656 break; 657 658 case KVM_REG_SIZE_U64: 659 env->spr[spr] = val.u64; 660 break; 661 662 default: 663 /* Don't handle this size yet */ 664 abort(); 665 } 666 } 667 } 668 669 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 670 { 671 PowerPCCPU *cpu = POWERPC_CPU(cs); 672 CPUPPCState *env = &cpu->env; 673 union { 674 uint32_t u32; 675 uint64_t u64; 676 } val; 677 struct kvm_one_reg reg = { 678 .id = id, 679 .addr = (uintptr_t) &val, 680 }; 681 int ret; 682 683 switch (id & KVM_REG_SIZE_MASK) { 684 case KVM_REG_SIZE_U32: 685 val.u32 = env->spr[spr]; 686 break; 687 688 case KVM_REG_SIZE_U64: 689 val.u64 = env->spr[spr]; 690 break; 691 692 default: 693 /* Don't handle this size yet */ 694 abort(); 695 } 696 697 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 698 if (ret != 0) { 699 trace_kvm_failed_spr_set(spr, strerror(errno)); 700 } 701 } 702 703 static int kvm_put_fp(CPUState *cs) 704 { 705 PowerPCCPU *cpu = POWERPC_CPU(cs); 706 CPUPPCState *env = &cpu->env; 707 struct kvm_one_reg reg; 708 int i; 709 int ret; 710 711 if (env->insns_flags & PPC_FLOAT) { 712 uint64_t fpscr = env->fpscr; 713 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 714 715 reg.id = KVM_REG_PPC_FPSCR; 716 reg.addr = (uintptr_t)&fpscr; 717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 718 if (ret < 0) { 719 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 720 return ret; 721 } 722 723 for (i = 0; i < 32; i++) { 724 uint64_t vsr[2]; 725 726 #ifdef HOST_WORDS_BIGENDIAN 727 vsr[0] = float64_val(env->fpr[i]); 728 vsr[1] = env->vsr[i]; 729 #else 730 vsr[0] = env->vsr[i]; 731 vsr[1] = float64_val(env->fpr[i]); 732 #endif 733 reg.addr = (uintptr_t) &vsr; 734 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 735 736 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 737 if (ret < 0) { 738 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 739 i, strerror(errno)); 740 return ret; 741 } 742 } 743 } 744 745 if (env->insns_flags & PPC_ALTIVEC) { 746 reg.id = KVM_REG_PPC_VSCR; 747 reg.addr = (uintptr_t)&env->vscr; 748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 749 if (ret < 0) { 750 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 751 return ret; 752 } 753 754 for (i = 0; i < 32; i++) { 755 reg.id = KVM_REG_PPC_VR(i); 756 reg.addr = (uintptr_t)&env->avr[i]; 757 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 758 if (ret < 0) { 759 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 760 return ret; 761 } 762 } 763 } 764 765 return 0; 766 } 767 768 static int kvm_get_fp(CPUState *cs) 769 { 770 PowerPCCPU *cpu = POWERPC_CPU(cs); 771 CPUPPCState *env = &cpu->env; 772 struct kvm_one_reg reg; 773 int i; 774 int ret; 775 776 if (env->insns_flags & PPC_FLOAT) { 777 uint64_t fpscr; 778 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 779 780 reg.id = KVM_REG_PPC_FPSCR; 781 reg.addr = (uintptr_t)&fpscr; 782 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 783 if (ret < 0) { 784 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 785 return ret; 786 } else { 787 env->fpscr = fpscr; 788 } 789 790 for (i = 0; i < 32; i++) { 791 uint64_t vsr[2]; 792 793 reg.addr = (uintptr_t) &vsr; 794 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 795 796 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 797 if (ret < 0) { 798 DPRINTF("Unable to get %s%d from KVM: %s\n", 799 vsx ? "VSR" : "FPR", i, strerror(errno)); 800 return ret; 801 } else { 802 #ifdef HOST_WORDS_BIGENDIAN 803 env->fpr[i] = vsr[0]; 804 if (vsx) { 805 env->vsr[i] = vsr[1]; 806 } 807 #else 808 env->fpr[i] = vsr[1]; 809 if (vsx) { 810 env->vsr[i] = vsr[0]; 811 } 812 #endif 813 } 814 } 815 } 816 817 if (env->insns_flags & PPC_ALTIVEC) { 818 reg.id = KVM_REG_PPC_VSCR; 819 reg.addr = (uintptr_t)&env->vscr; 820 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 821 if (ret < 0) { 822 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 823 return ret; 824 } 825 826 for (i = 0; i < 32; i++) { 827 reg.id = KVM_REG_PPC_VR(i); 828 reg.addr = (uintptr_t)&env->avr[i]; 829 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 830 if (ret < 0) { 831 DPRINTF("Unable to get VR%d from KVM: %s\n", 832 i, strerror(errno)); 833 return ret; 834 } 835 } 836 } 837 838 return 0; 839 } 840 841 #if defined(TARGET_PPC64) 842 static int kvm_get_vpa(CPUState *cs) 843 { 844 PowerPCCPU *cpu = POWERPC_CPU(cs); 845 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 846 struct kvm_one_reg reg; 847 int ret; 848 849 reg.id = KVM_REG_PPC_VPA_ADDR; 850 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 851 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 852 if (ret < 0) { 853 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 854 return ret; 855 } 856 857 assert((uintptr_t)&spapr_cpu->slb_shadow_size 858 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 859 reg.id = KVM_REG_PPC_VPA_SLB; 860 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 861 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 862 if (ret < 0) { 863 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 864 strerror(errno)); 865 return ret; 866 } 867 868 assert((uintptr_t)&spapr_cpu->dtl_size 869 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 870 reg.id = KVM_REG_PPC_VPA_DTL; 871 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 872 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 873 if (ret < 0) { 874 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 875 strerror(errno)); 876 return ret; 877 } 878 879 return 0; 880 } 881 882 static int kvm_put_vpa(CPUState *cs) 883 { 884 PowerPCCPU *cpu = POWERPC_CPU(cs); 885 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 886 struct kvm_one_reg reg; 887 int ret; 888 889 /* SLB shadow or DTL can't be registered unless a master VPA is 890 * registered. That means when restoring state, if a VPA *is* 891 * registered, we need to set that up first. If not, we need to 892 * deregister the others before deregistering the master VPA */ 893 assert(spapr_cpu->vpa_addr 894 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); 895 896 if (spapr_cpu->vpa_addr) { 897 reg.id = KVM_REG_PPC_VPA_ADDR; 898 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 899 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 900 if (ret < 0) { 901 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 902 return ret; 903 } 904 } 905 906 assert((uintptr_t)&spapr_cpu->slb_shadow_size 907 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 908 reg.id = KVM_REG_PPC_VPA_SLB; 909 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 910 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 911 if (ret < 0) { 912 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 913 return ret; 914 } 915 916 assert((uintptr_t)&spapr_cpu->dtl_size 917 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 918 reg.id = KVM_REG_PPC_VPA_DTL; 919 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 920 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 921 if (ret < 0) { 922 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 923 strerror(errno)); 924 return ret; 925 } 926 927 if (!spapr_cpu->vpa_addr) { 928 reg.id = KVM_REG_PPC_VPA_ADDR; 929 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 930 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 931 if (ret < 0) { 932 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 933 return ret; 934 } 935 } 936 937 return 0; 938 } 939 #endif /* TARGET_PPC64 */ 940 941 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 942 { 943 CPUPPCState *env = &cpu->env; 944 struct kvm_sregs sregs; 945 int i; 946 947 sregs.pvr = env->spr[SPR_PVR]; 948 949 if (cpu->vhyp) { 950 PPCVirtualHypervisorClass *vhc = 951 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 952 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 953 } else { 954 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 955 } 956 957 /* Sync SLB */ 958 #ifdef TARGET_PPC64 959 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 960 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 961 if (env->slb[i].esid & SLB_ESID_V) { 962 sregs.u.s.ppc64.slb[i].slbe |= i; 963 } 964 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 965 } 966 #endif 967 968 /* Sync SRs */ 969 for (i = 0; i < 16; i++) { 970 sregs.u.s.ppc32.sr[i] = env->sr[i]; 971 } 972 973 /* Sync BATs */ 974 for (i = 0; i < 8; i++) { 975 /* Beware. We have to swap upper and lower bits here */ 976 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 977 | env->DBAT[1][i]; 978 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 979 | env->IBAT[1][i]; 980 } 981 982 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 983 } 984 985 int kvm_arch_put_registers(CPUState *cs, int level) 986 { 987 PowerPCCPU *cpu = POWERPC_CPU(cs); 988 CPUPPCState *env = &cpu->env; 989 struct kvm_regs regs; 990 int ret; 991 int i; 992 993 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 994 if (ret < 0) { 995 return ret; 996 } 997 998 regs.ctr = env->ctr; 999 regs.lr = env->lr; 1000 regs.xer = cpu_read_xer(env); 1001 regs.msr = env->msr; 1002 regs.pc = env->nip; 1003 1004 regs.srr0 = env->spr[SPR_SRR0]; 1005 regs.srr1 = env->spr[SPR_SRR1]; 1006 1007 regs.sprg0 = env->spr[SPR_SPRG0]; 1008 regs.sprg1 = env->spr[SPR_SPRG1]; 1009 regs.sprg2 = env->spr[SPR_SPRG2]; 1010 regs.sprg3 = env->spr[SPR_SPRG3]; 1011 regs.sprg4 = env->spr[SPR_SPRG4]; 1012 regs.sprg5 = env->spr[SPR_SPRG5]; 1013 regs.sprg6 = env->spr[SPR_SPRG6]; 1014 regs.sprg7 = env->spr[SPR_SPRG7]; 1015 1016 regs.pid = env->spr[SPR_BOOKE_PID]; 1017 1018 for (i = 0;i < 32; i++) 1019 regs.gpr[i] = env->gpr[i]; 1020 1021 regs.cr = 0; 1022 for (i = 0; i < 8; i++) { 1023 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1024 } 1025 1026 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1027 if (ret < 0) 1028 return ret; 1029 1030 kvm_put_fp(cs); 1031 1032 if (env->tlb_dirty) { 1033 kvm_sw_tlb_put(cpu); 1034 env->tlb_dirty = false; 1035 } 1036 1037 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1038 ret = kvmppc_put_books_sregs(cpu); 1039 if (ret < 0) { 1040 return ret; 1041 } 1042 } 1043 1044 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1045 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1046 } 1047 1048 if (cap_one_reg) { 1049 int i; 1050 1051 /* We deliberately ignore errors here, for kernels which have 1052 * the ONE_REG calls, but don't support the specific 1053 * registers, there's a reasonable chance things will still 1054 * work, at least until we try to migrate. */ 1055 for (i = 0; i < 1024; i++) { 1056 uint64_t id = env->spr_cb[i].one_reg_id; 1057 1058 if (id != 0) { 1059 kvm_put_one_spr(cs, id, i); 1060 } 1061 } 1062 1063 #ifdef TARGET_PPC64 1064 if (msr_ts) { 1065 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1067 } 1068 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1070 } 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1073 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1074 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1075 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1076 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1077 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1078 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1079 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1080 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1081 } 1082 1083 if (cap_papr) { 1084 if (kvm_put_vpa(cs) < 0) { 1085 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1086 } 1087 } 1088 1089 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1090 #endif /* TARGET_PPC64 */ 1091 } 1092 1093 return ret; 1094 } 1095 1096 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1097 { 1098 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1099 } 1100 1101 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1102 { 1103 CPUPPCState *env = &cpu->env; 1104 struct kvm_sregs sregs; 1105 int ret; 1106 1107 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1108 if (ret < 0) { 1109 return ret; 1110 } 1111 1112 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1113 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1114 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1115 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1116 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1117 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1118 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1119 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1120 env->spr[SPR_DECR] = sregs.u.e.dec; 1121 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1122 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1123 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1124 } 1125 1126 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1127 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1128 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1129 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1130 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1131 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1132 } 1133 1134 if (sregs.u.e.features & KVM_SREGS_E_64) { 1135 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1136 } 1137 1138 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1139 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1140 } 1141 1142 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1143 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1144 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1145 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1146 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1147 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1148 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1149 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1150 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1151 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1152 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1153 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1154 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1155 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1156 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1157 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1158 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1159 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1160 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1161 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1162 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1163 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1164 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1165 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1166 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1167 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1168 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1169 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1170 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1171 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1172 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1173 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1174 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1175 1176 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1177 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1178 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1179 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1180 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1181 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1182 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1183 } 1184 1185 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1186 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1187 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1188 } 1189 1190 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1191 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1192 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1193 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1194 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1195 } 1196 } 1197 1198 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1199 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1200 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1201 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1202 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1203 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1204 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1205 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1206 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1207 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1208 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1209 } 1210 1211 if (sregs.u.e.features & KVM_SREGS_EXP) { 1212 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1213 } 1214 1215 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1216 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1217 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1218 } 1219 1220 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1221 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1222 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1223 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1224 1225 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1226 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1227 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1228 } 1229 } 1230 1231 return 0; 1232 } 1233 1234 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1235 { 1236 CPUPPCState *env = &cpu->env; 1237 struct kvm_sregs sregs; 1238 int ret; 1239 int i; 1240 1241 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1242 if (ret < 0) { 1243 return ret; 1244 } 1245 1246 if (!cpu->vhyp) { 1247 ppc_store_sdr1(env, sregs.u.s.sdr1); 1248 } 1249 1250 /* Sync SLB */ 1251 #ifdef TARGET_PPC64 1252 /* 1253 * The packed SLB array we get from KVM_GET_SREGS only contains 1254 * information about valid entries. So we flush our internal copy 1255 * to get rid of stale ones, then put all valid SLB entries back 1256 * in. 1257 */ 1258 memset(env->slb, 0, sizeof(env->slb)); 1259 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1260 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1261 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1262 /* 1263 * Only restore valid entries 1264 */ 1265 if (rb & SLB_ESID_V) { 1266 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1267 } 1268 } 1269 #endif 1270 1271 /* Sync SRs */ 1272 for (i = 0; i < 16; i++) { 1273 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1274 } 1275 1276 /* Sync BATs */ 1277 for (i = 0; i < 8; i++) { 1278 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1279 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1280 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1281 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1282 } 1283 1284 return 0; 1285 } 1286 1287 int kvm_arch_get_registers(CPUState *cs) 1288 { 1289 PowerPCCPU *cpu = POWERPC_CPU(cs); 1290 CPUPPCState *env = &cpu->env; 1291 struct kvm_regs regs; 1292 uint32_t cr; 1293 int i, ret; 1294 1295 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1296 if (ret < 0) 1297 return ret; 1298 1299 cr = regs.cr; 1300 for (i = 7; i >= 0; i--) { 1301 env->crf[i] = cr & 15; 1302 cr >>= 4; 1303 } 1304 1305 env->ctr = regs.ctr; 1306 env->lr = regs.lr; 1307 cpu_write_xer(env, regs.xer); 1308 env->msr = regs.msr; 1309 env->nip = regs.pc; 1310 1311 env->spr[SPR_SRR0] = regs.srr0; 1312 env->spr[SPR_SRR1] = regs.srr1; 1313 1314 env->spr[SPR_SPRG0] = regs.sprg0; 1315 env->spr[SPR_SPRG1] = regs.sprg1; 1316 env->spr[SPR_SPRG2] = regs.sprg2; 1317 env->spr[SPR_SPRG3] = regs.sprg3; 1318 env->spr[SPR_SPRG4] = regs.sprg4; 1319 env->spr[SPR_SPRG5] = regs.sprg5; 1320 env->spr[SPR_SPRG6] = regs.sprg6; 1321 env->spr[SPR_SPRG7] = regs.sprg7; 1322 1323 env->spr[SPR_BOOKE_PID] = regs.pid; 1324 1325 for (i = 0;i < 32; i++) 1326 env->gpr[i] = regs.gpr[i]; 1327 1328 kvm_get_fp(cs); 1329 1330 if (cap_booke_sregs) { 1331 ret = kvmppc_get_booke_sregs(cpu); 1332 if (ret < 0) { 1333 return ret; 1334 } 1335 } 1336 1337 if (cap_segstate) { 1338 ret = kvmppc_get_books_sregs(cpu); 1339 if (ret < 0) { 1340 return ret; 1341 } 1342 } 1343 1344 if (cap_hior) { 1345 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1346 } 1347 1348 if (cap_one_reg) { 1349 int i; 1350 1351 /* We deliberately ignore errors here, for kernels which have 1352 * the ONE_REG calls, but don't support the specific 1353 * registers, there's a reasonable chance things will still 1354 * work, at least until we try to migrate. */ 1355 for (i = 0; i < 1024; i++) { 1356 uint64_t id = env->spr_cb[i].one_reg_id; 1357 1358 if (id != 0) { 1359 kvm_get_one_spr(cs, id, i); 1360 } 1361 } 1362 1363 #ifdef TARGET_PPC64 1364 if (msr_ts) { 1365 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1367 } 1368 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1370 } 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1373 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1374 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1375 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1376 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1377 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1378 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1379 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1380 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1381 } 1382 1383 if (cap_papr) { 1384 if (kvm_get_vpa(cs) < 0) { 1385 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1386 } 1387 } 1388 1389 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1390 #endif 1391 } 1392 1393 return 0; 1394 } 1395 1396 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1397 { 1398 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1399 1400 if (irq != PPC_INTERRUPT_EXT) { 1401 return 0; 1402 } 1403 1404 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1405 return 0; 1406 } 1407 1408 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1409 1410 return 0; 1411 } 1412 1413 #if defined(TARGET_PPCEMB) 1414 #define PPC_INPUT_INT PPC40x_INPUT_INT 1415 #elif defined(TARGET_PPC64) 1416 #define PPC_INPUT_INT PPC970_INPUT_INT 1417 #else 1418 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1419 #endif 1420 1421 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1422 { 1423 PowerPCCPU *cpu = POWERPC_CPU(cs); 1424 CPUPPCState *env = &cpu->env; 1425 int r; 1426 unsigned irq; 1427 1428 qemu_mutex_lock_iothread(); 1429 1430 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1431 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1432 if (!cap_interrupt_level && 1433 run->ready_for_interrupt_injection && 1434 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1435 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1436 { 1437 /* For now KVM disregards the 'irq' argument. However, in the 1438 * future KVM could cache it in-kernel to avoid a heavyweight exit 1439 * when reading the UIC. 1440 */ 1441 irq = KVM_INTERRUPT_SET; 1442 1443 DPRINTF("injected interrupt %d\n", irq); 1444 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1445 if (r < 0) { 1446 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1447 } 1448 1449 /* Always wake up soon in case the interrupt was level based */ 1450 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1451 (NANOSECONDS_PER_SECOND / 50)); 1452 } 1453 1454 /* We don't know if there are more interrupts pending after this. However, 1455 * the guest will return to userspace in the course of handling this one 1456 * anyways, so we will get a chance to deliver the rest. */ 1457 1458 qemu_mutex_unlock_iothread(); 1459 } 1460 1461 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1462 { 1463 return MEMTXATTRS_UNSPECIFIED; 1464 } 1465 1466 int kvm_arch_process_async_events(CPUState *cs) 1467 { 1468 return cs->halted; 1469 } 1470 1471 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1472 { 1473 CPUState *cs = CPU(cpu); 1474 CPUPPCState *env = &cpu->env; 1475 1476 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1477 cs->halted = 1; 1478 cs->exception_index = EXCP_HLT; 1479 } 1480 1481 return 0; 1482 } 1483 1484 /* map dcr access to existing qemu dcr emulation */ 1485 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1486 { 1487 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1488 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1489 1490 return 0; 1491 } 1492 1493 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1494 { 1495 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1496 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1497 1498 return 0; 1499 } 1500 1501 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1502 { 1503 /* Mixed endian case is not handled */ 1504 uint32_t sc = debug_inst_opcode; 1505 1506 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1507 sizeof(sc), 0) || 1508 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1509 return -EINVAL; 1510 } 1511 1512 return 0; 1513 } 1514 1515 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1516 { 1517 uint32_t sc; 1518 1519 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1520 sc != debug_inst_opcode || 1521 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1522 sizeof(sc), 1)) { 1523 return -EINVAL; 1524 } 1525 1526 return 0; 1527 } 1528 1529 static int find_hw_breakpoint(target_ulong addr, int type) 1530 { 1531 int n; 1532 1533 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1534 <= ARRAY_SIZE(hw_debug_points)); 1535 1536 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1537 if (hw_debug_points[n].addr == addr && 1538 hw_debug_points[n].type == type) { 1539 return n; 1540 } 1541 } 1542 1543 return -1; 1544 } 1545 1546 static int find_hw_watchpoint(target_ulong addr, int *flag) 1547 { 1548 int n; 1549 1550 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1551 if (n >= 0) { 1552 *flag = BP_MEM_ACCESS; 1553 return n; 1554 } 1555 1556 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1557 if (n >= 0) { 1558 *flag = BP_MEM_WRITE; 1559 return n; 1560 } 1561 1562 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1563 if (n >= 0) { 1564 *flag = BP_MEM_READ; 1565 return n; 1566 } 1567 1568 return -1; 1569 } 1570 1571 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1572 target_ulong len, int type) 1573 { 1574 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1575 return -ENOBUFS; 1576 } 1577 1578 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1579 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1580 1581 switch (type) { 1582 case GDB_BREAKPOINT_HW: 1583 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1584 return -ENOBUFS; 1585 } 1586 1587 if (find_hw_breakpoint(addr, type) >= 0) { 1588 return -EEXIST; 1589 } 1590 1591 nb_hw_breakpoint++; 1592 break; 1593 1594 case GDB_WATCHPOINT_WRITE: 1595 case GDB_WATCHPOINT_READ: 1596 case GDB_WATCHPOINT_ACCESS: 1597 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1598 return -ENOBUFS; 1599 } 1600 1601 if (find_hw_breakpoint(addr, type) >= 0) { 1602 return -EEXIST; 1603 } 1604 1605 nb_hw_watchpoint++; 1606 break; 1607 1608 default: 1609 return -ENOSYS; 1610 } 1611 1612 return 0; 1613 } 1614 1615 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1616 target_ulong len, int type) 1617 { 1618 int n; 1619 1620 n = find_hw_breakpoint(addr, type); 1621 if (n < 0) { 1622 return -ENOENT; 1623 } 1624 1625 switch (type) { 1626 case GDB_BREAKPOINT_HW: 1627 nb_hw_breakpoint--; 1628 break; 1629 1630 case GDB_WATCHPOINT_WRITE: 1631 case GDB_WATCHPOINT_READ: 1632 case GDB_WATCHPOINT_ACCESS: 1633 nb_hw_watchpoint--; 1634 break; 1635 1636 default: 1637 return -ENOSYS; 1638 } 1639 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1640 1641 return 0; 1642 } 1643 1644 void kvm_arch_remove_all_hw_breakpoints(void) 1645 { 1646 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1647 } 1648 1649 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1650 { 1651 int n; 1652 1653 /* Software Breakpoint updates */ 1654 if (kvm_sw_breakpoints_active(cs)) { 1655 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1656 } 1657 1658 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1659 <= ARRAY_SIZE(hw_debug_points)); 1660 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1661 1662 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1663 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1664 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1665 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1666 switch (hw_debug_points[n].type) { 1667 case GDB_BREAKPOINT_HW: 1668 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1669 break; 1670 case GDB_WATCHPOINT_WRITE: 1671 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1672 break; 1673 case GDB_WATCHPOINT_READ: 1674 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1675 break; 1676 case GDB_WATCHPOINT_ACCESS: 1677 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1678 KVMPPC_DEBUG_WATCH_READ; 1679 break; 1680 default: 1681 cpu_abort(cs, "Unsupported breakpoint type\n"); 1682 } 1683 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1684 } 1685 } 1686 } 1687 1688 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1689 { 1690 CPUState *cs = CPU(cpu); 1691 CPUPPCState *env = &cpu->env; 1692 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1693 int handle = 0; 1694 int n; 1695 int flag = 0; 1696 1697 if (cs->singlestep_enabled) { 1698 handle = 1; 1699 } else if (arch_info->status) { 1700 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1701 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1702 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1703 if (n >= 0) { 1704 handle = 1; 1705 } 1706 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1707 KVMPPC_DEBUG_WATCH_WRITE)) { 1708 n = find_hw_watchpoint(arch_info->address, &flag); 1709 if (n >= 0) { 1710 handle = 1; 1711 cs->watchpoint_hit = &hw_watchpoint; 1712 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1713 hw_watchpoint.flags = flag; 1714 } 1715 } 1716 } 1717 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1718 handle = 1; 1719 } else { 1720 /* QEMU is not able to handle debug exception, so inject 1721 * program exception to guest; 1722 * Yes program exception NOT debug exception !! 1723 * When QEMU is using debug resources then debug exception must 1724 * be always set. To achieve this we set MSR_DE and also set 1725 * MSRP_DEP so guest cannot change MSR_DE. 1726 * When emulating debug resource for guest we want guest 1727 * to control MSR_DE (enable/disable debug interrupt on need). 1728 * Supporting both configurations are NOT possible. 1729 * So the result is that we cannot share debug resources 1730 * between QEMU and Guest on BOOKE architecture. 1731 * In the current design QEMU gets the priority over guest, 1732 * this means that if QEMU is using debug resources then guest 1733 * cannot use them; 1734 * For software breakpoint QEMU uses a privileged instruction; 1735 * So there cannot be any reason that we are here for guest 1736 * set debug exception, only possibility is guest executed a 1737 * privileged / illegal instruction and that's why we are 1738 * injecting a program interrupt. 1739 */ 1740 1741 cpu_synchronize_state(cs); 1742 /* env->nip is PC, so increment this by 4 to use 1743 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1744 */ 1745 env->nip += 4; 1746 cs->exception_index = POWERPC_EXCP_PROGRAM; 1747 env->error_code = POWERPC_EXCP_INVAL; 1748 ppc_cpu_do_interrupt(cs); 1749 } 1750 1751 return handle; 1752 } 1753 1754 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1755 { 1756 PowerPCCPU *cpu = POWERPC_CPU(cs); 1757 CPUPPCState *env = &cpu->env; 1758 int ret; 1759 1760 qemu_mutex_lock_iothread(); 1761 1762 switch (run->exit_reason) { 1763 case KVM_EXIT_DCR: 1764 if (run->dcr.is_write) { 1765 DPRINTF("handle dcr write\n"); 1766 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1767 } else { 1768 DPRINTF("handle dcr read\n"); 1769 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1770 } 1771 break; 1772 case KVM_EXIT_HLT: 1773 DPRINTF("handle halt\n"); 1774 ret = kvmppc_handle_halt(cpu); 1775 break; 1776 #if defined(TARGET_PPC64) 1777 case KVM_EXIT_PAPR_HCALL: 1778 DPRINTF("handle PAPR hypercall\n"); 1779 run->papr_hcall.ret = spapr_hypercall(cpu, 1780 run->papr_hcall.nr, 1781 run->papr_hcall.args); 1782 ret = 0; 1783 break; 1784 #endif 1785 case KVM_EXIT_EPR: 1786 DPRINTF("handle epr\n"); 1787 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1788 ret = 0; 1789 break; 1790 case KVM_EXIT_WATCHDOG: 1791 DPRINTF("handle watchdog expiry\n"); 1792 watchdog_perform_action(); 1793 ret = 0; 1794 break; 1795 1796 case KVM_EXIT_DEBUG: 1797 DPRINTF("handle debug exception\n"); 1798 if (kvm_handle_debug(cpu, run)) { 1799 ret = EXCP_DEBUG; 1800 break; 1801 } 1802 /* re-enter, this exception was guest-internal */ 1803 ret = 0; 1804 break; 1805 1806 default: 1807 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1808 ret = -1; 1809 break; 1810 } 1811 1812 qemu_mutex_unlock_iothread(); 1813 return ret; 1814 } 1815 1816 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1817 { 1818 CPUState *cs = CPU(cpu); 1819 uint32_t bits = tsr_bits; 1820 struct kvm_one_reg reg = { 1821 .id = KVM_REG_PPC_OR_TSR, 1822 .addr = (uintptr_t) &bits, 1823 }; 1824 1825 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1826 } 1827 1828 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1829 { 1830 1831 CPUState *cs = CPU(cpu); 1832 uint32_t bits = tsr_bits; 1833 struct kvm_one_reg reg = { 1834 .id = KVM_REG_PPC_CLEAR_TSR, 1835 .addr = (uintptr_t) &bits, 1836 }; 1837 1838 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1839 } 1840 1841 int kvmppc_set_tcr(PowerPCCPU *cpu) 1842 { 1843 CPUState *cs = CPU(cpu); 1844 CPUPPCState *env = &cpu->env; 1845 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1846 1847 struct kvm_one_reg reg = { 1848 .id = KVM_REG_PPC_TCR, 1849 .addr = (uintptr_t) &tcr, 1850 }; 1851 1852 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1853 } 1854 1855 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1856 { 1857 CPUState *cs = CPU(cpu); 1858 int ret; 1859 1860 if (!kvm_enabled()) { 1861 return -1; 1862 } 1863 1864 if (!cap_ppc_watchdog) { 1865 printf("warning: KVM does not support watchdog"); 1866 return -1; 1867 } 1868 1869 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1870 if (ret < 0) { 1871 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1872 __func__, strerror(-ret)); 1873 return ret; 1874 } 1875 1876 return ret; 1877 } 1878 1879 static int read_cpuinfo(const char *field, char *value, int len) 1880 { 1881 FILE *f; 1882 int ret = -1; 1883 int field_len = strlen(field); 1884 char line[512]; 1885 1886 f = fopen("/proc/cpuinfo", "r"); 1887 if (!f) { 1888 return -1; 1889 } 1890 1891 do { 1892 if (!fgets(line, sizeof(line), f)) { 1893 break; 1894 } 1895 if (!strncmp(line, field, field_len)) { 1896 pstrcpy(value, len, line); 1897 ret = 0; 1898 break; 1899 } 1900 } while(*line); 1901 1902 fclose(f); 1903 1904 return ret; 1905 } 1906 1907 uint32_t kvmppc_get_tbfreq(void) 1908 { 1909 char line[512]; 1910 char *ns; 1911 uint32_t retval = NANOSECONDS_PER_SECOND; 1912 1913 if (read_cpuinfo("timebase", line, sizeof(line))) { 1914 return retval; 1915 } 1916 1917 if (!(ns = strchr(line, ':'))) { 1918 return retval; 1919 } 1920 1921 ns++; 1922 1923 return atoi(ns); 1924 } 1925 1926 bool kvmppc_get_host_serial(char **value) 1927 { 1928 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1929 NULL); 1930 } 1931 1932 bool kvmppc_get_host_model(char **value) 1933 { 1934 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1935 } 1936 1937 /* Try to find a device tree node for a CPU with clock-frequency property */ 1938 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1939 { 1940 struct dirent *dirp; 1941 DIR *dp; 1942 1943 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1944 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1945 return -1; 1946 } 1947 1948 buf[0] = '\0'; 1949 while ((dirp = readdir(dp)) != NULL) { 1950 FILE *f; 1951 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1952 dirp->d_name); 1953 f = fopen(buf, "r"); 1954 if (f) { 1955 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1956 fclose(f); 1957 break; 1958 } 1959 buf[0] = '\0'; 1960 } 1961 closedir(dp); 1962 if (buf[0] == '\0') { 1963 printf("Unknown host!\n"); 1964 return -1; 1965 } 1966 1967 return 0; 1968 } 1969 1970 static uint64_t kvmppc_read_int_dt(const char *filename) 1971 { 1972 union { 1973 uint32_t v32; 1974 uint64_t v64; 1975 } u; 1976 FILE *f; 1977 int len; 1978 1979 f = fopen(filename, "rb"); 1980 if (!f) { 1981 return -1; 1982 } 1983 1984 len = fread(&u, 1, sizeof(u), f); 1985 fclose(f); 1986 switch (len) { 1987 case 4: 1988 /* property is a 32-bit quantity */ 1989 return be32_to_cpu(u.v32); 1990 case 8: 1991 return be64_to_cpu(u.v64); 1992 } 1993 1994 return 0; 1995 } 1996 1997 /* Read a CPU node property from the host device tree that's a single 1998 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1999 * (can't find or open the property, or doesn't understand the 2000 * format) */ 2001 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 2002 { 2003 char buf[PATH_MAX], *tmp; 2004 uint64_t val; 2005 2006 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 2007 return -1; 2008 } 2009 2010 tmp = g_strdup_printf("%s/%s", buf, propname); 2011 val = kvmppc_read_int_dt(tmp); 2012 g_free(tmp); 2013 2014 return val; 2015 } 2016 2017 uint64_t kvmppc_get_clockfreq(void) 2018 { 2019 return kvmppc_read_int_cpu_dt("clock-frequency"); 2020 } 2021 2022 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2023 { 2024 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2025 CPUState *cs = CPU(cpu); 2026 2027 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2028 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2029 return 0; 2030 } 2031 2032 return 1; 2033 } 2034 2035 int kvmppc_get_hasidle(CPUPPCState *env) 2036 { 2037 struct kvm_ppc_pvinfo pvinfo; 2038 2039 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2040 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2041 return 1; 2042 } 2043 2044 return 0; 2045 } 2046 2047 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2048 { 2049 uint32_t *hc = (uint32_t*)buf; 2050 struct kvm_ppc_pvinfo pvinfo; 2051 2052 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2053 memcpy(buf, pvinfo.hcall, buf_len); 2054 return 0; 2055 } 2056 2057 /* 2058 * Fallback to always fail hypercalls regardless of endianness: 2059 * 2060 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2061 * li r3, -1 2062 * b .+8 (becomes nop in wrong endian) 2063 * bswap32(li r3, -1) 2064 */ 2065 2066 hc[0] = cpu_to_be32(0x08000048); 2067 hc[1] = cpu_to_be32(0x3860ffff); 2068 hc[2] = cpu_to_be32(0x48000008); 2069 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2070 2071 return 1; 2072 } 2073 2074 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2075 { 2076 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2077 } 2078 2079 void kvmppc_enable_logical_ci_hcalls(void) 2080 { 2081 /* 2082 * FIXME: it would be nice if we could detect the cases where 2083 * we're using a device which requires the in kernel 2084 * implementation of these hcalls, but the kernel lacks them and 2085 * produce a warning. 2086 */ 2087 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2088 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2089 } 2090 2091 void kvmppc_enable_set_mode_hcall(void) 2092 { 2093 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2094 } 2095 2096 void kvmppc_enable_clear_ref_mod_hcalls(void) 2097 { 2098 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2099 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2100 } 2101 2102 void kvmppc_set_papr(PowerPCCPU *cpu) 2103 { 2104 CPUState *cs = CPU(cpu); 2105 int ret; 2106 2107 if (!kvm_enabled()) { 2108 return; 2109 } 2110 2111 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2112 if (ret) { 2113 error_report("This vCPU type or KVM version does not support PAPR"); 2114 exit(1); 2115 } 2116 2117 /* Update the capability flag so we sync the right information 2118 * with kvm */ 2119 cap_papr = 1; 2120 } 2121 2122 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2123 { 2124 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2125 } 2126 2127 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2128 { 2129 CPUState *cs = CPU(cpu); 2130 int ret; 2131 2132 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2133 if (ret && mpic_proxy) { 2134 error_report("This KVM version does not support EPR"); 2135 exit(1); 2136 } 2137 } 2138 2139 int kvmppc_smt_threads(void) 2140 { 2141 return cap_ppc_smt ? cap_ppc_smt : 1; 2142 } 2143 2144 int kvmppc_set_smt_threads(int smt) 2145 { 2146 int ret; 2147 2148 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2149 if (!ret) { 2150 cap_ppc_smt = smt; 2151 } 2152 return ret; 2153 } 2154 2155 void kvmppc_hint_smt_possible(Error **errp) 2156 { 2157 int i; 2158 GString *g; 2159 char *s; 2160 2161 assert(kvm_enabled()); 2162 if (cap_ppc_smt_possible) { 2163 g = g_string_new("Available VSMT modes:"); 2164 for (i = 63; i >= 0; i--) { 2165 if ((1UL << i) & cap_ppc_smt_possible) { 2166 g_string_append_printf(g, " %lu", (1UL << i)); 2167 } 2168 } 2169 s = g_string_free(g, false); 2170 error_append_hint(errp, "%s.\n", s); 2171 g_free(s); 2172 } else { 2173 error_append_hint(errp, 2174 "This KVM seems to be too old to support VSMT.\n"); 2175 } 2176 } 2177 2178 2179 #ifdef TARGET_PPC64 2180 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2181 { 2182 struct kvm_ppc_smmu_info info; 2183 long rampagesize, best_page_shift; 2184 int i; 2185 2186 /* Find the largest hardware supported page size that's less than 2187 * or equal to the (logical) backing page size of guest RAM */ 2188 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2189 rampagesize = qemu_getrampagesize(); 2190 best_page_shift = 0; 2191 2192 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2193 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2194 2195 if (!sps->page_shift) { 2196 continue; 2197 } 2198 2199 if ((sps->page_shift > best_page_shift) 2200 && ((1UL << sps->page_shift) <= rampagesize)) { 2201 best_page_shift = sps->page_shift; 2202 } 2203 } 2204 2205 return MIN(current_size, 2206 1ULL << (best_page_shift + hash_shift - 7)); 2207 } 2208 #endif 2209 2210 bool kvmppc_spapr_use_multitce(void) 2211 { 2212 return cap_spapr_multitce; 2213 } 2214 2215 int kvmppc_spapr_enable_inkernel_multitce(void) 2216 { 2217 int ret; 2218 2219 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2220 H_PUT_TCE_INDIRECT, 1); 2221 if (!ret) { 2222 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2223 H_STUFF_TCE, 1); 2224 } 2225 2226 return ret; 2227 } 2228 2229 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2230 uint64_t bus_offset, uint32_t nb_table, 2231 int *pfd, bool need_vfio) 2232 { 2233 long len; 2234 int fd; 2235 void *table; 2236 2237 /* Must set fd to -1 so we don't try to munmap when called for 2238 * destroying the table, which the upper layers -will- do 2239 */ 2240 *pfd = -1; 2241 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2242 return NULL; 2243 } 2244 2245 if (cap_spapr_tce_64) { 2246 struct kvm_create_spapr_tce_64 args = { 2247 .liobn = liobn, 2248 .page_shift = page_shift, 2249 .offset = bus_offset >> page_shift, 2250 .size = nb_table, 2251 .flags = 0 2252 }; 2253 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2254 if (fd < 0) { 2255 fprintf(stderr, 2256 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2257 liobn); 2258 return NULL; 2259 } 2260 } else if (cap_spapr_tce) { 2261 uint64_t window_size = (uint64_t) nb_table << page_shift; 2262 struct kvm_create_spapr_tce args = { 2263 .liobn = liobn, 2264 .window_size = window_size, 2265 }; 2266 if ((window_size != args.window_size) || bus_offset) { 2267 return NULL; 2268 } 2269 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2270 if (fd < 0) { 2271 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2272 liobn); 2273 return NULL; 2274 } 2275 } else { 2276 return NULL; 2277 } 2278 2279 len = nb_table * sizeof(uint64_t); 2280 /* FIXME: round this up to page size */ 2281 2282 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2283 if (table == MAP_FAILED) { 2284 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2285 liobn); 2286 close(fd); 2287 return NULL; 2288 } 2289 2290 *pfd = fd; 2291 return table; 2292 } 2293 2294 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2295 { 2296 long len; 2297 2298 if (fd < 0) { 2299 return -1; 2300 } 2301 2302 len = nb_table * sizeof(uint64_t); 2303 if ((munmap(table, len) < 0) || 2304 (close(fd) < 0)) { 2305 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2306 strerror(errno)); 2307 /* Leak the table */ 2308 } 2309 2310 return 0; 2311 } 2312 2313 int kvmppc_reset_htab(int shift_hint) 2314 { 2315 uint32_t shift = shift_hint; 2316 2317 if (!kvm_enabled()) { 2318 /* Full emulation, tell caller to allocate htab itself */ 2319 return 0; 2320 } 2321 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2322 int ret; 2323 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2324 if (ret == -ENOTTY) { 2325 /* At least some versions of PR KVM advertise the 2326 * capability, but don't implement the ioctl(). Oops. 2327 * Return 0 so that we allocate the htab in qemu, as is 2328 * correct for PR. */ 2329 return 0; 2330 } else if (ret < 0) { 2331 return ret; 2332 } 2333 return shift; 2334 } 2335 2336 /* We have a kernel that predates the htab reset calls. For PR 2337 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2338 * this era, it has allocated a 16MB fixed size hash table already. */ 2339 if (kvmppc_is_pr(kvm_state)) { 2340 /* PR - tell caller to allocate htab */ 2341 return 0; 2342 } else { 2343 /* HV - assume 16MB kernel allocated htab */ 2344 return 24; 2345 } 2346 } 2347 2348 static inline uint32_t mfpvr(void) 2349 { 2350 uint32_t pvr; 2351 2352 asm ("mfpvr %0" 2353 : "=r"(pvr)); 2354 return pvr; 2355 } 2356 2357 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2358 { 2359 if (on) { 2360 *word |= flags; 2361 } else { 2362 *word &= ~flags; 2363 } 2364 } 2365 2366 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2367 { 2368 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2369 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2370 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2371 2372 /* Now fix up the class with information we can query from the host */ 2373 pcc->pvr = mfpvr(); 2374 2375 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2376 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2377 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2378 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2379 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2380 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2381 2382 if (dcache_size != -1) { 2383 pcc->l1_dcache_size = dcache_size; 2384 } 2385 2386 if (icache_size != -1) { 2387 pcc->l1_icache_size = icache_size; 2388 } 2389 2390 #if defined(TARGET_PPC64) 2391 pcc->radix_page_info = kvm_get_radix_page_info(); 2392 2393 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2394 /* 2395 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2396 * compliant. More importantly, advertising ISA 3.00 2397 * architected mode may prevent guests from activating 2398 * necessary DD1 workarounds. 2399 */ 2400 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2401 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2402 } 2403 #endif /* defined(TARGET_PPC64) */ 2404 } 2405 2406 bool kvmppc_has_cap_epr(void) 2407 { 2408 return cap_epr; 2409 } 2410 2411 bool kvmppc_has_cap_fixup_hcalls(void) 2412 { 2413 return cap_fixup_hcalls; 2414 } 2415 2416 bool kvmppc_has_cap_htm(void) 2417 { 2418 return cap_htm; 2419 } 2420 2421 bool kvmppc_has_cap_mmu_radix(void) 2422 { 2423 return cap_mmu_radix; 2424 } 2425 2426 bool kvmppc_has_cap_mmu_hash_v3(void) 2427 { 2428 return cap_mmu_hash_v3; 2429 } 2430 2431 static bool kvmppc_power8_host(void) 2432 { 2433 bool ret = false; 2434 #ifdef TARGET_PPC64 2435 { 2436 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); 2437 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || 2438 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || 2439 (base_pvr == CPU_POWERPC_POWER8_BASE); 2440 } 2441 #endif /* TARGET_PPC64 */ 2442 return ret; 2443 } 2444 2445 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) 2446 { 2447 bool l1d_thread_priv_req = !kvmppc_power8_host(); 2448 2449 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2450 return 2; 2451 } else if ((!l1d_thread_priv_req || 2452 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2453 (c.character & c.character_mask 2454 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2455 return 1; 2456 } 2457 2458 return 0; 2459 } 2460 2461 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) 2462 { 2463 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2464 return 2; 2465 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2466 return 1; 2467 } 2468 2469 return 0; 2470 } 2471 2472 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) 2473 { 2474 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2475 return SPAPR_CAP_FIXED_CCD; 2476 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2477 return SPAPR_CAP_FIXED_IBS; 2478 } 2479 2480 return 0; 2481 } 2482 2483 static void kvmppc_get_cpu_characteristics(KVMState *s) 2484 { 2485 struct kvm_ppc_cpu_char c; 2486 int ret; 2487 2488 /* Assume broken */ 2489 cap_ppc_safe_cache = 0; 2490 cap_ppc_safe_bounds_check = 0; 2491 cap_ppc_safe_indirect_branch = 0; 2492 2493 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2494 if (!ret) { 2495 return; 2496 } 2497 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2498 if (ret < 0) { 2499 return; 2500 } 2501 2502 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); 2503 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); 2504 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); 2505 } 2506 2507 int kvmppc_get_cap_safe_cache(void) 2508 { 2509 return cap_ppc_safe_cache; 2510 } 2511 2512 int kvmppc_get_cap_safe_bounds_check(void) 2513 { 2514 return cap_ppc_safe_bounds_check; 2515 } 2516 2517 int kvmppc_get_cap_safe_indirect_branch(void) 2518 { 2519 return cap_ppc_safe_indirect_branch; 2520 } 2521 2522 bool kvmppc_has_cap_spapr_vfio(void) 2523 { 2524 return cap_spapr_vfio; 2525 } 2526 2527 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2528 { 2529 uint32_t host_pvr = mfpvr(); 2530 PowerPCCPUClass *pvr_pcc; 2531 2532 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2533 if (pvr_pcc == NULL) { 2534 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2535 } 2536 2537 return pvr_pcc; 2538 } 2539 2540 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2541 { 2542 TypeInfo type_info = { 2543 .name = TYPE_HOST_POWERPC_CPU, 2544 .class_init = kvmppc_host_cpu_class_init, 2545 }; 2546 MachineClass *mc = MACHINE_GET_CLASS(ms); 2547 PowerPCCPUClass *pvr_pcc; 2548 ObjectClass *oc; 2549 DeviceClass *dc; 2550 int i; 2551 2552 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2553 if (pvr_pcc == NULL) { 2554 return -1; 2555 } 2556 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2557 type_register(&type_info); 2558 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2559 /* override TCG default cpu type with 'host' cpu model */ 2560 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2561 } 2562 2563 oc = object_class_by_name(type_info.name); 2564 g_assert(oc); 2565 2566 /* 2567 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2568 * we want "POWER8" to be a "family" alias that points to the current 2569 * host CPU type, too) 2570 */ 2571 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2572 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2573 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2574 char *suffix; 2575 2576 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2577 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2578 if (suffix) { 2579 *suffix = 0; 2580 } 2581 break; 2582 } 2583 } 2584 2585 return 0; 2586 } 2587 2588 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2589 { 2590 struct kvm_rtas_token_args args = { 2591 .token = token, 2592 }; 2593 2594 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2595 return -ENOENT; 2596 } 2597 2598 strncpy(args.name, function, sizeof(args.name)); 2599 2600 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2601 } 2602 2603 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2604 { 2605 struct kvm_get_htab_fd s = { 2606 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2607 .start_index = index, 2608 }; 2609 int ret; 2610 2611 if (!cap_htab_fd) { 2612 error_setg(errp, "KVM version doesn't support %s the HPT", 2613 write ? "writing" : "reading"); 2614 return -ENOTSUP; 2615 } 2616 2617 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2618 if (ret < 0) { 2619 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2620 write ? "writing" : "reading", write ? "to" : "from", 2621 strerror(errno)); 2622 return -errno; 2623 } 2624 2625 return ret; 2626 } 2627 2628 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2629 { 2630 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2631 uint8_t buf[bufsize]; 2632 ssize_t rc; 2633 2634 do { 2635 rc = read(fd, buf, bufsize); 2636 if (rc < 0) { 2637 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2638 strerror(errno)); 2639 return rc; 2640 } else if (rc) { 2641 uint8_t *buffer = buf; 2642 ssize_t n = rc; 2643 while (n) { 2644 struct kvm_get_htab_header *head = 2645 (struct kvm_get_htab_header *) buffer; 2646 size_t chunksize = sizeof(*head) + 2647 HASH_PTE_SIZE_64 * head->n_valid; 2648 2649 qemu_put_be32(f, head->index); 2650 qemu_put_be16(f, head->n_valid); 2651 qemu_put_be16(f, head->n_invalid); 2652 qemu_put_buffer(f, (void *)(head + 1), 2653 HASH_PTE_SIZE_64 * head->n_valid); 2654 2655 buffer += chunksize; 2656 n -= chunksize; 2657 } 2658 } 2659 } while ((rc != 0) 2660 && ((max_ns < 0) 2661 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2662 2663 return (rc == 0) ? 1 : 0; 2664 } 2665 2666 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2667 uint16_t n_valid, uint16_t n_invalid) 2668 { 2669 struct kvm_get_htab_header *buf; 2670 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2671 ssize_t rc; 2672 2673 buf = alloca(chunksize); 2674 buf->index = index; 2675 buf->n_valid = n_valid; 2676 buf->n_invalid = n_invalid; 2677 2678 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2679 2680 rc = write(fd, buf, chunksize); 2681 if (rc < 0) { 2682 fprintf(stderr, "Error writing KVM hash table: %s\n", 2683 strerror(errno)); 2684 return rc; 2685 } 2686 if (rc != chunksize) { 2687 /* We should never get a short write on a single chunk */ 2688 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2689 return -1; 2690 } 2691 return 0; 2692 } 2693 2694 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2695 { 2696 return true; 2697 } 2698 2699 void kvm_arch_init_irq_routing(KVMState *s) 2700 { 2701 } 2702 2703 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2704 { 2705 int fd, rc; 2706 int i; 2707 2708 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2709 2710 i = 0; 2711 while (i < n) { 2712 struct kvm_get_htab_header *hdr; 2713 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2714 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2715 2716 rc = read(fd, buf, sizeof(buf)); 2717 if (rc < 0) { 2718 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2719 } 2720 2721 hdr = (struct kvm_get_htab_header *)buf; 2722 while ((i < n) && ((char *)hdr < (buf + rc))) { 2723 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2724 2725 if (hdr->index != (ptex + i)) { 2726 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2727 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2728 } 2729 2730 if (n - i < valid) { 2731 valid = n - i; 2732 } 2733 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2734 i += valid; 2735 2736 if ((n - i) < invalid) { 2737 invalid = n - i; 2738 } 2739 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2740 i += invalid; 2741 2742 hdr = (struct kvm_get_htab_header *) 2743 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2744 } 2745 } 2746 2747 close(fd); 2748 } 2749 2750 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2751 { 2752 int fd, rc; 2753 struct { 2754 struct kvm_get_htab_header hdr; 2755 uint64_t pte0; 2756 uint64_t pte1; 2757 } buf; 2758 2759 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2760 2761 buf.hdr.n_valid = 1; 2762 buf.hdr.n_invalid = 0; 2763 buf.hdr.index = ptex; 2764 buf.pte0 = cpu_to_be64(pte0); 2765 buf.pte1 = cpu_to_be64(pte1); 2766 2767 rc = write(fd, &buf, sizeof(buf)); 2768 if (rc != sizeof(buf)) { 2769 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2770 } 2771 close(fd); 2772 } 2773 2774 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2775 uint64_t address, uint32_t data, PCIDevice *dev) 2776 { 2777 return 0; 2778 } 2779 2780 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2781 int vector, PCIDevice *dev) 2782 { 2783 return 0; 2784 } 2785 2786 int kvm_arch_release_virq_post(int virq) 2787 { 2788 return 0; 2789 } 2790 2791 int kvm_arch_msi_data_to_gsi(uint32_t data) 2792 { 2793 return data & 0xffff; 2794 } 2795 2796 int kvmppc_enable_hwrng(void) 2797 { 2798 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2799 return -1; 2800 } 2801 2802 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2803 } 2804 2805 void kvmppc_check_papr_resize_hpt(Error **errp) 2806 { 2807 if (!kvm_enabled()) { 2808 return; /* No KVM, we're good */ 2809 } 2810 2811 if (cap_resize_hpt) { 2812 return; /* Kernel has explicit support, we're good */ 2813 } 2814 2815 /* Otherwise fallback on looking for PR KVM */ 2816 if (kvmppc_is_pr(kvm_state)) { 2817 return; 2818 } 2819 2820 error_setg(errp, 2821 "Hash page table resizing not available with this KVM version"); 2822 } 2823 2824 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2825 { 2826 CPUState *cs = CPU(cpu); 2827 struct kvm_ppc_resize_hpt rhpt = { 2828 .flags = flags, 2829 .shift = shift, 2830 }; 2831 2832 if (!cap_resize_hpt) { 2833 return -ENOSYS; 2834 } 2835 2836 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2837 } 2838 2839 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2840 { 2841 CPUState *cs = CPU(cpu); 2842 struct kvm_ppc_resize_hpt rhpt = { 2843 .flags = flags, 2844 .shift = shift, 2845 }; 2846 2847 if (!cap_resize_hpt) { 2848 return -ENOSYS; 2849 } 2850 2851 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2852 } 2853 2854 /* 2855 * This is a helper function to detect a post migration scenario 2856 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2857 * the guest kernel can't handle a PVR value other than the actual host 2858 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2859 * 2860 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2861 * (so, we're HV), return true. The workaround itself is done in 2862 * cpu_post_load. 2863 * 2864 * The order here is important: we'll only check for KVM PR as a 2865 * fallback if the guest kernel can't handle the situation itself. 2866 * We need to avoid as much as possible querying the running KVM type 2867 * in QEMU level. 2868 */ 2869 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2870 { 2871 CPUState *cs = CPU(cpu); 2872 2873 if (!kvm_enabled()) { 2874 return false; 2875 } 2876 2877 if (cap_ppc_pvr_compat) { 2878 return false; 2879 } 2880 2881 return !kvmppc_is_pr(cs->kvm_state); 2882 } 2883