1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 93 static uint32_t debug_inst_opcode; 94 95 /* XXX We have a race condition where we actually have a level triggered 96 * interrupt, but the infrastructure can't expose that yet, so the guest 97 * takes but ignores it, goes to sleep and never gets notified that there's 98 * still an interrupt pending. 99 * 100 * As a quick workaround, let's just wake up again 20 ms after we injected 101 * an interrupt. That way we can assure that we're always reinjecting 102 * interrupts in case the guest swallowed them. 103 */ 104 static QEMUTimer *idle_timer; 105 106 static void kvm_kick_cpu(void *opaque) 107 { 108 PowerPCCPU *cpu = opaque; 109 110 qemu_cpu_kick(CPU(cpu)); 111 } 112 113 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 114 * should only be used for fallback tests - generally we should use 115 * explicit capabilities for the features we want, rather than 116 * assuming what is/isn't available depending on the KVM variant. */ 117 static bool kvmppc_is_pr(KVMState *ks) 118 { 119 /* Assume KVM-PR if the GET_PVINFO capability is available */ 120 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 121 } 122 123 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 124 125 int kvm_arch_init(MachineState *ms, KVMState *s) 126 { 127 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 128 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 129 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 130 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 131 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 132 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 133 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 134 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 135 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 136 cap_spapr_vfio = false; 137 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 138 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 139 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 140 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 141 /* Note: we don't set cap_papr here, because this capability is 142 * only activated after this by kvmppc_set_papr() */ 143 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 144 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 145 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 146 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 147 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 148 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 149 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 150 /* 151 * Note: setting it to false because there is not such capability 152 * in KVM at this moment. 153 * 154 * TODO: call kvm_vm_check_extension() with the right capability 155 * after the kernel starts implementing it.*/ 156 cap_ppc_pvr_compat = false; 157 158 if (!cap_interrupt_level) { 159 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 160 "VM to stall at times!\n"); 161 } 162 163 kvm_ppc_register_host_cpu_type(ms); 164 165 return 0; 166 } 167 168 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 169 { 170 return 0; 171 } 172 173 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 174 { 175 CPUPPCState *cenv = &cpu->env; 176 CPUState *cs = CPU(cpu); 177 struct kvm_sregs sregs; 178 int ret; 179 180 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 181 /* What we're really trying to say is "if we're on BookE, we use 182 the native PVR for now". This is the only sane way to check 183 it though, so we potentially confuse users that they can run 184 BookE guests on BookS. Let's hope nobody dares enough :) */ 185 return 0; 186 } else { 187 if (!cap_segstate) { 188 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 189 return -ENOSYS; 190 } 191 } 192 193 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 194 if (ret) { 195 return ret; 196 } 197 198 sregs.pvr = cenv->spr[SPR_PVR]; 199 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 200 } 201 202 /* Set up a shared TLB array with KVM */ 203 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 204 { 205 CPUPPCState *env = &cpu->env; 206 CPUState *cs = CPU(cpu); 207 struct kvm_book3e_206_tlb_params params = {}; 208 struct kvm_config_tlb cfg = {}; 209 unsigned int entries = 0; 210 int ret, i; 211 212 if (!kvm_enabled() || 213 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 214 return 0; 215 } 216 217 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 218 219 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 220 params.tlb_sizes[i] = booke206_tlb_size(env, i); 221 params.tlb_ways[i] = booke206_tlb_ways(env, i); 222 entries += params.tlb_sizes[i]; 223 } 224 225 assert(entries == env->nb_tlb); 226 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 227 228 env->tlb_dirty = true; 229 230 cfg.array = (uintptr_t)env->tlb.tlbm; 231 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 232 cfg.params = (uintptr_t)¶ms; 233 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 234 235 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 236 if (ret < 0) { 237 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 238 __func__, strerror(-ret)); 239 return ret; 240 } 241 242 env->kvm_sw_tlb = true; 243 return 0; 244 } 245 246 247 #if defined(TARGET_PPC64) 248 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 249 struct kvm_ppc_smmu_info *info) 250 { 251 CPUPPCState *env = &cpu->env; 252 CPUState *cs = CPU(cpu); 253 254 memset(info, 0, sizeof(*info)); 255 256 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 257 * need to "guess" what the supported page sizes are. 258 * 259 * For that to work we make a few assumptions: 260 * 261 * - Check whether we are running "PR" KVM which only supports 4K 262 * and 16M pages, but supports them regardless of the backing 263 * store characteritics. We also don't support 1T segments. 264 * 265 * This is safe as if HV KVM ever supports that capability or PR 266 * KVM grows supports for more page/segment sizes, those versions 267 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 268 * will not hit this fallback 269 * 270 * - Else we are running HV KVM. This means we only support page 271 * sizes that fit in the backing store. Additionally we only 272 * advertize 64K pages if the processor is ARCH 2.06 and we assume 273 * P7 encodings for the SLB and hash table. Here too, we assume 274 * support for any newer processor will mean a kernel that 275 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 276 * this fallback. 277 */ 278 if (kvmppc_is_pr(cs->kvm_state)) { 279 /* No flags */ 280 info->flags = 0; 281 info->slb_size = 64; 282 283 /* Standard 4k base page size segment */ 284 info->sps[0].page_shift = 12; 285 info->sps[0].slb_enc = 0; 286 info->sps[0].enc[0].page_shift = 12; 287 info->sps[0].enc[0].pte_enc = 0; 288 289 /* Standard 16M large page size segment */ 290 info->sps[1].page_shift = 24; 291 info->sps[1].slb_enc = SLB_VSID_L; 292 info->sps[1].enc[0].page_shift = 24; 293 info->sps[1].enc[0].pte_enc = 0; 294 } else { 295 int i = 0; 296 297 /* HV KVM has backing store size restrictions */ 298 info->flags = KVM_PPC_PAGE_SIZES_REAL; 299 300 if (env->mmu_model & POWERPC_MMU_1TSEG) { 301 info->flags |= KVM_PPC_1T_SEGMENTS; 302 } 303 304 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 305 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 306 info->slb_size = 32; 307 } else { 308 info->slb_size = 64; 309 } 310 311 /* Standard 4k base page size segment */ 312 info->sps[i].page_shift = 12; 313 info->sps[i].slb_enc = 0; 314 info->sps[i].enc[0].page_shift = 12; 315 info->sps[i].enc[0].pte_enc = 0; 316 i++; 317 318 /* 64K on MMU 2.06 and later */ 319 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 320 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 321 info->sps[i].page_shift = 16; 322 info->sps[i].slb_enc = 0x110; 323 info->sps[i].enc[0].page_shift = 16; 324 info->sps[i].enc[0].pte_enc = 1; 325 i++; 326 } 327 328 /* Standard 16M large page size segment */ 329 info->sps[i].page_shift = 24; 330 info->sps[i].slb_enc = SLB_VSID_L; 331 info->sps[i].enc[0].page_shift = 24; 332 info->sps[i].enc[0].pte_enc = 0; 333 } 334 } 335 336 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 337 { 338 CPUState *cs = CPU(cpu); 339 int ret; 340 341 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 342 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 343 if (ret == 0) { 344 return; 345 } 346 } 347 348 kvm_get_fallback_smmu_info(cpu, info); 349 } 350 351 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 352 { 353 KVMState *s = KVM_STATE(current_machine->accelerator); 354 struct ppc_radix_page_info *radix_page_info; 355 struct kvm_ppc_rmmu_info rmmu_info; 356 int i; 357 358 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 359 return NULL; 360 } 361 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 362 return NULL; 363 } 364 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 365 radix_page_info->count = 0; 366 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 367 if (rmmu_info.ap_encodings[i]) { 368 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 369 radix_page_info->count++; 370 } 371 } 372 return radix_page_info; 373 } 374 375 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 376 bool radix, bool gtse, 377 uint64_t proc_tbl) 378 { 379 CPUState *cs = CPU(cpu); 380 int ret; 381 uint64_t flags = 0; 382 struct kvm_ppc_mmuv3_cfg cfg = { 383 .process_table = proc_tbl, 384 }; 385 386 if (radix) { 387 flags |= KVM_PPC_MMUV3_RADIX; 388 } 389 if (gtse) { 390 flags |= KVM_PPC_MMUV3_GTSE; 391 } 392 cfg.flags = flags; 393 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 394 switch (ret) { 395 case 0: 396 return H_SUCCESS; 397 case -EINVAL: 398 return H_PARAMETER; 399 case -ENODEV: 400 return H_NOT_AVAILABLE; 401 default: 402 return H_HARDWARE; 403 } 404 } 405 406 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 407 { 408 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 409 return true; 410 } 411 412 return (1ul << shift) <= rampgsize; 413 } 414 415 static long max_cpu_page_size; 416 417 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 418 { 419 static struct kvm_ppc_smmu_info smmu_info; 420 static bool has_smmu_info; 421 CPUPPCState *env = &cpu->env; 422 int iq, ik, jq, jk; 423 bool has_64k_pages = false; 424 425 /* We only handle page sizes for 64-bit server guests for now */ 426 if (!(env->mmu_model & POWERPC_MMU_64)) { 427 return; 428 } 429 430 /* Collect MMU info from kernel if not already */ 431 if (!has_smmu_info) { 432 kvm_get_smmu_info(cpu, &smmu_info); 433 has_smmu_info = true; 434 } 435 436 if (!max_cpu_page_size) { 437 max_cpu_page_size = qemu_getrampagesize(); 438 } 439 440 /* Convert to QEMU form */ 441 memset(&env->sps, 0, sizeof(env->sps)); 442 443 /* If we have HV KVM, we need to forbid CI large pages if our 444 * host page size is smaller than 64K. 445 */ 446 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 447 env->ci_large_pages = getpagesize() >= 0x10000; 448 } 449 450 /* 451 * XXX This loop should be an entry wide AND of the capabilities that 452 * the selected CPU has with the capabilities that KVM supports. 453 */ 454 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 455 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 456 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 457 458 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 459 ksps->page_shift)) { 460 continue; 461 } 462 qsps->page_shift = ksps->page_shift; 463 qsps->slb_enc = ksps->slb_enc; 464 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 465 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 466 ksps->enc[jk].page_shift)) { 467 continue; 468 } 469 if (ksps->enc[jk].page_shift == 16) { 470 has_64k_pages = true; 471 } 472 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 473 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 474 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 475 break; 476 } 477 } 478 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 479 break; 480 } 481 } 482 env->slb_nr = smmu_info.slb_size; 483 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 484 env->mmu_model &= ~POWERPC_MMU_1TSEG; 485 } 486 if (!has_64k_pages) { 487 env->mmu_model &= ~POWERPC_MMU_64K; 488 } 489 } 490 491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 492 { 493 Object *mem_obj = object_resolve_path(obj_path, NULL); 494 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 495 long pagesize; 496 497 if (mempath) { 498 pagesize = qemu_mempath_getpagesize(mempath); 499 g_free(mempath); 500 } else { 501 pagesize = getpagesize(); 502 } 503 504 return pagesize >= max_cpu_page_size; 505 } 506 507 #else /* defined (TARGET_PPC64) */ 508 509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 510 { 511 } 512 513 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 514 { 515 return true; 516 } 517 518 #endif /* !defined (TARGET_PPC64) */ 519 520 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 521 { 522 return POWERPC_CPU(cpu)->vcpu_id; 523 } 524 525 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 526 * book3s supports only 1 watchpoint, so array size 527 * of 4 is sufficient for now. 528 */ 529 #define MAX_HW_BKPTS 4 530 531 static struct HWBreakpoint { 532 target_ulong addr; 533 int type; 534 } hw_debug_points[MAX_HW_BKPTS]; 535 536 static CPUWatchpoint hw_watchpoint; 537 538 /* Default there is no breakpoint and watchpoint supported */ 539 static int max_hw_breakpoint; 540 static int max_hw_watchpoint; 541 static int nb_hw_breakpoint; 542 static int nb_hw_watchpoint; 543 544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 545 { 546 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 547 max_hw_breakpoint = 2; 548 max_hw_watchpoint = 2; 549 } 550 551 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 552 fprintf(stderr, "Error initializing h/w breakpoints\n"); 553 return; 554 } 555 } 556 557 int kvm_arch_init_vcpu(CPUState *cs) 558 { 559 PowerPCCPU *cpu = POWERPC_CPU(cs); 560 CPUPPCState *cenv = &cpu->env; 561 int ret; 562 563 /* Gather server mmu info from KVM and update the CPU state */ 564 kvm_fixup_page_sizes(cpu); 565 566 /* Synchronize sregs with kvm */ 567 ret = kvm_arch_sync_sregs(cpu); 568 if (ret) { 569 if (ret == -EINVAL) { 570 error_report("Register sync failed... If you're using kvm-hv.ko," 571 " only \"-cpu host\" is possible"); 572 } 573 return ret; 574 } 575 576 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 577 578 switch (cenv->mmu_model) { 579 case POWERPC_MMU_BOOKE206: 580 /* This target supports access to KVM's guest TLB */ 581 ret = kvm_booke206_tlb_init(cpu); 582 break; 583 case POWERPC_MMU_2_07: 584 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 585 /* KVM-HV has transactional memory on POWER8 also without the 586 * KVM_CAP_PPC_HTM extension, so enable it here instead as 587 * long as it's availble to userspace on the host. */ 588 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 589 cap_htm = true; 590 } 591 } 592 break; 593 default: 594 break; 595 } 596 597 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 598 kvmppc_hw_debug_points_init(cenv); 599 600 return ret; 601 } 602 603 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 604 { 605 CPUPPCState *env = &cpu->env; 606 CPUState *cs = CPU(cpu); 607 struct kvm_dirty_tlb dirty_tlb; 608 unsigned char *bitmap; 609 int ret; 610 611 if (!env->kvm_sw_tlb) { 612 return; 613 } 614 615 bitmap = g_malloc((env->nb_tlb + 7) / 8); 616 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 617 618 dirty_tlb.bitmap = (uintptr_t)bitmap; 619 dirty_tlb.num_dirty = env->nb_tlb; 620 621 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 622 if (ret) { 623 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 624 __func__, strerror(-ret)); 625 } 626 627 g_free(bitmap); 628 } 629 630 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 631 { 632 PowerPCCPU *cpu = POWERPC_CPU(cs); 633 CPUPPCState *env = &cpu->env; 634 union { 635 uint32_t u32; 636 uint64_t u64; 637 } val; 638 struct kvm_one_reg reg = { 639 .id = id, 640 .addr = (uintptr_t) &val, 641 }; 642 int ret; 643 644 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 645 if (ret != 0) { 646 trace_kvm_failed_spr_get(spr, strerror(errno)); 647 } else { 648 switch (id & KVM_REG_SIZE_MASK) { 649 case KVM_REG_SIZE_U32: 650 env->spr[spr] = val.u32; 651 break; 652 653 case KVM_REG_SIZE_U64: 654 env->spr[spr] = val.u64; 655 break; 656 657 default: 658 /* Don't handle this size yet */ 659 abort(); 660 } 661 } 662 } 663 664 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 665 { 666 PowerPCCPU *cpu = POWERPC_CPU(cs); 667 CPUPPCState *env = &cpu->env; 668 union { 669 uint32_t u32; 670 uint64_t u64; 671 } val; 672 struct kvm_one_reg reg = { 673 .id = id, 674 .addr = (uintptr_t) &val, 675 }; 676 int ret; 677 678 switch (id & KVM_REG_SIZE_MASK) { 679 case KVM_REG_SIZE_U32: 680 val.u32 = env->spr[spr]; 681 break; 682 683 case KVM_REG_SIZE_U64: 684 val.u64 = env->spr[spr]; 685 break; 686 687 default: 688 /* Don't handle this size yet */ 689 abort(); 690 } 691 692 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 693 if (ret != 0) { 694 trace_kvm_failed_spr_set(spr, strerror(errno)); 695 } 696 } 697 698 static int kvm_put_fp(CPUState *cs) 699 { 700 PowerPCCPU *cpu = POWERPC_CPU(cs); 701 CPUPPCState *env = &cpu->env; 702 struct kvm_one_reg reg; 703 int i; 704 int ret; 705 706 if (env->insns_flags & PPC_FLOAT) { 707 uint64_t fpscr = env->fpscr; 708 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 709 710 reg.id = KVM_REG_PPC_FPSCR; 711 reg.addr = (uintptr_t)&fpscr; 712 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 713 if (ret < 0) { 714 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 715 return ret; 716 } 717 718 for (i = 0; i < 32; i++) { 719 uint64_t vsr[2]; 720 721 #ifdef HOST_WORDS_BIGENDIAN 722 vsr[0] = float64_val(env->fpr[i]); 723 vsr[1] = env->vsr[i]; 724 #else 725 vsr[0] = env->vsr[i]; 726 vsr[1] = float64_val(env->fpr[i]); 727 #endif 728 reg.addr = (uintptr_t) &vsr; 729 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 730 731 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 732 if (ret < 0) { 733 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 734 i, strerror(errno)); 735 return ret; 736 } 737 } 738 } 739 740 if (env->insns_flags & PPC_ALTIVEC) { 741 reg.id = KVM_REG_PPC_VSCR; 742 reg.addr = (uintptr_t)&env->vscr; 743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 744 if (ret < 0) { 745 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 746 return ret; 747 } 748 749 for (i = 0; i < 32; i++) { 750 reg.id = KVM_REG_PPC_VR(i); 751 reg.addr = (uintptr_t)&env->avr[i]; 752 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 753 if (ret < 0) { 754 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 755 return ret; 756 } 757 } 758 } 759 760 return 0; 761 } 762 763 static int kvm_get_fp(CPUState *cs) 764 { 765 PowerPCCPU *cpu = POWERPC_CPU(cs); 766 CPUPPCState *env = &cpu->env; 767 struct kvm_one_reg reg; 768 int i; 769 int ret; 770 771 if (env->insns_flags & PPC_FLOAT) { 772 uint64_t fpscr; 773 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 774 775 reg.id = KVM_REG_PPC_FPSCR; 776 reg.addr = (uintptr_t)&fpscr; 777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 778 if (ret < 0) { 779 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 780 return ret; 781 } else { 782 env->fpscr = fpscr; 783 } 784 785 for (i = 0; i < 32; i++) { 786 uint64_t vsr[2]; 787 788 reg.addr = (uintptr_t) &vsr; 789 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 790 791 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 792 if (ret < 0) { 793 DPRINTF("Unable to get %s%d from KVM: %s\n", 794 vsx ? "VSR" : "FPR", i, strerror(errno)); 795 return ret; 796 } else { 797 #ifdef HOST_WORDS_BIGENDIAN 798 env->fpr[i] = vsr[0]; 799 if (vsx) { 800 env->vsr[i] = vsr[1]; 801 } 802 #else 803 env->fpr[i] = vsr[1]; 804 if (vsx) { 805 env->vsr[i] = vsr[0]; 806 } 807 #endif 808 } 809 } 810 } 811 812 if (env->insns_flags & PPC_ALTIVEC) { 813 reg.id = KVM_REG_PPC_VSCR; 814 reg.addr = (uintptr_t)&env->vscr; 815 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 816 if (ret < 0) { 817 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 818 return ret; 819 } 820 821 for (i = 0; i < 32; i++) { 822 reg.id = KVM_REG_PPC_VR(i); 823 reg.addr = (uintptr_t)&env->avr[i]; 824 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 825 if (ret < 0) { 826 DPRINTF("Unable to get VR%d from KVM: %s\n", 827 i, strerror(errno)); 828 return ret; 829 } 830 } 831 } 832 833 return 0; 834 } 835 836 #if defined(TARGET_PPC64) 837 static int kvm_get_vpa(CPUState *cs) 838 { 839 PowerPCCPU *cpu = POWERPC_CPU(cs); 840 CPUPPCState *env = &cpu->env; 841 struct kvm_one_reg reg; 842 int ret; 843 844 reg.id = KVM_REG_PPC_VPA_ADDR; 845 reg.addr = (uintptr_t)&env->vpa_addr; 846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 847 if (ret < 0) { 848 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 849 return ret; 850 } 851 852 assert((uintptr_t)&env->slb_shadow_size 853 == ((uintptr_t)&env->slb_shadow_addr + 8)); 854 reg.id = KVM_REG_PPC_VPA_SLB; 855 reg.addr = (uintptr_t)&env->slb_shadow_addr; 856 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 857 if (ret < 0) { 858 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 859 strerror(errno)); 860 return ret; 861 } 862 863 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 864 reg.id = KVM_REG_PPC_VPA_DTL; 865 reg.addr = (uintptr_t)&env->dtl_addr; 866 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 867 if (ret < 0) { 868 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 869 strerror(errno)); 870 return ret; 871 } 872 873 return 0; 874 } 875 876 static int kvm_put_vpa(CPUState *cs) 877 { 878 PowerPCCPU *cpu = POWERPC_CPU(cs); 879 CPUPPCState *env = &cpu->env; 880 struct kvm_one_reg reg; 881 int ret; 882 883 /* SLB shadow or DTL can't be registered unless a master VPA is 884 * registered. That means when restoring state, if a VPA *is* 885 * registered, we need to set that up first. If not, we need to 886 * deregister the others before deregistering the master VPA */ 887 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 888 889 if (env->vpa_addr) { 890 reg.id = KVM_REG_PPC_VPA_ADDR; 891 reg.addr = (uintptr_t)&env->vpa_addr; 892 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 893 if (ret < 0) { 894 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 895 return ret; 896 } 897 } 898 899 assert((uintptr_t)&env->slb_shadow_size 900 == ((uintptr_t)&env->slb_shadow_addr + 8)); 901 reg.id = KVM_REG_PPC_VPA_SLB; 902 reg.addr = (uintptr_t)&env->slb_shadow_addr; 903 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 904 if (ret < 0) { 905 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 906 return ret; 907 } 908 909 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 910 reg.id = KVM_REG_PPC_VPA_DTL; 911 reg.addr = (uintptr_t)&env->dtl_addr; 912 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 913 if (ret < 0) { 914 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 915 strerror(errno)); 916 return ret; 917 } 918 919 if (!env->vpa_addr) { 920 reg.id = KVM_REG_PPC_VPA_ADDR; 921 reg.addr = (uintptr_t)&env->vpa_addr; 922 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 923 if (ret < 0) { 924 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 925 return ret; 926 } 927 } 928 929 return 0; 930 } 931 #endif /* TARGET_PPC64 */ 932 933 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 934 { 935 CPUPPCState *env = &cpu->env; 936 struct kvm_sregs sregs; 937 int i; 938 939 sregs.pvr = env->spr[SPR_PVR]; 940 941 if (cpu->vhyp) { 942 PPCVirtualHypervisorClass *vhc = 943 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 944 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 945 } else { 946 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 947 } 948 949 /* Sync SLB */ 950 #ifdef TARGET_PPC64 951 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 952 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 953 if (env->slb[i].esid & SLB_ESID_V) { 954 sregs.u.s.ppc64.slb[i].slbe |= i; 955 } 956 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 957 } 958 #endif 959 960 /* Sync SRs */ 961 for (i = 0; i < 16; i++) { 962 sregs.u.s.ppc32.sr[i] = env->sr[i]; 963 } 964 965 /* Sync BATs */ 966 for (i = 0; i < 8; i++) { 967 /* Beware. We have to swap upper and lower bits here */ 968 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 969 | env->DBAT[1][i]; 970 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 971 | env->IBAT[1][i]; 972 } 973 974 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 975 } 976 977 int kvm_arch_put_registers(CPUState *cs, int level) 978 { 979 PowerPCCPU *cpu = POWERPC_CPU(cs); 980 CPUPPCState *env = &cpu->env; 981 struct kvm_regs regs; 982 int ret; 983 int i; 984 985 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 986 if (ret < 0) { 987 return ret; 988 } 989 990 regs.ctr = env->ctr; 991 regs.lr = env->lr; 992 regs.xer = cpu_read_xer(env); 993 regs.msr = env->msr; 994 regs.pc = env->nip; 995 996 regs.srr0 = env->spr[SPR_SRR0]; 997 regs.srr1 = env->spr[SPR_SRR1]; 998 999 regs.sprg0 = env->spr[SPR_SPRG0]; 1000 regs.sprg1 = env->spr[SPR_SPRG1]; 1001 regs.sprg2 = env->spr[SPR_SPRG2]; 1002 regs.sprg3 = env->spr[SPR_SPRG3]; 1003 regs.sprg4 = env->spr[SPR_SPRG4]; 1004 regs.sprg5 = env->spr[SPR_SPRG5]; 1005 regs.sprg6 = env->spr[SPR_SPRG6]; 1006 regs.sprg7 = env->spr[SPR_SPRG7]; 1007 1008 regs.pid = env->spr[SPR_BOOKE_PID]; 1009 1010 for (i = 0;i < 32; i++) 1011 regs.gpr[i] = env->gpr[i]; 1012 1013 regs.cr = 0; 1014 for (i = 0; i < 8; i++) { 1015 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1016 } 1017 1018 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1019 if (ret < 0) 1020 return ret; 1021 1022 kvm_put_fp(cs); 1023 1024 if (env->tlb_dirty) { 1025 kvm_sw_tlb_put(cpu); 1026 env->tlb_dirty = false; 1027 } 1028 1029 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1030 ret = kvmppc_put_books_sregs(cpu); 1031 if (ret < 0) { 1032 return ret; 1033 } 1034 } 1035 1036 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1037 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1038 } 1039 1040 if (cap_one_reg) { 1041 int i; 1042 1043 /* We deliberately ignore errors here, for kernels which have 1044 * the ONE_REG calls, but don't support the specific 1045 * registers, there's a reasonable chance things will still 1046 * work, at least until we try to migrate. */ 1047 for (i = 0; i < 1024; i++) { 1048 uint64_t id = env->spr_cb[i].one_reg_id; 1049 1050 if (id != 0) { 1051 kvm_put_one_spr(cs, id, i); 1052 } 1053 } 1054 1055 #ifdef TARGET_PPC64 1056 if (msr_ts) { 1057 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1059 } 1060 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1062 } 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1073 } 1074 1075 if (cap_papr) { 1076 if (kvm_put_vpa(cs) < 0) { 1077 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1078 } 1079 } 1080 1081 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1082 #endif /* TARGET_PPC64 */ 1083 } 1084 1085 return ret; 1086 } 1087 1088 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1089 { 1090 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1091 } 1092 1093 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1094 { 1095 CPUPPCState *env = &cpu->env; 1096 struct kvm_sregs sregs; 1097 int ret; 1098 1099 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1100 if (ret < 0) { 1101 return ret; 1102 } 1103 1104 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1105 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1106 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1107 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1108 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1109 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1110 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1111 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1112 env->spr[SPR_DECR] = sregs.u.e.dec; 1113 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1114 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1115 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1116 } 1117 1118 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1119 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1120 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1121 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1122 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1123 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1124 } 1125 1126 if (sregs.u.e.features & KVM_SREGS_E_64) { 1127 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1128 } 1129 1130 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1131 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1132 } 1133 1134 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1135 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1136 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1137 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1138 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1139 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1140 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1141 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1142 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1143 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1144 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1145 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1146 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1147 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1148 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1149 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1150 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1151 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1152 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1153 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1154 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1155 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1156 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1157 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1158 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1159 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1160 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1161 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1162 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1163 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1164 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1165 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1166 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1167 1168 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1169 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1170 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1171 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1172 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1173 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1174 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1175 } 1176 1177 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1178 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1179 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1180 } 1181 1182 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1183 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1184 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1185 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1186 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1187 } 1188 } 1189 1190 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1191 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1192 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1193 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1194 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1195 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1196 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1197 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1198 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1199 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1200 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1201 } 1202 1203 if (sregs.u.e.features & KVM_SREGS_EXP) { 1204 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1205 } 1206 1207 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1208 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1209 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1210 } 1211 1212 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1213 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1214 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1215 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1216 1217 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1218 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1219 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1220 } 1221 } 1222 1223 return 0; 1224 } 1225 1226 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1227 { 1228 CPUPPCState *env = &cpu->env; 1229 struct kvm_sregs sregs; 1230 int ret; 1231 int i; 1232 1233 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1234 if (ret < 0) { 1235 return ret; 1236 } 1237 1238 if (!cpu->vhyp) { 1239 ppc_store_sdr1(env, sregs.u.s.sdr1); 1240 } 1241 1242 /* Sync SLB */ 1243 #ifdef TARGET_PPC64 1244 /* 1245 * The packed SLB array we get from KVM_GET_SREGS only contains 1246 * information about valid entries. So we flush our internal copy 1247 * to get rid of stale ones, then put all valid SLB entries back 1248 * in. 1249 */ 1250 memset(env->slb, 0, sizeof(env->slb)); 1251 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1252 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1253 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1254 /* 1255 * Only restore valid entries 1256 */ 1257 if (rb & SLB_ESID_V) { 1258 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1259 } 1260 } 1261 #endif 1262 1263 /* Sync SRs */ 1264 for (i = 0; i < 16; i++) { 1265 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1266 } 1267 1268 /* Sync BATs */ 1269 for (i = 0; i < 8; i++) { 1270 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1271 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1272 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1273 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1274 } 1275 1276 return 0; 1277 } 1278 1279 int kvm_arch_get_registers(CPUState *cs) 1280 { 1281 PowerPCCPU *cpu = POWERPC_CPU(cs); 1282 CPUPPCState *env = &cpu->env; 1283 struct kvm_regs regs; 1284 uint32_t cr; 1285 int i, ret; 1286 1287 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1288 if (ret < 0) 1289 return ret; 1290 1291 cr = regs.cr; 1292 for (i = 7; i >= 0; i--) { 1293 env->crf[i] = cr & 15; 1294 cr >>= 4; 1295 } 1296 1297 env->ctr = regs.ctr; 1298 env->lr = regs.lr; 1299 cpu_write_xer(env, regs.xer); 1300 env->msr = regs.msr; 1301 env->nip = regs.pc; 1302 1303 env->spr[SPR_SRR0] = regs.srr0; 1304 env->spr[SPR_SRR1] = regs.srr1; 1305 1306 env->spr[SPR_SPRG0] = regs.sprg0; 1307 env->spr[SPR_SPRG1] = regs.sprg1; 1308 env->spr[SPR_SPRG2] = regs.sprg2; 1309 env->spr[SPR_SPRG3] = regs.sprg3; 1310 env->spr[SPR_SPRG4] = regs.sprg4; 1311 env->spr[SPR_SPRG5] = regs.sprg5; 1312 env->spr[SPR_SPRG6] = regs.sprg6; 1313 env->spr[SPR_SPRG7] = regs.sprg7; 1314 1315 env->spr[SPR_BOOKE_PID] = regs.pid; 1316 1317 for (i = 0;i < 32; i++) 1318 env->gpr[i] = regs.gpr[i]; 1319 1320 kvm_get_fp(cs); 1321 1322 if (cap_booke_sregs) { 1323 ret = kvmppc_get_booke_sregs(cpu); 1324 if (ret < 0) { 1325 return ret; 1326 } 1327 } 1328 1329 if (cap_segstate) { 1330 ret = kvmppc_get_books_sregs(cpu); 1331 if (ret < 0) { 1332 return ret; 1333 } 1334 } 1335 1336 if (cap_hior) { 1337 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1338 } 1339 1340 if (cap_one_reg) { 1341 int i; 1342 1343 /* We deliberately ignore errors here, for kernels which have 1344 * the ONE_REG calls, but don't support the specific 1345 * registers, there's a reasonable chance things will still 1346 * work, at least until we try to migrate. */ 1347 for (i = 0; i < 1024; i++) { 1348 uint64_t id = env->spr_cb[i].one_reg_id; 1349 1350 if (id != 0) { 1351 kvm_get_one_spr(cs, id, i); 1352 } 1353 } 1354 1355 #ifdef TARGET_PPC64 1356 if (msr_ts) { 1357 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1359 } 1360 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1362 } 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1373 } 1374 1375 if (cap_papr) { 1376 if (kvm_get_vpa(cs) < 0) { 1377 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1378 } 1379 } 1380 1381 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1382 #endif 1383 } 1384 1385 return 0; 1386 } 1387 1388 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1389 { 1390 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1391 1392 if (irq != PPC_INTERRUPT_EXT) { 1393 return 0; 1394 } 1395 1396 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1397 return 0; 1398 } 1399 1400 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1401 1402 return 0; 1403 } 1404 1405 #if defined(TARGET_PPCEMB) 1406 #define PPC_INPUT_INT PPC40x_INPUT_INT 1407 #elif defined(TARGET_PPC64) 1408 #define PPC_INPUT_INT PPC970_INPUT_INT 1409 #else 1410 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1411 #endif 1412 1413 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1414 { 1415 PowerPCCPU *cpu = POWERPC_CPU(cs); 1416 CPUPPCState *env = &cpu->env; 1417 int r; 1418 unsigned irq; 1419 1420 qemu_mutex_lock_iothread(); 1421 1422 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1423 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1424 if (!cap_interrupt_level && 1425 run->ready_for_interrupt_injection && 1426 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1427 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1428 { 1429 /* For now KVM disregards the 'irq' argument. However, in the 1430 * future KVM could cache it in-kernel to avoid a heavyweight exit 1431 * when reading the UIC. 1432 */ 1433 irq = KVM_INTERRUPT_SET; 1434 1435 DPRINTF("injected interrupt %d\n", irq); 1436 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1437 if (r < 0) { 1438 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1439 } 1440 1441 /* Always wake up soon in case the interrupt was level based */ 1442 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1443 (NANOSECONDS_PER_SECOND / 50)); 1444 } 1445 1446 /* We don't know if there are more interrupts pending after this. However, 1447 * the guest will return to userspace in the course of handling this one 1448 * anyways, so we will get a chance to deliver the rest. */ 1449 1450 qemu_mutex_unlock_iothread(); 1451 } 1452 1453 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1454 { 1455 return MEMTXATTRS_UNSPECIFIED; 1456 } 1457 1458 int kvm_arch_process_async_events(CPUState *cs) 1459 { 1460 return cs->halted; 1461 } 1462 1463 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1464 { 1465 CPUState *cs = CPU(cpu); 1466 CPUPPCState *env = &cpu->env; 1467 1468 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1469 cs->halted = 1; 1470 cs->exception_index = EXCP_HLT; 1471 } 1472 1473 return 0; 1474 } 1475 1476 /* map dcr access to existing qemu dcr emulation */ 1477 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1478 { 1479 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1480 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1481 1482 return 0; 1483 } 1484 1485 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1486 { 1487 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1488 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1489 1490 return 0; 1491 } 1492 1493 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1494 { 1495 /* Mixed endian case is not handled */ 1496 uint32_t sc = debug_inst_opcode; 1497 1498 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1499 sizeof(sc), 0) || 1500 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1501 return -EINVAL; 1502 } 1503 1504 return 0; 1505 } 1506 1507 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1508 { 1509 uint32_t sc; 1510 1511 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1512 sc != debug_inst_opcode || 1513 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1514 sizeof(sc), 1)) { 1515 return -EINVAL; 1516 } 1517 1518 return 0; 1519 } 1520 1521 static int find_hw_breakpoint(target_ulong addr, int type) 1522 { 1523 int n; 1524 1525 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1526 <= ARRAY_SIZE(hw_debug_points)); 1527 1528 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1529 if (hw_debug_points[n].addr == addr && 1530 hw_debug_points[n].type == type) { 1531 return n; 1532 } 1533 } 1534 1535 return -1; 1536 } 1537 1538 static int find_hw_watchpoint(target_ulong addr, int *flag) 1539 { 1540 int n; 1541 1542 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1543 if (n >= 0) { 1544 *flag = BP_MEM_ACCESS; 1545 return n; 1546 } 1547 1548 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1549 if (n >= 0) { 1550 *flag = BP_MEM_WRITE; 1551 return n; 1552 } 1553 1554 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1555 if (n >= 0) { 1556 *flag = BP_MEM_READ; 1557 return n; 1558 } 1559 1560 return -1; 1561 } 1562 1563 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1564 target_ulong len, int type) 1565 { 1566 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1567 return -ENOBUFS; 1568 } 1569 1570 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1571 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1572 1573 switch (type) { 1574 case GDB_BREAKPOINT_HW: 1575 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1576 return -ENOBUFS; 1577 } 1578 1579 if (find_hw_breakpoint(addr, type) >= 0) { 1580 return -EEXIST; 1581 } 1582 1583 nb_hw_breakpoint++; 1584 break; 1585 1586 case GDB_WATCHPOINT_WRITE: 1587 case GDB_WATCHPOINT_READ: 1588 case GDB_WATCHPOINT_ACCESS: 1589 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1590 return -ENOBUFS; 1591 } 1592 1593 if (find_hw_breakpoint(addr, type) >= 0) { 1594 return -EEXIST; 1595 } 1596 1597 nb_hw_watchpoint++; 1598 break; 1599 1600 default: 1601 return -ENOSYS; 1602 } 1603 1604 return 0; 1605 } 1606 1607 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1608 target_ulong len, int type) 1609 { 1610 int n; 1611 1612 n = find_hw_breakpoint(addr, type); 1613 if (n < 0) { 1614 return -ENOENT; 1615 } 1616 1617 switch (type) { 1618 case GDB_BREAKPOINT_HW: 1619 nb_hw_breakpoint--; 1620 break; 1621 1622 case GDB_WATCHPOINT_WRITE: 1623 case GDB_WATCHPOINT_READ: 1624 case GDB_WATCHPOINT_ACCESS: 1625 nb_hw_watchpoint--; 1626 break; 1627 1628 default: 1629 return -ENOSYS; 1630 } 1631 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1632 1633 return 0; 1634 } 1635 1636 void kvm_arch_remove_all_hw_breakpoints(void) 1637 { 1638 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1639 } 1640 1641 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1642 { 1643 int n; 1644 1645 /* Software Breakpoint updates */ 1646 if (kvm_sw_breakpoints_active(cs)) { 1647 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1648 } 1649 1650 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1651 <= ARRAY_SIZE(hw_debug_points)); 1652 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1653 1654 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1655 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1656 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1657 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1658 switch (hw_debug_points[n].type) { 1659 case GDB_BREAKPOINT_HW: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1661 break; 1662 case GDB_WATCHPOINT_WRITE: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1664 break; 1665 case GDB_WATCHPOINT_READ: 1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1667 break; 1668 case GDB_WATCHPOINT_ACCESS: 1669 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1670 KVMPPC_DEBUG_WATCH_READ; 1671 break; 1672 default: 1673 cpu_abort(cs, "Unsupported breakpoint type\n"); 1674 } 1675 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1676 } 1677 } 1678 } 1679 1680 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1681 { 1682 CPUState *cs = CPU(cpu); 1683 CPUPPCState *env = &cpu->env; 1684 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1685 int handle = 0; 1686 int n; 1687 int flag = 0; 1688 1689 if (cs->singlestep_enabled) { 1690 handle = 1; 1691 } else if (arch_info->status) { 1692 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1693 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1694 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1695 if (n >= 0) { 1696 handle = 1; 1697 } 1698 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1699 KVMPPC_DEBUG_WATCH_WRITE)) { 1700 n = find_hw_watchpoint(arch_info->address, &flag); 1701 if (n >= 0) { 1702 handle = 1; 1703 cs->watchpoint_hit = &hw_watchpoint; 1704 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1705 hw_watchpoint.flags = flag; 1706 } 1707 } 1708 } 1709 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1710 handle = 1; 1711 } else { 1712 /* QEMU is not able to handle debug exception, so inject 1713 * program exception to guest; 1714 * Yes program exception NOT debug exception !! 1715 * When QEMU is using debug resources then debug exception must 1716 * be always set. To achieve this we set MSR_DE and also set 1717 * MSRP_DEP so guest cannot change MSR_DE. 1718 * When emulating debug resource for guest we want guest 1719 * to control MSR_DE (enable/disable debug interrupt on need). 1720 * Supporting both configurations are NOT possible. 1721 * So the result is that we cannot share debug resources 1722 * between QEMU and Guest on BOOKE architecture. 1723 * In the current design QEMU gets the priority over guest, 1724 * this means that if QEMU is using debug resources then guest 1725 * cannot use them; 1726 * For software breakpoint QEMU uses a privileged instruction; 1727 * So there cannot be any reason that we are here for guest 1728 * set debug exception, only possibility is guest executed a 1729 * privileged / illegal instruction and that's why we are 1730 * injecting a program interrupt. 1731 */ 1732 1733 cpu_synchronize_state(cs); 1734 /* env->nip is PC, so increment this by 4 to use 1735 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1736 */ 1737 env->nip += 4; 1738 cs->exception_index = POWERPC_EXCP_PROGRAM; 1739 env->error_code = POWERPC_EXCP_INVAL; 1740 ppc_cpu_do_interrupt(cs); 1741 } 1742 1743 return handle; 1744 } 1745 1746 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1747 { 1748 PowerPCCPU *cpu = POWERPC_CPU(cs); 1749 CPUPPCState *env = &cpu->env; 1750 int ret; 1751 1752 qemu_mutex_lock_iothread(); 1753 1754 switch (run->exit_reason) { 1755 case KVM_EXIT_DCR: 1756 if (run->dcr.is_write) { 1757 DPRINTF("handle dcr write\n"); 1758 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1759 } else { 1760 DPRINTF("handle dcr read\n"); 1761 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1762 } 1763 break; 1764 case KVM_EXIT_HLT: 1765 DPRINTF("handle halt\n"); 1766 ret = kvmppc_handle_halt(cpu); 1767 break; 1768 #if defined(TARGET_PPC64) 1769 case KVM_EXIT_PAPR_HCALL: 1770 DPRINTF("handle PAPR hypercall\n"); 1771 run->papr_hcall.ret = spapr_hypercall(cpu, 1772 run->papr_hcall.nr, 1773 run->papr_hcall.args); 1774 ret = 0; 1775 break; 1776 #endif 1777 case KVM_EXIT_EPR: 1778 DPRINTF("handle epr\n"); 1779 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1780 ret = 0; 1781 break; 1782 case KVM_EXIT_WATCHDOG: 1783 DPRINTF("handle watchdog expiry\n"); 1784 watchdog_perform_action(); 1785 ret = 0; 1786 break; 1787 1788 case KVM_EXIT_DEBUG: 1789 DPRINTF("handle debug exception\n"); 1790 if (kvm_handle_debug(cpu, run)) { 1791 ret = EXCP_DEBUG; 1792 break; 1793 } 1794 /* re-enter, this exception was guest-internal */ 1795 ret = 0; 1796 break; 1797 1798 default: 1799 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1800 ret = -1; 1801 break; 1802 } 1803 1804 qemu_mutex_unlock_iothread(); 1805 return ret; 1806 } 1807 1808 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1809 { 1810 CPUState *cs = CPU(cpu); 1811 uint32_t bits = tsr_bits; 1812 struct kvm_one_reg reg = { 1813 .id = KVM_REG_PPC_OR_TSR, 1814 .addr = (uintptr_t) &bits, 1815 }; 1816 1817 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1818 } 1819 1820 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1821 { 1822 1823 CPUState *cs = CPU(cpu); 1824 uint32_t bits = tsr_bits; 1825 struct kvm_one_reg reg = { 1826 .id = KVM_REG_PPC_CLEAR_TSR, 1827 .addr = (uintptr_t) &bits, 1828 }; 1829 1830 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1831 } 1832 1833 int kvmppc_set_tcr(PowerPCCPU *cpu) 1834 { 1835 CPUState *cs = CPU(cpu); 1836 CPUPPCState *env = &cpu->env; 1837 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1838 1839 struct kvm_one_reg reg = { 1840 .id = KVM_REG_PPC_TCR, 1841 .addr = (uintptr_t) &tcr, 1842 }; 1843 1844 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1845 } 1846 1847 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1848 { 1849 CPUState *cs = CPU(cpu); 1850 int ret; 1851 1852 if (!kvm_enabled()) { 1853 return -1; 1854 } 1855 1856 if (!cap_ppc_watchdog) { 1857 printf("warning: KVM does not support watchdog"); 1858 return -1; 1859 } 1860 1861 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1862 if (ret < 0) { 1863 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1864 __func__, strerror(-ret)); 1865 return ret; 1866 } 1867 1868 return ret; 1869 } 1870 1871 static int read_cpuinfo(const char *field, char *value, int len) 1872 { 1873 FILE *f; 1874 int ret = -1; 1875 int field_len = strlen(field); 1876 char line[512]; 1877 1878 f = fopen("/proc/cpuinfo", "r"); 1879 if (!f) { 1880 return -1; 1881 } 1882 1883 do { 1884 if (!fgets(line, sizeof(line), f)) { 1885 break; 1886 } 1887 if (!strncmp(line, field, field_len)) { 1888 pstrcpy(value, len, line); 1889 ret = 0; 1890 break; 1891 } 1892 } while(*line); 1893 1894 fclose(f); 1895 1896 return ret; 1897 } 1898 1899 uint32_t kvmppc_get_tbfreq(void) 1900 { 1901 char line[512]; 1902 char *ns; 1903 uint32_t retval = NANOSECONDS_PER_SECOND; 1904 1905 if (read_cpuinfo("timebase", line, sizeof(line))) { 1906 return retval; 1907 } 1908 1909 if (!(ns = strchr(line, ':'))) { 1910 return retval; 1911 } 1912 1913 ns++; 1914 1915 return atoi(ns); 1916 } 1917 1918 bool kvmppc_get_host_serial(char **value) 1919 { 1920 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1921 NULL); 1922 } 1923 1924 bool kvmppc_get_host_model(char **value) 1925 { 1926 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1927 } 1928 1929 /* Try to find a device tree node for a CPU with clock-frequency property */ 1930 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1931 { 1932 struct dirent *dirp; 1933 DIR *dp; 1934 1935 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1936 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1937 return -1; 1938 } 1939 1940 buf[0] = '\0'; 1941 while ((dirp = readdir(dp)) != NULL) { 1942 FILE *f; 1943 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1944 dirp->d_name); 1945 f = fopen(buf, "r"); 1946 if (f) { 1947 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1948 fclose(f); 1949 break; 1950 } 1951 buf[0] = '\0'; 1952 } 1953 closedir(dp); 1954 if (buf[0] == '\0') { 1955 printf("Unknown host!\n"); 1956 return -1; 1957 } 1958 1959 return 0; 1960 } 1961 1962 static uint64_t kvmppc_read_int_dt(const char *filename) 1963 { 1964 union { 1965 uint32_t v32; 1966 uint64_t v64; 1967 } u; 1968 FILE *f; 1969 int len; 1970 1971 f = fopen(filename, "rb"); 1972 if (!f) { 1973 return -1; 1974 } 1975 1976 len = fread(&u, 1, sizeof(u), f); 1977 fclose(f); 1978 switch (len) { 1979 case 4: 1980 /* property is a 32-bit quantity */ 1981 return be32_to_cpu(u.v32); 1982 case 8: 1983 return be64_to_cpu(u.v64); 1984 } 1985 1986 return 0; 1987 } 1988 1989 /* Read a CPU node property from the host device tree that's a single 1990 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1991 * (can't find or open the property, or doesn't understand the 1992 * format) */ 1993 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1994 { 1995 char buf[PATH_MAX], *tmp; 1996 uint64_t val; 1997 1998 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1999 return -1; 2000 } 2001 2002 tmp = g_strdup_printf("%s/%s", buf, propname); 2003 val = kvmppc_read_int_dt(tmp); 2004 g_free(tmp); 2005 2006 return val; 2007 } 2008 2009 uint64_t kvmppc_get_clockfreq(void) 2010 { 2011 return kvmppc_read_int_cpu_dt("clock-frequency"); 2012 } 2013 2014 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2015 { 2016 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2017 CPUState *cs = CPU(cpu); 2018 2019 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2020 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2021 return 0; 2022 } 2023 2024 return 1; 2025 } 2026 2027 int kvmppc_get_hasidle(CPUPPCState *env) 2028 { 2029 struct kvm_ppc_pvinfo pvinfo; 2030 2031 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2032 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2033 return 1; 2034 } 2035 2036 return 0; 2037 } 2038 2039 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2040 { 2041 uint32_t *hc = (uint32_t*)buf; 2042 struct kvm_ppc_pvinfo pvinfo; 2043 2044 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2045 memcpy(buf, pvinfo.hcall, buf_len); 2046 return 0; 2047 } 2048 2049 /* 2050 * Fallback to always fail hypercalls regardless of endianness: 2051 * 2052 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2053 * li r3, -1 2054 * b .+8 (becomes nop in wrong endian) 2055 * bswap32(li r3, -1) 2056 */ 2057 2058 hc[0] = cpu_to_be32(0x08000048); 2059 hc[1] = cpu_to_be32(0x3860ffff); 2060 hc[2] = cpu_to_be32(0x48000008); 2061 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2062 2063 return 1; 2064 } 2065 2066 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2067 { 2068 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2069 } 2070 2071 void kvmppc_enable_logical_ci_hcalls(void) 2072 { 2073 /* 2074 * FIXME: it would be nice if we could detect the cases where 2075 * we're using a device which requires the in kernel 2076 * implementation of these hcalls, but the kernel lacks them and 2077 * produce a warning. 2078 */ 2079 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2080 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2081 } 2082 2083 void kvmppc_enable_set_mode_hcall(void) 2084 { 2085 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2086 } 2087 2088 void kvmppc_enable_clear_ref_mod_hcalls(void) 2089 { 2090 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2091 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2092 } 2093 2094 void kvmppc_set_papr(PowerPCCPU *cpu) 2095 { 2096 CPUState *cs = CPU(cpu); 2097 int ret; 2098 2099 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2100 if (ret) { 2101 error_report("This vCPU type or KVM version does not support PAPR"); 2102 exit(1); 2103 } 2104 2105 /* Update the capability flag so we sync the right information 2106 * with kvm */ 2107 cap_papr = 1; 2108 } 2109 2110 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2111 { 2112 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2113 } 2114 2115 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2116 { 2117 CPUState *cs = CPU(cpu); 2118 int ret; 2119 2120 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2121 if (ret && mpic_proxy) { 2122 error_report("This KVM version does not support EPR"); 2123 exit(1); 2124 } 2125 } 2126 2127 int kvmppc_smt_threads(void) 2128 { 2129 return cap_ppc_smt ? cap_ppc_smt : 1; 2130 } 2131 2132 int kvmppc_set_smt_threads(int smt) 2133 { 2134 int ret; 2135 2136 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2137 if (!ret) { 2138 cap_ppc_smt = smt; 2139 } 2140 return ret; 2141 } 2142 2143 void kvmppc_hint_smt_possible(Error **errp) 2144 { 2145 int i; 2146 GString *g; 2147 char *s; 2148 2149 assert(kvm_enabled()); 2150 if (cap_ppc_smt_possible) { 2151 g = g_string_new("Available VSMT modes:"); 2152 for (i = 63; i >= 0; i--) { 2153 if ((1UL << i) & cap_ppc_smt_possible) { 2154 g_string_append_printf(g, " %lu", (1UL << i)); 2155 } 2156 } 2157 s = g_string_free(g, false); 2158 error_append_hint(errp, "%s.\n", s); 2159 g_free(s); 2160 } else { 2161 error_append_hint(errp, 2162 "This KVM seems to be too old to support VSMT.\n"); 2163 } 2164 } 2165 2166 2167 #ifdef TARGET_PPC64 2168 off_t kvmppc_alloc_rma(void **rma) 2169 { 2170 off_t size; 2171 int fd; 2172 struct kvm_allocate_rma ret; 2173 2174 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2175 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2176 * not necessary on this hardware 2177 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2178 * 2179 * FIXME: We should allow the user to force contiguous RMA 2180 * allocation in the cap_ppc_rma==1 case. 2181 */ 2182 if (cap_ppc_rma < 2) { 2183 return 0; 2184 } 2185 2186 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2187 if (fd < 0) { 2188 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2189 strerror(errno)); 2190 return -1; 2191 } 2192 2193 size = MIN(ret.rma_size, 256ul << 20); 2194 2195 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2196 if (*rma == MAP_FAILED) { 2197 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2198 return -1; 2199 }; 2200 2201 return size; 2202 } 2203 2204 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2205 { 2206 struct kvm_ppc_smmu_info info; 2207 long rampagesize, best_page_shift; 2208 int i; 2209 2210 if (cap_ppc_rma >= 2) { 2211 return current_size; 2212 } 2213 2214 /* Find the largest hardware supported page size that's less than 2215 * or equal to the (logical) backing page size of guest RAM */ 2216 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2217 rampagesize = qemu_getrampagesize(); 2218 best_page_shift = 0; 2219 2220 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2221 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2222 2223 if (!sps->page_shift) { 2224 continue; 2225 } 2226 2227 if ((sps->page_shift > best_page_shift) 2228 && ((1UL << sps->page_shift) <= rampagesize)) { 2229 best_page_shift = sps->page_shift; 2230 } 2231 } 2232 2233 return MIN(current_size, 2234 1ULL << (best_page_shift + hash_shift - 7)); 2235 } 2236 #endif 2237 2238 bool kvmppc_spapr_use_multitce(void) 2239 { 2240 return cap_spapr_multitce; 2241 } 2242 2243 int kvmppc_spapr_enable_inkernel_multitce(void) 2244 { 2245 int ret; 2246 2247 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2248 H_PUT_TCE_INDIRECT, 1); 2249 if (!ret) { 2250 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2251 H_STUFF_TCE, 1); 2252 } 2253 2254 return ret; 2255 } 2256 2257 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2258 uint64_t bus_offset, uint32_t nb_table, 2259 int *pfd, bool need_vfio) 2260 { 2261 long len; 2262 int fd; 2263 void *table; 2264 2265 /* Must set fd to -1 so we don't try to munmap when called for 2266 * destroying the table, which the upper layers -will- do 2267 */ 2268 *pfd = -1; 2269 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2270 return NULL; 2271 } 2272 2273 if (cap_spapr_tce_64) { 2274 struct kvm_create_spapr_tce_64 args = { 2275 .liobn = liobn, 2276 .page_shift = page_shift, 2277 .offset = bus_offset >> page_shift, 2278 .size = nb_table, 2279 .flags = 0 2280 }; 2281 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2282 if (fd < 0) { 2283 fprintf(stderr, 2284 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2285 liobn); 2286 return NULL; 2287 } 2288 } else if (cap_spapr_tce) { 2289 uint64_t window_size = (uint64_t) nb_table << page_shift; 2290 struct kvm_create_spapr_tce args = { 2291 .liobn = liobn, 2292 .window_size = window_size, 2293 }; 2294 if ((window_size != args.window_size) || bus_offset) { 2295 return NULL; 2296 } 2297 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2298 if (fd < 0) { 2299 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2300 liobn); 2301 return NULL; 2302 } 2303 } else { 2304 return NULL; 2305 } 2306 2307 len = nb_table * sizeof(uint64_t); 2308 /* FIXME: round this up to page size */ 2309 2310 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2311 if (table == MAP_FAILED) { 2312 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2313 liobn); 2314 close(fd); 2315 return NULL; 2316 } 2317 2318 *pfd = fd; 2319 return table; 2320 } 2321 2322 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2323 { 2324 long len; 2325 2326 if (fd < 0) { 2327 return -1; 2328 } 2329 2330 len = nb_table * sizeof(uint64_t); 2331 if ((munmap(table, len) < 0) || 2332 (close(fd) < 0)) { 2333 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2334 strerror(errno)); 2335 /* Leak the table */ 2336 } 2337 2338 return 0; 2339 } 2340 2341 int kvmppc_reset_htab(int shift_hint) 2342 { 2343 uint32_t shift = shift_hint; 2344 2345 if (!kvm_enabled()) { 2346 /* Full emulation, tell caller to allocate htab itself */ 2347 return 0; 2348 } 2349 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2350 int ret; 2351 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2352 if (ret == -ENOTTY) { 2353 /* At least some versions of PR KVM advertise the 2354 * capability, but don't implement the ioctl(). Oops. 2355 * Return 0 so that we allocate the htab in qemu, as is 2356 * correct for PR. */ 2357 return 0; 2358 } else if (ret < 0) { 2359 return ret; 2360 } 2361 return shift; 2362 } 2363 2364 /* We have a kernel that predates the htab reset calls. For PR 2365 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2366 * this era, it has allocated a 16MB fixed size hash table already. */ 2367 if (kvmppc_is_pr(kvm_state)) { 2368 /* PR - tell caller to allocate htab */ 2369 return 0; 2370 } else { 2371 /* HV - assume 16MB kernel allocated htab */ 2372 return 24; 2373 } 2374 } 2375 2376 static inline uint32_t mfpvr(void) 2377 { 2378 uint32_t pvr; 2379 2380 asm ("mfpvr %0" 2381 : "=r"(pvr)); 2382 return pvr; 2383 } 2384 2385 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2386 { 2387 if (on) { 2388 *word |= flags; 2389 } else { 2390 *word &= ~flags; 2391 } 2392 } 2393 2394 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2395 { 2396 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2397 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2398 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2399 2400 /* Now fix up the class with information we can query from the host */ 2401 pcc->pvr = mfpvr(); 2402 2403 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2404 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2405 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2406 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2407 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2408 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2409 2410 if (dcache_size != -1) { 2411 pcc->l1_dcache_size = dcache_size; 2412 } 2413 2414 if (icache_size != -1) { 2415 pcc->l1_icache_size = icache_size; 2416 } 2417 2418 #if defined(TARGET_PPC64) 2419 pcc->radix_page_info = kvm_get_radix_page_info(); 2420 2421 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2422 /* 2423 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2424 * compliant. More importantly, advertising ISA 3.00 2425 * architected mode may prevent guests from activating 2426 * necessary DD1 workarounds. 2427 */ 2428 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2429 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2430 } 2431 #endif /* defined(TARGET_PPC64) */ 2432 } 2433 2434 bool kvmppc_has_cap_epr(void) 2435 { 2436 return cap_epr; 2437 } 2438 2439 bool kvmppc_has_cap_fixup_hcalls(void) 2440 { 2441 return cap_fixup_hcalls; 2442 } 2443 2444 bool kvmppc_has_cap_htm(void) 2445 { 2446 return cap_htm; 2447 } 2448 2449 bool kvmppc_has_cap_mmu_radix(void) 2450 { 2451 return cap_mmu_radix; 2452 } 2453 2454 bool kvmppc_has_cap_mmu_hash_v3(void) 2455 { 2456 return cap_mmu_hash_v3; 2457 } 2458 2459 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2460 { 2461 uint32_t host_pvr = mfpvr(); 2462 PowerPCCPUClass *pvr_pcc; 2463 2464 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2465 if (pvr_pcc == NULL) { 2466 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2467 } 2468 2469 return pvr_pcc; 2470 } 2471 2472 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2473 { 2474 TypeInfo type_info = { 2475 .name = TYPE_HOST_POWERPC_CPU, 2476 .class_init = kvmppc_host_cpu_class_init, 2477 }; 2478 MachineClass *mc = MACHINE_GET_CLASS(ms); 2479 PowerPCCPUClass *pvr_pcc; 2480 ObjectClass *oc; 2481 DeviceClass *dc; 2482 int i; 2483 2484 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2485 if (pvr_pcc == NULL) { 2486 return -1; 2487 } 2488 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2489 type_register(&type_info); 2490 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2491 /* override TCG default cpu type with 'host' cpu model */ 2492 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2493 } 2494 2495 oc = object_class_by_name(type_info.name); 2496 g_assert(oc); 2497 2498 /* 2499 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2500 * we want "POWER8" to be a "family" alias that points to the current 2501 * host CPU type, too) 2502 */ 2503 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2504 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2505 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2506 char *suffix; 2507 2508 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2509 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2510 if (suffix) { 2511 *suffix = 0; 2512 } 2513 break; 2514 } 2515 } 2516 2517 return 0; 2518 } 2519 2520 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2521 { 2522 struct kvm_rtas_token_args args = { 2523 .token = token, 2524 }; 2525 2526 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2527 return -ENOENT; 2528 } 2529 2530 strncpy(args.name, function, sizeof(args.name)); 2531 2532 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2533 } 2534 2535 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2536 { 2537 struct kvm_get_htab_fd s = { 2538 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2539 .start_index = index, 2540 }; 2541 int ret; 2542 2543 if (!cap_htab_fd) { 2544 error_setg(errp, "KVM version doesn't support %s the HPT", 2545 write ? "writing" : "reading"); 2546 return -ENOTSUP; 2547 } 2548 2549 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2550 if (ret < 0) { 2551 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2552 write ? "writing" : "reading", write ? "to" : "from", 2553 strerror(errno)); 2554 return -errno; 2555 } 2556 2557 return ret; 2558 } 2559 2560 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2561 { 2562 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2563 uint8_t buf[bufsize]; 2564 ssize_t rc; 2565 2566 do { 2567 rc = read(fd, buf, bufsize); 2568 if (rc < 0) { 2569 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2570 strerror(errno)); 2571 return rc; 2572 } else if (rc) { 2573 uint8_t *buffer = buf; 2574 ssize_t n = rc; 2575 while (n) { 2576 struct kvm_get_htab_header *head = 2577 (struct kvm_get_htab_header *) buffer; 2578 size_t chunksize = sizeof(*head) + 2579 HASH_PTE_SIZE_64 * head->n_valid; 2580 2581 qemu_put_be32(f, head->index); 2582 qemu_put_be16(f, head->n_valid); 2583 qemu_put_be16(f, head->n_invalid); 2584 qemu_put_buffer(f, (void *)(head + 1), 2585 HASH_PTE_SIZE_64 * head->n_valid); 2586 2587 buffer += chunksize; 2588 n -= chunksize; 2589 } 2590 } 2591 } while ((rc != 0) 2592 && ((max_ns < 0) 2593 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2594 2595 return (rc == 0) ? 1 : 0; 2596 } 2597 2598 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2599 uint16_t n_valid, uint16_t n_invalid) 2600 { 2601 struct kvm_get_htab_header *buf; 2602 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2603 ssize_t rc; 2604 2605 buf = alloca(chunksize); 2606 buf->index = index; 2607 buf->n_valid = n_valid; 2608 buf->n_invalid = n_invalid; 2609 2610 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2611 2612 rc = write(fd, buf, chunksize); 2613 if (rc < 0) { 2614 fprintf(stderr, "Error writing KVM hash table: %s\n", 2615 strerror(errno)); 2616 return rc; 2617 } 2618 if (rc != chunksize) { 2619 /* We should never get a short write on a single chunk */ 2620 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2621 return -1; 2622 } 2623 return 0; 2624 } 2625 2626 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2627 { 2628 return true; 2629 } 2630 2631 void kvm_arch_init_irq_routing(KVMState *s) 2632 { 2633 } 2634 2635 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2636 { 2637 int fd, rc; 2638 int i; 2639 2640 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2641 2642 i = 0; 2643 while (i < n) { 2644 struct kvm_get_htab_header *hdr; 2645 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2646 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2647 2648 rc = read(fd, buf, sizeof(buf)); 2649 if (rc < 0) { 2650 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2651 } 2652 2653 hdr = (struct kvm_get_htab_header *)buf; 2654 while ((i < n) && ((char *)hdr < (buf + rc))) { 2655 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2656 2657 if (hdr->index != (ptex + i)) { 2658 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2659 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2660 } 2661 2662 if (n - i < valid) { 2663 valid = n - i; 2664 } 2665 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2666 i += valid; 2667 2668 if ((n - i) < invalid) { 2669 invalid = n - i; 2670 } 2671 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2672 i += invalid; 2673 2674 hdr = (struct kvm_get_htab_header *) 2675 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2676 } 2677 } 2678 2679 close(fd); 2680 } 2681 2682 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2683 { 2684 int fd, rc; 2685 struct { 2686 struct kvm_get_htab_header hdr; 2687 uint64_t pte0; 2688 uint64_t pte1; 2689 } buf; 2690 2691 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2692 2693 buf.hdr.n_valid = 1; 2694 buf.hdr.n_invalid = 0; 2695 buf.hdr.index = ptex; 2696 buf.pte0 = cpu_to_be64(pte0); 2697 buf.pte1 = cpu_to_be64(pte1); 2698 2699 rc = write(fd, &buf, sizeof(buf)); 2700 if (rc != sizeof(buf)) { 2701 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2702 } 2703 close(fd); 2704 } 2705 2706 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2707 uint64_t address, uint32_t data, PCIDevice *dev) 2708 { 2709 return 0; 2710 } 2711 2712 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2713 int vector, PCIDevice *dev) 2714 { 2715 return 0; 2716 } 2717 2718 int kvm_arch_release_virq_post(int virq) 2719 { 2720 return 0; 2721 } 2722 2723 int kvm_arch_msi_data_to_gsi(uint32_t data) 2724 { 2725 return data & 0xffff; 2726 } 2727 2728 int kvmppc_enable_hwrng(void) 2729 { 2730 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2731 return -1; 2732 } 2733 2734 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2735 } 2736 2737 void kvmppc_check_papr_resize_hpt(Error **errp) 2738 { 2739 if (!kvm_enabled()) { 2740 return; /* No KVM, we're good */ 2741 } 2742 2743 if (cap_resize_hpt) { 2744 return; /* Kernel has explicit support, we're good */ 2745 } 2746 2747 /* Otherwise fallback on looking for PR KVM */ 2748 if (kvmppc_is_pr(kvm_state)) { 2749 return; 2750 } 2751 2752 error_setg(errp, 2753 "Hash page table resizing not available with this KVM version"); 2754 } 2755 2756 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2757 { 2758 CPUState *cs = CPU(cpu); 2759 struct kvm_ppc_resize_hpt rhpt = { 2760 .flags = flags, 2761 .shift = shift, 2762 }; 2763 2764 if (!cap_resize_hpt) { 2765 return -ENOSYS; 2766 } 2767 2768 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2769 } 2770 2771 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2772 { 2773 CPUState *cs = CPU(cpu); 2774 struct kvm_ppc_resize_hpt rhpt = { 2775 .flags = flags, 2776 .shift = shift, 2777 }; 2778 2779 if (!cap_resize_hpt) { 2780 return -ENOSYS; 2781 } 2782 2783 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2784 } 2785 2786 /* 2787 * This is a helper function to detect a post migration scenario 2788 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2789 * the guest kernel can't handle a PVR value other than the actual host 2790 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2791 * 2792 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2793 * (so, we're HV), return true. The workaround itself is done in 2794 * cpu_post_load. 2795 * 2796 * The order here is important: we'll only check for KVM PR as a 2797 * fallback if the guest kernel can't handle the situation itself. 2798 * We need to avoid as much as possible querying the running KVM type 2799 * in QEMU level. 2800 */ 2801 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2802 { 2803 CPUState *cs = CPU(cpu); 2804 2805 if (!kvm_enabled()) { 2806 return false; 2807 } 2808 2809 if (cap_ppc_pvr_compat) { 2810 return false; 2811 } 2812 2813 return !kvmppc_is_pr(cs->kvm_state); 2814 } 2815