1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 93 static uint32_t debug_inst_opcode; 94 95 /* XXX We have a race condition where we actually have a level triggered 96 * interrupt, but the infrastructure can't expose that yet, so the guest 97 * takes but ignores it, goes to sleep and never gets notified that there's 98 * still an interrupt pending. 99 * 100 * As a quick workaround, let's just wake up again 20 ms after we injected 101 * an interrupt. That way we can assure that we're always reinjecting 102 * interrupts in case the guest swallowed them. 103 */ 104 static QEMUTimer *idle_timer; 105 106 static void kvm_kick_cpu(void *opaque) 107 { 108 PowerPCCPU *cpu = opaque; 109 110 qemu_cpu_kick(CPU(cpu)); 111 } 112 113 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 114 * should only be used for fallback tests - generally we should use 115 * explicit capabilities for the features we want, rather than 116 * assuming what is/isn't available depending on the KVM variant. */ 117 static bool kvmppc_is_pr(KVMState *ks) 118 { 119 /* Assume KVM-PR if the GET_PVINFO capability is available */ 120 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 121 } 122 123 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 124 125 int kvm_arch_init(MachineState *ms, KVMState *s) 126 { 127 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 128 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 129 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 130 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 131 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 132 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 133 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 134 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 135 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 136 cap_spapr_vfio = false; 137 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 138 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 139 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 140 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 141 /* Note: we don't set cap_papr here, because this capability is 142 * only activated after this by kvmppc_set_papr() */ 143 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 144 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 145 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 146 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 147 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 148 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 149 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 150 /* 151 * Note: setting it to false because there is not such capability 152 * in KVM at this moment. 153 * 154 * TODO: call kvm_vm_check_extension() with the right capability 155 * after the kernel starts implementing it.*/ 156 cap_ppc_pvr_compat = false; 157 158 if (!cap_interrupt_level) { 159 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 160 "VM to stall at times!\n"); 161 } 162 163 kvm_ppc_register_host_cpu_type(ms); 164 165 return 0; 166 } 167 168 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 169 { 170 return 0; 171 } 172 173 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 174 { 175 CPUPPCState *cenv = &cpu->env; 176 CPUState *cs = CPU(cpu); 177 struct kvm_sregs sregs; 178 int ret; 179 180 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 181 /* What we're really trying to say is "if we're on BookE, we use 182 the native PVR for now". This is the only sane way to check 183 it though, so we potentially confuse users that they can run 184 BookE guests on BookS. Let's hope nobody dares enough :) */ 185 return 0; 186 } else { 187 if (!cap_segstate) { 188 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 189 return -ENOSYS; 190 } 191 } 192 193 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 194 if (ret) { 195 return ret; 196 } 197 198 sregs.pvr = cenv->spr[SPR_PVR]; 199 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 200 } 201 202 /* Set up a shared TLB array with KVM */ 203 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 204 { 205 CPUPPCState *env = &cpu->env; 206 CPUState *cs = CPU(cpu); 207 struct kvm_book3e_206_tlb_params params = {}; 208 struct kvm_config_tlb cfg = {}; 209 unsigned int entries = 0; 210 int ret, i; 211 212 if (!kvm_enabled() || 213 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 214 return 0; 215 } 216 217 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 218 219 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 220 params.tlb_sizes[i] = booke206_tlb_size(env, i); 221 params.tlb_ways[i] = booke206_tlb_ways(env, i); 222 entries += params.tlb_sizes[i]; 223 } 224 225 assert(entries == env->nb_tlb); 226 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 227 228 env->tlb_dirty = true; 229 230 cfg.array = (uintptr_t)env->tlb.tlbm; 231 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 232 cfg.params = (uintptr_t)¶ms; 233 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 234 235 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 236 if (ret < 0) { 237 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 238 __func__, strerror(-ret)); 239 return ret; 240 } 241 242 env->kvm_sw_tlb = true; 243 return 0; 244 } 245 246 247 #if defined(TARGET_PPC64) 248 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 249 struct kvm_ppc_smmu_info *info) 250 { 251 CPUPPCState *env = &cpu->env; 252 CPUState *cs = CPU(cpu); 253 254 memset(info, 0, sizeof(*info)); 255 256 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 257 * need to "guess" what the supported page sizes are. 258 * 259 * For that to work we make a few assumptions: 260 * 261 * - Check whether we are running "PR" KVM which only supports 4K 262 * and 16M pages, but supports them regardless of the backing 263 * store characteritics. We also don't support 1T segments. 264 * 265 * This is safe as if HV KVM ever supports that capability or PR 266 * KVM grows supports for more page/segment sizes, those versions 267 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 268 * will not hit this fallback 269 * 270 * - Else we are running HV KVM. This means we only support page 271 * sizes that fit in the backing store. Additionally we only 272 * advertize 64K pages if the processor is ARCH 2.06 and we assume 273 * P7 encodings for the SLB and hash table. Here too, we assume 274 * support for any newer processor will mean a kernel that 275 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 276 * this fallback. 277 */ 278 if (kvmppc_is_pr(cs->kvm_state)) { 279 /* No flags */ 280 info->flags = 0; 281 info->slb_size = 64; 282 283 /* Standard 4k base page size segment */ 284 info->sps[0].page_shift = 12; 285 info->sps[0].slb_enc = 0; 286 info->sps[0].enc[0].page_shift = 12; 287 info->sps[0].enc[0].pte_enc = 0; 288 289 /* Standard 16M large page size segment */ 290 info->sps[1].page_shift = 24; 291 info->sps[1].slb_enc = SLB_VSID_L; 292 info->sps[1].enc[0].page_shift = 24; 293 info->sps[1].enc[0].pte_enc = 0; 294 } else { 295 int i = 0; 296 297 /* HV KVM has backing store size restrictions */ 298 info->flags = KVM_PPC_PAGE_SIZES_REAL; 299 300 if (env->mmu_model & POWERPC_MMU_1TSEG) { 301 info->flags |= KVM_PPC_1T_SEGMENTS; 302 } 303 304 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 305 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 306 info->slb_size = 32; 307 } else { 308 info->slb_size = 64; 309 } 310 311 /* Standard 4k base page size segment */ 312 info->sps[i].page_shift = 12; 313 info->sps[i].slb_enc = 0; 314 info->sps[i].enc[0].page_shift = 12; 315 info->sps[i].enc[0].pte_enc = 0; 316 i++; 317 318 /* 64K on MMU 2.06 and later */ 319 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 320 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 321 info->sps[i].page_shift = 16; 322 info->sps[i].slb_enc = 0x110; 323 info->sps[i].enc[0].page_shift = 16; 324 info->sps[i].enc[0].pte_enc = 1; 325 i++; 326 } 327 328 /* Standard 16M large page size segment */ 329 info->sps[i].page_shift = 24; 330 info->sps[i].slb_enc = SLB_VSID_L; 331 info->sps[i].enc[0].page_shift = 24; 332 info->sps[i].enc[0].pte_enc = 0; 333 } 334 } 335 336 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 337 { 338 CPUState *cs = CPU(cpu); 339 int ret; 340 341 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 342 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 343 if (ret == 0) { 344 return; 345 } 346 } 347 348 kvm_get_fallback_smmu_info(cpu, info); 349 } 350 351 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 352 { 353 KVMState *s = KVM_STATE(current_machine->accelerator); 354 struct ppc_radix_page_info *radix_page_info; 355 struct kvm_ppc_rmmu_info rmmu_info; 356 int i; 357 358 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 359 return NULL; 360 } 361 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 362 return NULL; 363 } 364 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 365 radix_page_info->count = 0; 366 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 367 if (rmmu_info.ap_encodings[i]) { 368 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 369 radix_page_info->count++; 370 } 371 } 372 return radix_page_info; 373 } 374 375 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 376 bool radix, bool gtse, 377 uint64_t proc_tbl) 378 { 379 CPUState *cs = CPU(cpu); 380 int ret; 381 uint64_t flags = 0; 382 struct kvm_ppc_mmuv3_cfg cfg = { 383 .process_table = proc_tbl, 384 }; 385 386 if (radix) { 387 flags |= KVM_PPC_MMUV3_RADIX; 388 } 389 if (gtse) { 390 flags |= KVM_PPC_MMUV3_GTSE; 391 } 392 cfg.flags = flags; 393 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 394 switch (ret) { 395 case 0: 396 return H_SUCCESS; 397 case -EINVAL: 398 return H_PARAMETER; 399 case -ENODEV: 400 return H_NOT_AVAILABLE; 401 default: 402 return H_HARDWARE; 403 } 404 } 405 406 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 407 { 408 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 409 return true; 410 } 411 412 return (1ul << shift) <= rampgsize; 413 } 414 415 static long max_cpu_page_size; 416 417 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 418 { 419 static struct kvm_ppc_smmu_info smmu_info; 420 static bool has_smmu_info; 421 CPUPPCState *env = &cpu->env; 422 int iq, ik, jq, jk; 423 bool has_64k_pages = false; 424 425 /* We only handle page sizes for 64-bit server guests for now */ 426 if (!(env->mmu_model & POWERPC_MMU_64)) { 427 return; 428 } 429 430 /* Collect MMU info from kernel if not already */ 431 if (!has_smmu_info) { 432 kvm_get_smmu_info(cpu, &smmu_info); 433 has_smmu_info = true; 434 } 435 436 if (!max_cpu_page_size) { 437 max_cpu_page_size = qemu_getrampagesize(); 438 } 439 440 /* Convert to QEMU form */ 441 memset(&env->sps, 0, sizeof(env->sps)); 442 443 /* If we have HV KVM, we need to forbid CI large pages if our 444 * host page size is smaller than 64K. 445 */ 446 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 447 env->ci_large_pages = getpagesize() >= 0x10000; 448 } 449 450 /* 451 * XXX This loop should be an entry wide AND of the capabilities that 452 * the selected CPU has with the capabilities that KVM supports. 453 */ 454 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 455 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 456 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 457 458 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 459 ksps->page_shift)) { 460 continue; 461 } 462 qsps->page_shift = ksps->page_shift; 463 qsps->slb_enc = ksps->slb_enc; 464 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 465 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 466 ksps->enc[jk].page_shift)) { 467 continue; 468 } 469 if (ksps->enc[jk].page_shift == 16) { 470 has_64k_pages = true; 471 } 472 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 473 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 474 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 475 break; 476 } 477 } 478 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 479 break; 480 } 481 } 482 env->slb_nr = smmu_info.slb_size; 483 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 484 env->mmu_model &= ~POWERPC_MMU_1TSEG; 485 } 486 if (!has_64k_pages) { 487 env->mmu_model &= ~POWERPC_MMU_64K; 488 } 489 } 490 491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 492 { 493 Object *mem_obj = object_resolve_path(obj_path, NULL); 494 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 495 long pagesize; 496 497 if (mempath) { 498 pagesize = qemu_mempath_getpagesize(mempath); 499 g_free(mempath); 500 } else { 501 pagesize = getpagesize(); 502 } 503 504 return pagesize >= max_cpu_page_size; 505 } 506 507 #else /* defined (TARGET_PPC64) */ 508 509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 510 { 511 } 512 513 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 514 { 515 return true; 516 } 517 518 #endif /* !defined (TARGET_PPC64) */ 519 520 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 521 { 522 return POWERPC_CPU(cpu)->vcpu_id; 523 } 524 525 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 526 * book3s supports only 1 watchpoint, so array size 527 * of 4 is sufficient for now. 528 */ 529 #define MAX_HW_BKPTS 4 530 531 static struct HWBreakpoint { 532 target_ulong addr; 533 int type; 534 } hw_debug_points[MAX_HW_BKPTS]; 535 536 static CPUWatchpoint hw_watchpoint; 537 538 /* Default there is no breakpoint and watchpoint supported */ 539 static int max_hw_breakpoint; 540 static int max_hw_watchpoint; 541 static int nb_hw_breakpoint; 542 static int nb_hw_watchpoint; 543 544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 545 { 546 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 547 max_hw_breakpoint = 2; 548 max_hw_watchpoint = 2; 549 } 550 551 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 552 fprintf(stderr, "Error initializing h/w breakpoints\n"); 553 return; 554 } 555 } 556 557 int kvm_arch_init_vcpu(CPUState *cs) 558 { 559 PowerPCCPU *cpu = POWERPC_CPU(cs); 560 CPUPPCState *cenv = &cpu->env; 561 int ret; 562 563 /* Gather server mmu info from KVM and update the CPU state */ 564 kvm_fixup_page_sizes(cpu); 565 566 /* Synchronize sregs with kvm */ 567 ret = kvm_arch_sync_sregs(cpu); 568 if (ret) { 569 if (ret == -EINVAL) { 570 error_report("Register sync failed... If you're using kvm-hv.ko," 571 " only \"-cpu host\" is possible"); 572 } 573 return ret; 574 } 575 576 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 577 578 switch (cenv->mmu_model) { 579 case POWERPC_MMU_BOOKE206: 580 /* This target supports access to KVM's guest TLB */ 581 ret = kvm_booke206_tlb_init(cpu); 582 break; 583 case POWERPC_MMU_2_07: 584 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 585 /* KVM-HV has transactional memory on POWER8 also without the 586 * KVM_CAP_PPC_HTM extension, so enable it here instead as 587 * long as it's availble to userspace on the host. */ 588 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 589 cap_htm = true; 590 } 591 } 592 break; 593 default: 594 break; 595 } 596 597 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 598 kvmppc_hw_debug_points_init(cenv); 599 600 return ret; 601 } 602 603 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 604 { 605 CPUPPCState *env = &cpu->env; 606 CPUState *cs = CPU(cpu); 607 struct kvm_dirty_tlb dirty_tlb; 608 unsigned char *bitmap; 609 int ret; 610 611 if (!env->kvm_sw_tlb) { 612 return; 613 } 614 615 bitmap = g_malloc((env->nb_tlb + 7) / 8); 616 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 617 618 dirty_tlb.bitmap = (uintptr_t)bitmap; 619 dirty_tlb.num_dirty = env->nb_tlb; 620 621 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 622 if (ret) { 623 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 624 __func__, strerror(-ret)); 625 } 626 627 g_free(bitmap); 628 } 629 630 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 631 { 632 PowerPCCPU *cpu = POWERPC_CPU(cs); 633 CPUPPCState *env = &cpu->env; 634 union { 635 uint32_t u32; 636 uint64_t u64; 637 } val; 638 struct kvm_one_reg reg = { 639 .id = id, 640 .addr = (uintptr_t) &val, 641 }; 642 int ret; 643 644 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 645 if (ret != 0) { 646 trace_kvm_failed_spr_get(spr, strerror(errno)); 647 } else { 648 switch (id & KVM_REG_SIZE_MASK) { 649 case KVM_REG_SIZE_U32: 650 env->spr[spr] = val.u32; 651 break; 652 653 case KVM_REG_SIZE_U64: 654 env->spr[spr] = val.u64; 655 break; 656 657 default: 658 /* Don't handle this size yet */ 659 abort(); 660 } 661 } 662 } 663 664 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 665 { 666 PowerPCCPU *cpu = POWERPC_CPU(cs); 667 CPUPPCState *env = &cpu->env; 668 union { 669 uint32_t u32; 670 uint64_t u64; 671 } val; 672 struct kvm_one_reg reg = { 673 .id = id, 674 .addr = (uintptr_t) &val, 675 }; 676 int ret; 677 678 switch (id & KVM_REG_SIZE_MASK) { 679 case KVM_REG_SIZE_U32: 680 val.u32 = env->spr[spr]; 681 break; 682 683 case KVM_REG_SIZE_U64: 684 val.u64 = env->spr[spr]; 685 break; 686 687 default: 688 /* Don't handle this size yet */ 689 abort(); 690 } 691 692 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 693 if (ret != 0) { 694 trace_kvm_failed_spr_set(spr, strerror(errno)); 695 } 696 } 697 698 static int kvm_put_fp(CPUState *cs) 699 { 700 PowerPCCPU *cpu = POWERPC_CPU(cs); 701 CPUPPCState *env = &cpu->env; 702 struct kvm_one_reg reg; 703 int i; 704 int ret; 705 706 if (env->insns_flags & PPC_FLOAT) { 707 uint64_t fpscr = env->fpscr; 708 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 709 710 reg.id = KVM_REG_PPC_FPSCR; 711 reg.addr = (uintptr_t)&fpscr; 712 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 713 if (ret < 0) { 714 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 715 return ret; 716 } 717 718 for (i = 0; i < 32; i++) { 719 uint64_t vsr[2]; 720 721 #ifdef HOST_WORDS_BIGENDIAN 722 vsr[0] = float64_val(env->fpr[i]); 723 vsr[1] = env->vsr[i]; 724 #else 725 vsr[0] = env->vsr[i]; 726 vsr[1] = float64_val(env->fpr[i]); 727 #endif 728 reg.addr = (uintptr_t) &vsr; 729 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 730 731 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 732 if (ret < 0) { 733 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 734 i, strerror(errno)); 735 return ret; 736 } 737 } 738 } 739 740 if (env->insns_flags & PPC_ALTIVEC) { 741 reg.id = KVM_REG_PPC_VSCR; 742 reg.addr = (uintptr_t)&env->vscr; 743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 744 if (ret < 0) { 745 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 746 return ret; 747 } 748 749 for (i = 0; i < 32; i++) { 750 reg.id = KVM_REG_PPC_VR(i); 751 reg.addr = (uintptr_t)&env->avr[i]; 752 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 753 if (ret < 0) { 754 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 755 return ret; 756 } 757 } 758 } 759 760 return 0; 761 } 762 763 static int kvm_get_fp(CPUState *cs) 764 { 765 PowerPCCPU *cpu = POWERPC_CPU(cs); 766 CPUPPCState *env = &cpu->env; 767 struct kvm_one_reg reg; 768 int i; 769 int ret; 770 771 if (env->insns_flags & PPC_FLOAT) { 772 uint64_t fpscr; 773 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 774 775 reg.id = KVM_REG_PPC_FPSCR; 776 reg.addr = (uintptr_t)&fpscr; 777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 778 if (ret < 0) { 779 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 780 return ret; 781 } else { 782 env->fpscr = fpscr; 783 } 784 785 for (i = 0; i < 32; i++) { 786 uint64_t vsr[2]; 787 788 reg.addr = (uintptr_t) &vsr; 789 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 790 791 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 792 if (ret < 0) { 793 DPRINTF("Unable to get %s%d from KVM: %s\n", 794 vsx ? "VSR" : "FPR", i, strerror(errno)); 795 return ret; 796 } else { 797 #ifdef HOST_WORDS_BIGENDIAN 798 env->fpr[i] = vsr[0]; 799 if (vsx) { 800 env->vsr[i] = vsr[1]; 801 } 802 #else 803 env->fpr[i] = vsr[1]; 804 if (vsx) { 805 env->vsr[i] = vsr[0]; 806 } 807 #endif 808 } 809 } 810 } 811 812 if (env->insns_flags & PPC_ALTIVEC) { 813 reg.id = KVM_REG_PPC_VSCR; 814 reg.addr = (uintptr_t)&env->vscr; 815 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 816 if (ret < 0) { 817 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 818 return ret; 819 } 820 821 for (i = 0; i < 32; i++) { 822 reg.id = KVM_REG_PPC_VR(i); 823 reg.addr = (uintptr_t)&env->avr[i]; 824 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 825 if (ret < 0) { 826 DPRINTF("Unable to get VR%d from KVM: %s\n", 827 i, strerror(errno)); 828 return ret; 829 } 830 } 831 } 832 833 return 0; 834 } 835 836 #if defined(TARGET_PPC64) 837 static int kvm_get_vpa(CPUState *cs) 838 { 839 PowerPCCPU *cpu = POWERPC_CPU(cs); 840 CPUPPCState *env = &cpu->env; 841 struct kvm_one_reg reg; 842 int ret; 843 844 reg.id = KVM_REG_PPC_VPA_ADDR; 845 reg.addr = (uintptr_t)&env->vpa_addr; 846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 847 if (ret < 0) { 848 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 849 return ret; 850 } 851 852 assert((uintptr_t)&env->slb_shadow_size 853 == ((uintptr_t)&env->slb_shadow_addr + 8)); 854 reg.id = KVM_REG_PPC_VPA_SLB; 855 reg.addr = (uintptr_t)&env->slb_shadow_addr; 856 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 857 if (ret < 0) { 858 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 859 strerror(errno)); 860 return ret; 861 } 862 863 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 864 reg.id = KVM_REG_PPC_VPA_DTL; 865 reg.addr = (uintptr_t)&env->dtl_addr; 866 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 867 if (ret < 0) { 868 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 869 strerror(errno)); 870 return ret; 871 } 872 873 return 0; 874 } 875 876 static int kvm_put_vpa(CPUState *cs) 877 { 878 PowerPCCPU *cpu = POWERPC_CPU(cs); 879 CPUPPCState *env = &cpu->env; 880 struct kvm_one_reg reg; 881 int ret; 882 883 /* SLB shadow or DTL can't be registered unless a master VPA is 884 * registered. That means when restoring state, if a VPA *is* 885 * registered, we need to set that up first. If not, we need to 886 * deregister the others before deregistering the master VPA */ 887 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 888 889 if (env->vpa_addr) { 890 reg.id = KVM_REG_PPC_VPA_ADDR; 891 reg.addr = (uintptr_t)&env->vpa_addr; 892 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 893 if (ret < 0) { 894 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 895 return ret; 896 } 897 } 898 899 assert((uintptr_t)&env->slb_shadow_size 900 == ((uintptr_t)&env->slb_shadow_addr + 8)); 901 reg.id = KVM_REG_PPC_VPA_SLB; 902 reg.addr = (uintptr_t)&env->slb_shadow_addr; 903 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 904 if (ret < 0) { 905 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 906 return ret; 907 } 908 909 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 910 reg.id = KVM_REG_PPC_VPA_DTL; 911 reg.addr = (uintptr_t)&env->dtl_addr; 912 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 913 if (ret < 0) { 914 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 915 strerror(errno)); 916 return ret; 917 } 918 919 if (!env->vpa_addr) { 920 reg.id = KVM_REG_PPC_VPA_ADDR; 921 reg.addr = (uintptr_t)&env->vpa_addr; 922 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 923 if (ret < 0) { 924 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 925 return ret; 926 } 927 } 928 929 return 0; 930 } 931 #endif /* TARGET_PPC64 */ 932 933 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 934 { 935 CPUPPCState *env = &cpu->env; 936 struct kvm_sregs sregs; 937 int i; 938 939 sregs.pvr = env->spr[SPR_PVR]; 940 941 if (cpu->vhyp) { 942 PPCVirtualHypervisorClass *vhc = 943 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 944 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 945 } else { 946 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 947 } 948 949 /* Sync SLB */ 950 #ifdef TARGET_PPC64 951 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 952 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 953 if (env->slb[i].esid & SLB_ESID_V) { 954 sregs.u.s.ppc64.slb[i].slbe |= i; 955 } 956 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 957 } 958 #endif 959 960 /* Sync SRs */ 961 for (i = 0; i < 16; i++) { 962 sregs.u.s.ppc32.sr[i] = env->sr[i]; 963 } 964 965 /* Sync BATs */ 966 for (i = 0; i < 8; i++) { 967 /* Beware. We have to swap upper and lower bits here */ 968 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 969 | env->DBAT[1][i]; 970 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 971 | env->IBAT[1][i]; 972 } 973 974 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 975 } 976 977 int kvm_arch_put_registers(CPUState *cs, int level) 978 { 979 PowerPCCPU *cpu = POWERPC_CPU(cs); 980 CPUPPCState *env = &cpu->env; 981 struct kvm_regs regs; 982 int ret; 983 int i; 984 985 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 986 if (ret < 0) { 987 return ret; 988 } 989 990 regs.ctr = env->ctr; 991 regs.lr = env->lr; 992 regs.xer = cpu_read_xer(env); 993 regs.msr = env->msr; 994 regs.pc = env->nip; 995 996 regs.srr0 = env->spr[SPR_SRR0]; 997 regs.srr1 = env->spr[SPR_SRR1]; 998 999 regs.sprg0 = env->spr[SPR_SPRG0]; 1000 regs.sprg1 = env->spr[SPR_SPRG1]; 1001 regs.sprg2 = env->spr[SPR_SPRG2]; 1002 regs.sprg3 = env->spr[SPR_SPRG3]; 1003 regs.sprg4 = env->spr[SPR_SPRG4]; 1004 regs.sprg5 = env->spr[SPR_SPRG5]; 1005 regs.sprg6 = env->spr[SPR_SPRG6]; 1006 regs.sprg7 = env->spr[SPR_SPRG7]; 1007 1008 regs.pid = env->spr[SPR_BOOKE_PID]; 1009 1010 for (i = 0;i < 32; i++) 1011 regs.gpr[i] = env->gpr[i]; 1012 1013 regs.cr = 0; 1014 for (i = 0; i < 8; i++) { 1015 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1016 } 1017 1018 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1019 if (ret < 0) 1020 return ret; 1021 1022 kvm_put_fp(cs); 1023 1024 if (env->tlb_dirty) { 1025 kvm_sw_tlb_put(cpu); 1026 env->tlb_dirty = false; 1027 } 1028 1029 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1030 ret = kvmppc_put_books_sregs(cpu); 1031 if (ret < 0) { 1032 return ret; 1033 } 1034 } 1035 1036 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1037 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1038 } 1039 1040 if (cap_one_reg) { 1041 int i; 1042 1043 /* We deliberately ignore errors here, for kernels which have 1044 * the ONE_REG calls, but don't support the specific 1045 * registers, there's a reasonable chance things will still 1046 * work, at least until we try to migrate. */ 1047 for (i = 0; i < 1024; i++) { 1048 uint64_t id = env->spr_cb[i].one_reg_id; 1049 1050 if (id != 0) { 1051 kvm_put_one_spr(cs, id, i); 1052 } 1053 } 1054 1055 #ifdef TARGET_PPC64 1056 if (msr_ts) { 1057 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1059 } 1060 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1062 } 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1073 } 1074 1075 if (cap_papr) { 1076 if (kvm_put_vpa(cs) < 0) { 1077 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1078 } 1079 } 1080 1081 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1082 #endif /* TARGET_PPC64 */ 1083 } 1084 1085 return ret; 1086 } 1087 1088 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1089 { 1090 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1091 } 1092 1093 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1094 { 1095 CPUPPCState *env = &cpu->env; 1096 struct kvm_sregs sregs; 1097 int ret; 1098 1099 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1100 if (ret < 0) { 1101 return ret; 1102 } 1103 1104 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1105 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1106 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1107 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1108 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1109 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1110 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1111 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1112 env->spr[SPR_DECR] = sregs.u.e.dec; 1113 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1114 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1115 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1116 } 1117 1118 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1119 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1120 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1121 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1122 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1123 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1124 } 1125 1126 if (sregs.u.e.features & KVM_SREGS_E_64) { 1127 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1128 } 1129 1130 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1131 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1132 } 1133 1134 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1135 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1136 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1137 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1138 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1139 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1140 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1141 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1142 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1143 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1144 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1145 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1146 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1147 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1148 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1149 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1150 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1151 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1152 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1153 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1154 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1155 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1156 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1157 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1158 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1159 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1160 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1161 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1162 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1163 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1164 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1165 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1166 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1167 1168 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1169 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1170 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1171 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1172 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1173 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1174 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1175 } 1176 1177 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1178 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1179 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1180 } 1181 1182 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1183 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1184 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1185 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1186 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1187 } 1188 } 1189 1190 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1191 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1192 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1193 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1194 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1195 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1196 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1197 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1198 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1199 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1200 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1201 } 1202 1203 if (sregs.u.e.features & KVM_SREGS_EXP) { 1204 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1205 } 1206 1207 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1208 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1209 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1210 } 1211 1212 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1213 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1214 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1215 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1216 1217 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1218 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1219 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1220 } 1221 } 1222 1223 return 0; 1224 } 1225 1226 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1227 { 1228 CPUPPCState *env = &cpu->env; 1229 struct kvm_sregs sregs; 1230 int ret; 1231 int i; 1232 1233 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1234 if (ret < 0) { 1235 return ret; 1236 } 1237 1238 if (!cpu->vhyp) { 1239 ppc_store_sdr1(env, sregs.u.s.sdr1); 1240 } 1241 1242 /* Sync SLB */ 1243 #ifdef TARGET_PPC64 1244 /* 1245 * The packed SLB array we get from KVM_GET_SREGS only contains 1246 * information about valid entries. So we flush our internal copy 1247 * to get rid of stale ones, then put all valid SLB entries back 1248 * in. 1249 */ 1250 memset(env->slb, 0, sizeof(env->slb)); 1251 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1252 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1253 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1254 /* 1255 * Only restore valid entries 1256 */ 1257 if (rb & SLB_ESID_V) { 1258 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1259 } 1260 } 1261 #endif 1262 1263 /* Sync SRs */ 1264 for (i = 0; i < 16; i++) { 1265 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1266 } 1267 1268 /* Sync BATs */ 1269 for (i = 0; i < 8; i++) { 1270 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1271 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1272 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1273 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1274 } 1275 1276 return 0; 1277 } 1278 1279 int kvm_arch_get_registers(CPUState *cs) 1280 { 1281 PowerPCCPU *cpu = POWERPC_CPU(cs); 1282 CPUPPCState *env = &cpu->env; 1283 struct kvm_regs regs; 1284 uint32_t cr; 1285 int i, ret; 1286 1287 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1288 if (ret < 0) 1289 return ret; 1290 1291 cr = regs.cr; 1292 for (i = 7; i >= 0; i--) { 1293 env->crf[i] = cr & 15; 1294 cr >>= 4; 1295 } 1296 1297 env->ctr = regs.ctr; 1298 env->lr = regs.lr; 1299 cpu_write_xer(env, regs.xer); 1300 env->msr = regs.msr; 1301 env->nip = regs.pc; 1302 1303 env->spr[SPR_SRR0] = regs.srr0; 1304 env->spr[SPR_SRR1] = regs.srr1; 1305 1306 env->spr[SPR_SPRG0] = regs.sprg0; 1307 env->spr[SPR_SPRG1] = regs.sprg1; 1308 env->spr[SPR_SPRG2] = regs.sprg2; 1309 env->spr[SPR_SPRG3] = regs.sprg3; 1310 env->spr[SPR_SPRG4] = regs.sprg4; 1311 env->spr[SPR_SPRG5] = regs.sprg5; 1312 env->spr[SPR_SPRG6] = regs.sprg6; 1313 env->spr[SPR_SPRG7] = regs.sprg7; 1314 1315 env->spr[SPR_BOOKE_PID] = regs.pid; 1316 1317 for (i = 0;i < 32; i++) 1318 env->gpr[i] = regs.gpr[i]; 1319 1320 kvm_get_fp(cs); 1321 1322 if (cap_booke_sregs) { 1323 ret = kvmppc_get_booke_sregs(cpu); 1324 if (ret < 0) { 1325 return ret; 1326 } 1327 } 1328 1329 if (cap_segstate) { 1330 ret = kvmppc_get_books_sregs(cpu); 1331 if (ret < 0) { 1332 return ret; 1333 } 1334 } 1335 1336 if (cap_hior) { 1337 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1338 } 1339 1340 if (cap_one_reg) { 1341 int i; 1342 1343 /* We deliberately ignore errors here, for kernels which have 1344 * the ONE_REG calls, but don't support the specific 1345 * registers, there's a reasonable chance things will still 1346 * work, at least until we try to migrate. */ 1347 for (i = 0; i < 1024; i++) { 1348 uint64_t id = env->spr_cb[i].one_reg_id; 1349 1350 if (id != 0) { 1351 kvm_get_one_spr(cs, id, i); 1352 } 1353 } 1354 1355 #ifdef TARGET_PPC64 1356 if (msr_ts) { 1357 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1359 } 1360 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1362 } 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1373 } 1374 1375 if (cap_papr) { 1376 if (kvm_get_vpa(cs) < 0) { 1377 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1378 } 1379 } 1380 1381 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1382 #endif 1383 } 1384 1385 return 0; 1386 } 1387 1388 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1389 { 1390 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1391 1392 if (irq != PPC_INTERRUPT_EXT) { 1393 return 0; 1394 } 1395 1396 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1397 return 0; 1398 } 1399 1400 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1401 1402 return 0; 1403 } 1404 1405 #if defined(TARGET_PPCEMB) 1406 #define PPC_INPUT_INT PPC40x_INPUT_INT 1407 #elif defined(TARGET_PPC64) 1408 #define PPC_INPUT_INT PPC970_INPUT_INT 1409 #else 1410 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1411 #endif 1412 1413 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1414 { 1415 PowerPCCPU *cpu = POWERPC_CPU(cs); 1416 CPUPPCState *env = &cpu->env; 1417 int r; 1418 unsigned irq; 1419 1420 qemu_mutex_lock_iothread(); 1421 1422 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1423 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1424 if (!cap_interrupt_level && 1425 run->ready_for_interrupt_injection && 1426 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1427 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1428 { 1429 /* For now KVM disregards the 'irq' argument. However, in the 1430 * future KVM could cache it in-kernel to avoid a heavyweight exit 1431 * when reading the UIC. 1432 */ 1433 irq = KVM_INTERRUPT_SET; 1434 1435 DPRINTF("injected interrupt %d\n", irq); 1436 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1437 if (r < 0) { 1438 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1439 } 1440 1441 /* Always wake up soon in case the interrupt was level based */ 1442 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1443 (NANOSECONDS_PER_SECOND / 50)); 1444 } 1445 1446 /* We don't know if there are more interrupts pending after this. However, 1447 * the guest will return to userspace in the course of handling this one 1448 * anyways, so we will get a chance to deliver the rest. */ 1449 1450 qemu_mutex_unlock_iothread(); 1451 } 1452 1453 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1454 { 1455 return MEMTXATTRS_UNSPECIFIED; 1456 } 1457 1458 int kvm_arch_process_async_events(CPUState *cs) 1459 { 1460 return cs->halted; 1461 } 1462 1463 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1464 { 1465 CPUState *cs = CPU(cpu); 1466 CPUPPCState *env = &cpu->env; 1467 1468 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1469 cs->halted = 1; 1470 cs->exception_index = EXCP_HLT; 1471 } 1472 1473 return 0; 1474 } 1475 1476 /* map dcr access to existing qemu dcr emulation */ 1477 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1478 { 1479 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1480 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1481 1482 return 0; 1483 } 1484 1485 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1486 { 1487 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1488 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1489 1490 return 0; 1491 } 1492 1493 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1494 { 1495 /* Mixed endian case is not handled */ 1496 uint32_t sc = debug_inst_opcode; 1497 1498 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1499 sizeof(sc), 0) || 1500 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1501 return -EINVAL; 1502 } 1503 1504 return 0; 1505 } 1506 1507 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1508 { 1509 uint32_t sc; 1510 1511 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1512 sc != debug_inst_opcode || 1513 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1514 sizeof(sc), 1)) { 1515 return -EINVAL; 1516 } 1517 1518 return 0; 1519 } 1520 1521 static int find_hw_breakpoint(target_ulong addr, int type) 1522 { 1523 int n; 1524 1525 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1526 <= ARRAY_SIZE(hw_debug_points)); 1527 1528 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1529 if (hw_debug_points[n].addr == addr && 1530 hw_debug_points[n].type == type) { 1531 return n; 1532 } 1533 } 1534 1535 return -1; 1536 } 1537 1538 static int find_hw_watchpoint(target_ulong addr, int *flag) 1539 { 1540 int n; 1541 1542 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1543 if (n >= 0) { 1544 *flag = BP_MEM_ACCESS; 1545 return n; 1546 } 1547 1548 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1549 if (n >= 0) { 1550 *flag = BP_MEM_WRITE; 1551 return n; 1552 } 1553 1554 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1555 if (n >= 0) { 1556 *flag = BP_MEM_READ; 1557 return n; 1558 } 1559 1560 return -1; 1561 } 1562 1563 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1564 target_ulong len, int type) 1565 { 1566 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1567 return -ENOBUFS; 1568 } 1569 1570 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1571 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1572 1573 switch (type) { 1574 case GDB_BREAKPOINT_HW: 1575 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1576 return -ENOBUFS; 1577 } 1578 1579 if (find_hw_breakpoint(addr, type) >= 0) { 1580 return -EEXIST; 1581 } 1582 1583 nb_hw_breakpoint++; 1584 break; 1585 1586 case GDB_WATCHPOINT_WRITE: 1587 case GDB_WATCHPOINT_READ: 1588 case GDB_WATCHPOINT_ACCESS: 1589 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1590 return -ENOBUFS; 1591 } 1592 1593 if (find_hw_breakpoint(addr, type) >= 0) { 1594 return -EEXIST; 1595 } 1596 1597 nb_hw_watchpoint++; 1598 break; 1599 1600 default: 1601 return -ENOSYS; 1602 } 1603 1604 return 0; 1605 } 1606 1607 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1608 target_ulong len, int type) 1609 { 1610 int n; 1611 1612 n = find_hw_breakpoint(addr, type); 1613 if (n < 0) { 1614 return -ENOENT; 1615 } 1616 1617 switch (type) { 1618 case GDB_BREAKPOINT_HW: 1619 nb_hw_breakpoint--; 1620 break; 1621 1622 case GDB_WATCHPOINT_WRITE: 1623 case GDB_WATCHPOINT_READ: 1624 case GDB_WATCHPOINT_ACCESS: 1625 nb_hw_watchpoint--; 1626 break; 1627 1628 default: 1629 return -ENOSYS; 1630 } 1631 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1632 1633 return 0; 1634 } 1635 1636 void kvm_arch_remove_all_hw_breakpoints(void) 1637 { 1638 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1639 } 1640 1641 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1642 { 1643 int n; 1644 1645 /* Software Breakpoint updates */ 1646 if (kvm_sw_breakpoints_active(cs)) { 1647 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1648 } 1649 1650 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1651 <= ARRAY_SIZE(hw_debug_points)); 1652 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1653 1654 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1655 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1656 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1657 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1658 switch (hw_debug_points[n].type) { 1659 case GDB_BREAKPOINT_HW: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1661 break; 1662 case GDB_WATCHPOINT_WRITE: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1664 break; 1665 case GDB_WATCHPOINT_READ: 1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1667 break; 1668 case GDB_WATCHPOINT_ACCESS: 1669 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1670 KVMPPC_DEBUG_WATCH_READ; 1671 break; 1672 default: 1673 cpu_abort(cs, "Unsupported breakpoint type\n"); 1674 } 1675 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1676 } 1677 } 1678 } 1679 1680 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1681 { 1682 CPUState *cs = CPU(cpu); 1683 CPUPPCState *env = &cpu->env; 1684 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1685 int handle = 0; 1686 int n; 1687 int flag = 0; 1688 1689 if (cs->singlestep_enabled) { 1690 handle = 1; 1691 } else if (arch_info->status) { 1692 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1693 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1694 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1695 if (n >= 0) { 1696 handle = 1; 1697 } 1698 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1699 KVMPPC_DEBUG_WATCH_WRITE)) { 1700 n = find_hw_watchpoint(arch_info->address, &flag); 1701 if (n >= 0) { 1702 handle = 1; 1703 cs->watchpoint_hit = &hw_watchpoint; 1704 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1705 hw_watchpoint.flags = flag; 1706 } 1707 } 1708 } 1709 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1710 handle = 1; 1711 } else { 1712 /* QEMU is not able to handle debug exception, so inject 1713 * program exception to guest; 1714 * Yes program exception NOT debug exception !! 1715 * When QEMU is using debug resources then debug exception must 1716 * be always set. To achieve this we set MSR_DE and also set 1717 * MSRP_DEP so guest cannot change MSR_DE. 1718 * When emulating debug resource for guest we want guest 1719 * to control MSR_DE (enable/disable debug interrupt on need). 1720 * Supporting both configurations are NOT possible. 1721 * So the result is that we cannot share debug resources 1722 * between QEMU and Guest on BOOKE architecture. 1723 * In the current design QEMU gets the priority over guest, 1724 * this means that if QEMU is using debug resources then guest 1725 * cannot use them; 1726 * For software breakpoint QEMU uses a privileged instruction; 1727 * So there cannot be any reason that we are here for guest 1728 * set debug exception, only possibility is guest executed a 1729 * privileged / illegal instruction and that's why we are 1730 * injecting a program interrupt. 1731 */ 1732 1733 cpu_synchronize_state(cs); 1734 /* env->nip is PC, so increment this by 4 to use 1735 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1736 */ 1737 env->nip += 4; 1738 cs->exception_index = POWERPC_EXCP_PROGRAM; 1739 env->error_code = POWERPC_EXCP_INVAL; 1740 ppc_cpu_do_interrupt(cs); 1741 } 1742 1743 return handle; 1744 } 1745 1746 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1747 { 1748 PowerPCCPU *cpu = POWERPC_CPU(cs); 1749 CPUPPCState *env = &cpu->env; 1750 int ret; 1751 1752 qemu_mutex_lock_iothread(); 1753 1754 switch (run->exit_reason) { 1755 case KVM_EXIT_DCR: 1756 if (run->dcr.is_write) { 1757 DPRINTF("handle dcr write\n"); 1758 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1759 } else { 1760 DPRINTF("handle dcr read\n"); 1761 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1762 } 1763 break; 1764 case KVM_EXIT_HLT: 1765 DPRINTF("handle halt\n"); 1766 ret = kvmppc_handle_halt(cpu); 1767 break; 1768 #if defined(TARGET_PPC64) 1769 case KVM_EXIT_PAPR_HCALL: 1770 DPRINTF("handle PAPR hypercall\n"); 1771 run->papr_hcall.ret = spapr_hypercall(cpu, 1772 run->papr_hcall.nr, 1773 run->papr_hcall.args); 1774 ret = 0; 1775 break; 1776 #endif 1777 case KVM_EXIT_EPR: 1778 DPRINTF("handle epr\n"); 1779 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1780 ret = 0; 1781 break; 1782 case KVM_EXIT_WATCHDOG: 1783 DPRINTF("handle watchdog expiry\n"); 1784 watchdog_perform_action(); 1785 ret = 0; 1786 break; 1787 1788 case KVM_EXIT_DEBUG: 1789 DPRINTF("handle debug exception\n"); 1790 if (kvm_handle_debug(cpu, run)) { 1791 ret = EXCP_DEBUG; 1792 break; 1793 } 1794 /* re-enter, this exception was guest-internal */ 1795 ret = 0; 1796 break; 1797 1798 default: 1799 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1800 ret = -1; 1801 break; 1802 } 1803 1804 qemu_mutex_unlock_iothread(); 1805 return ret; 1806 } 1807 1808 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1809 { 1810 CPUState *cs = CPU(cpu); 1811 uint32_t bits = tsr_bits; 1812 struct kvm_one_reg reg = { 1813 .id = KVM_REG_PPC_OR_TSR, 1814 .addr = (uintptr_t) &bits, 1815 }; 1816 1817 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1818 } 1819 1820 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1821 { 1822 1823 CPUState *cs = CPU(cpu); 1824 uint32_t bits = tsr_bits; 1825 struct kvm_one_reg reg = { 1826 .id = KVM_REG_PPC_CLEAR_TSR, 1827 .addr = (uintptr_t) &bits, 1828 }; 1829 1830 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1831 } 1832 1833 int kvmppc_set_tcr(PowerPCCPU *cpu) 1834 { 1835 CPUState *cs = CPU(cpu); 1836 CPUPPCState *env = &cpu->env; 1837 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1838 1839 struct kvm_one_reg reg = { 1840 .id = KVM_REG_PPC_TCR, 1841 .addr = (uintptr_t) &tcr, 1842 }; 1843 1844 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1845 } 1846 1847 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1848 { 1849 CPUState *cs = CPU(cpu); 1850 int ret; 1851 1852 if (!kvm_enabled()) { 1853 return -1; 1854 } 1855 1856 if (!cap_ppc_watchdog) { 1857 printf("warning: KVM does not support watchdog"); 1858 return -1; 1859 } 1860 1861 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1862 if (ret < 0) { 1863 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1864 __func__, strerror(-ret)); 1865 return ret; 1866 } 1867 1868 return ret; 1869 } 1870 1871 static int read_cpuinfo(const char *field, char *value, int len) 1872 { 1873 FILE *f; 1874 int ret = -1; 1875 int field_len = strlen(field); 1876 char line[512]; 1877 1878 f = fopen("/proc/cpuinfo", "r"); 1879 if (!f) { 1880 return -1; 1881 } 1882 1883 do { 1884 if (!fgets(line, sizeof(line), f)) { 1885 break; 1886 } 1887 if (!strncmp(line, field, field_len)) { 1888 pstrcpy(value, len, line); 1889 ret = 0; 1890 break; 1891 } 1892 } while(*line); 1893 1894 fclose(f); 1895 1896 return ret; 1897 } 1898 1899 uint32_t kvmppc_get_tbfreq(void) 1900 { 1901 char line[512]; 1902 char *ns; 1903 uint32_t retval = NANOSECONDS_PER_SECOND; 1904 1905 if (read_cpuinfo("timebase", line, sizeof(line))) { 1906 return retval; 1907 } 1908 1909 if (!(ns = strchr(line, ':'))) { 1910 return retval; 1911 } 1912 1913 ns++; 1914 1915 return atoi(ns); 1916 } 1917 1918 bool kvmppc_get_host_serial(char **value) 1919 { 1920 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1921 NULL); 1922 } 1923 1924 bool kvmppc_get_host_model(char **value) 1925 { 1926 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1927 } 1928 1929 /* Try to find a device tree node for a CPU with clock-frequency property */ 1930 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1931 { 1932 struct dirent *dirp; 1933 DIR *dp; 1934 1935 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1936 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1937 return -1; 1938 } 1939 1940 buf[0] = '\0'; 1941 while ((dirp = readdir(dp)) != NULL) { 1942 FILE *f; 1943 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1944 dirp->d_name); 1945 f = fopen(buf, "r"); 1946 if (f) { 1947 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1948 fclose(f); 1949 break; 1950 } 1951 buf[0] = '\0'; 1952 } 1953 closedir(dp); 1954 if (buf[0] == '\0') { 1955 printf("Unknown host!\n"); 1956 return -1; 1957 } 1958 1959 return 0; 1960 } 1961 1962 static uint64_t kvmppc_read_int_dt(const char *filename) 1963 { 1964 union { 1965 uint32_t v32; 1966 uint64_t v64; 1967 } u; 1968 FILE *f; 1969 int len; 1970 1971 f = fopen(filename, "rb"); 1972 if (!f) { 1973 return -1; 1974 } 1975 1976 len = fread(&u, 1, sizeof(u), f); 1977 fclose(f); 1978 switch (len) { 1979 case 4: 1980 /* property is a 32-bit quantity */ 1981 return be32_to_cpu(u.v32); 1982 case 8: 1983 return be64_to_cpu(u.v64); 1984 } 1985 1986 return 0; 1987 } 1988 1989 /* Read a CPU node property from the host device tree that's a single 1990 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1991 * (can't find or open the property, or doesn't understand the 1992 * format) */ 1993 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1994 { 1995 char buf[PATH_MAX], *tmp; 1996 uint64_t val; 1997 1998 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1999 return -1; 2000 } 2001 2002 tmp = g_strdup_printf("%s/%s", buf, propname); 2003 val = kvmppc_read_int_dt(tmp); 2004 g_free(tmp); 2005 2006 return val; 2007 } 2008 2009 uint64_t kvmppc_get_clockfreq(void) 2010 { 2011 return kvmppc_read_int_cpu_dt("clock-frequency"); 2012 } 2013 2014 uint32_t kvmppc_get_vmx(void) 2015 { 2016 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2017 } 2018 2019 uint32_t kvmppc_get_dfp(void) 2020 { 2021 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2022 } 2023 2024 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2025 { 2026 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2027 CPUState *cs = CPU(cpu); 2028 2029 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2030 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2031 return 0; 2032 } 2033 2034 return 1; 2035 } 2036 2037 int kvmppc_get_hasidle(CPUPPCState *env) 2038 { 2039 struct kvm_ppc_pvinfo pvinfo; 2040 2041 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2042 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2043 return 1; 2044 } 2045 2046 return 0; 2047 } 2048 2049 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2050 { 2051 uint32_t *hc = (uint32_t*)buf; 2052 struct kvm_ppc_pvinfo pvinfo; 2053 2054 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2055 memcpy(buf, pvinfo.hcall, buf_len); 2056 return 0; 2057 } 2058 2059 /* 2060 * Fallback to always fail hypercalls regardless of endianness: 2061 * 2062 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2063 * li r3, -1 2064 * b .+8 (becomes nop in wrong endian) 2065 * bswap32(li r3, -1) 2066 */ 2067 2068 hc[0] = cpu_to_be32(0x08000048); 2069 hc[1] = cpu_to_be32(0x3860ffff); 2070 hc[2] = cpu_to_be32(0x48000008); 2071 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2072 2073 return 1; 2074 } 2075 2076 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2077 { 2078 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2079 } 2080 2081 void kvmppc_enable_logical_ci_hcalls(void) 2082 { 2083 /* 2084 * FIXME: it would be nice if we could detect the cases where 2085 * we're using a device which requires the in kernel 2086 * implementation of these hcalls, but the kernel lacks them and 2087 * produce a warning. 2088 */ 2089 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2090 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2091 } 2092 2093 void kvmppc_enable_set_mode_hcall(void) 2094 { 2095 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2096 } 2097 2098 void kvmppc_enable_clear_ref_mod_hcalls(void) 2099 { 2100 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2101 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2102 } 2103 2104 void kvmppc_set_papr(PowerPCCPU *cpu) 2105 { 2106 CPUState *cs = CPU(cpu); 2107 int ret; 2108 2109 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2110 if (ret) { 2111 error_report("This vCPU type or KVM version does not support PAPR"); 2112 exit(1); 2113 } 2114 2115 /* Update the capability flag so we sync the right information 2116 * with kvm */ 2117 cap_papr = 1; 2118 } 2119 2120 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2121 { 2122 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2123 } 2124 2125 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2126 { 2127 CPUState *cs = CPU(cpu); 2128 int ret; 2129 2130 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2131 if (ret && mpic_proxy) { 2132 error_report("This KVM version does not support EPR"); 2133 exit(1); 2134 } 2135 } 2136 2137 int kvmppc_smt_threads(void) 2138 { 2139 return cap_ppc_smt ? cap_ppc_smt : 1; 2140 } 2141 2142 int kvmppc_set_smt_threads(int smt) 2143 { 2144 int ret; 2145 2146 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2147 if (!ret) { 2148 cap_ppc_smt = smt; 2149 } 2150 return ret; 2151 } 2152 2153 void kvmppc_hint_smt_possible(Error **errp) 2154 { 2155 int i; 2156 GString *g; 2157 char *s; 2158 2159 assert(kvm_enabled()); 2160 if (cap_ppc_smt_possible) { 2161 g = g_string_new("Available VSMT modes:"); 2162 for (i = 63; i >= 0; i--) { 2163 if ((1UL << i) & cap_ppc_smt_possible) { 2164 g_string_append_printf(g, " %lu", (1UL << i)); 2165 } 2166 } 2167 s = g_string_free(g, false); 2168 error_append_hint(errp, "%s.\n", s); 2169 g_free(s); 2170 } else { 2171 error_append_hint(errp, 2172 "This KVM seems to be too old to support VSMT.\n"); 2173 } 2174 } 2175 2176 2177 #ifdef TARGET_PPC64 2178 off_t kvmppc_alloc_rma(void **rma) 2179 { 2180 off_t size; 2181 int fd; 2182 struct kvm_allocate_rma ret; 2183 2184 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2185 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2186 * not necessary on this hardware 2187 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2188 * 2189 * FIXME: We should allow the user to force contiguous RMA 2190 * allocation in the cap_ppc_rma==1 case. 2191 */ 2192 if (cap_ppc_rma < 2) { 2193 return 0; 2194 } 2195 2196 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2197 if (fd < 0) { 2198 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2199 strerror(errno)); 2200 return -1; 2201 } 2202 2203 size = MIN(ret.rma_size, 256ul << 20); 2204 2205 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2206 if (*rma == MAP_FAILED) { 2207 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2208 return -1; 2209 }; 2210 2211 return size; 2212 } 2213 2214 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2215 { 2216 struct kvm_ppc_smmu_info info; 2217 long rampagesize, best_page_shift; 2218 int i; 2219 2220 if (cap_ppc_rma >= 2) { 2221 return current_size; 2222 } 2223 2224 /* Find the largest hardware supported page size that's less than 2225 * or equal to the (logical) backing page size of guest RAM */ 2226 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2227 rampagesize = qemu_getrampagesize(); 2228 best_page_shift = 0; 2229 2230 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2231 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2232 2233 if (!sps->page_shift) { 2234 continue; 2235 } 2236 2237 if ((sps->page_shift > best_page_shift) 2238 && ((1UL << sps->page_shift) <= rampagesize)) { 2239 best_page_shift = sps->page_shift; 2240 } 2241 } 2242 2243 return MIN(current_size, 2244 1ULL << (best_page_shift + hash_shift - 7)); 2245 } 2246 #endif 2247 2248 bool kvmppc_spapr_use_multitce(void) 2249 { 2250 return cap_spapr_multitce; 2251 } 2252 2253 int kvmppc_spapr_enable_inkernel_multitce(void) 2254 { 2255 int ret; 2256 2257 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2258 H_PUT_TCE_INDIRECT, 1); 2259 if (!ret) { 2260 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2261 H_STUFF_TCE, 1); 2262 } 2263 2264 return ret; 2265 } 2266 2267 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2268 uint64_t bus_offset, uint32_t nb_table, 2269 int *pfd, bool need_vfio) 2270 { 2271 long len; 2272 int fd; 2273 void *table; 2274 2275 /* Must set fd to -1 so we don't try to munmap when called for 2276 * destroying the table, which the upper layers -will- do 2277 */ 2278 *pfd = -1; 2279 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2280 return NULL; 2281 } 2282 2283 if (cap_spapr_tce_64) { 2284 struct kvm_create_spapr_tce_64 args = { 2285 .liobn = liobn, 2286 .page_shift = page_shift, 2287 .offset = bus_offset >> page_shift, 2288 .size = nb_table, 2289 .flags = 0 2290 }; 2291 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2292 if (fd < 0) { 2293 fprintf(stderr, 2294 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2295 liobn); 2296 return NULL; 2297 } 2298 } else if (cap_spapr_tce) { 2299 uint64_t window_size = (uint64_t) nb_table << page_shift; 2300 struct kvm_create_spapr_tce args = { 2301 .liobn = liobn, 2302 .window_size = window_size, 2303 }; 2304 if ((window_size != args.window_size) || bus_offset) { 2305 return NULL; 2306 } 2307 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2308 if (fd < 0) { 2309 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2310 liobn); 2311 return NULL; 2312 } 2313 } else { 2314 return NULL; 2315 } 2316 2317 len = nb_table * sizeof(uint64_t); 2318 /* FIXME: round this up to page size */ 2319 2320 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2321 if (table == MAP_FAILED) { 2322 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2323 liobn); 2324 close(fd); 2325 return NULL; 2326 } 2327 2328 *pfd = fd; 2329 return table; 2330 } 2331 2332 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2333 { 2334 long len; 2335 2336 if (fd < 0) { 2337 return -1; 2338 } 2339 2340 len = nb_table * sizeof(uint64_t); 2341 if ((munmap(table, len) < 0) || 2342 (close(fd) < 0)) { 2343 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2344 strerror(errno)); 2345 /* Leak the table */ 2346 } 2347 2348 return 0; 2349 } 2350 2351 int kvmppc_reset_htab(int shift_hint) 2352 { 2353 uint32_t shift = shift_hint; 2354 2355 if (!kvm_enabled()) { 2356 /* Full emulation, tell caller to allocate htab itself */ 2357 return 0; 2358 } 2359 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2360 int ret; 2361 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2362 if (ret == -ENOTTY) { 2363 /* At least some versions of PR KVM advertise the 2364 * capability, but don't implement the ioctl(). Oops. 2365 * Return 0 so that we allocate the htab in qemu, as is 2366 * correct for PR. */ 2367 return 0; 2368 } else if (ret < 0) { 2369 return ret; 2370 } 2371 return shift; 2372 } 2373 2374 /* We have a kernel that predates the htab reset calls. For PR 2375 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2376 * this era, it has allocated a 16MB fixed size hash table already. */ 2377 if (kvmppc_is_pr(kvm_state)) { 2378 /* PR - tell caller to allocate htab */ 2379 return 0; 2380 } else { 2381 /* HV - assume 16MB kernel allocated htab */ 2382 return 24; 2383 } 2384 } 2385 2386 static inline uint32_t mfpvr(void) 2387 { 2388 uint32_t pvr; 2389 2390 asm ("mfpvr %0" 2391 : "=r"(pvr)); 2392 return pvr; 2393 } 2394 2395 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2396 { 2397 if (on) { 2398 *word |= flags; 2399 } else { 2400 *word &= ~flags; 2401 } 2402 } 2403 2404 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2405 { 2406 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2407 uint32_t vmx = kvmppc_get_vmx(); 2408 uint32_t dfp = kvmppc_get_dfp(); 2409 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2410 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2411 2412 /* Now fix up the class with information we can query from the host */ 2413 pcc->pvr = mfpvr(); 2414 2415 if (vmx != -1) { 2416 /* Only override when we know what the host supports */ 2417 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2418 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2419 } 2420 if (dfp != -1) { 2421 /* Only override when we know what the host supports */ 2422 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2423 } 2424 2425 if (dcache_size != -1) { 2426 pcc->l1_dcache_size = dcache_size; 2427 } 2428 2429 if (icache_size != -1) { 2430 pcc->l1_icache_size = icache_size; 2431 } 2432 2433 #if defined(TARGET_PPC64) 2434 pcc->radix_page_info = kvm_get_radix_page_info(); 2435 2436 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2437 /* 2438 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2439 * compliant. More importantly, advertising ISA 3.00 2440 * architected mode may prevent guests from activating 2441 * necessary DD1 workarounds. 2442 */ 2443 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2444 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2445 } 2446 #endif /* defined(TARGET_PPC64) */ 2447 } 2448 2449 bool kvmppc_has_cap_epr(void) 2450 { 2451 return cap_epr; 2452 } 2453 2454 bool kvmppc_has_cap_fixup_hcalls(void) 2455 { 2456 return cap_fixup_hcalls; 2457 } 2458 2459 bool kvmppc_has_cap_htm(void) 2460 { 2461 return cap_htm; 2462 } 2463 2464 bool kvmppc_has_cap_mmu_radix(void) 2465 { 2466 return cap_mmu_radix; 2467 } 2468 2469 bool kvmppc_has_cap_mmu_hash_v3(void) 2470 { 2471 return cap_mmu_hash_v3; 2472 } 2473 2474 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2475 { 2476 uint32_t host_pvr = mfpvr(); 2477 PowerPCCPUClass *pvr_pcc; 2478 2479 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2480 if (pvr_pcc == NULL) { 2481 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2482 } 2483 2484 return pvr_pcc; 2485 } 2486 2487 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2488 { 2489 TypeInfo type_info = { 2490 .name = TYPE_HOST_POWERPC_CPU, 2491 .class_init = kvmppc_host_cpu_class_init, 2492 }; 2493 MachineClass *mc = MACHINE_GET_CLASS(ms); 2494 PowerPCCPUClass *pvr_pcc; 2495 ObjectClass *oc; 2496 DeviceClass *dc; 2497 int i; 2498 2499 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2500 if (pvr_pcc == NULL) { 2501 return -1; 2502 } 2503 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2504 type_register(&type_info); 2505 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2506 /* override TCG default cpu type with 'host' cpu model */ 2507 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2508 } 2509 2510 oc = object_class_by_name(type_info.name); 2511 g_assert(oc); 2512 2513 /* 2514 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2515 * we want "POWER8" to be a "family" alias that points to the current 2516 * host CPU type, too) 2517 */ 2518 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2519 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2520 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2521 char *suffix; 2522 2523 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2524 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2525 if (suffix) { 2526 *suffix = 0; 2527 } 2528 break; 2529 } 2530 } 2531 2532 return 0; 2533 } 2534 2535 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2536 { 2537 struct kvm_rtas_token_args args = { 2538 .token = token, 2539 }; 2540 2541 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2542 return -ENOENT; 2543 } 2544 2545 strncpy(args.name, function, sizeof(args.name)); 2546 2547 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2548 } 2549 2550 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2551 { 2552 struct kvm_get_htab_fd s = { 2553 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2554 .start_index = index, 2555 }; 2556 int ret; 2557 2558 if (!cap_htab_fd) { 2559 error_setg(errp, "KVM version doesn't support %s the HPT", 2560 write ? "writing" : "reading"); 2561 return -ENOTSUP; 2562 } 2563 2564 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2565 if (ret < 0) { 2566 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2567 write ? "writing" : "reading", write ? "to" : "from", 2568 strerror(errno)); 2569 return -errno; 2570 } 2571 2572 return ret; 2573 } 2574 2575 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2576 { 2577 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2578 uint8_t buf[bufsize]; 2579 ssize_t rc; 2580 2581 do { 2582 rc = read(fd, buf, bufsize); 2583 if (rc < 0) { 2584 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2585 strerror(errno)); 2586 return rc; 2587 } else if (rc) { 2588 uint8_t *buffer = buf; 2589 ssize_t n = rc; 2590 while (n) { 2591 struct kvm_get_htab_header *head = 2592 (struct kvm_get_htab_header *) buffer; 2593 size_t chunksize = sizeof(*head) + 2594 HASH_PTE_SIZE_64 * head->n_valid; 2595 2596 qemu_put_be32(f, head->index); 2597 qemu_put_be16(f, head->n_valid); 2598 qemu_put_be16(f, head->n_invalid); 2599 qemu_put_buffer(f, (void *)(head + 1), 2600 HASH_PTE_SIZE_64 * head->n_valid); 2601 2602 buffer += chunksize; 2603 n -= chunksize; 2604 } 2605 } 2606 } while ((rc != 0) 2607 && ((max_ns < 0) 2608 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2609 2610 return (rc == 0) ? 1 : 0; 2611 } 2612 2613 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2614 uint16_t n_valid, uint16_t n_invalid) 2615 { 2616 struct kvm_get_htab_header *buf; 2617 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2618 ssize_t rc; 2619 2620 buf = alloca(chunksize); 2621 buf->index = index; 2622 buf->n_valid = n_valid; 2623 buf->n_invalid = n_invalid; 2624 2625 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2626 2627 rc = write(fd, buf, chunksize); 2628 if (rc < 0) { 2629 fprintf(stderr, "Error writing KVM hash table: %s\n", 2630 strerror(errno)); 2631 return rc; 2632 } 2633 if (rc != chunksize) { 2634 /* We should never get a short write on a single chunk */ 2635 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2636 return -1; 2637 } 2638 return 0; 2639 } 2640 2641 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2642 { 2643 return true; 2644 } 2645 2646 void kvm_arch_init_irq_routing(KVMState *s) 2647 { 2648 } 2649 2650 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2651 { 2652 int fd, rc; 2653 int i; 2654 2655 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2656 2657 i = 0; 2658 while (i < n) { 2659 struct kvm_get_htab_header *hdr; 2660 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2661 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2662 2663 rc = read(fd, buf, sizeof(buf)); 2664 if (rc < 0) { 2665 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2666 } 2667 2668 hdr = (struct kvm_get_htab_header *)buf; 2669 while ((i < n) && ((char *)hdr < (buf + rc))) { 2670 int invalid = hdr->n_invalid; 2671 2672 if (hdr->index != (ptex + i)) { 2673 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2674 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2675 } 2676 2677 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2678 i += hdr->n_valid; 2679 2680 if ((n - i) < invalid) { 2681 invalid = n - i; 2682 } 2683 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2684 i += hdr->n_invalid; 2685 2686 hdr = (struct kvm_get_htab_header *) 2687 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2688 } 2689 } 2690 2691 close(fd); 2692 } 2693 2694 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2695 { 2696 int fd, rc; 2697 struct { 2698 struct kvm_get_htab_header hdr; 2699 uint64_t pte0; 2700 uint64_t pte1; 2701 } buf; 2702 2703 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2704 2705 buf.hdr.n_valid = 1; 2706 buf.hdr.n_invalid = 0; 2707 buf.hdr.index = ptex; 2708 buf.pte0 = cpu_to_be64(pte0); 2709 buf.pte1 = cpu_to_be64(pte1); 2710 2711 rc = write(fd, &buf, sizeof(buf)); 2712 if (rc != sizeof(buf)) { 2713 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2714 } 2715 close(fd); 2716 } 2717 2718 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2719 uint64_t address, uint32_t data, PCIDevice *dev) 2720 { 2721 return 0; 2722 } 2723 2724 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2725 int vector, PCIDevice *dev) 2726 { 2727 return 0; 2728 } 2729 2730 int kvm_arch_release_virq_post(int virq) 2731 { 2732 return 0; 2733 } 2734 2735 int kvm_arch_msi_data_to_gsi(uint32_t data) 2736 { 2737 return data & 0xffff; 2738 } 2739 2740 int kvmppc_enable_hwrng(void) 2741 { 2742 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2743 return -1; 2744 } 2745 2746 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2747 } 2748 2749 void kvmppc_check_papr_resize_hpt(Error **errp) 2750 { 2751 if (!kvm_enabled()) { 2752 return; /* No KVM, we're good */ 2753 } 2754 2755 if (cap_resize_hpt) { 2756 return; /* Kernel has explicit support, we're good */ 2757 } 2758 2759 /* Otherwise fallback on looking for PR KVM */ 2760 if (kvmppc_is_pr(kvm_state)) { 2761 return; 2762 } 2763 2764 error_setg(errp, 2765 "Hash page table resizing not available with this KVM version"); 2766 } 2767 2768 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2769 { 2770 CPUState *cs = CPU(cpu); 2771 struct kvm_ppc_resize_hpt rhpt = { 2772 .flags = flags, 2773 .shift = shift, 2774 }; 2775 2776 if (!cap_resize_hpt) { 2777 return -ENOSYS; 2778 } 2779 2780 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2781 } 2782 2783 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2784 { 2785 CPUState *cs = CPU(cpu); 2786 struct kvm_ppc_resize_hpt rhpt = { 2787 .flags = flags, 2788 .shift = shift, 2789 }; 2790 2791 if (!cap_resize_hpt) { 2792 return -ENOSYS; 2793 } 2794 2795 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2796 } 2797 2798 /* 2799 * This is a helper function to detect a post migration scenario 2800 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2801 * the guest kernel can't handle a PVR value other than the actual host 2802 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2803 * 2804 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2805 * (so, we're HV), return true. The workaround itself is done in 2806 * cpu_post_load. 2807 * 2808 * The order here is important: we'll only check for KVM PR as a 2809 * fallback if the guest kernel can't handle the situation itself. 2810 * We need to avoid as much as possible querying the running KVM type 2811 * in QEMU level. 2812 */ 2813 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2814 { 2815 CPUState *cs = CPU(cpu); 2816 2817 if (!kvm_enabled()) { 2818 return false; 2819 } 2820 2821 if (cap_ppc_pvr_compat) { 2822 return false; 2823 } 2824 2825 return !kvmppc_is_pr(cs->kvm_state); 2826 } 2827