1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #if defined(TARGET_PPC64) 51 #include "hw/ppc/spapr_cpu_core.h" 52 #endif 53 #include "elf.h" 54 #include "sysemu/kvm_int.h" 55 56 //#define DEBUG_KVM 57 58 #ifdef DEBUG_KVM 59 #define DPRINTF(fmt, ...) \ 60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 61 #else 62 #define DPRINTF(fmt, ...) \ 63 do { } while (0) 64 #endif 65 66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 67 68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 69 KVM_CAP_LAST_INFO 70 }; 71 72 static int cap_interrupt_unset = false; 73 static int cap_interrupt_level = false; 74 static int cap_segstate; 75 static int cap_booke_sregs; 76 static int cap_ppc_smt; 77 static int cap_ppc_rma; 78 static int cap_spapr_tce; 79 static int cap_spapr_tce_64; 80 static int cap_spapr_multitce; 81 static int cap_spapr_vfio; 82 static int cap_hior; 83 static int cap_one_reg; 84 static int cap_epr; 85 static int cap_ppc_watchdog; 86 static int cap_papr; 87 static int cap_htab_fd; 88 static int cap_fixup_hcalls; 89 static int cap_htm; /* Hardware transactional memory support */ 90 static int cap_mmu_radix; 91 static int cap_mmu_hash_v3; 92 static int cap_resize_hpt; 93 static int cap_ppc_pvr_compat; 94 95 static uint32_t debug_inst_opcode; 96 97 /* XXX We have a race condition where we actually have a level triggered 98 * interrupt, but the infrastructure can't expose that yet, so the guest 99 * takes but ignores it, goes to sleep and never gets notified that there's 100 * still an interrupt pending. 101 * 102 * As a quick workaround, let's just wake up again 20 ms after we injected 103 * an interrupt. That way we can assure that we're always reinjecting 104 * interrupts in case the guest swallowed them. 105 */ 106 static QEMUTimer *idle_timer; 107 108 static void kvm_kick_cpu(void *opaque) 109 { 110 PowerPCCPU *cpu = opaque; 111 112 qemu_cpu_kick(CPU(cpu)); 113 } 114 115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 116 * should only be used for fallback tests - generally we should use 117 * explicit capabilities for the features we want, rather than 118 * assuming what is/isn't available depending on the KVM variant. */ 119 static bool kvmppc_is_pr(KVMState *ks) 120 { 121 /* Assume KVM-PR if the GET_PVINFO capability is available */ 122 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 123 } 124 125 static int kvm_ppc_register_host_cpu_type(void); 126 127 int kvm_arch_init(MachineState *ms, KVMState *s) 128 { 129 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 130 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 131 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 132 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 133 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 134 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 138 cap_spapr_vfio = false; 139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 143 /* Note: we don't set cap_papr here, because this capability is 144 * only activated after this by kvmppc_set_papr() */ 145 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 147 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 148 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 149 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 150 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 151 /* 152 * Note: setting it to false because there is not such capability 153 * in KVM at this moment. 154 * 155 * TODO: call kvm_vm_check_extension() with the right capability 156 * after the kernel starts implementing it.*/ 157 cap_ppc_pvr_compat = false; 158 159 if (!cap_interrupt_level) { 160 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 161 "VM to stall at times!\n"); 162 } 163 164 kvm_ppc_register_host_cpu_type(); 165 166 return 0; 167 } 168 169 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 170 { 171 return 0; 172 } 173 174 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 175 { 176 CPUPPCState *cenv = &cpu->env; 177 CPUState *cs = CPU(cpu); 178 struct kvm_sregs sregs; 179 int ret; 180 181 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 182 /* What we're really trying to say is "if we're on BookE, we use 183 the native PVR for now". This is the only sane way to check 184 it though, so we potentially confuse users that they can run 185 BookE guests on BookS. Let's hope nobody dares enough :) */ 186 return 0; 187 } else { 188 if (!cap_segstate) { 189 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 190 return -ENOSYS; 191 } 192 } 193 194 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 195 if (ret) { 196 return ret; 197 } 198 199 sregs.pvr = cenv->spr[SPR_PVR]; 200 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 201 } 202 203 /* Set up a shared TLB array with KVM */ 204 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 205 { 206 CPUPPCState *env = &cpu->env; 207 CPUState *cs = CPU(cpu); 208 struct kvm_book3e_206_tlb_params params = {}; 209 struct kvm_config_tlb cfg = {}; 210 unsigned int entries = 0; 211 int ret, i; 212 213 if (!kvm_enabled() || 214 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 215 return 0; 216 } 217 218 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 219 220 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 221 params.tlb_sizes[i] = booke206_tlb_size(env, i); 222 params.tlb_ways[i] = booke206_tlb_ways(env, i); 223 entries += params.tlb_sizes[i]; 224 } 225 226 assert(entries == env->nb_tlb); 227 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 228 229 env->tlb_dirty = true; 230 231 cfg.array = (uintptr_t)env->tlb.tlbm; 232 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 233 cfg.params = (uintptr_t)¶ms; 234 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 235 236 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 237 if (ret < 0) { 238 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 239 __func__, strerror(-ret)); 240 return ret; 241 } 242 243 env->kvm_sw_tlb = true; 244 return 0; 245 } 246 247 248 #if defined(TARGET_PPC64) 249 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 250 struct kvm_ppc_smmu_info *info) 251 { 252 CPUPPCState *env = &cpu->env; 253 CPUState *cs = CPU(cpu); 254 255 memset(info, 0, sizeof(*info)); 256 257 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 258 * need to "guess" what the supported page sizes are. 259 * 260 * For that to work we make a few assumptions: 261 * 262 * - Check whether we are running "PR" KVM which only supports 4K 263 * and 16M pages, but supports them regardless of the backing 264 * store characteritics. We also don't support 1T segments. 265 * 266 * This is safe as if HV KVM ever supports that capability or PR 267 * KVM grows supports for more page/segment sizes, those versions 268 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 269 * will not hit this fallback 270 * 271 * - Else we are running HV KVM. This means we only support page 272 * sizes that fit in the backing store. Additionally we only 273 * advertize 64K pages if the processor is ARCH 2.06 and we assume 274 * P7 encodings for the SLB and hash table. Here too, we assume 275 * support for any newer processor will mean a kernel that 276 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 277 * this fallback. 278 */ 279 if (kvmppc_is_pr(cs->kvm_state)) { 280 /* No flags */ 281 info->flags = 0; 282 info->slb_size = 64; 283 284 /* Standard 4k base page size segment */ 285 info->sps[0].page_shift = 12; 286 info->sps[0].slb_enc = 0; 287 info->sps[0].enc[0].page_shift = 12; 288 info->sps[0].enc[0].pte_enc = 0; 289 290 /* Standard 16M large page size segment */ 291 info->sps[1].page_shift = 24; 292 info->sps[1].slb_enc = SLB_VSID_L; 293 info->sps[1].enc[0].page_shift = 24; 294 info->sps[1].enc[0].pte_enc = 0; 295 } else { 296 int i = 0; 297 298 /* HV KVM has backing store size restrictions */ 299 info->flags = KVM_PPC_PAGE_SIZES_REAL; 300 301 if (env->mmu_model & POWERPC_MMU_1TSEG) { 302 info->flags |= KVM_PPC_1T_SEGMENTS; 303 } 304 305 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 306 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 307 info->slb_size = 32; 308 } else { 309 info->slb_size = 64; 310 } 311 312 /* Standard 4k base page size segment */ 313 info->sps[i].page_shift = 12; 314 info->sps[i].slb_enc = 0; 315 info->sps[i].enc[0].page_shift = 12; 316 info->sps[i].enc[0].pte_enc = 0; 317 i++; 318 319 /* 64K on MMU 2.06 and later */ 320 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 321 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 322 info->sps[i].page_shift = 16; 323 info->sps[i].slb_enc = 0x110; 324 info->sps[i].enc[0].page_shift = 16; 325 info->sps[i].enc[0].pte_enc = 1; 326 i++; 327 } 328 329 /* Standard 16M large page size segment */ 330 info->sps[i].page_shift = 24; 331 info->sps[i].slb_enc = SLB_VSID_L; 332 info->sps[i].enc[0].page_shift = 24; 333 info->sps[i].enc[0].pte_enc = 0; 334 } 335 } 336 337 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 338 { 339 CPUState *cs = CPU(cpu); 340 int ret; 341 342 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 343 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 344 if (ret == 0) { 345 return; 346 } 347 } 348 349 kvm_get_fallback_smmu_info(cpu, info); 350 } 351 352 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 353 { 354 KVMState *s = KVM_STATE(current_machine->accelerator); 355 struct ppc_radix_page_info *radix_page_info; 356 struct kvm_ppc_rmmu_info rmmu_info; 357 int i; 358 359 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 360 return NULL; 361 } 362 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 363 return NULL; 364 } 365 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 366 radix_page_info->count = 0; 367 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 368 if (rmmu_info.ap_encodings[i]) { 369 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 370 radix_page_info->count++; 371 } 372 } 373 return radix_page_info; 374 } 375 376 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 377 bool radix, bool gtse, 378 uint64_t proc_tbl) 379 { 380 CPUState *cs = CPU(cpu); 381 int ret; 382 uint64_t flags = 0; 383 struct kvm_ppc_mmuv3_cfg cfg = { 384 .process_table = proc_tbl, 385 }; 386 387 if (radix) { 388 flags |= KVM_PPC_MMUV3_RADIX; 389 } 390 if (gtse) { 391 flags |= KVM_PPC_MMUV3_GTSE; 392 } 393 cfg.flags = flags; 394 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 395 switch (ret) { 396 case 0: 397 return H_SUCCESS; 398 case -EINVAL: 399 return H_PARAMETER; 400 case -ENODEV: 401 return H_NOT_AVAILABLE; 402 default: 403 return H_HARDWARE; 404 } 405 } 406 407 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 408 { 409 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 410 return true; 411 } 412 413 return (1ul << shift) <= rampgsize; 414 } 415 416 static long max_cpu_page_size; 417 418 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 419 { 420 static struct kvm_ppc_smmu_info smmu_info; 421 static bool has_smmu_info; 422 CPUPPCState *env = &cpu->env; 423 int iq, ik, jq, jk; 424 bool has_64k_pages = false; 425 426 /* We only handle page sizes for 64-bit server guests for now */ 427 if (!(env->mmu_model & POWERPC_MMU_64)) { 428 return; 429 } 430 431 /* Collect MMU info from kernel if not already */ 432 if (!has_smmu_info) { 433 kvm_get_smmu_info(cpu, &smmu_info); 434 has_smmu_info = true; 435 } 436 437 if (!max_cpu_page_size) { 438 max_cpu_page_size = qemu_getrampagesize(); 439 } 440 441 /* Convert to QEMU form */ 442 memset(&env->sps, 0, sizeof(env->sps)); 443 444 /* If we have HV KVM, we need to forbid CI large pages if our 445 * host page size is smaller than 64K. 446 */ 447 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 448 env->ci_large_pages = getpagesize() >= 0x10000; 449 } 450 451 /* 452 * XXX This loop should be an entry wide AND of the capabilities that 453 * the selected CPU has with the capabilities that KVM supports. 454 */ 455 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 456 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 457 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 458 459 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 460 ksps->page_shift)) { 461 continue; 462 } 463 qsps->page_shift = ksps->page_shift; 464 qsps->slb_enc = ksps->slb_enc; 465 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 466 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 467 ksps->enc[jk].page_shift)) { 468 continue; 469 } 470 if (ksps->enc[jk].page_shift == 16) { 471 has_64k_pages = true; 472 } 473 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 474 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 475 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 476 break; 477 } 478 } 479 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 480 break; 481 } 482 } 483 env->slb_nr = smmu_info.slb_size; 484 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 485 env->mmu_model &= ~POWERPC_MMU_1TSEG; 486 } 487 if (!has_64k_pages) { 488 env->mmu_model &= ~POWERPC_MMU_64K; 489 } 490 } 491 492 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 493 { 494 Object *mem_obj = object_resolve_path(obj_path, NULL); 495 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 496 long pagesize; 497 498 if (mempath) { 499 pagesize = qemu_mempath_getpagesize(mempath); 500 g_free(mempath); 501 } else { 502 pagesize = getpagesize(); 503 } 504 505 return pagesize >= max_cpu_page_size; 506 } 507 508 #else /* defined (TARGET_PPC64) */ 509 510 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 511 { 512 } 513 514 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 515 { 516 return true; 517 } 518 519 #endif /* !defined (TARGET_PPC64) */ 520 521 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 522 { 523 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 524 } 525 526 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 527 * book3s supports only 1 watchpoint, so array size 528 * of 4 is sufficient for now. 529 */ 530 #define MAX_HW_BKPTS 4 531 532 static struct HWBreakpoint { 533 target_ulong addr; 534 int type; 535 } hw_debug_points[MAX_HW_BKPTS]; 536 537 static CPUWatchpoint hw_watchpoint; 538 539 /* Default there is no breakpoint and watchpoint supported */ 540 static int max_hw_breakpoint; 541 static int max_hw_watchpoint; 542 static int nb_hw_breakpoint; 543 static int nb_hw_watchpoint; 544 545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 546 { 547 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 548 max_hw_breakpoint = 2; 549 max_hw_watchpoint = 2; 550 } 551 552 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 553 fprintf(stderr, "Error initializing h/w breakpoints\n"); 554 return; 555 } 556 } 557 558 int kvm_arch_init_vcpu(CPUState *cs) 559 { 560 PowerPCCPU *cpu = POWERPC_CPU(cs); 561 CPUPPCState *cenv = &cpu->env; 562 int ret; 563 564 /* Gather server mmu info from KVM and update the CPU state */ 565 kvm_fixup_page_sizes(cpu); 566 567 /* Synchronize sregs with kvm */ 568 ret = kvm_arch_sync_sregs(cpu); 569 if (ret) { 570 if (ret == -EINVAL) { 571 error_report("Register sync failed... If you're using kvm-hv.ko," 572 " only \"-cpu host\" is possible"); 573 } 574 return ret; 575 } 576 577 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 578 579 switch (cenv->mmu_model) { 580 case POWERPC_MMU_BOOKE206: 581 /* This target supports access to KVM's guest TLB */ 582 ret = kvm_booke206_tlb_init(cpu); 583 break; 584 case POWERPC_MMU_2_07: 585 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 586 /* KVM-HV has transactional memory on POWER8 also without the 587 * KVM_CAP_PPC_HTM extension, so enable it here instead as 588 * long as it's availble to userspace on the host. */ 589 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 590 cap_htm = true; 591 } 592 } 593 break; 594 default: 595 break; 596 } 597 598 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 599 kvmppc_hw_debug_points_init(cenv); 600 601 return ret; 602 } 603 604 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 605 { 606 CPUPPCState *env = &cpu->env; 607 CPUState *cs = CPU(cpu); 608 struct kvm_dirty_tlb dirty_tlb; 609 unsigned char *bitmap; 610 int ret; 611 612 if (!env->kvm_sw_tlb) { 613 return; 614 } 615 616 bitmap = g_malloc((env->nb_tlb + 7) / 8); 617 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 618 619 dirty_tlb.bitmap = (uintptr_t)bitmap; 620 dirty_tlb.num_dirty = env->nb_tlb; 621 622 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 623 if (ret) { 624 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 625 __func__, strerror(-ret)); 626 } 627 628 g_free(bitmap); 629 } 630 631 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 632 { 633 PowerPCCPU *cpu = POWERPC_CPU(cs); 634 CPUPPCState *env = &cpu->env; 635 union { 636 uint32_t u32; 637 uint64_t u64; 638 } val; 639 struct kvm_one_reg reg = { 640 .id = id, 641 .addr = (uintptr_t) &val, 642 }; 643 int ret; 644 645 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 646 if (ret != 0) { 647 trace_kvm_failed_spr_get(spr, strerror(errno)); 648 } else { 649 switch (id & KVM_REG_SIZE_MASK) { 650 case KVM_REG_SIZE_U32: 651 env->spr[spr] = val.u32; 652 break; 653 654 case KVM_REG_SIZE_U64: 655 env->spr[spr] = val.u64; 656 break; 657 658 default: 659 /* Don't handle this size yet */ 660 abort(); 661 } 662 } 663 } 664 665 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 666 { 667 PowerPCCPU *cpu = POWERPC_CPU(cs); 668 CPUPPCState *env = &cpu->env; 669 union { 670 uint32_t u32; 671 uint64_t u64; 672 } val; 673 struct kvm_one_reg reg = { 674 .id = id, 675 .addr = (uintptr_t) &val, 676 }; 677 int ret; 678 679 switch (id & KVM_REG_SIZE_MASK) { 680 case KVM_REG_SIZE_U32: 681 val.u32 = env->spr[spr]; 682 break; 683 684 case KVM_REG_SIZE_U64: 685 val.u64 = env->spr[spr]; 686 break; 687 688 default: 689 /* Don't handle this size yet */ 690 abort(); 691 } 692 693 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 694 if (ret != 0) { 695 trace_kvm_failed_spr_set(spr, strerror(errno)); 696 } 697 } 698 699 static int kvm_put_fp(CPUState *cs) 700 { 701 PowerPCCPU *cpu = POWERPC_CPU(cs); 702 CPUPPCState *env = &cpu->env; 703 struct kvm_one_reg reg; 704 int i; 705 int ret; 706 707 if (env->insns_flags & PPC_FLOAT) { 708 uint64_t fpscr = env->fpscr; 709 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 710 711 reg.id = KVM_REG_PPC_FPSCR; 712 reg.addr = (uintptr_t)&fpscr; 713 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 714 if (ret < 0) { 715 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 716 return ret; 717 } 718 719 for (i = 0; i < 32; i++) { 720 uint64_t vsr[2]; 721 722 #ifdef HOST_WORDS_BIGENDIAN 723 vsr[0] = float64_val(env->fpr[i]); 724 vsr[1] = env->vsr[i]; 725 #else 726 vsr[0] = env->vsr[i]; 727 vsr[1] = float64_val(env->fpr[i]); 728 #endif 729 reg.addr = (uintptr_t) &vsr; 730 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 731 732 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 733 if (ret < 0) { 734 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 735 i, strerror(errno)); 736 return ret; 737 } 738 } 739 } 740 741 if (env->insns_flags & PPC_ALTIVEC) { 742 reg.id = KVM_REG_PPC_VSCR; 743 reg.addr = (uintptr_t)&env->vscr; 744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 745 if (ret < 0) { 746 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 747 return ret; 748 } 749 750 for (i = 0; i < 32; i++) { 751 reg.id = KVM_REG_PPC_VR(i); 752 reg.addr = (uintptr_t)&env->avr[i]; 753 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 754 if (ret < 0) { 755 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 756 return ret; 757 } 758 } 759 } 760 761 return 0; 762 } 763 764 static int kvm_get_fp(CPUState *cs) 765 { 766 PowerPCCPU *cpu = POWERPC_CPU(cs); 767 CPUPPCState *env = &cpu->env; 768 struct kvm_one_reg reg; 769 int i; 770 int ret; 771 772 if (env->insns_flags & PPC_FLOAT) { 773 uint64_t fpscr; 774 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 775 776 reg.id = KVM_REG_PPC_FPSCR; 777 reg.addr = (uintptr_t)&fpscr; 778 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 779 if (ret < 0) { 780 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 781 return ret; 782 } else { 783 env->fpscr = fpscr; 784 } 785 786 for (i = 0; i < 32; i++) { 787 uint64_t vsr[2]; 788 789 reg.addr = (uintptr_t) &vsr; 790 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 791 792 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 793 if (ret < 0) { 794 DPRINTF("Unable to get %s%d from KVM: %s\n", 795 vsx ? "VSR" : "FPR", i, strerror(errno)); 796 return ret; 797 } else { 798 #ifdef HOST_WORDS_BIGENDIAN 799 env->fpr[i] = vsr[0]; 800 if (vsx) { 801 env->vsr[i] = vsr[1]; 802 } 803 #else 804 env->fpr[i] = vsr[1]; 805 if (vsx) { 806 env->vsr[i] = vsr[0]; 807 } 808 #endif 809 } 810 } 811 } 812 813 if (env->insns_flags & PPC_ALTIVEC) { 814 reg.id = KVM_REG_PPC_VSCR; 815 reg.addr = (uintptr_t)&env->vscr; 816 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 817 if (ret < 0) { 818 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 819 return ret; 820 } 821 822 for (i = 0; i < 32; i++) { 823 reg.id = KVM_REG_PPC_VR(i); 824 reg.addr = (uintptr_t)&env->avr[i]; 825 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 826 if (ret < 0) { 827 DPRINTF("Unable to get VR%d from KVM: %s\n", 828 i, strerror(errno)); 829 return ret; 830 } 831 } 832 } 833 834 return 0; 835 } 836 837 #if defined(TARGET_PPC64) 838 static int kvm_get_vpa(CPUState *cs) 839 { 840 PowerPCCPU *cpu = POWERPC_CPU(cs); 841 CPUPPCState *env = &cpu->env; 842 struct kvm_one_reg reg; 843 int ret; 844 845 reg.id = KVM_REG_PPC_VPA_ADDR; 846 reg.addr = (uintptr_t)&env->vpa_addr; 847 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 848 if (ret < 0) { 849 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 850 return ret; 851 } 852 853 assert((uintptr_t)&env->slb_shadow_size 854 == ((uintptr_t)&env->slb_shadow_addr + 8)); 855 reg.id = KVM_REG_PPC_VPA_SLB; 856 reg.addr = (uintptr_t)&env->slb_shadow_addr; 857 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 858 if (ret < 0) { 859 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 860 strerror(errno)); 861 return ret; 862 } 863 864 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 865 reg.id = KVM_REG_PPC_VPA_DTL; 866 reg.addr = (uintptr_t)&env->dtl_addr; 867 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 868 if (ret < 0) { 869 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 870 strerror(errno)); 871 return ret; 872 } 873 874 return 0; 875 } 876 877 static int kvm_put_vpa(CPUState *cs) 878 { 879 PowerPCCPU *cpu = POWERPC_CPU(cs); 880 CPUPPCState *env = &cpu->env; 881 struct kvm_one_reg reg; 882 int ret; 883 884 /* SLB shadow or DTL can't be registered unless a master VPA is 885 * registered. That means when restoring state, if a VPA *is* 886 * registered, we need to set that up first. If not, we need to 887 * deregister the others before deregistering the master VPA */ 888 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 889 890 if (env->vpa_addr) { 891 reg.id = KVM_REG_PPC_VPA_ADDR; 892 reg.addr = (uintptr_t)&env->vpa_addr; 893 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 894 if (ret < 0) { 895 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 896 return ret; 897 } 898 } 899 900 assert((uintptr_t)&env->slb_shadow_size 901 == ((uintptr_t)&env->slb_shadow_addr + 8)); 902 reg.id = KVM_REG_PPC_VPA_SLB; 903 reg.addr = (uintptr_t)&env->slb_shadow_addr; 904 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 905 if (ret < 0) { 906 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 907 return ret; 908 } 909 910 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 911 reg.id = KVM_REG_PPC_VPA_DTL; 912 reg.addr = (uintptr_t)&env->dtl_addr; 913 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 914 if (ret < 0) { 915 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 916 strerror(errno)); 917 return ret; 918 } 919 920 if (!env->vpa_addr) { 921 reg.id = KVM_REG_PPC_VPA_ADDR; 922 reg.addr = (uintptr_t)&env->vpa_addr; 923 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 924 if (ret < 0) { 925 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 926 return ret; 927 } 928 } 929 930 return 0; 931 } 932 #endif /* TARGET_PPC64 */ 933 934 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 935 { 936 CPUPPCState *env = &cpu->env; 937 struct kvm_sregs sregs; 938 int i; 939 940 sregs.pvr = env->spr[SPR_PVR]; 941 942 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 943 944 /* Sync SLB */ 945 #ifdef TARGET_PPC64 946 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 947 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 948 if (env->slb[i].esid & SLB_ESID_V) { 949 sregs.u.s.ppc64.slb[i].slbe |= i; 950 } 951 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 952 } 953 #endif 954 955 /* Sync SRs */ 956 for (i = 0; i < 16; i++) { 957 sregs.u.s.ppc32.sr[i] = env->sr[i]; 958 } 959 960 /* Sync BATs */ 961 for (i = 0; i < 8; i++) { 962 /* Beware. We have to swap upper and lower bits here */ 963 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 964 | env->DBAT[1][i]; 965 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 966 | env->IBAT[1][i]; 967 } 968 969 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 970 } 971 972 int kvm_arch_put_registers(CPUState *cs, int level) 973 { 974 PowerPCCPU *cpu = POWERPC_CPU(cs); 975 CPUPPCState *env = &cpu->env; 976 struct kvm_regs regs; 977 int ret; 978 int i; 979 980 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 981 if (ret < 0) { 982 return ret; 983 } 984 985 regs.ctr = env->ctr; 986 regs.lr = env->lr; 987 regs.xer = cpu_read_xer(env); 988 regs.msr = env->msr; 989 regs.pc = env->nip; 990 991 regs.srr0 = env->spr[SPR_SRR0]; 992 regs.srr1 = env->spr[SPR_SRR1]; 993 994 regs.sprg0 = env->spr[SPR_SPRG0]; 995 regs.sprg1 = env->spr[SPR_SPRG1]; 996 regs.sprg2 = env->spr[SPR_SPRG2]; 997 regs.sprg3 = env->spr[SPR_SPRG3]; 998 regs.sprg4 = env->spr[SPR_SPRG4]; 999 regs.sprg5 = env->spr[SPR_SPRG5]; 1000 regs.sprg6 = env->spr[SPR_SPRG6]; 1001 regs.sprg7 = env->spr[SPR_SPRG7]; 1002 1003 regs.pid = env->spr[SPR_BOOKE_PID]; 1004 1005 for (i = 0;i < 32; i++) 1006 regs.gpr[i] = env->gpr[i]; 1007 1008 regs.cr = 0; 1009 for (i = 0; i < 8; i++) { 1010 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1011 } 1012 1013 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1014 if (ret < 0) 1015 return ret; 1016 1017 kvm_put_fp(cs); 1018 1019 if (env->tlb_dirty) { 1020 kvm_sw_tlb_put(cpu); 1021 env->tlb_dirty = false; 1022 } 1023 1024 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1025 ret = kvmppc_put_books_sregs(cpu); 1026 if (ret < 0) { 1027 return ret; 1028 } 1029 } 1030 1031 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1032 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1033 } 1034 1035 if (cap_one_reg) { 1036 int i; 1037 1038 /* We deliberately ignore errors here, for kernels which have 1039 * the ONE_REG calls, but don't support the specific 1040 * registers, there's a reasonable chance things will still 1041 * work, at least until we try to migrate. */ 1042 for (i = 0; i < 1024; i++) { 1043 uint64_t id = env->spr_cb[i].one_reg_id; 1044 1045 if (id != 0) { 1046 kvm_put_one_spr(cs, id, i); 1047 } 1048 } 1049 1050 #ifdef TARGET_PPC64 1051 if (msr_ts) { 1052 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1054 } 1055 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1057 } 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1068 } 1069 1070 if (cap_papr) { 1071 if (kvm_put_vpa(cs) < 0) { 1072 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1073 } 1074 } 1075 1076 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1077 #endif /* TARGET_PPC64 */ 1078 } 1079 1080 return ret; 1081 } 1082 1083 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1084 { 1085 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1086 } 1087 1088 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1089 { 1090 CPUPPCState *env = &cpu->env; 1091 struct kvm_sregs sregs; 1092 int ret; 1093 1094 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1095 if (ret < 0) { 1096 return ret; 1097 } 1098 1099 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1100 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1101 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1102 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1103 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1104 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1105 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1106 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1107 env->spr[SPR_DECR] = sregs.u.e.dec; 1108 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1109 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1110 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1111 } 1112 1113 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1114 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1115 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1116 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1117 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1118 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1119 } 1120 1121 if (sregs.u.e.features & KVM_SREGS_E_64) { 1122 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1123 } 1124 1125 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1126 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1127 } 1128 1129 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1130 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1131 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1132 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1133 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1134 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1135 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1136 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1137 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1138 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1139 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1140 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1141 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1142 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1143 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1144 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1145 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1146 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1147 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1148 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1149 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1150 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1151 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1152 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1153 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1154 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1155 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1156 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1157 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1158 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1159 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1160 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1161 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1162 1163 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1164 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1165 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1166 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1167 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1168 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1169 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1170 } 1171 1172 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1173 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1174 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1175 } 1176 1177 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1178 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1179 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1180 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1181 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1182 } 1183 } 1184 1185 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1186 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1187 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1188 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1189 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1190 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1191 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1192 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1193 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1194 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1195 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1196 } 1197 1198 if (sregs.u.e.features & KVM_SREGS_EXP) { 1199 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1200 } 1201 1202 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1203 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1204 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1205 } 1206 1207 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1208 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1209 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1210 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1211 1212 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1213 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1214 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1215 } 1216 } 1217 1218 return 0; 1219 } 1220 1221 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1222 { 1223 CPUPPCState *env = &cpu->env; 1224 struct kvm_sregs sregs; 1225 int ret; 1226 int i; 1227 1228 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1229 if (ret < 0) { 1230 return ret; 1231 } 1232 1233 if (!cpu->vhyp) { 1234 ppc_store_sdr1(env, sregs.u.s.sdr1); 1235 } 1236 1237 /* Sync SLB */ 1238 #ifdef TARGET_PPC64 1239 /* 1240 * The packed SLB array we get from KVM_GET_SREGS only contains 1241 * information about valid entries. So we flush our internal copy 1242 * to get rid of stale ones, then put all valid SLB entries back 1243 * in. 1244 */ 1245 memset(env->slb, 0, sizeof(env->slb)); 1246 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1247 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1248 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1249 /* 1250 * Only restore valid entries 1251 */ 1252 if (rb & SLB_ESID_V) { 1253 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1254 } 1255 } 1256 #endif 1257 1258 /* Sync SRs */ 1259 for (i = 0; i < 16; i++) { 1260 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1261 } 1262 1263 /* Sync BATs */ 1264 for (i = 0; i < 8; i++) { 1265 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1266 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1267 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1268 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1269 } 1270 1271 return 0; 1272 } 1273 1274 int kvm_arch_get_registers(CPUState *cs) 1275 { 1276 PowerPCCPU *cpu = POWERPC_CPU(cs); 1277 CPUPPCState *env = &cpu->env; 1278 struct kvm_regs regs; 1279 uint32_t cr; 1280 int i, ret; 1281 1282 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1283 if (ret < 0) 1284 return ret; 1285 1286 cr = regs.cr; 1287 for (i = 7; i >= 0; i--) { 1288 env->crf[i] = cr & 15; 1289 cr >>= 4; 1290 } 1291 1292 env->ctr = regs.ctr; 1293 env->lr = regs.lr; 1294 cpu_write_xer(env, regs.xer); 1295 env->msr = regs.msr; 1296 env->nip = regs.pc; 1297 1298 env->spr[SPR_SRR0] = regs.srr0; 1299 env->spr[SPR_SRR1] = regs.srr1; 1300 1301 env->spr[SPR_SPRG0] = regs.sprg0; 1302 env->spr[SPR_SPRG1] = regs.sprg1; 1303 env->spr[SPR_SPRG2] = regs.sprg2; 1304 env->spr[SPR_SPRG3] = regs.sprg3; 1305 env->spr[SPR_SPRG4] = regs.sprg4; 1306 env->spr[SPR_SPRG5] = regs.sprg5; 1307 env->spr[SPR_SPRG6] = regs.sprg6; 1308 env->spr[SPR_SPRG7] = regs.sprg7; 1309 1310 env->spr[SPR_BOOKE_PID] = regs.pid; 1311 1312 for (i = 0;i < 32; i++) 1313 env->gpr[i] = regs.gpr[i]; 1314 1315 kvm_get_fp(cs); 1316 1317 if (cap_booke_sregs) { 1318 ret = kvmppc_get_booke_sregs(cpu); 1319 if (ret < 0) { 1320 return ret; 1321 } 1322 } 1323 1324 if (cap_segstate) { 1325 ret = kvmppc_get_books_sregs(cpu); 1326 if (ret < 0) { 1327 return ret; 1328 } 1329 } 1330 1331 if (cap_hior) { 1332 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1333 } 1334 1335 if (cap_one_reg) { 1336 int i; 1337 1338 /* We deliberately ignore errors here, for kernels which have 1339 * the ONE_REG calls, but don't support the specific 1340 * registers, there's a reasonable chance things will still 1341 * work, at least until we try to migrate. */ 1342 for (i = 0; i < 1024; i++) { 1343 uint64_t id = env->spr_cb[i].one_reg_id; 1344 1345 if (id != 0) { 1346 kvm_get_one_spr(cs, id, i); 1347 } 1348 } 1349 1350 #ifdef TARGET_PPC64 1351 if (msr_ts) { 1352 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1354 } 1355 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1357 } 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1368 } 1369 1370 if (cap_papr) { 1371 if (kvm_get_vpa(cs) < 0) { 1372 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1373 } 1374 } 1375 1376 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1377 #endif 1378 } 1379 1380 return 0; 1381 } 1382 1383 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1384 { 1385 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1386 1387 if (irq != PPC_INTERRUPT_EXT) { 1388 return 0; 1389 } 1390 1391 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1392 return 0; 1393 } 1394 1395 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1396 1397 return 0; 1398 } 1399 1400 #if defined(TARGET_PPCEMB) 1401 #define PPC_INPUT_INT PPC40x_INPUT_INT 1402 #elif defined(TARGET_PPC64) 1403 #define PPC_INPUT_INT PPC970_INPUT_INT 1404 #else 1405 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1406 #endif 1407 1408 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1409 { 1410 PowerPCCPU *cpu = POWERPC_CPU(cs); 1411 CPUPPCState *env = &cpu->env; 1412 int r; 1413 unsigned irq; 1414 1415 qemu_mutex_lock_iothread(); 1416 1417 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1418 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1419 if (!cap_interrupt_level && 1420 run->ready_for_interrupt_injection && 1421 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1422 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1423 { 1424 /* For now KVM disregards the 'irq' argument. However, in the 1425 * future KVM could cache it in-kernel to avoid a heavyweight exit 1426 * when reading the UIC. 1427 */ 1428 irq = KVM_INTERRUPT_SET; 1429 1430 DPRINTF("injected interrupt %d\n", irq); 1431 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1432 if (r < 0) { 1433 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1434 } 1435 1436 /* Always wake up soon in case the interrupt was level based */ 1437 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1438 (NANOSECONDS_PER_SECOND / 50)); 1439 } 1440 1441 /* We don't know if there are more interrupts pending after this. However, 1442 * the guest will return to userspace in the course of handling this one 1443 * anyways, so we will get a chance to deliver the rest. */ 1444 1445 qemu_mutex_unlock_iothread(); 1446 } 1447 1448 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1449 { 1450 return MEMTXATTRS_UNSPECIFIED; 1451 } 1452 1453 int kvm_arch_process_async_events(CPUState *cs) 1454 { 1455 return cs->halted; 1456 } 1457 1458 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1459 { 1460 CPUState *cs = CPU(cpu); 1461 CPUPPCState *env = &cpu->env; 1462 1463 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1464 cs->halted = 1; 1465 cs->exception_index = EXCP_HLT; 1466 } 1467 1468 return 0; 1469 } 1470 1471 /* map dcr access to existing qemu dcr emulation */ 1472 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1473 { 1474 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1475 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1476 1477 return 0; 1478 } 1479 1480 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1481 { 1482 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1483 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1484 1485 return 0; 1486 } 1487 1488 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1489 { 1490 /* Mixed endian case is not handled */ 1491 uint32_t sc = debug_inst_opcode; 1492 1493 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1494 sizeof(sc), 0) || 1495 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1496 return -EINVAL; 1497 } 1498 1499 return 0; 1500 } 1501 1502 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1503 { 1504 uint32_t sc; 1505 1506 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1507 sc != debug_inst_opcode || 1508 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1509 sizeof(sc), 1)) { 1510 return -EINVAL; 1511 } 1512 1513 return 0; 1514 } 1515 1516 static int find_hw_breakpoint(target_ulong addr, int type) 1517 { 1518 int n; 1519 1520 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1521 <= ARRAY_SIZE(hw_debug_points)); 1522 1523 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1524 if (hw_debug_points[n].addr == addr && 1525 hw_debug_points[n].type == type) { 1526 return n; 1527 } 1528 } 1529 1530 return -1; 1531 } 1532 1533 static int find_hw_watchpoint(target_ulong addr, int *flag) 1534 { 1535 int n; 1536 1537 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1538 if (n >= 0) { 1539 *flag = BP_MEM_ACCESS; 1540 return n; 1541 } 1542 1543 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1544 if (n >= 0) { 1545 *flag = BP_MEM_WRITE; 1546 return n; 1547 } 1548 1549 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1550 if (n >= 0) { 1551 *flag = BP_MEM_READ; 1552 return n; 1553 } 1554 1555 return -1; 1556 } 1557 1558 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1559 target_ulong len, int type) 1560 { 1561 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1562 return -ENOBUFS; 1563 } 1564 1565 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1566 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1567 1568 switch (type) { 1569 case GDB_BREAKPOINT_HW: 1570 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1571 return -ENOBUFS; 1572 } 1573 1574 if (find_hw_breakpoint(addr, type) >= 0) { 1575 return -EEXIST; 1576 } 1577 1578 nb_hw_breakpoint++; 1579 break; 1580 1581 case GDB_WATCHPOINT_WRITE: 1582 case GDB_WATCHPOINT_READ: 1583 case GDB_WATCHPOINT_ACCESS: 1584 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1585 return -ENOBUFS; 1586 } 1587 1588 if (find_hw_breakpoint(addr, type) >= 0) { 1589 return -EEXIST; 1590 } 1591 1592 nb_hw_watchpoint++; 1593 break; 1594 1595 default: 1596 return -ENOSYS; 1597 } 1598 1599 return 0; 1600 } 1601 1602 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1603 target_ulong len, int type) 1604 { 1605 int n; 1606 1607 n = find_hw_breakpoint(addr, type); 1608 if (n < 0) { 1609 return -ENOENT; 1610 } 1611 1612 switch (type) { 1613 case GDB_BREAKPOINT_HW: 1614 nb_hw_breakpoint--; 1615 break; 1616 1617 case GDB_WATCHPOINT_WRITE: 1618 case GDB_WATCHPOINT_READ: 1619 case GDB_WATCHPOINT_ACCESS: 1620 nb_hw_watchpoint--; 1621 break; 1622 1623 default: 1624 return -ENOSYS; 1625 } 1626 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1627 1628 return 0; 1629 } 1630 1631 void kvm_arch_remove_all_hw_breakpoints(void) 1632 { 1633 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1634 } 1635 1636 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1637 { 1638 int n; 1639 1640 /* Software Breakpoint updates */ 1641 if (kvm_sw_breakpoints_active(cs)) { 1642 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1643 } 1644 1645 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1646 <= ARRAY_SIZE(hw_debug_points)); 1647 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1648 1649 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1650 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1651 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1652 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1653 switch (hw_debug_points[n].type) { 1654 case GDB_BREAKPOINT_HW: 1655 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1656 break; 1657 case GDB_WATCHPOINT_WRITE: 1658 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1659 break; 1660 case GDB_WATCHPOINT_READ: 1661 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1662 break; 1663 case GDB_WATCHPOINT_ACCESS: 1664 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1665 KVMPPC_DEBUG_WATCH_READ; 1666 break; 1667 default: 1668 cpu_abort(cs, "Unsupported breakpoint type\n"); 1669 } 1670 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1671 } 1672 } 1673 } 1674 1675 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1676 { 1677 CPUState *cs = CPU(cpu); 1678 CPUPPCState *env = &cpu->env; 1679 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1680 int handle = 0; 1681 int n; 1682 int flag = 0; 1683 1684 if (cs->singlestep_enabled) { 1685 handle = 1; 1686 } else if (arch_info->status) { 1687 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1688 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1689 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1690 if (n >= 0) { 1691 handle = 1; 1692 } 1693 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1694 KVMPPC_DEBUG_WATCH_WRITE)) { 1695 n = find_hw_watchpoint(arch_info->address, &flag); 1696 if (n >= 0) { 1697 handle = 1; 1698 cs->watchpoint_hit = &hw_watchpoint; 1699 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1700 hw_watchpoint.flags = flag; 1701 } 1702 } 1703 } 1704 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1705 handle = 1; 1706 } else { 1707 /* QEMU is not able to handle debug exception, so inject 1708 * program exception to guest; 1709 * Yes program exception NOT debug exception !! 1710 * When QEMU is using debug resources then debug exception must 1711 * be always set. To achieve this we set MSR_DE and also set 1712 * MSRP_DEP so guest cannot change MSR_DE. 1713 * When emulating debug resource for guest we want guest 1714 * to control MSR_DE (enable/disable debug interrupt on need). 1715 * Supporting both configurations are NOT possible. 1716 * So the result is that we cannot share debug resources 1717 * between QEMU and Guest on BOOKE architecture. 1718 * In the current design QEMU gets the priority over guest, 1719 * this means that if QEMU is using debug resources then guest 1720 * cannot use them; 1721 * For software breakpoint QEMU uses a privileged instruction; 1722 * So there cannot be any reason that we are here for guest 1723 * set debug exception, only possibility is guest executed a 1724 * privileged / illegal instruction and that's why we are 1725 * injecting a program interrupt. 1726 */ 1727 1728 cpu_synchronize_state(cs); 1729 /* env->nip is PC, so increment this by 4 to use 1730 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1731 */ 1732 env->nip += 4; 1733 cs->exception_index = POWERPC_EXCP_PROGRAM; 1734 env->error_code = POWERPC_EXCP_INVAL; 1735 ppc_cpu_do_interrupt(cs); 1736 } 1737 1738 return handle; 1739 } 1740 1741 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1742 { 1743 PowerPCCPU *cpu = POWERPC_CPU(cs); 1744 CPUPPCState *env = &cpu->env; 1745 int ret; 1746 1747 qemu_mutex_lock_iothread(); 1748 1749 switch (run->exit_reason) { 1750 case KVM_EXIT_DCR: 1751 if (run->dcr.is_write) { 1752 DPRINTF("handle dcr write\n"); 1753 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1754 } else { 1755 DPRINTF("handle dcr read\n"); 1756 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1757 } 1758 break; 1759 case KVM_EXIT_HLT: 1760 DPRINTF("handle halt\n"); 1761 ret = kvmppc_handle_halt(cpu); 1762 break; 1763 #if defined(TARGET_PPC64) 1764 case KVM_EXIT_PAPR_HCALL: 1765 DPRINTF("handle PAPR hypercall\n"); 1766 run->papr_hcall.ret = spapr_hypercall(cpu, 1767 run->papr_hcall.nr, 1768 run->papr_hcall.args); 1769 ret = 0; 1770 break; 1771 #endif 1772 case KVM_EXIT_EPR: 1773 DPRINTF("handle epr\n"); 1774 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1775 ret = 0; 1776 break; 1777 case KVM_EXIT_WATCHDOG: 1778 DPRINTF("handle watchdog expiry\n"); 1779 watchdog_perform_action(); 1780 ret = 0; 1781 break; 1782 1783 case KVM_EXIT_DEBUG: 1784 DPRINTF("handle debug exception\n"); 1785 if (kvm_handle_debug(cpu, run)) { 1786 ret = EXCP_DEBUG; 1787 break; 1788 } 1789 /* re-enter, this exception was guest-internal */ 1790 ret = 0; 1791 break; 1792 1793 default: 1794 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1795 ret = -1; 1796 break; 1797 } 1798 1799 qemu_mutex_unlock_iothread(); 1800 return ret; 1801 } 1802 1803 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1804 { 1805 CPUState *cs = CPU(cpu); 1806 uint32_t bits = tsr_bits; 1807 struct kvm_one_reg reg = { 1808 .id = KVM_REG_PPC_OR_TSR, 1809 .addr = (uintptr_t) &bits, 1810 }; 1811 1812 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1813 } 1814 1815 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1816 { 1817 1818 CPUState *cs = CPU(cpu); 1819 uint32_t bits = tsr_bits; 1820 struct kvm_one_reg reg = { 1821 .id = KVM_REG_PPC_CLEAR_TSR, 1822 .addr = (uintptr_t) &bits, 1823 }; 1824 1825 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1826 } 1827 1828 int kvmppc_set_tcr(PowerPCCPU *cpu) 1829 { 1830 CPUState *cs = CPU(cpu); 1831 CPUPPCState *env = &cpu->env; 1832 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1833 1834 struct kvm_one_reg reg = { 1835 .id = KVM_REG_PPC_TCR, 1836 .addr = (uintptr_t) &tcr, 1837 }; 1838 1839 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1840 } 1841 1842 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1843 { 1844 CPUState *cs = CPU(cpu); 1845 int ret; 1846 1847 if (!kvm_enabled()) { 1848 return -1; 1849 } 1850 1851 if (!cap_ppc_watchdog) { 1852 printf("warning: KVM does not support watchdog"); 1853 return -1; 1854 } 1855 1856 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1857 if (ret < 0) { 1858 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1859 __func__, strerror(-ret)); 1860 return ret; 1861 } 1862 1863 return ret; 1864 } 1865 1866 static int read_cpuinfo(const char *field, char *value, int len) 1867 { 1868 FILE *f; 1869 int ret = -1; 1870 int field_len = strlen(field); 1871 char line[512]; 1872 1873 f = fopen("/proc/cpuinfo", "r"); 1874 if (!f) { 1875 return -1; 1876 } 1877 1878 do { 1879 if (!fgets(line, sizeof(line), f)) { 1880 break; 1881 } 1882 if (!strncmp(line, field, field_len)) { 1883 pstrcpy(value, len, line); 1884 ret = 0; 1885 break; 1886 } 1887 } while(*line); 1888 1889 fclose(f); 1890 1891 return ret; 1892 } 1893 1894 uint32_t kvmppc_get_tbfreq(void) 1895 { 1896 char line[512]; 1897 char *ns; 1898 uint32_t retval = NANOSECONDS_PER_SECOND; 1899 1900 if (read_cpuinfo("timebase", line, sizeof(line))) { 1901 return retval; 1902 } 1903 1904 if (!(ns = strchr(line, ':'))) { 1905 return retval; 1906 } 1907 1908 ns++; 1909 1910 return atoi(ns); 1911 } 1912 1913 bool kvmppc_get_host_serial(char **value) 1914 { 1915 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1916 NULL); 1917 } 1918 1919 bool kvmppc_get_host_model(char **value) 1920 { 1921 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1922 } 1923 1924 /* Try to find a device tree node for a CPU with clock-frequency property */ 1925 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1926 { 1927 struct dirent *dirp; 1928 DIR *dp; 1929 1930 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1931 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1932 return -1; 1933 } 1934 1935 buf[0] = '\0'; 1936 while ((dirp = readdir(dp)) != NULL) { 1937 FILE *f; 1938 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1939 dirp->d_name); 1940 f = fopen(buf, "r"); 1941 if (f) { 1942 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1943 fclose(f); 1944 break; 1945 } 1946 buf[0] = '\0'; 1947 } 1948 closedir(dp); 1949 if (buf[0] == '\0') { 1950 printf("Unknown host!\n"); 1951 return -1; 1952 } 1953 1954 return 0; 1955 } 1956 1957 static uint64_t kvmppc_read_int_dt(const char *filename) 1958 { 1959 union { 1960 uint32_t v32; 1961 uint64_t v64; 1962 } u; 1963 FILE *f; 1964 int len; 1965 1966 f = fopen(filename, "rb"); 1967 if (!f) { 1968 return -1; 1969 } 1970 1971 len = fread(&u, 1, sizeof(u), f); 1972 fclose(f); 1973 switch (len) { 1974 case 4: 1975 /* property is a 32-bit quantity */ 1976 return be32_to_cpu(u.v32); 1977 case 8: 1978 return be64_to_cpu(u.v64); 1979 } 1980 1981 return 0; 1982 } 1983 1984 /* Read a CPU node property from the host device tree that's a single 1985 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1986 * (can't find or open the property, or doesn't understand the 1987 * format) */ 1988 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1989 { 1990 char buf[PATH_MAX], *tmp; 1991 uint64_t val; 1992 1993 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1994 return -1; 1995 } 1996 1997 tmp = g_strdup_printf("%s/%s", buf, propname); 1998 val = kvmppc_read_int_dt(tmp); 1999 g_free(tmp); 2000 2001 return val; 2002 } 2003 2004 uint64_t kvmppc_get_clockfreq(void) 2005 { 2006 return kvmppc_read_int_cpu_dt("clock-frequency"); 2007 } 2008 2009 uint32_t kvmppc_get_vmx(void) 2010 { 2011 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2012 } 2013 2014 uint32_t kvmppc_get_dfp(void) 2015 { 2016 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2017 } 2018 2019 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2020 { 2021 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2022 CPUState *cs = CPU(cpu); 2023 2024 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2025 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2026 return 0; 2027 } 2028 2029 return 1; 2030 } 2031 2032 int kvmppc_get_hasidle(CPUPPCState *env) 2033 { 2034 struct kvm_ppc_pvinfo pvinfo; 2035 2036 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2037 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2038 return 1; 2039 } 2040 2041 return 0; 2042 } 2043 2044 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2045 { 2046 uint32_t *hc = (uint32_t*)buf; 2047 struct kvm_ppc_pvinfo pvinfo; 2048 2049 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2050 memcpy(buf, pvinfo.hcall, buf_len); 2051 return 0; 2052 } 2053 2054 /* 2055 * Fallback to always fail hypercalls regardless of endianness: 2056 * 2057 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2058 * li r3, -1 2059 * b .+8 (becomes nop in wrong endian) 2060 * bswap32(li r3, -1) 2061 */ 2062 2063 hc[0] = cpu_to_be32(0x08000048); 2064 hc[1] = cpu_to_be32(0x3860ffff); 2065 hc[2] = cpu_to_be32(0x48000008); 2066 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2067 2068 return 1; 2069 } 2070 2071 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2072 { 2073 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2074 } 2075 2076 void kvmppc_enable_logical_ci_hcalls(void) 2077 { 2078 /* 2079 * FIXME: it would be nice if we could detect the cases where 2080 * we're using a device which requires the in kernel 2081 * implementation of these hcalls, but the kernel lacks them and 2082 * produce a warning. 2083 */ 2084 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2085 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2086 } 2087 2088 void kvmppc_enable_set_mode_hcall(void) 2089 { 2090 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2091 } 2092 2093 void kvmppc_enable_clear_ref_mod_hcalls(void) 2094 { 2095 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2096 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2097 } 2098 2099 void kvmppc_set_papr(PowerPCCPU *cpu) 2100 { 2101 CPUState *cs = CPU(cpu); 2102 int ret; 2103 2104 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2105 if (ret) { 2106 error_report("This vCPU type or KVM version does not support PAPR"); 2107 exit(1); 2108 } 2109 2110 /* Update the capability flag so we sync the right information 2111 * with kvm */ 2112 cap_papr = 1; 2113 } 2114 2115 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2116 { 2117 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2118 } 2119 2120 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2121 { 2122 CPUState *cs = CPU(cpu); 2123 int ret; 2124 2125 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2126 if (ret && mpic_proxy) { 2127 error_report("This KVM version does not support EPR"); 2128 exit(1); 2129 } 2130 } 2131 2132 int kvmppc_smt_threads(void) 2133 { 2134 return cap_ppc_smt ? cap_ppc_smt : 1; 2135 } 2136 2137 #ifdef TARGET_PPC64 2138 off_t kvmppc_alloc_rma(void **rma) 2139 { 2140 off_t size; 2141 int fd; 2142 struct kvm_allocate_rma ret; 2143 2144 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2145 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2146 * not necessary on this hardware 2147 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2148 * 2149 * FIXME: We should allow the user to force contiguous RMA 2150 * allocation in the cap_ppc_rma==1 case. 2151 */ 2152 if (cap_ppc_rma < 2) { 2153 return 0; 2154 } 2155 2156 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2157 if (fd < 0) { 2158 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2159 strerror(errno)); 2160 return -1; 2161 } 2162 2163 size = MIN(ret.rma_size, 256ul << 20); 2164 2165 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2166 if (*rma == MAP_FAILED) { 2167 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2168 return -1; 2169 }; 2170 2171 return size; 2172 } 2173 2174 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2175 { 2176 struct kvm_ppc_smmu_info info; 2177 long rampagesize, best_page_shift; 2178 int i; 2179 2180 if (cap_ppc_rma >= 2) { 2181 return current_size; 2182 } 2183 2184 /* Find the largest hardware supported page size that's less than 2185 * or equal to the (logical) backing page size of guest RAM */ 2186 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2187 rampagesize = qemu_getrampagesize(); 2188 best_page_shift = 0; 2189 2190 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2191 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2192 2193 if (!sps->page_shift) { 2194 continue; 2195 } 2196 2197 if ((sps->page_shift > best_page_shift) 2198 && ((1UL << sps->page_shift) <= rampagesize)) { 2199 best_page_shift = sps->page_shift; 2200 } 2201 } 2202 2203 return MIN(current_size, 2204 1ULL << (best_page_shift + hash_shift - 7)); 2205 } 2206 #endif 2207 2208 bool kvmppc_spapr_use_multitce(void) 2209 { 2210 return cap_spapr_multitce; 2211 } 2212 2213 int kvmppc_spapr_enable_inkernel_multitce(void) 2214 { 2215 int ret; 2216 2217 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2218 H_PUT_TCE_INDIRECT, 1); 2219 if (!ret) { 2220 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2221 H_STUFF_TCE, 1); 2222 } 2223 2224 return ret; 2225 } 2226 2227 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2228 uint64_t bus_offset, uint32_t nb_table, 2229 int *pfd, bool need_vfio) 2230 { 2231 long len; 2232 int fd; 2233 void *table; 2234 2235 /* Must set fd to -1 so we don't try to munmap when called for 2236 * destroying the table, which the upper layers -will- do 2237 */ 2238 *pfd = -1; 2239 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2240 return NULL; 2241 } 2242 2243 if (cap_spapr_tce_64) { 2244 struct kvm_create_spapr_tce_64 args = { 2245 .liobn = liobn, 2246 .page_shift = page_shift, 2247 .offset = bus_offset >> page_shift, 2248 .size = nb_table, 2249 .flags = 0 2250 }; 2251 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2252 if (fd < 0) { 2253 fprintf(stderr, 2254 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2255 liobn); 2256 return NULL; 2257 } 2258 } else if (cap_spapr_tce) { 2259 uint64_t window_size = (uint64_t) nb_table << page_shift; 2260 struct kvm_create_spapr_tce args = { 2261 .liobn = liobn, 2262 .window_size = window_size, 2263 }; 2264 if ((window_size != args.window_size) || bus_offset) { 2265 return NULL; 2266 } 2267 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2268 if (fd < 0) { 2269 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2270 liobn); 2271 return NULL; 2272 } 2273 } else { 2274 return NULL; 2275 } 2276 2277 len = nb_table * sizeof(uint64_t); 2278 /* FIXME: round this up to page size */ 2279 2280 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2281 if (table == MAP_FAILED) { 2282 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2283 liobn); 2284 close(fd); 2285 return NULL; 2286 } 2287 2288 *pfd = fd; 2289 return table; 2290 } 2291 2292 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2293 { 2294 long len; 2295 2296 if (fd < 0) { 2297 return -1; 2298 } 2299 2300 len = nb_table * sizeof(uint64_t); 2301 if ((munmap(table, len) < 0) || 2302 (close(fd) < 0)) { 2303 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2304 strerror(errno)); 2305 /* Leak the table */ 2306 } 2307 2308 return 0; 2309 } 2310 2311 int kvmppc_reset_htab(int shift_hint) 2312 { 2313 uint32_t shift = shift_hint; 2314 2315 if (!kvm_enabled()) { 2316 /* Full emulation, tell caller to allocate htab itself */ 2317 return 0; 2318 } 2319 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2320 int ret; 2321 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2322 if (ret == -ENOTTY) { 2323 /* At least some versions of PR KVM advertise the 2324 * capability, but don't implement the ioctl(). Oops. 2325 * Return 0 so that we allocate the htab in qemu, as is 2326 * correct for PR. */ 2327 return 0; 2328 } else if (ret < 0) { 2329 return ret; 2330 } 2331 return shift; 2332 } 2333 2334 /* We have a kernel that predates the htab reset calls. For PR 2335 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2336 * this era, it has allocated a 16MB fixed size hash table already. */ 2337 if (kvmppc_is_pr(kvm_state)) { 2338 /* PR - tell caller to allocate htab */ 2339 return 0; 2340 } else { 2341 /* HV - assume 16MB kernel allocated htab */ 2342 return 24; 2343 } 2344 } 2345 2346 static inline uint32_t mfpvr(void) 2347 { 2348 uint32_t pvr; 2349 2350 asm ("mfpvr %0" 2351 : "=r"(pvr)); 2352 return pvr; 2353 } 2354 2355 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2356 { 2357 if (on) { 2358 *word |= flags; 2359 } else { 2360 *word &= ~flags; 2361 } 2362 } 2363 2364 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2365 { 2366 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2367 uint32_t vmx = kvmppc_get_vmx(); 2368 uint32_t dfp = kvmppc_get_dfp(); 2369 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2370 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2371 2372 /* Now fix up the class with information we can query from the host */ 2373 pcc->pvr = mfpvr(); 2374 2375 if (vmx != -1) { 2376 /* Only override when we know what the host supports */ 2377 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2378 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2379 } 2380 if (dfp != -1) { 2381 /* Only override when we know what the host supports */ 2382 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2383 } 2384 2385 if (dcache_size != -1) { 2386 pcc->l1_dcache_size = dcache_size; 2387 } 2388 2389 if (icache_size != -1) { 2390 pcc->l1_icache_size = icache_size; 2391 } 2392 2393 #if defined(TARGET_PPC64) 2394 pcc->radix_page_info = kvm_get_radix_page_info(); 2395 2396 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2397 /* 2398 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2399 * compliant. More importantly, advertising ISA 3.00 2400 * architected mode may prevent guests from activating 2401 * necessary DD1 workarounds. 2402 */ 2403 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2404 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2405 } 2406 #endif /* defined(TARGET_PPC64) */ 2407 } 2408 2409 bool kvmppc_has_cap_epr(void) 2410 { 2411 return cap_epr; 2412 } 2413 2414 bool kvmppc_has_cap_htab_fd(void) 2415 { 2416 return cap_htab_fd; 2417 } 2418 2419 bool kvmppc_has_cap_fixup_hcalls(void) 2420 { 2421 return cap_fixup_hcalls; 2422 } 2423 2424 bool kvmppc_has_cap_htm(void) 2425 { 2426 return cap_htm; 2427 } 2428 2429 bool kvmppc_has_cap_mmu_radix(void) 2430 { 2431 return cap_mmu_radix; 2432 } 2433 2434 bool kvmppc_has_cap_mmu_hash_v3(void) 2435 { 2436 return cap_mmu_hash_v3; 2437 } 2438 2439 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2440 { 2441 uint32_t host_pvr = mfpvr(); 2442 PowerPCCPUClass *pvr_pcc; 2443 2444 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2445 if (pvr_pcc == NULL) { 2446 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2447 } 2448 2449 return pvr_pcc; 2450 } 2451 2452 static int kvm_ppc_register_host_cpu_type(void) 2453 { 2454 TypeInfo type_info = { 2455 .name = TYPE_HOST_POWERPC_CPU, 2456 .class_init = kvmppc_host_cpu_class_init, 2457 }; 2458 PowerPCCPUClass *pvr_pcc; 2459 ObjectClass *oc; 2460 DeviceClass *dc; 2461 int i; 2462 2463 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2464 if (pvr_pcc == NULL) { 2465 return -1; 2466 } 2467 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2468 type_register(&type_info); 2469 2470 oc = object_class_by_name(type_info.name); 2471 g_assert(oc); 2472 2473 #if defined(TARGET_PPC64) 2474 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2475 type_info.parent = TYPE_SPAPR_CPU_CORE, 2476 type_info.instance_size = sizeof(sPAPRCPUCore); 2477 type_info.instance_init = NULL; 2478 type_info.class_init = spapr_cpu_core_class_init; 2479 type_info.class_data = (void *) "host"; 2480 type_register(&type_info); 2481 g_free((void *)type_info.name); 2482 #endif 2483 2484 /* 2485 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2486 * we want "POWER8" to be a "family" alias that points to the current 2487 * host CPU type, too) 2488 */ 2489 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2490 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2491 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2492 char *suffix; 2493 2494 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2495 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU); 2496 if (suffix) { 2497 *suffix = 0; 2498 } 2499 ppc_cpu_aliases[i].oc = oc; 2500 break; 2501 } 2502 } 2503 2504 return 0; 2505 } 2506 2507 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2508 { 2509 struct kvm_rtas_token_args args = { 2510 .token = token, 2511 }; 2512 2513 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2514 return -ENOENT; 2515 } 2516 2517 strncpy(args.name, function, sizeof(args.name)); 2518 2519 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2520 } 2521 2522 int kvmppc_get_htab_fd(bool write) 2523 { 2524 struct kvm_get_htab_fd s = { 2525 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2526 .start_index = 0, 2527 }; 2528 2529 if (!cap_htab_fd) { 2530 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2531 return -1; 2532 } 2533 2534 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2535 } 2536 2537 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2538 { 2539 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2540 uint8_t buf[bufsize]; 2541 ssize_t rc; 2542 2543 do { 2544 rc = read(fd, buf, bufsize); 2545 if (rc < 0) { 2546 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2547 strerror(errno)); 2548 return rc; 2549 } else if (rc) { 2550 uint8_t *buffer = buf; 2551 ssize_t n = rc; 2552 while (n) { 2553 struct kvm_get_htab_header *head = 2554 (struct kvm_get_htab_header *) buffer; 2555 size_t chunksize = sizeof(*head) + 2556 HASH_PTE_SIZE_64 * head->n_valid; 2557 2558 qemu_put_be32(f, head->index); 2559 qemu_put_be16(f, head->n_valid); 2560 qemu_put_be16(f, head->n_invalid); 2561 qemu_put_buffer(f, (void *)(head + 1), 2562 HASH_PTE_SIZE_64 * head->n_valid); 2563 2564 buffer += chunksize; 2565 n -= chunksize; 2566 } 2567 } 2568 } while ((rc != 0) 2569 && ((max_ns < 0) 2570 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2571 2572 return (rc == 0) ? 1 : 0; 2573 } 2574 2575 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2576 uint16_t n_valid, uint16_t n_invalid) 2577 { 2578 struct kvm_get_htab_header *buf; 2579 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2580 ssize_t rc; 2581 2582 buf = alloca(chunksize); 2583 buf->index = index; 2584 buf->n_valid = n_valid; 2585 buf->n_invalid = n_invalid; 2586 2587 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2588 2589 rc = write(fd, buf, chunksize); 2590 if (rc < 0) { 2591 fprintf(stderr, "Error writing KVM hash table: %s\n", 2592 strerror(errno)); 2593 return rc; 2594 } 2595 if (rc != chunksize) { 2596 /* We should never get a short write on a single chunk */ 2597 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2598 return -1; 2599 } 2600 return 0; 2601 } 2602 2603 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2604 { 2605 return true; 2606 } 2607 2608 void kvm_arch_init_irq_routing(KVMState *s) 2609 { 2610 } 2611 2612 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2613 { 2614 struct kvm_get_htab_fd ghf = { 2615 .flags = 0, 2616 .start_index = ptex, 2617 }; 2618 int fd, rc; 2619 int i; 2620 2621 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2622 if (fd < 0) { 2623 hw_error("kvmppc_read_hptes: Unable to open HPT fd"); 2624 } 2625 2626 i = 0; 2627 while (i < n) { 2628 struct kvm_get_htab_header *hdr; 2629 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2630 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2631 2632 rc = read(fd, buf, sizeof(buf)); 2633 if (rc < 0) { 2634 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2635 } 2636 2637 hdr = (struct kvm_get_htab_header *)buf; 2638 while ((i < n) && ((char *)hdr < (buf + rc))) { 2639 int invalid = hdr->n_invalid; 2640 2641 if (hdr->index != (ptex + i)) { 2642 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2643 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2644 } 2645 2646 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2647 i += hdr->n_valid; 2648 2649 if ((n - i) < invalid) { 2650 invalid = n - i; 2651 } 2652 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2653 i += hdr->n_invalid; 2654 2655 hdr = (struct kvm_get_htab_header *) 2656 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2657 } 2658 } 2659 2660 close(fd); 2661 } 2662 2663 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2664 { 2665 int fd, rc; 2666 struct kvm_get_htab_fd ghf; 2667 struct { 2668 struct kvm_get_htab_header hdr; 2669 uint64_t pte0; 2670 uint64_t pte1; 2671 } buf; 2672 2673 ghf.flags = 0; 2674 ghf.start_index = 0; /* Ignored */ 2675 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2676 if (fd < 0) { 2677 hw_error("kvmppc_write_hpte: Unable to open HPT fd"); 2678 } 2679 2680 buf.hdr.n_valid = 1; 2681 buf.hdr.n_invalid = 0; 2682 buf.hdr.index = ptex; 2683 buf.pte0 = cpu_to_be64(pte0); 2684 buf.pte1 = cpu_to_be64(pte1); 2685 2686 rc = write(fd, &buf, sizeof(buf)); 2687 if (rc != sizeof(buf)) { 2688 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2689 } 2690 close(fd); 2691 } 2692 2693 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2694 uint64_t address, uint32_t data, PCIDevice *dev) 2695 { 2696 return 0; 2697 } 2698 2699 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2700 int vector, PCIDevice *dev) 2701 { 2702 return 0; 2703 } 2704 2705 int kvm_arch_release_virq_post(int virq) 2706 { 2707 return 0; 2708 } 2709 2710 int kvm_arch_msi_data_to_gsi(uint32_t data) 2711 { 2712 return data & 0xffff; 2713 } 2714 2715 int kvmppc_enable_hwrng(void) 2716 { 2717 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2718 return -1; 2719 } 2720 2721 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2722 } 2723 2724 void kvmppc_check_papr_resize_hpt(Error **errp) 2725 { 2726 if (!kvm_enabled()) { 2727 return; /* No KVM, we're good */ 2728 } 2729 2730 if (cap_resize_hpt) { 2731 return; /* Kernel has explicit support, we're good */ 2732 } 2733 2734 /* Otherwise fallback on looking for PR KVM */ 2735 if (kvmppc_is_pr(kvm_state)) { 2736 return; 2737 } 2738 2739 error_setg(errp, 2740 "Hash page table resizing not available with this KVM version"); 2741 } 2742 2743 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2744 { 2745 CPUState *cs = CPU(cpu); 2746 struct kvm_ppc_resize_hpt rhpt = { 2747 .flags = flags, 2748 .shift = shift, 2749 }; 2750 2751 if (!cap_resize_hpt) { 2752 return -ENOSYS; 2753 } 2754 2755 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2756 } 2757 2758 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2759 { 2760 CPUState *cs = CPU(cpu); 2761 struct kvm_ppc_resize_hpt rhpt = { 2762 .flags = flags, 2763 .shift = shift, 2764 }; 2765 2766 if (!cap_resize_hpt) { 2767 return -ENOSYS; 2768 } 2769 2770 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2771 } 2772 2773 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg) 2774 { 2775 target_ulong sdr1 = arg.target_ptr; 2776 PowerPCCPU *cpu = POWERPC_CPU(cs); 2777 CPUPPCState *env = &cpu->env; 2778 2779 /* This is just for the benefit of PR KVM */ 2780 cpu_synchronize_state(cs); 2781 env->spr[SPR_SDR1] = sdr1; 2782 if (kvmppc_put_books_sregs(cpu) < 0) { 2783 error_report("Unable to update SDR1 in KVM"); 2784 exit(1); 2785 } 2786 } 2787 2788 void kvmppc_update_sdr1(target_ulong sdr1) 2789 { 2790 CPUState *cs; 2791 2792 CPU_FOREACH(cs) { 2793 run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); 2794 } 2795 } 2796 2797 /* 2798 * This is a helper function to detect a post migration scenario 2799 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2800 * the guest kernel can't handle a PVR value other than the actual host 2801 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2802 * 2803 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2804 * (so, we're HV), return true. The workaround itself is done in 2805 * cpu_post_load. 2806 * 2807 * The order here is important: we'll only check for KVM PR as a 2808 * fallback if the guest kernel can't handle the situation itself. 2809 * We need to avoid as much as possible querying the running KVM type 2810 * in QEMU level. 2811 */ 2812 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2813 { 2814 CPUState *cs = CPU(cpu); 2815 2816 if (!kvm_enabled()) { 2817 return false; 2818 } 2819 2820 if (cap_ppc_pvr_compat) { 2821 return false; 2822 } 2823 2824 return !kvmppc_is_pr(cs->kvm_state); 2825 } 2826