1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qemu/error-report.h" 26 #include "cpu.h" 27 #include "cpu-models.h" 28 #include "qemu/timer.h" 29 #include "sysemu/sysemu.h" 30 #include "sysemu/hw_accel.h" 31 #include "kvm_ppc.h" 32 #include "sysemu/cpus.h" 33 #include "sysemu/device_tree.h" 34 #include "mmu-hash64.h" 35 36 #include "hw/sysbus.h" 37 #include "hw/ppc/spapr.h" 38 #include "hw/ppc/spapr_vio.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "sysemu/watchdog.h" 42 #include "trace.h" 43 #include "exec/gdbstub.h" 44 #include "exec/memattrs.h" 45 #include "exec/ram_addr.h" 46 #include "sysemu/hostmem.h" 47 #include "qemu/cutils.h" 48 #include "qemu/mmap-alloc.h" 49 #if defined(TARGET_PPC64) 50 #include "hw/ppc/spapr_cpu_core.h" 51 #endif 52 #include "elf.h" 53 #include "sysemu/kvm_int.h" 54 55 //#define DEBUG_KVM 56 57 #ifdef DEBUG_KVM 58 #define DPRINTF(fmt, ...) \ 59 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 60 #else 61 #define DPRINTF(fmt, ...) \ 62 do { } while (0) 63 #endif 64 65 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 66 67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 68 KVM_CAP_LAST_INFO 69 }; 70 71 static int cap_interrupt_unset = false; 72 static int cap_interrupt_level = false; 73 static int cap_segstate; 74 static int cap_booke_sregs; 75 static int cap_ppc_smt; 76 static int cap_ppc_rma; 77 static int cap_spapr_tce; 78 static int cap_spapr_tce_64; 79 static int cap_spapr_multitce; 80 static int cap_spapr_vfio; 81 static int cap_hior; 82 static int cap_one_reg; 83 static int cap_epr; 84 static int cap_ppc_watchdog; 85 static int cap_papr; 86 static int cap_htab_fd; 87 static int cap_fixup_hcalls; 88 static int cap_htm; /* Hardware transactional memory support */ 89 static int cap_mmu_radix; 90 static int cap_mmu_hash_v3; 91 92 static uint32_t debug_inst_opcode; 93 94 /* XXX We have a race condition where we actually have a level triggered 95 * interrupt, but the infrastructure can't expose that yet, so the guest 96 * takes but ignores it, goes to sleep and never gets notified that there's 97 * still an interrupt pending. 98 * 99 * As a quick workaround, let's just wake up again 20 ms after we injected 100 * an interrupt. That way we can assure that we're always reinjecting 101 * interrupts in case the guest swallowed them. 102 */ 103 static QEMUTimer *idle_timer; 104 105 static void kvm_kick_cpu(void *opaque) 106 { 107 PowerPCCPU *cpu = opaque; 108 109 qemu_cpu_kick(CPU(cpu)); 110 } 111 112 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 113 * should only be used for fallback tests - generally we should use 114 * explicit capabilities for the features we want, rather than 115 * assuming what is/isn't available depending on the KVM variant. */ 116 static bool kvmppc_is_pr(KVMState *ks) 117 { 118 /* Assume KVM-PR if the GET_PVINFO capability is available */ 119 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 120 } 121 122 static int kvm_ppc_register_host_cpu_type(void); 123 124 int kvm_arch_init(MachineState *ms, KVMState *s) 125 { 126 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 127 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 128 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 129 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 130 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 131 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 132 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 133 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 134 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 135 cap_spapr_vfio = false; 136 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 137 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 138 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 139 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 140 /* Note: we don't set cap_papr here, because this capability is 141 * only activated after this by kvmppc_set_papr() */ 142 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 143 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 144 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 145 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 146 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 147 148 if (!cap_interrupt_level) { 149 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 150 "VM to stall at times!\n"); 151 } 152 153 kvm_ppc_register_host_cpu_type(); 154 155 return 0; 156 } 157 158 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 159 { 160 return 0; 161 } 162 163 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 164 { 165 CPUPPCState *cenv = &cpu->env; 166 CPUState *cs = CPU(cpu); 167 struct kvm_sregs sregs; 168 int ret; 169 170 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 171 /* What we're really trying to say is "if we're on BookE, we use 172 the native PVR for now". This is the only sane way to check 173 it though, so we potentially confuse users that they can run 174 BookE guests on BookS. Let's hope nobody dares enough :) */ 175 return 0; 176 } else { 177 if (!cap_segstate) { 178 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 179 return -ENOSYS; 180 } 181 } 182 183 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 184 if (ret) { 185 return ret; 186 } 187 188 sregs.pvr = cenv->spr[SPR_PVR]; 189 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 190 } 191 192 /* Set up a shared TLB array with KVM */ 193 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 194 { 195 CPUPPCState *env = &cpu->env; 196 CPUState *cs = CPU(cpu); 197 struct kvm_book3e_206_tlb_params params = {}; 198 struct kvm_config_tlb cfg = {}; 199 unsigned int entries = 0; 200 int ret, i; 201 202 if (!kvm_enabled() || 203 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 204 return 0; 205 } 206 207 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 208 209 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 210 params.tlb_sizes[i] = booke206_tlb_size(env, i); 211 params.tlb_ways[i] = booke206_tlb_ways(env, i); 212 entries += params.tlb_sizes[i]; 213 } 214 215 assert(entries == env->nb_tlb); 216 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 217 218 env->tlb_dirty = true; 219 220 cfg.array = (uintptr_t)env->tlb.tlbm; 221 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 222 cfg.params = (uintptr_t)¶ms; 223 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 224 225 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 226 if (ret < 0) { 227 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 228 __func__, strerror(-ret)); 229 return ret; 230 } 231 232 env->kvm_sw_tlb = true; 233 return 0; 234 } 235 236 237 #if defined(TARGET_PPC64) 238 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 239 struct kvm_ppc_smmu_info *info) 240 { 241 CPUPPCState *env = &cpu->env; 242 CPUState *cs = CPU(cpu); 243 244 memset(info, 0, sizeof(*info)); 245 246 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 247 * need to "guess" what the supported page sizes are. 248 * 249 * For that to work we make a few assumptions: 250 * 251 * - Check whether we are running "PR" KVM which only supports 4K 252 * and 16M pages, but supports them regardless of the backing 253 * store characteritics. We also don't support 1T segments. 254 * 255 * This is safe as if HV KVM ever supports that capability or PR 256 * KVM grows supports for more page/segment sizes, those versions 257 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 258 * will not hit this fallback 259 * 260 * - Else we are running HV KVM. This means we only support page 261 * sizes that fit in the backing store. Additionally we only 262 * advertize 64K pages if the processor is ARCH 2.06 and we assume 263 * P7 encodings for the SLB and hash table. Here too, we assume 264 * support for any newer processor will mean a kernel that 265 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 266 * this fallback. 267 */ 268 if (kvmppc_is_pr(cs->kvm_state)) { 269 /* No flags */ 270 info->flags = 0; 271 info->slb_size = 64; 272 273 /* Standard 4k base page size segment */ 274 info->sps[0].page_shift = 12; 275 info->sps[0].slb_enc = 0; 276 info->sps[0].enc[0].page_shift = 12; 277 info->sps[0].enc[0].pte_enc = 0; 278 279 /* Standard 16M large page size segment */ 280 info->sps[1].page_shift = 24; 281 info->sps[1].slb_enc = SLB_VSID_L; 282 info->sps[1].enc[0].page_shift = 24; 283 info->sps[1].enc[0].pte_enc = 0; 284 } else { 285 int i = 0; 286 287 /* HV KVM has backing store size restrictions */ 288 info->flags = KVM_PPC_PAGE_SIZES_REAL; 289 290 if (env->mmu_model & POWERPC_MMU_1TSEG) { 291 info->flags |= KVM_PPC_1T_SEGMENTS; 292 } 293 294 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 295 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 296 info->slb_size = 32; 297 } else { 298 info->slb_size = 64; 299 } 300 301 /* Standard 4k base page size segment */ 302 info->sps[i].page_shift = 12; 303 info->sps[i].slb_enc = 0; 304 info->sps[i].enc[0].page_shift = 12; 305 info->sps[i].enc[0].pte_enc = 0; 306 i++; 307 308 /* 64K on MMU 2.06 and later */ 309 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 310 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 311 info->sps[i].page_shift = 16; 312 info->sps[i].slb_enc = 0x110; 313 info->sps[i].enc[0].page_shift = 16; 314 info->sps[i].enc[0].pte_enc = 1; 315 i++; 316 } 317 318 /* Standard 16M large page size segment */ 319 info->sps[i].page_shift = 24; 320 info->sps[i].slb_enc = SLB_VSID_L; 321 info->sps[i].enc[0].page_shift = 24; 322 info->sps[i].enc[0].pte_enc = 0; 323 } 324 } 325 326 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 327 { 328 CPUState *cs = CPU(cpu); 329 int ret; 330 331 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 332 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 333 if (ret == 0) { 334 return; 335 } 336 } 337 338 kvm_get_fallback_smmu_info(cpu, info); 339 } 340 341 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 342 { 343 KVMState *s = KVM_STATE(current_machine->accelerator); 344 struct ppc_radix_page_info *radix_page_info; 345 struct kvm_ppc_rmmu_info rmmu_info; 346 int i; 347 348 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 349 return NULL; 350 } 351 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 352 return NULL; 353 } 354 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 355 radix_page_info->count = 0; 356 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 357 if (rmmu_info.ap_encodings[i]) { 358 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 359 radix_page_info->count++; 360 } 361 } 362 return radix_page_info; 363 } 364 365 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 366 bool radix, bool gtse, 367 uint64_t proc_tbl) 368 { 369 CPUState *cs = CPU(cpu); 370 int ret; 371 uint64_t flags = 0; 372 struct kvm_ppc_mmuv3_cfg cfg = { 373 .process_table = proc_tbl, 374 }; 375 376 if (radix) { 377 flags |= KVM_PPC_MMUV3_RADIX; 378 } 379 if (gtse) { 380 flags |= KVM_PPC_MMUV3_GTSE; 381 } 382 cfg.flags = flags; 383 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 384 switch (ret) { 385 case 0: 386 return H_SUCCESS; 387 case -EINVAL: 388 return H_PARAMETER; 389 case -ENODEV: 390 return H_NOT_AVAILABLE; 391 default: 392 return H_HARDWARE; 393 } 394 } 395 396 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 397 { 398 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 399 return true; 400 } 401 402 return (1ul << shift) <= rampgsize; 403 } 404 405 static long max_cpu_page_size; 406 407 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 408 { 409 static struct kvm_ppc_smmu_info smmu_info; 410 static bool has_smmu_info; 411 CPUPPCState *env = &cpu->env; 412 int iq, ik, jq, jk; 413 bool has_64k_pages = false; 414 415 /* We only handle page sizes for 64-bit server guests for now */ 416 if (!(env->mmu_model & POWERPC_MMU_64)) { 417 return; 418 } 419 420 /* Collect MMU info from kernel if not already */ 421 if (!has_smmu_info) { 422 kvm_get_smmu_info(cpu, &smmu_info); 423 has_smmu_info = true; 424 } 425 426 if (!max_cpu_page_size) { 427 max_cpu_page_size = qemu_getrampagesize(); 428 } 429 430 /* Convert to QEMU form */ 431 memset(&env->sps, 0, sizeof(env->sps)); 432 433 /* If we have HV KVM, we need to forbid CI large pages if our 434 * host page size is smaller than 64K. 435 */ 436 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 437 env->ci_large_pages = getpagesize() >= 0x10000; 438 } 439 440 /* 441 * XXX This loop should be an entry wide AND of the capabilities that 442 * the selected CPU has with the capabilities that KVM supports. 443 */ 444 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 445 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 446 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 447 448 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 449 ksps->page_shift)) { 450 continue; 451 } 452 qsps->page_shift = ksps->page_shift; 453 qsps->slb_enc = ksps->slb_enc; 454 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 455 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 456 ksps->enc[jk].page_shift)) { 457 continue; 458 } 459 if (ksps->enc[jk].page_shift == 16) { 460 has_64k_pages = true; 461 } 462 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 463 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 464 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 465 break; 466 } 467 } 468 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 469 break; 470 } 471 } 472 env->slb_nr = smmu_info.slb_size; 473 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 474 env->mmu_model &= ~POWERPC_MMU_1TSEG; 475 } 476 if (!has_64k_pages) { 477 env->mmu_model &= ~POWERPC_MMU_64K; 478 } 479 } 480 481 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 482 { 483 Object *mem_obj = object_resolve_path(obj_path, NULL); 484 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 485 long pagesize; 486 487 if (mempath) { 488 pagesize = qemu_mempath_getpagesize(mempath); 489 } else { 490 pagesize = getpagesize(); 491 } 492 493 return pagesize >= max_cpu_page_size; 494 } 495 496 #else /* defined (TARGET_PPC64) */ 497 498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 499 { 500 } 501 502 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 503 { 504 return true; 505 } 506 507 #endif /* !defined (TARGET_PPC64) */ 508 509 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 510 { 511 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 512 } 513 514 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 515 * book3s supports only 1 watchpoint, so array size 516 * of 4 is sufficient for now. 517 */ 518 #define MAX_HW_BKPTS 4 519 520 static struct HWBreakpoint { 521 target_ulong addr; 522 int type; 523 } hw_debug_points[MAX_HW_BKPTS]; 524 525 static CPUWatchpoint hw_watchpoint; 526 527 /* Default there is no breakpoint and watchpoint supported */ 528 static int max_hw_breakpoint; 529 static int max_hw_watchpoint; 530 static int nb_hw_breakpoint; 531 static int nb_hw_watchpoint; 532 533 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 534 { 535 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 536 max_hw_breakpoint = 2; 537 max_hw_watchpoint = 2; 538 } 539 540 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 541 fprintf(stderr, "Error initializing h/w breakpoints\n"); 542 return; 543 } 544 } 545 546 int kvm_arch_init_vcpu(CPUState *cs) 547 { 548 PowerPCCPU *cpu = POWERPC_CPU(cs); 549 CPUPPCState *cenv = &cpu->env; 550 int ret; 551 552 /* Gather server mmu info from KVM and update the CPU state */ 553 kvm_fixup_page_sizes(cpu); 554 555 /* Synchronize sregs with kvm */ 556 ret = kvm_arch_sync_sregs(cpu); 557 if (ret) { 558 if (ret == -EINVAL) { 559 error_report("Register sync failed... If you're using kvm-hv.ko," 560 " only \"-cpu host\" is possible"); 561 } 562 return ret; 563 } 564 565 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 566 567 switch (cenv->mmu_model) { 568 case POWERPC_MMU_BOOKE206: 569 /* This target supports access to KVM's guest TLB */ 570 ret = kvm_booke206_tlb_init(cpu); 571 break; 572 case POWERPC_MMU_2_07: 573 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 574 /* KVM-HV has transactional memory on POWER8 also without the 575 * KVM_CAP_PPC_HTM extension, so enable it here instead as 576 * long as it's availble to userspace on the host. */ 577 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 578 cap_htm = true; 579 } 580 } 581 break; 582 default: 583 break; 584 } 585 586 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 587 kvmppc_hw_debug_points_init(cenv); 588 589 return ret; 590 } 591 592 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 593 { 594 CPUPPCState *env = &cpu->env; 595 CPUState *cs = CPU(cpu); 596 struct kvm_dirty_tlb dirty_tlb; 597 unsigned char *bitmap; 598 int ret; 599 600 if (!env->kvm_sw_tlb) { 601 return; 602 } 603 604 bitmap = g_malloc((env->nb_tlb + 7) / 8); 605 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 606 607 dirty_tlb.bitmap = (uintptr_t)bitmap; 608 dirty_tlb.num_dirty = env->nb_tlb; 609 610 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 611 if (ret) { 612 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 613 __func__, strerror(-ret)); 614 } 615 616 g_free(bitmap); 617 } 618 619 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 620 { 621 PowerPCCPU *cpu = POWERPC_CPU(cs); 622 CPUPPCState *env = &cpu->env; 623 union { 624 uint32_t u32; 625 uint64_t u64; 626 } val; 627 struct kvm_one_reg reg = { 628 .id = id, 629 .addr = (uintptr_t) &val, 630 }; 631 int ret; 632 633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 634 if (ret != 0) { 635 trace_kvm_failed_spr_get(spr, strerror(errno)); 636 } else { 637 switch (id & KVM_REG_SIZE_MASK) { 638 case KVM_REG_SIZE_U32: 639 env->spr[spr] = val.u32; 640 break; 641 642 case KVM_REG_SIZE_U64: 643 env->spr[spr] = val.u64; 644 break; 645 646 default: 647 /* Don't handle this size yet */ 648 abort(); 649 } 650 } 651 } 652 653 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 654 { 655 PowerPCCPU *cpu = POWERPC_CPU(cs); 656 CPUPPCState *env = &cpu->env; 657 union { 658 uint32_t u32; 659 uint64_t u64; 660 } val; 661 struct kvm_one_reg reg = { 662 .id = id, 663 .addr = (uintptr_t) &val, 664 }; 665 int ret; 666 667 switch (id & KVM_REG_SIZE_MASK) { 668 case KVM_REG_SIZE_U32: 669 val.u32 = env->spr[spr]; 670 break; 671 672 case KVM_REG_SIZE_U64: 673 val.u64 = env->spr[spr]; 674 break; 675 676 default: 677 /* Don't handle this size yet */ 678 abort(); 679 } 680 681 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 682 if (ret != 0) { 683 trace_kvm_failed_spr_set(spr, strerror(errno)); 684 } 685 } 686 687 static int kvm_put_fp(CPUState *cs) 688 { 689 PowerPCCPU *cpu = POWERPC_CPU(cs); 690 CPUPPCState *env = &cpu->env; 691 struct kvm_one_reg reg; 692 int i; 693 int ret; 694 695 if (env->insns_flags & PPC_FLOAT) { 696 uint64_t fpscr = env->fpscr; 697 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 698 699 reg.id = KVM_REG_PPC_FPSCR; 700 reg.addr = (uintptr_t)&fpscr; 701 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 702 if (ret < 0) { 703 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 704 return ret; 705 } 706 707 for (i = 0; i < 32; i++) { 708 uint64_t vsr[2]; 709 710 #ifdef HOST_WORDS_BIGENDIAN 711 vsr[0] = float64_val(env->fpr[i]); 712 vsr[1] = env->vsr[i]; 713 #else 714 vsr[0] = env->vsr[i]; 715 vsr[1] = float64_val(env->fpr[i]); 716 #endif 717 reg.addr = (uintptr_t) &vsr; 718 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 719 720 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 721 if (ret < 0) { 722 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 723 i, strerror(errno)); 724 return ret; 725 } 726 } 727 } 728 729 if (env->insns_flags & PPC_ALTIVEC) { 730 reg.id = KVM_REG_PPC_VSCR; 731 reg.addr = (uintptr_t)&env->vscr; 732 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 733 if (ret < 0) { 734 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 735 return ret; 736 } 737 738 for (i = 0; i < 32; i++) { 739 reg.id = KVM_REG_PPC_VR(i); 740 reg.addr = (uintptr_t)&env->avr[i]; 741 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 742 if (ret < 0) { 743 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 744 return ret; 745 } 746 } 747 } 748 749 return 0; 750 } 751 752 static int kvm_get_fp(CPUState *cs) 753 { 754 PowerPCCPU *cpu = POWERPC_CPU(cs); 755 CPUPPCState *env = &cpu->env; 756 struct kvm_one_reg reg; 757 int i; 758 int ret; 759 760 if (env->insns_flags & PPC_FLOAT) { 761 uint64_t fpscr; 762 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 763 764 reg.id = KVM_REG_PPC_FPSCR; 765 reg.addr = (uintptr_t)&fpscr; 766 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 767 if (ret < 0) { 768 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 769 return ret; 770 } else { 771 env->fpscr = fpscr; 772 } 773 774 for (i = 0; i < 32; i++) { 775 uint64_t vsr[2]; 776 777 reg.addr = (uintptr_t) &vsr; 778 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 779 780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 781 if (ret < 0) { 782 DPRINTF("Unable to get %s%d from KVM: %s\n", 783 vsx ? "VSR" : "FPR", i, strerror(errno)); 784 return ret; 785 } else { 786 #ifdef HOST_WORDS_BIGENDIAN 787 env->fpr[i] = vsr[0]; 788 if (vsx) { 789 env->vsr[i] = vsr[1]; 790 } 791 #else 792 env->fpr[i] = vsr[1]; 793 if (vsx) { 794 env->vsr[i] = vsr[0]; 795 } 796 #endif 797 } 798 } 799 } 800 801 if (env->insns_flags & PPC_ALTIVEC) { 802 reg.id = KVM_REG_PPC_VSCR; 803 reg.addr = (uintptr_t)&env->vscr; 804 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 805 if (ret < 0) { 806 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 807 return ret; 808 } 809 810 for (i = 0; i < 32; i++) { 811 reg.id = KVM_REG_PPC_VR(i); 812 reg.addr = (uintptr_t)&env->avr[i]; 813 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 814 if (ret < 0) { 815 DPRINTF("Unable to get VR%d from KVM: %s\n", 816 i, strerror(errno)); 817 return ret; 818 } 819 } 820 } 821 822 return 0; 823 } 824 825 #if defined(TARGET_PPC64) 826 static int kvm_get_vpa(CPUState *cs) 827 { 828 PowerPCCPU *cpu = POWERPC_CPU(cs); 829 CPUPPCState *env = &cpu->env; 830 struct kvm_one_reg reg; 831 int ret; 832 833 reg.id = KVM_REG_PPC_VPA_ADDR; 834 reg.addr = (uintptr_t)&env->vpa_addr; 835 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 836 if (ret < 0) { 837 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 838 return ret; 839 } 840 841 assert((uintptr_t)&env->slb_shadow_size 842 == ((uintptr_t)&env->slb_shadow_addr + 8)); 843 reg.id = KVM_REG_PPC_VPA_SLB; 844 reg.addr = (uintptr_t)&env->slb_shadow_addr; 845 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 846 if (ret < 0) { 847 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 848 strerror(errno)); 849 return ret; 850 } 851 852 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 853 reg.id = KVM_REG_PPC_VPA_DTL; 854 reg.addr = (uintptr_t)&env->dtl_addr; 855 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 856 if (ret < 0) { 857 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 858 strerror(errno)); 859 return ret; 860 } 861 862 return 0; 863 } 864 865 static int kvm_put_vpa(CPUState *cs) 866 { 867 PowerPCCPU *cpu = POWERPC_CPU(cs); 868 CPUPPCState *env = &cpu->env; 869 struct kvm_one_reg reg; 870 int ret; 871 872 /* SLB shadow or DTL can't be registered unless a master VPA is 873 * registered. That means when restoring state, if a VPA *is* 874 * registered, we need to set that up first. If not, we need to 875 * deregister the others before deregistering the master VPA */ 876 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 877 878 if (env->vpa_addr) { 879 reg.id = KVM_REG_PPC_VPA_ADDR; 880 reg.addr = (uintptr_t)&env->vpa_addr; 881 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 882 if (ret < 0) { 883 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 884 return ret; 885 } 886 } 887 888 assert((uintptr_t)&env->slb_shadow_size 889 == ((uintptr_t)&env->slb_shadow_addr + 8)); 890 reg.id = KVM_REG_PPC_VPA_SLB; 891 reg.addr = (uintptr_t)&env->slb_shadow_addr; 892 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 893 if (ret < 0) { 894 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 895 return ret; 896 } 897 898 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 899 reg.id = KVM_REG_PPC_VPA_DTL; 900 reg.addr = (uintptr_t)&env->dtl_addr; 901 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 902 if (ret < 0) { 903 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 904 strerror(errno)); 905 return ret; 906 } 907 908 if (!env->vpa_addr) { 909 reg.id = KVM_REG_PPC_VPA_ADDR; 910 reg.addr = (uintptr_t)&env->vpa_addr; 911 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 912 if (ret < 0) { 913 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 914 return ret; 915 } 916 } 917 918 return 0; 919 } 920 #endif /* TARGET_PPC64 */ 921 922 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 923 { 924 CPUPPCState *env = &cpu->env; 925 struct kvm_sregs sregs; 926 int i; 927 928 sregs.pvr = env->spr[SPR_PVR]; 929 930 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 931 932 /* Sync SLB */ 933 #ifdef TARGET_PPC64 934 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 935 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 936 if (env->slb[i].esid & SLB_ESID_V) { 937 sregs.u.s.ppc64.slb[i].slbe |= i; 938 } 939 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 940 } 941 #endif 942 943 /* Sync SRs */ 944 for (i = 0; i < 16; i++) { 945 sregs.u.s.ppc32.sr[i] = env->sr[i]; 946 } 947 948 /* Sync BATs */ 949 for (i = 0; i < 8; i++) { 950 /* Beware. We have to swap upper and lower bits here */ 951 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 952 | env->DBAT[1][i]; 953 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 954 | env->IBAT[1][i]; 955 } 956 957 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 958 } 959 960 int kvm_arch_put_registers(CPUState *cs, int level) 961 { 962 PowerPCCPU *cpu = POWERPC_CPU(cs); 963 CPUPPCState *env = &cpu->env; 964 struct kvm_regs regs; 965 int ret; 966 int i; 967 968 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 969 if (ret < 0) { 970 return ret; 971 } 972 973 regs.ctr = env->ctr; 974 regs.lr = env->lr; 975 regs.xer = cpu_read_xer(env); 976 regs.msr = env->msr; 977 regs.pc = env->nip; 978 979 regs.srr0 = env->spr[SPR_SRR0]; 980 regs.srr1 = env->spr[SPR_SRR1]; 981 982 regs.sprg0 = env->spr[SPR_SPRG0]; 983 regs.sprg1 = env->spr[SPR_SPRG1]; 984 regs.sprg2 = env->spr[SPR_SPRG2]; 985 regs.sprg3 = env->spr[SPR_SPRG3]; 986 regs.sprg4 = env->spr[SPR_SPRG4]; 987 regs.sprg5 = env->spr[SPR_SPRG5]; 988 regs.sprg6 = env->spr[SPR_SPRG6]; 989 regs.sprg7 = env->spr[SPR_SPRG7]; 990 991 regs.pid = env->spr[SPR_BOOKE_PID]; 992 993 for (i = 0;i < 32; i++) 994 regs.gpr[i] = env->gpr[i]; 995 996 regs.cr = 0; 997 for (i = 0; i < 8; i++) { 998 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 999 } 1000 1001 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1002 if (ret < 0) 1003 return ret; 1004 1005 kvm_put_fp(cs); 1006 1007 if (env->tlb_dirty) { 1008 kvm_sw_tlb_put(cpu); 1009 env->tlb_dirty = false; 1010 } 1011 1012 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1013 ret = kvmppc_put_books_sregs(cpu); 1014 if (ret < 0) { 1015 return ret; 1016 } 1017 } 1018 1019 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1020 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1021 } 1022 1023 if (cap_one_reg) { 1024 int i; 1025 1026 /* We deliberately ignore errors here, for kernels which have 1027 * the ONE_REG calls, but don't support the specific 1028 * registers, there's a reasonable chance things will still 1029 * work, at least until we try to migrate. */ 1030 for (i = 0; i < 1024; i++) { 1031 uint64_t id = env->spr_cb[i].one_reg_id; 1032 1033 if (id != 0) { 1034 kvm_put_one_spr(cs, id, i); 1035 } 1036 } 1037 1038 #ifdef TARGET_PPC64 1039 if (msr_ts) { 1040 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1041 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1042 } 1043 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1044 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1045 } 1046 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1047 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1048 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1056 } 1057 1058 if (cap_papr) { 1059 if (kvm_put_vpa(cs) < 0) { 1060 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1061 } 1062 } 1063 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1065 #endif /* TARGET_PPC64 */ 1066 } 1067 1068 return ret; 1069 } 1070 1071 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1072 { 1073 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1074 } 1075 1076 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1077 { 1078 CPUPPCState *env = &cpu->env; 1079 struct kvm_sregs sregs; 1080 int ret; 1081 1082 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1083 if (ret < 0) { 1084 return ret; 1085 } 1086 1087 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1088 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1089 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1090 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1091 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1092 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1093 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1094 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1095 env->spr[SPR_DECR] = sregs.u.e.dec; 1096 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1097 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1098 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1099 } 1100 1101 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1102 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1103 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1104 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1105 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1106 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1107 } 1108 1109 if (sregs.u.e.features & KVM_SREGS_E_64) { 1110 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1111 } 1112 1113 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1114 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1115 } 1116 1117 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1118 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1119 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1120 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1121 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1122 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1123 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1124 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1125 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1126 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1127 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1128 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1129 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1130 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1131 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1132 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1133 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1134 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1135 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1136 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1137 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1138 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1139 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1140 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1141 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1142 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1143 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1144 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1145 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1146 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1147 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1148 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1149 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1150 1151 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1152 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1153 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1154 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1155 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1156 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1157 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1158 } 1159 1160 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1161 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1162 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1163 } 1164 1165 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1166 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1167 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1168 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1169 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1170 } 1171 } 1172 1173 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1174 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1175 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1176 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1177 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1178 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1179 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1180 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1181 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1182 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1183 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1184 } 1185 1186 if (sregs.u.e.features & KVM_SREGS_EXP) { 1187 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1188 } 1189 1190 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1191 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1192 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1193 } 1194 1195 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1196 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1197 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1198 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1199 1200 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1201 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1202 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1203 } 1204 } 1205 1206 return 0; 1207 } 1208 1209 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1210 { 1211 CPUPPCState *env = &cpu->env; 1212 struct kvm_sregs sregs; 1213 int ret; 1214 int i; 1215 1216 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1217 if (ret < 0) { 1218 return ret; 1219 } 1220 1221 if (!cpu->vhyp) { 1222 ppc_store_sdr1(env, sregs.u.s.sdr1); 1223 } 1224 1225 /* Sync SLB */ 1226 #ifdef TARGET_PPC64 1227 /* 1228 * The packed SLB array we get from KVM_GET_SREGS only contains 1229 * information about valid entries. So we flush our internal copy 1230 * to get rid of stale ones, then put all valid SLB entries back 1231 * in. 1232 */ 1233 memset(env->slb, 0, sizeof(env->slb)); 1234 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1235 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1236 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1237 /* 1238 * Only restore valid entries 1239 */ 1240 if (rb & SLB_ESID_V) { 1241 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1242 } 1243 } 1244 #endif 1245 1246 /* Sync SRs */ 1247 for (i = 0; i < 16; i++) { 1248 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1249 } 1250 1251 /* Sync BATs */ 1252 for (i = 0; i < 8; i++) { 1253 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1254 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1255 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1256 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1257 } 1258 1259 return 0; 1260 } 1261 1262 int kvm_arch_get_registers(CPUState *cs) 1263 { 1264 PowerPCCPU *cpu = POWERPC_CPU(cs); 1265 CPUPPCState *env = &cpu->env; 1266 struct kvm_regs regs; 1267 uint32_t cr; 1268 int i, ret; 1269 1270 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1271 if (ret < 0) 1272 return ret; 1273 1274 cr = regs.cr; 1275 for (i = 7; i >= 0; i--) { 1276 env->crf[i] = cr & 15; 1277 cr >>= 4; 1278 } 1279 1280 env->ctr = regs.ctr; 1281 env->lr = regs.lr; 1282 cpu_write_xer(env, regs.xer); 1283 env->msr = regs.msr; 1284 env->nip = regs.pc; 1285 1286 env->spr[SPR_SRR0] = regs.srr0; 1287 env->spr[SPR_SRR1] = regs.srr1; 1288 1289 env->spr[SPR_SPRG0] = regs.sprg0; 1290 env->spr[SPR_SPRG1] = regs.sprg1; 1291 env->spr[SPR_SPRG2] = regs.sprg2; 1292 env->spr[SPR_SPRG3] = regs.sprg3; 1293 env->spr[SPR_SPRG4] = regs.sprg4; 1294 env->spr[SPR_SPRG5] = regs.sprg5; 1295 env->spr[SPR_SPRG6] = regs.sprg6; 1296 env->spr[SPR_SPRG7] = regs.sprg7; 1297 1298 env->spr[SPR_BOOKE_PID] = regs.pid; 1299 1300 for (i = 0;i < 32; i++) 1301 env->gpr[i] = regs.gpr[i]; 1302 1303 kvm_get_fp(cs); 1304 1305 if (cap_booke_sregs) { 1306 ret = kvmppc_get_booke_sregs(cpu); 1307 if (ret < 0) { 1308 return ret; 1309 } 1310 } 1311 1312 if (cap_segstate) { 1313 ret = kvmppc_get_books_sregs(cpu); 1314 if (ret < 0) { 1315 return ret; 1316 } 1317 } 1318 1319 if (cap_hior) { 1320 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1321 } 1322 1323 if (cap_one_reg) { 1324 int i; 1325 1326 /* We deliberately ignore errors here, for kernels which have 1327 * the ONE_REG calls, but don't support the specific 1328 * registers, there's a reasonable chance things will still 1329 * work, at least until we try to migrate. */ 1330 for (i = 0; i < 1024; i++) { 1331 uint64_t id = env->spr_cb[i].one_reg_id; 1332 1333 if (id != 0) { 1334 kvm_get_one_spr(cs, id, i); 1335 } 1336 } 1337 1338 #ifdef TARGET_PPC64 1339 if (msr_ts) { 1340 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1341 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1342 } 1343 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1344 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1345 } 1346 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1347 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1348 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1356 } 1357 1358 if (cap_papr) { 1359 if (kvm_get_vpa(cs) < 0) { 1360 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1361 } 1362 } 1363 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1365 #endif 1366 } 1367 1368 return 0; 1369 } 1370 1371 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1372 { 1373 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1374 1375 if (irq != PPC_INTERRUPT_EXT) { 1376 return 0; 1377 } 1378 1379 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1380 return 0; 1381 } 1382 1383 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1384 1385 return 0; 1386 } 1387 1388 #if defined(TARGET_PPCEMB) 1389 #define PPC_INPUT_INT PPC40x_INPUT_INT 1390 #elif defined(TARGET_PPC64) 1391 #define PPC_INPUT_INT PPC970_INPUT_INT 1392 #else 1393 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1394 #endif 1395 1396 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1397 { 1398 PowerPCCPU *cpu = POWERPC_CPU(cs); 1399 CPUPPCState *env = &cpu->env; 1400 int r; 1401 unsigned irq; 1402 1403 qemu_mutex_lock_iothread(); 1404 1405 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1406 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1407 if (!cap_interrupt_level && 1408 run->ready_for_interrupt_injection && 1409 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1410 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1411 { 1412 /* For now KVM disregards the 'irq' argument. However, in the 1413 * future KVM could cache it in-kernel to avoid a heavyweight exit 1414 * when reading the UIC. 1415 */ 1416 irq = KVM_INTERRUPT_SET; 1417 1418 DPRINTF("injected interrupt %d\n", irq); 1419 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1420 if (r < 0) { 1421 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1422 } 1423 1424 /* Always wake up soon in case the interrupt was level based */ 1425 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1426 (NANOSECONDS_PER_SECOND / 50)); 1427 } 1428 1429 /* We don't know if there are more interrupts pending after this. However, 1430 * the guest will return to userspace in the course of handling this one 1431 * anyways, so we will get a chance to deliver the rest. */ 1432 1433 qemu_mutex_unlock_iothread(); 1434 } 1435 1436 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1437 { 1438 return MEMTXATTRS_UNSPECIFIED; 1439 } 1440 1441 int kvm_arch_process_async_events(CPUState *cs) 1442 { 1443 return cs->halted; 1444 } 1445 1446 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1447 { 1448 CPUState *cs = CPU(cpu); 1449 CPUPPCState *env = &cpu->env; 1450 1451 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1452 cs->halted = 1; 1453 cs->exception_index = EXCP_HLT; 1454 } 1455 1456 return 0; 1457 } 1458 1459 /* map dcr access to existing qemu dcr emulation */ 1460 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1461 { 1462 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1463 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1464 1465 return 0; 1466 } 1467 1468 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1469 { 1470 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1471 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1472 1473 return 0; 1474 } 1475 1476 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1477 { 1478 /* Mixed endian case is not handled */ 1479 uint32_t sc = debug_inst_opcode; 1480 1481 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1482 sizeof(sc), 0) || 1483 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1484 return -EINVAL; 1485 } 1486 1487 return 0; 1488 } 1489 1490 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1491 { 1492 uint32_t sc; 1493 1494 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1495 sc != debug_inst_opcode || 1496 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1497 sizeof(sc), 1)) { 1498 return -EINVAL; 1499 } 1500 1501 return 0; 1502 } 1503 1504 static int find_hw_breakpoint(target_ulong addr, int type) 1505 { 1506 int n; 1507 1508 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1509 <= ARRAY_SIZE(hw_debug_points)); 1510 1511 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1512 if (hw_debug_points[n].addr == addr && 1513 hw_debug_points[n].type == type) { 1514 return n; 1515 } 1516 } 1517 1518 return -1; 1519 } 1520 1521 static int find_hw_watchpoint(target_ulong addr, int *flag) 1522 { 1523 int n; 1524 1525 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1526 if (n >= 0) { 1527 *flag = BP_MEM_ACCESS; 1528 return n; 1529 } 1530 1531 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1532 if (n >= 0) { 1533 *flag = BP_MEM_WRITE; 1534 return n; 1535 } 1536 1537 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1538 if (n >= 0) { 1539 *flag = BP_MEM_READ; 1540 return n; 1541 } 1542 1543 return -1; 1544 } 1545 1546 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1547 target_ulong len, int type) 1548 { 1549 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1550 return -ENOBUFS; 1551 } 1552 1553 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1554 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1555 1556 switch (type) { 1557 case GDB_BREAKPOINT_HW: 1558 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1559 return -ENOBUFS; 1560 } 1561 1562 if (find_hw_breakpoint(addr, type) >= 0) { 1563 return -EEXIST; 1564 } 1565 1566 nb_hw_breakpoint++; 1567 break; 1568 1569 case GDB_WATCHPOINT_WRITE: 1570 case GDB_WATCHPOINT_READ: 1571 case GDB_WATCHPOINT_ACCESS: 1572 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1573 return -ENOBUFS; 1574 } 1575 1576 if (find_hw_breakpoint(addr, type) >= 0) { 1577 return -EEXIST; 1578 } 1579 1580 nb_hw_watchpoint++; 1581 break; 1582 1583 default: 1584 return -ENOSYS; 1585 } 1586 1587 return 0; 1588 } 1589 1590 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1591 target_ulong len, int type) 1592 { 1593 int n; 1594 1595 n = find_hw_breakpoint(addr, type); 1596 if (n < 0) { 1597 return -ENOENT; 1598 } 1599 1600 switch (type) { 1601 case GDB_BREAKPOINT_HW: 1602 nb_hw_breakpoint--; 1603 break; 1604 1605 case GDB_WATCHPOINT_WRITE: 1606 case GDB_WATCHPOINT_READ: 1607 case GDB_WATCHPOINT_ACCESS: 1608 nb_hw_watchpoint--; 1609 break; 1610 1611 default: 1612 return -ENOSYS; 1613 } 1614 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1615 1616 return 0; 1617 } 1618 1619 void kvm_arch_remove_all_hw_breakpoints(void) 1620 { 1621 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1622 } 1623 1624 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1625 { 1626 int n; 1627 1628 /* Software Breakpoint updates */ 1629 if (kvm_sw_breakpoints_active(cs)) { 1630 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1631 } 1632 1633 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1634 <= ARRAY_SIZE(hw_debug_points)); 1635 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1636 1637 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1638 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1639 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1640 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1641 switch (hw_debug_points[n].type) { 1642 case GDB_BREAKPOINT_HW: 1643 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1644 break; 1645 case GDB_WATCHPOINT_WRITE: 1646 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1647 break; 1648 case GDB_WATCHPOINT_READ: 1649 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1650 break; 1651 case GDB_WATCHPOINT_ACCESS: 1652 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1653 KVMPPC_DEBUG_WATCH_READ; 1654 break; 1655 default: 1656 cpu_abort(cs, "Unsupported breakpoint type\n"); 1657 } 1658 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1659 } 1660 } 1661 } 1662 1663 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1664 { 1665 CPUState *cs = CPU(cpu); 1666 CPUPPCState *env = &cpu->env; 1667 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1668 int handle = 0; 1669 int n; 1670 int flag = 0; 1671 1672 if (cs->singlestep_enabled) { 1673 handle = 1; 1674 } else if (arch_info->status) { 1675 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1676 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1677 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1678 if (n >= 0) { 1679 handle = 1; 1680 } 1681 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1682 KVMPPC_DEBUG_WATCH_WRITE)) { 1683 n = find_hw_watchpoint(arch_info->address, &flag); 1684 if (n >= 0) { 1685 handle = 1; 1686 cs->watchpoint_hit = &hw_watchpoint; 1687 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1688 hw_watchpoint.flags = flag; 1689 } 1690 } 1691 } 1692 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1693 handle = 1; 1694 } else { 1695 /* QEMU is not able to handle debug exception, so inject 1696 * program exception to guest; 1697 * Yes program exception NOT debug exception !! 1698 * When QEMU is using debug resources then debug exception must 1699 * be always set. To achieve this we set MSR_DE and also set 1700 * MSRP_DEP so guest cannot change MSR_DE. 1701 * When emulating debug resource for guest we want guest 1702 * to control MSR_DE (enable/disable debug interrupt on need). 1703 * Supporting both configurations are NOT possible. 1704 * So the result is that we cannot share debug resources 1705 * between QEMU and Guest on BOOKE architecture. 1706 * In the current design QEMU gets the priority over guest, 1707 * this means that if QEMU is using debug resources then guest 1708 * cannot use them; 1709 * For software breakpoint QEMU uses a privileged instruction; 1710 * So there cannot be any reason that we are here for guest 1711 * set debug exception, only possibility is guest executed a 1712 * privileged / illegal instruction and that's why we are 1713 * injecting a program interrupt. 1714 */ 1715 1716 cpu_synchronize_state(cs); 1717 /* env->nip is PC, so increment this by 4 to use 1718 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1719 */ 1720 env->nip += 4; 1721 cs->exception_index = POWERPC_EXCP_PROGRAM; 1722 env->error_code = POWERPC_EXCP_INVAL; 1723 ppc_cpu_do_interrupt(cs); 1724 } 1725 1726 return handle; 1727 } 1728 1729 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1730 { 1731 PowerPCCPU *cpu = POWERPC_CPU(cs); 1732 CPUPPCState *env = &cpu->env; 1733 int ret; 1734 1735 qemu_mutex_lock_iothread(); 1736 1737 switch (run->exit_reason) { 1738 case KVM_EXIT_DCR: 1739 if (run->dcr.is_write) { 1740 DPRINTF("handle dcr write\n"); 1741 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1742 } else { 1743 DPRINTF("handle dcr read\n"); 1744 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1745 } 1746 break; 1747 case KVM_EXIT_HLT: 1748 DPRINTF("handle halt\n"); 1749 ret = kvmppc_handle_halt(cpu); 1750 break; 1751 #if defined(TARGET_PPC64) 1752 case KVM_EXIT_PAPR_HCALL: 1753 DPRINTF("handle PAPR hypercall\n"); 1754 run->papr_hcall.ret = spapr_hypercall(cpu, 1755 run->papr_hcall.nr, 1756 run->papr_hcall.args); 1757 ret = 0; 1758 break; 1759 #endif 1760 case KVM_EXIT_EPR: 1761 DPRINTF("handle epr\n"); 1762 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1763 ret = 0; 1764 break; 1765 case KVM_EXIT_WATCHDOG: 1766 DPRINTF("handle watchdog expiry\n"); 1767 watchdog_perform_action(); 1768 ret = 0; 1769 break; 1770 1771 case KVM_EXIT_DEBUG: 1772 DPRINTF("handle debug exception\n"); 1773 if (kvm_handle_debug(cpu, run)) { 1774 ret = EXCP_DEBUG; 1775 break; 1776 } 1777 /* re-enter, this exception was guest-internal */ 1778 ret = 0; 1779 break; 1780 1781 default: 1782 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1783 ret = -1; 1784 break; 1785 } 1786 1787 qemu_mutex_unlock_iothread(); 1788 return ret; 1789 } 1790 1791 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1792 { 1793 CPUState *cs = CPU(cpu); 1794 uint32_t bits = tsr_bits; 1795 struct kvm_one_reg reg = { 1796 .id = KVM_REG_PPC_OR_TSR, 1797 .addr = (uintptr_t) &bits, 1798 }; 1799 1800 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1801 } 1802 1803 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1804 { 1805 1806 CPUState *cs = CPU(cpu); 1807 uint32_t bits = tsr_bits; 1808 struct kvm_one_reg reg = { 1809 .id = KVM_REG_PPC_CLEAR_TSR, 1810 .addr = (uintptr_t) &bits, 1811 }; 1812 1813 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1814 } 1815 1816 int kvmppc_set_tcr(PowerPCCPU *cpu) 1817 { 1818 CPUState *cs = CPU(cpu); 1819 CPUPPCState *env = &cpu->env; 1820 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1821 1822 struct kvm_one_reg reg = { 1823 .id = KVM_REG_PPC_TCR, 1824 .addr = (uintptr_t) &tcr, 1825 }; 1826 1827 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1828 } 1829 1830 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1831 { 1832 CPUState *cs = CPU(cpu); 1833 int ret; 1834 1835 if (!kvm_enabled()) { 1836 return -1; 1837 } 1838 1839 if (!cap_ppc_watchdog) { 1840 printf("warning: KVM does not support watchdog"); 1841 return -1; 1842 } 1843 1844 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1845 if (ret < 0) { 1846 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1847 __func__, strerror(-ret)); 1848 return ret; 1849 } 1850 1851 return ret; 1852 } 1853 1854 static int read_cpuinfo(const char *field, char *value, int len) 1855 { 1856 FILE *f; 1857 int ret = -1; 1858 int field_len = strlen(field); 1859 char line[512]; 1860 1861 f = fopen("/proc/cpuinfo", "r"); 1862 if (!f) { 1863 return -1; 1864 } 1865 1866 do { 1867 if (!fgets(line, sizeof(line), f)) { 1868 break; 1869 } 1870 if (!strncmp(line, field, field_len)) { 1871 pstrcpy(value, len, line); 1872 ret = 0; 1873 break; 1874 } 1875 } while(*line); 1876 1877 fclose(f); 1878 1879 return ret; 1880 } 1881 1882 uint32_t kvmppc_get_tbfreq(void) 1883 { 1884 char line[512]; 1885 char *ns; 1886 uint32_t retval = NANOSECONDS_PER_SECOND; 1887 1888 if (read_cpuinfo("timebase", line, sizeof(line))) { 1889 return retval; 1890 } 1891 1892 if (!(ns = strchr(line, ':'))) { 1893 return retval; 1894 } 1895 1896 ns++; 1897 1898 return atoi(ns); 1899 } 1900 1901 bool kvmppc_get_host_serial(char **value) 1902 { 1903 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1904 NULL); 1905 } 1906 1907 bool kvmppc_get_host_model(char **value) 1908 { 1909 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1910 } 1911 1912 /* Try to find a device tree node for a CPU with clock-frequency property */ 1913 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1914 { 1915 struct dirent *dirp; 1916 DIR *dp; 1917 1918 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1919 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1920 return -1; 1921 } 1922 1923 buf[0] = '\0'; 1924 while ((dirp = readdir(dp)) != NULL) { 1925 FILE *f; 1926 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1927 dirp->d_name); 1928 f = fopen(buf, "r"); 1929 if (f) { 1930 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1931 fclose(f); 1932 break; 1933 } 1934 buf[0] = '\0'; 1935 } 1936 closedir(dp); 1937 if (buf[0] == '\0') { 1938 printf("Unknown host!\n"); 1939 return -1; 1940 } 1941 1942 return 0; 1943 } 1944 1945 static uint64_t kvmppc_read_int_dt(const char *filename) 1946 { 1947 union { 1948 uint32_t v32; 1949 uint64_t v64; 1950 } u; 1951 FILE *f; 1952 int len; 1953 1954 f = fopen(filename, "rb"); 1955 if (!f) { 1956 return -1; 1957 } 1958 1959 len = fread(&u, 1, sizeof(u), f); 1960 fclose(f); 1961 switch (len) { 1962 case 4: 1963 /* property is a 32-bit quantity */ 1964 return be32_to_cpu(u.v32); 1965 case 8: 1966 return be64_to_cpu(u.v64); 1967 } 1968 1969 return 0; 1970 } 1971 1972 /* Read a CPU node property from the host device tree that's a single 1973 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1974 * (can't find or open the property, or doesn't understand the 1975 * format) */ 1976 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1977 { 1978 char buf[PATH_MAX], *tmp; 1979 uint64_t val; 1980 1981 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1982 return -1; 1983 } 1984 1985 tmp = g_strdup_printf("%s/%s", buf, propname); 1986 val = kvmppc_read_int_dt(tmp); 1987 g_free(tmp); 1988 1989 return val; 1990 } 1991 1992 uint64_t kvmppc_get_clockfreq(void) 1993 { 1994 return kvmppc_read_int_cpu_dt("clock-frequency"); 1995 } 1996 1997 uint32_t kvmppc_get_vmx(void) 1998 { 1999 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2000 } 2001 2002 uint32_t kvmppc_get_dfp(void) 2003 { 2004 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2005 } 2006 2007 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2008 { 2009 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2010 CPUState *cs = CPU(cpu); 2011 2012 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2013 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2014 return 0; 2015 } 2016 2017 return 1; 2018 } 2019 2020 int kvmppc_get_hasidle(CPUPPCState *env) 2021 { 2022 struct kvm_ppc_pvinfo pvinfo; 2023 2024 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2025 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2026 return 1; 2027 } 2028 2029 return 0; 2030 } 2031 2032 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2033 { 2034 uint32_t *hc = (uint32_t*)buf; 2035 struct kvm_ppc_pvinfo pvinfo; 2036 2037 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2038 memcpy(buf, pvinfo.hcall, buf_len); 2039 return 0; 2040 } 2041 2042 /* 2043 * Fallback to always fail hypercalls regardless of endianness: 2044 * 2045 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2046 * li r3, -1 2047 * b .+8 (becomes nop in wrong endian) 2048 * bswap32(li r3, -1) 2049 */ 2050 2051 hc[0] = cpu_to_be32(0x08000048); 2052 hc[1] = cpu_to_be32(0x3860ffff); 2053 hc[2] = cpu_to_be32(0x48000008); 2054 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2055 2056 return 1; 2057 } 2058 2059 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2060 { 2061 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2062 } 2063 2064 void kvmppc_enable_logical_ci_hcalls(void) 2065 { 2066 /* 2067 * FIXME: it would be nice if we could detect the cases where 2068 * we're using a device which requires the in kernel 2069 * implementation of these hcalls, but the kernel lacks them and 2070 * produce a warning. 2071 */ 2072 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2073 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2074 } 2075 2076 void kvmppc_enable_set_mode_hcall(void) 2077 { 2078 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2079 } 2080 2081 void kvmppc_enable_clear_ref_mod_hcalls(void) 2082 { 2083 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2084 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2085 } 2086 2087 void kvmppc_set_papr(PowerPCCPU *cpu) 2088 { 2089 CPUState *cs = CPU(cpu); 2090 int ret; 2091 2092 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2093 if (ret) { 2094 error_report("This vCPU type or KVM version does not support PAPR"); 2095 exit(1); 2096 } 2097 2098 /* Update the capability flag so we sync the right information 2099 * with kvm */ 2100 cap_papr = 1; 2101 } 2102 2103 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2104 { 2105 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2106 } 2107 2108 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2109 { 2110 CPUState *cs = CPU(cpu); 2111 int ret; 2112 2113 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2114 if (ret && mpic_proxy) { 2115 error_report("This KVM version does not support EPR"); 2116 exit(1); 2117 } 2118 } 2119 2120 int kvmppc_smt_threads(void) 2121 { 2122 return cap_ppc_smt ? cap_ppc_smt : 1; 2123 } 2124 2125 #ifdef TARGET_PPC64 2126 off_t kvmppc_alloc_rma(void **rma) 2127 { 2128 off_t size; 2129 int fd; 2130 struct kvm_allocate_rma ret; 2131 2132 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2133 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2134 * not necessary on this hardware 2135 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2136 * 2137 * FIXME: We should allow the user to force contiguous RMA 2138 * allocation in the cap_ppc_rma==1 case. 2139 */ 2140 if (cap_ppc_rma < 2) { 2141 return 0; 2142 } 2143 2144 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2145 if (fd < 0) { 2146 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2147 strerror(errno)); 2148 return -1; 2149 } 2150 2151 size = MIN(ret.rma_size, 256ul << 20); 2152 2153 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2154 if (*rma == MAP_FAILED) { 2155 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2156 return -1; 2157 }; 2158 2159 return size; 2160 } 2161 2162 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2163 { 2164 struct kvm_ppc_smmu_info info; 2165 long rampagesize, best_page_shift; 2166 int i; 2167 2168 if (cap_ppc_rma >= 2) { 2169 return current_size; 2170 } 2171 2172 /* Find the largest hardware supported page size that's less than 2173 * or equal to the (logical) backing page size of guest RAM */ 2174 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2175 rampagesize = qemu_getrampagesize(); 2176 best_page_shift = 0; 2177 2178 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2179 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2180 2181 if (!sps->page_shift) { 2182 continue; 2183 } 2184 2185 if ((sps->page_shift > best_page_shift) 2186 && ((1UL << sps->page_shift) <= rampagesize)) { 2187 best_page_shift = sps->page_shift; 2188 } 2189 } 2190 2191 return MIN(current_size, 2192 1ULL << (best_page_shift + hash_shift - 7)); 2193 } 2194 #endif 2195 2196 bool kvmppc_spapr_use_multitce(void) 2197 { 2198 return cap_spapr_multitce; 2199 } 2200 2201 int kvmppc_spapr_enable_inkernel_multitce(void) 2202 { 2203 int ret; 2204 2205 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2206 H_PUT_TCE_INDIRECT, 1); 2207 if (!ret) { 2208 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2209 H_STUFF_TCE, 1); 2210 } 2211 2212 return ret; 2213 } 2214 2215 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2216 uint64_t bus_offset, uint32_t nb_table, 2217 int *pfd, bool need_vfio) 2218 { 2219 long len; 2220 int fd; 2221 void *table; 2222 2223 /* Must set fd to -1 so we don't try to munmap when called for 2224 * destroying the table, which the upper layers -will- do 2225 */ 2226 *pfd = -1; 2227 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2228 return NULL; 2229 } 2230 2231 if (cap_spapr_tce_64) { 2232 struct kvm_create_spapr_tce_64 args = { 2233 .liobn = liobn, 2234 .page_shift = page_shift, 2235 .offset = bus_offset >> page_shift, 2236 .size = nb_table, 2237 .flags = 0 2238 }; 2239 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2240 if (fd < 0) { 2241 fprintf(stderr, 2242 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2243 liobn); 2244 return NULL; 2245 } 2246 } else if (cap_spapr_tce) { 2247 uint64_t window_size = (uint64_t) nb_table << page_shift; 2248 struct kvm_create_spapr_tce args = { 2249 .liobn = liobn, 2250 .window_size = window_size, 2251 }; 2252 if ((window_size != args.window_size) || bus_offset) { 2253 return NULL; 2254 } 2255 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2256 if (fd < 0) { 2257 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2258 liobn); 2259 return NULL; 2260 } 2261 } else { 2262 return NULL; 2263 } 2264 2265 len = nb_table * sizeof(uint64_t); 2266 /* FIXME: round this up to page size */ 2267 2268 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2269 if (table == MAP_FAILED) { 2270 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2271 liobn); 2272 close(fd); 2273 return NULL; 2274 } 2275 2276 *pfd = fd; 2277 return table; 2278 } 2279 2280 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2281 { 2282 long len; 2283 2284 if (fd < 0) { 2285 return -1; 2286 } 2287 2288 len = nb_table * sizeof(uint64_t); 2289 if ((munmap(table, len) < 0) || 2290 (close(fd) < 0)) { 2291 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2292 strerror(errno)); 2293 /* Leak the table */ 2294 } 2295 2296 return 0; 2297 } 2298 2299 int kvmppc_reset_htab(int shift_hint) 2300 { 2301 uint32_t shift = shift_hint; 2302 2303 if (!kvm_enabled()) { 2304 /* Full emulation, tell caller to allocate htab itself */ 2305 return 0; 2306 } 2307 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2308 int ret; 2309 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2310 if (ret == -ENOTTY) { 2311 /* At least some versions of PR KVM advertise the 2312 * capability, but don't implement the ioctl(). Oops. 2313 * Return 0 so that we allocate the htab in qemu, as is 2314 * correct for PR. */ 2315 return 0; 2316 } else if (ret < 0) { 2317 return ret; 2318 } 2319 return shift; 2320 } 2321 2322 /* We have a kernel that predates the htab reset calls. For PR 2323 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2324 * this era, it has allocated a 16MB fixed size hash table already. */ 2325 if (kvmppc_is_pr(kvm_state)) { 2326 /* PR - tell caller to allocate htab */ 2327 return 0; 2328 } else { 2329 /* HV - assume 16MB kernel allocated htab */ 2330 return 24; 2331 } 2332 } 2333 2334 static inline uint32_t mfpvr(void) 2335 { 2336 uint32_t pvr; 2337 2338 asm ("mfpvr %0" 2339 : "=r"(pvr)); 2340 return pvr; 2341 } 2342 2343 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2344 { 2345 if (on) { 2346 *word |= flags; 2347 } else { 2348 *word &= ~flags; 2349 } 2350 } 2351 2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2353 { 2354 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2355 uint32_t vmx = kvmppc_get_vmx(); 2356 uint32_t dfp = kvmppc_get_dfp(); 2357 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2358 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2359 2360 /* Now fix up the class with information we can query from the host */ 2361 pcc->pvr = mfpvr(); 2362 2363 if (vmx != -1) { 2364 /* Only override when we know what the host supports */ 2365 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2366 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2367 } 2368 if (dfp != -1) { 2369 /* Only override when we know what the host supports */ 2370 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2371 } 2372 2373 if (dcache_size != -1) { 2374 pcc->l1_dcache_size = dcache_size; 2375 } 2376 2377 if (icache_size != -1) { 2378 pcc->l1_icache_size = icache_size; 2379 } 2380 2381 #if defined(TARGET_PPC64) 2382 pcc->radix_page_info = kvm_get_radix_page_info(); 2383 2384 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2385 /* 2386 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2387 * compliant. More importantly, advertising ISA 3.00 2388 * architected mode may prevent guests from activating 2389 * necessary DD1 workarounds. 2390 */ 2391 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2392 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2393 } 2394 #endif /* defined(TARGET_PPC64) */ 2395 } 2396 2397 bool kvmppc_has_cap_epr(void) 2398 { 2399 return cap_epr; 2400 } 2401 2402 bool kvmppc_has_cap_htab_fd(void) 2403 { 2404 return cap_htab_fd; 2405 } 2406 2407 bool kvmppc_has_cap_fixup_hcalls(void) 2408 { 2409 return cap_fixup_hcalls; 2410 } 2411 2412 bool kvmppc_has_cap_htm(void) 2413 { 2414 return cap_htm; 2415 } 2416 2417 bool kvmppc_has_cap_mmu_radix(void) 2418 { 2419 return cap_mmu_radix; 2420 } 2421 2422 bool kvmppc_has_cap_mmu_hash_v3(void) 2423 { 2424 return cap_mmu_hash_v3; 2425 } 2426 2427 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2428 { 2429 uint32_t host_pvr = mfpvr(); 2430 PowerPCCPUClass *pvr_pcc; 2431 2432 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2433 if (pvr_pcc == NULL) { 2434 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2435 } 2436 2437 return pvr_pcc; 2438 } 2439 2440 static int kvm_ppc_register_host_cpu_type(void) 2441 { 2442 TypeInfo type_info = { 2443 .name = TYPE_HOST_POWERPC_CPU, 2444 .class_init = kvmppc_host_cpu_class_init, 2445 }; 2446 PowerPCCPUClass *pvr_pcc; 2447 DeviceClass *dc; 2448 int i; 2449 2450 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2451 if (pvr_pcc == NULL) { 2452 return -1; 2453 } 2454 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2455 type_register(&type_info); 2456 2457 #if defined(TARGET_PPC64) 2458 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2459 type_info.parent = TYPE_SPAPR_CPU_CORE, 2460 type_info.instance_size = sizeof(sPAPRCPUCore); 2461 type_info.instance_init = NULL; 2462 type_info.class_init = spapr_cpu_core_class_init; 2463 type_info.class_data = (void *) "host"; 2464 type_register(&type_info); 2465 g_free((void *)type_info.name); 2466 #endif 2467 2468 /* 2469 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2470 * we want "POWER8" to be a "family" alias that points to the current 2471 * host CPU type, too) 2472 */ 2473 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2474 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2475 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2476 ObjectClass *oc = OBJECT_CLASS(pvr_pcc); 2477 char *suffix; 2478 2479 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2480 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU); 2481 if (suffix) { 2482 *suffix = 0; 2483 } 2484 ppc_cpu_aliases[i].oc = oc; 2485 break; 2486 } 2487 } 2488 2489 return 0; 2490 } 2491 2492 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2493 { 2494 struct kvm_rtas_token_args args = { 2495 .token = token, 2496 }; 2497 2498 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2499 return -ENOENT; 2500 } 2501 2502 strncpy(args.name, function, sizeof(args.name)); 2503 2504 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2505 } 2506 2507 int kvmppc_get_htab_fd(bool write) 2508 { 2509 struct kvm_get_htab_fd s = { 2510 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2511 .start_index = 0, 2512 }; 2513 2514 if (!cap_htab_fd) { 2515 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2516 return -1; 2517 } 2518 2519 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2520 } 2521 2522 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2523 { 2524 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2525 uint8_t buf[bufsize]; 2526 ssize_t rc; 2527 2528 do { 2529 rc = read(fd, buf, bufsize); 2530 if (rc < 0) { 2531 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2532 strerror(errno)); 2533 return rc; 2534 } else if (rc) { 2535 uint8_t *buffer = buf; 2536 ssize_t n = rc; 2537 while (n) { 2538 struct kvm_get_htab_header *head = 2539 (struct kvm_get_htab_header *) buffer; 2540 size_t chunksize = sizeof(*head) + 2541 HASH_PTE_SIZE_64 * head->n_valid; 2542 2543 qemu_put_be32(f, head->index); 2544 qemu_put_be16(f, head->n_valid); 2545 qemu_put_be16(f, head->n_invalid); 2546 qemu_put_buffer(f, (void *)(head + 1), 2547 HASH_PTE_SIZE_64 * head->n_valid); 2548 2549 buffer += chunksize; 2550 n -= chunksize; 2551 } 2552 } 2553 } while ((rc != 0) 2554 && ((max_ns < 0) 2555 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2556 2557 return (rc == 0) ? 1 : 0; 2558 } 2559 2560 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2561 uint16_t n_valid, uint16_t n_invalid) 2562 { 2563 struct kvm_get_htab_header *buf; 2564 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2565 ssize_t rc; 2566 2567 buf = alloca(chunksize); 2568 buf->index = index; 2569 buf->n_valid = n_valid; 2570 buf->n_invalid = n_invalid; 2571 2572 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2573 2574 rc = write(fd, buf, chunksize); 2575 if (rc < 0) { 2576 fprintf(stderr, "Error writing KVM hash table: %s\n", 2577 strerror(errno)); 2578 return rc; 2579 } 2580 if (rc != chunksize) { 2581 /* We should never get a short write on a single chunk */ 2582 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2583 return -1; 2584 } 2585 return 0; 2586 } 2587 2588 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2589 { 2590 return true; 2591 } 2592 2593 void kvm_arch_init_irq_routing(KVMState *s) 2594 { 2595 } 2596 2597 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2598 { 2599 struct kvm_get_htab_fd ghf = { 2600 .flags = 0, 2601 .start_index = ptex, 2602 }; 2603 int fd, rc; 2604 int i; 2605 2606 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2607 if (fd < 0) { 2608 hw_error("kvmppc_read_hptes: Unable to open HPT fd"); 2609 } 2610 2611 i = 0; 2612 while (i < n) { 2613 struct kvm_get_htab_header *hdr; 2614 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2615 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2616 2617 rc = read(fd, buf, sizeof(buf)); 2618 if (rc < 0) { 2619 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2620 } 2621 2622 hdr = (struct kvm_get_htab_header *)buf; 2623 while ((i < n) && ((char *)hdr < (buf + rc))) { 2624 int invalid = hdr->n_invalid; 2625 2626 if (hdr->index != (ptex + i)) { 2627 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2628 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2629 } 2630 2631 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2632 i += hdr->n_valid; 2633 2634 if ((n - i) < invalid) { 2635 invalid = n - i; 2636 } 2637 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2638 i += hdr->n_invalid; 2639 2640 hdr = (struct kvm_get_htab_header *) 2641 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2642 } 2643 } 2644 2645 close(fd); 2646 } 2647 2648 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2649 { 2650 int fd, rc; 2651 struct kvm_get_htab_fd ghf; 2652 struct { 2653 struct kvm_get_htab_header hdr; 2654 uint64_t pte0; 2655 uint64_t pte1; 2656 } buf; 2657 2658 ghf.flags = 0; 2659 ghf.start_index = 0; /* Ignored */ 2660 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2661 if (fd < 0) { 2662 hw_error("kvmppc_write_hpte: Unable to open HPT fd"); 2663 } 2664 2665 buf.hdr.n_valid = 1; 2666 buf.hdr.n_invalid = 0; 2667 buf.hdr.index = ptex; 2668 buf.pte0 = cpu_to_be64(pte0); 2669 buf.pte1 = cpu_to_be64(pte1); 2670 2671 rc = write(fd, &buf, sizeof(buf)); 2672 if (rc != sizeof(buf)) { 2673 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2674 } 2675 close(fd); 2676 } 2677 2678 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2679 uint64_t address, uint32_t data, PCIDevice *dev) 2680 { 2681 return 0; 2682 } 2683 2684 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2685 int vector, PCIDevice *dev) 2686 { 2687 return 0; 2688 } 2689 2690 int kvm_arch_release_virq_post(int virq) 2691 { 2692 return 0; 2693 } 2694 2695 int kvm_arch_msi_data_to_gsi(uint32_t data) 2696 { 2697 return data & 0xffff; 2698 } 2699 2700 int kvmppc_enable_hwrng(void) 2701 { 2702 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2703 return -1; 2704 } 2705 2706 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2707 } 2708