1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #if defined(TARGET_PPC64) 51 #include "hw/ppc/spapr_cpu_core.h" 52 #endif 53 #include "elf.h" 54 #include "sysemu/kvm_int.h" 55 56 //#define DEBUG_KVM 57 58 #ifdef DEBUG_KVM 59 #define DPRINTF(fmt, ...) \ 60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 61 #else 62 #define DPRINTF(fmt, ...) \ 63 do { } while (0) 64 #endif 65 66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 67 68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 69 KVM_CAP_LAST_INFO 70 }; 71 72 static int cap_interrupt_unset = false; 73 static int cap_interrupt_level = false; 74 static int cap_segstate; 75 static int cap_booke_sregs; 76 static int cap_ppc_smt; 77 static int cap_ppc_smt_possible; 78 static int cap_ppc_rma; 79 static int cap_spapr_tce; 80 static int cap_spapr_tce_64; 81 static int cap_spapr_multitce; 82 static int cap_spapr_vfio; 83 static int cap_hior; 84 static int cap_one_reg; 85 static int cap_epr; 86 static int cap_ppc_watchdog; 87 static int cap_papr; 88 static int cap_htab_fd; 89 static int cap_fixup_hcalls; 90 static int cap_htm; /* Hardware transactional memory support */ 91 static int cap_mmu_radix; 92 static int cap_mmu_hash_v3; 93 static int cap_resize_hpt; 94 static int cap_ppc_pvr_compat; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(void); 127 128 int kvm_arch_init(MachineState *ms, KVMState *s) 129 { 130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 135 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 139 cap_spapr_vfio = false; 140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 144 /* Note: we don't set cap_papr here, because this capability is 145 * only activated after this by kvmppc_set_papr() */ 146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 153 /* 154 * Note: setting it to false because there is not such capability 155 * in KVM at this moment. 156 * 157 * TODO: call kvm_vm_check_extension() with the right capability 158 * after the kernel starts implementing it.*/ 159 cap_ppc_pvr_compat = false; 160 161 if (!cap_interrupt_level) { 162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 163 "VM to stall at times!\n"); 164 } 165 166 kvm_ppc_register_host_cpu_type(); 167 168 return 0; 169 } 170 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 172 { 173 return 0; 174 } 175 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 177 { 178 CPUPPCState *cenv = &cpu->env; 179 CPUState *cs = CPU(cpu); 180 struct kvm_sregs sregs; 181 int ret; 182 183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 184 /* What we're really trying to say is "if we're on BookE, we use 185 the native PVR for now". This is the only sane way to check 186 it though, so we potentially confuse users that they can run 187 BookE guests on BookS. Let's hope nobody dares enough :) */ 188 return 0; 189 } else { 190 if (!cap_segstate) { 191 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 192 return -ENOSYS; 193 } 194 } 195 196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 197 if (ret) { 198 return ret; 199 } 200 201 sregs.pvr = cenv->spr[SPR_PVR]; 202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 203 } 204 205 /* Set up a shared TLB array with KVM */ 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 207 { 208 CPUPPCState *env = &cpu->env; 209 CPUState *cs = CPU(cpu); 210 struct kvm_book3e_206_tlb_params params = {}; 211 struct kvm_config_tlb cfg = {}; 212 unsigned int entries = 0; 213 int ret, i; 214 215 if (!kvm_enabled() || 216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 217 return 0; 218 } 219 220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 221 222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 223 params.tlb_sizes[i] = booke206_tlb_size(env, i); 224 params.tlb_ways[i] = booke206_tlb_ways(env, i); 225 entries += params.tlb_sizes[i]; 226 } 227 228 assert(entries == env->nb_tlb); 229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 230 231 env->tlb_dirty = true; 232 233 cfg.array = (uintptr_t)env->tlb.tlbm; 234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 235 cfg.params = (uintptr_t)¶ms; 236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 237 238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 239 if (ret < 0) { 240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 241 __func__, strerror(-ret)); 242 return ret; 243 } 244 245 env->kvm_sw_tlb = true; 246 return 0; 247 } 248 249 250 #if defined(TARGET_PPC64) 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 252 struct kvm_ppc_smmu_info *info) 253 { 254 CPUPPCState *env = &cpu->env; 255 CPUState *cs = CPU(cpu); 256 257 memset(info, 0, sizeof(*info)); 258 259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 260 * need to "guess" what the supported page sizes are. 261 * 262 * For that to work we make a few assumptions: 263 * 264 * - Check whether we are running "PR" KVM which only supports 4K 265 * and 16M pages, but supports them regardless of the backing 266 * store characteritics. We also don't support 1T segments. 267 * 268 * This is safe as if HV KVM ever supports that capability or PR 269 * KVM grows supports for more page/segment sizes, those versions 270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 271 * will not hit this fallback 272 * 273 * - Else we are running HV KVM. This means we only support page 274 * sizes that fit in the backing store. Additionally we only 275 * advertize 64K pages if the processor is ARCH 2.06 and we assume 276 * P7 encodings for the SLB and hash table. Here too, we assume 277 * support for any newer processor will mean a kernel that 278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 279 * this fallback. 280 */ 281 if (kvmppc_is_pr(cs->kvm_state)) { 282 /* No flags */ 283 info->flags = 0; 284 info->slb_size = 64; 285 286 /* Standard 4k base page size segment */ 287 info->sps[0].page_shift = 12; 288 info->sps[0].slb_enc = 0; 289 info->sps[0].enc[0].page_shift = 12; 290 info->sps[0].enc[0].pte_enc = 0; 291 292 /* Standard 16M large page size segment */ 293 info->sps[1].page_shift = 24; 294 info->sps[1].slb_enc = SLB_VSID_L; 295 info->sps[1].enc[0].page_shift = 24; 296 info->sps[1].enc[0].pte_enc = 0; 297 } else { 298 int i = 0; 299 300 /* HV KVM has backing store size restrictions */ 301 info->flags = KVM_PPC_PAGE_SIZES_REAL; 302 303 if (env->mmu_model & POWERPC_MMU_1TSEG) { 304 info->flags |= KVM_PPC_1T_SEGMENTS; 305 } 306 307 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 308 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 309 info->slb_size = 32; 310 } else { 311 info->slb_size = 64; 312 } 313 314 /* Standard 4k base page size segment */ 315 info->sps[i].page_shift = 12; 316 info->sps[i].slb_enc = 0; 317 info->sps[i].enc[0].page_shift = 12; 318 info->sps[i].enc[0].pte_enc = 0; 319 i++; 320 321 /* 64K on MMU 2.06 and later */ 322 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 323 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 324 info->sps[i].page_shift = 16; 325 info->sps[i].slb_enc = 0x110; 326 info->sps[i].enc[0].page_shift = 16; 327 info->sps[i].enc[0].pte_enc = 1; 328 i++; 329 } 330 331 /* Standard 16M large page size segment */ 332 info->sps[i].page_shift = 24; 333 info->sps[i].slb_enc = SLB_VSID_L; 334 info->sps[i].enc[0].page_shift = 24; 335 info->sps[i].enc[0].pte_enc = 0; 336 } 337 } 338 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 340 { 341 CPUState *cs = CPU(cpu); 342 int ret; 343 344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 346 if (ret == 0) { 347 return; 348 } 349 } 350 351 kvm_get_fallback_smmu_info(cpu, info); 352 } 353 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 355 { 356 KVMState *s = KVM_STATE(current_machine->accelerator); 357 struct ppc_radix_page_info *radix_page_info; 358 struct kvm_ppc_rmmu_info rmmu_info; 359 int i; 360 361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 362 return NULL; 363 } 364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 365 return NULL; 366 } 367 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 368 radix_page_info->count = 0; 369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 370 if (rmmu_info.ap_encodings[i]) { 371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 372 radix_page_info->count++; 373 } 374 } 375 return radix_page_info; 376 } 377 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 379 bool radix, bool gtse, 380 uint64_t proc_tbl) 381 { 382 CPUState *cs = CPU(cpu); 383 int ret; 384 uint64_t flags = 0; 385 struct kvm_ppc_mmuv3_cfg cfg = { 386 .process_table = proc_tbl, 387 }; 388 389 if (radix) { 390 flags |= KVM_PPC_MMUV3_RADIX; 391 } 392 if (gtse) { 393 flags |= KVM_PPC_MMUV3_GTSE; 394 } 395 cfg.flags = flags; 396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 397 switch (ret) { 398 case 0: 399 return H_SUCCESS; 400 case -EINVAL: 401 return H_PARAMETER; 402 case -ENODEV: 403 return H_NOT_AVAILABLE; 404 default: 405 return H_HARDWARE; 406 } 407 } 408 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 410 { 411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 412 return true; 413 } 414 415 return (1ul << shift) <= rampgsize; 416 } 417 418 static long max_cpu_page_size; 419 420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 421 { 422 static struct kvm_ppc_smmu_info smmu_info; 423 static bool has_smmu_info; 424 CPUPPCState *env = &cpu->env; 425 int iq, ik, jq, jk; 426 bool has_64k_pages = false; 427 428 /* We only handle page sizes for 64-bit server guests for now */ 429 if (!(env->mmu_model & POWERPC_MMU_64)) { 430 return; 431 } 432 433 /* Collect MMU info from kernel if not already */ 434 if (!has_smmu_info) { 435 kvm_get_smmu_info(cpu, &smmu_info); 436 has_smmu_info = true; 437 } 438 439 if (!max_cpu_page_size) { 440 max_cpu_page_size = qemu_getrampagesize(); 441 } 442 443 /* Convert to QEMU form */ 444 memset(&env->sps, 0, sizeof(env->sps)); 445 446 /* If we have HV KVM, we need to forbid CI large pages if our 447 * host page size is smaller than 64K. 448 */ 449 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 450 env->ci_large_pages = getpagesize() >= 0x10000; 451 } 452 453 /* 454 * XXX This loop should be an entry wide AND of the capabilities that 455 * the selected CPU has with the capabilities that KVM supports. 456 */ 457 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 458 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 459 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 460 461 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 462 ksps->page_shift)) { 463 continue; 464 } 465 qsps->page_shift = ksps->page_shift; 466 qsps->slb_enc = ksps->slb_enc; 467 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 468 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 469 ksps->enc[jk].page_shift)) { 470 continue; 471 } 472 if (ksps->enc[jk].page_shift == 16) { 473 has_64k_pages = true; 474 } 475 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 476 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 477 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 478 break; 479 } 480 } 481 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 482 break; 483 } 484 } 485 env->slb_nr = smmu_info.slb_size; 486 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 487 env->mmu_model &= ~POWERPC_MMU_1TSEG; 488 } 489 if (!has_64k_pages) { 490 env->mmu_model &= ~POWERPC_MMU_64K; 491 } 492 } 493 494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 495 { 496 Object *mem_obj = object_resolve_path(obj_path, NULL); 497 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 498 long pagesize; 499 500 if (mempath) { 501 pagesize = qemu_mempath_getpagesize(mempath); 502 g_free(mempath); 503 } else { 504 pagesize = getpagesize(); 505 } 506 507 return pagesize >= max_cpu_page_size; 508 } 509 510 #else /* defined (TARGET_PPC64) */ 511 512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 513 { 514 } 515 516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 517 { 518 return true; 519 } 520 521 #endif /* !defined (TARGET_PPC64) */ 522 523 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 524 { 525 return POWERPC_CPU(cpu)->vcpu_id; 526 } 527 528 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 529 * book3s supports only 1 watchpoint, so array size 530 * of 4 is sufficient for now. 531 */ 532 #define MAX_HW_BKPTS 4 533 534 static struct HWBreakpoint { 535 target_ulong addr; 536 int type; 537 } hw_debug_points[MAX_HW_BKPTS]; 538 539 static CPUWatchpoint hw_watchpoint; 540 541 /* Default there is no breakpoint and watchpoint supported */ 542 static int max_hw_breakpoint; 543 static int max_hw_watchpoint; 544 static int nb_hw_breakpoint; 545 static int nb_hw_watchpoint; 546 547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 548 { 549 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 550 max_hw_breakpoint = 2; 551 max_hw_watchpoint = 2; 552 } 553 554 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 555 fprintf(stderr, "Error initializing h/w breakpoints\n"); 556 return; 557 } 558 } 559 560 int kvm_arch_init_vcpu(CPUState *cs) 561 { 562 PowerPCCPU *cpu = POWERPC_CPU(cs); 563 CPUPPCState *cenv = &cpu->env; 564 int ret; 565 566 /* Gather server mmu info from KVM and update the CPU state */ 567 kvm_fixup_page_sizes(cpu); 568 569 /* Synchronize sregs with kvm */ 570 ret = kvm_arch_sync_sregs(cpu); 571 if (ret) { 572 if (ret == -EINVAL) { 573 error_report("Register sync failed... If you're using kvm-hv.ko," 574 " only \"-cpu host\" is possible"); 575 } 576 return ret; 577 } 578 579 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 580 581 switch (cenv->mmu_model) { 582 case POWERPC_MMU_BOOKE206: 583 /* This target supports access to KVM's guest TLB */ 584 ret = kvm_booke206_tlb_init(cpu); 585 break; 586 case POWERPC_MMU_2_07: 587 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 588 /* KVM-HV has transactional memory on POWER8 also without the 589 * KVM_CAP_PPC_HTM extension, so enable it here instead as 590 * long as it's availble to userspace on the host. */ 591 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 592 cap_htm = true; 593 } 594 } 595 break; 596 default: 597 break; 598 } 599 600 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 601 kvmppc_hw_debug_points_init(cenv); 602 603 return ret; 604 } 605 606 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 607 { 608 CPUPPCState *env = &cpu->env; 609 CPUState *cs = CPU(cpu); 610 struct kvm_dirty_tlb dirty_tlb; 611 unsigned char *bitmap; 612 int ret; 613 614 if (!env->kvm_sw_tlb) { 615 return; 616 } 617 618 bitmap = g_malloc((env->nb_tlb + 7) / 8); 619 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 620 621 dirty_tlb.bitmap = (uintptr_t)bitmap; 622 dirty_tlb.num_dirty = env->nb_tlb; 623 624 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 625 if (ret) { 626 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 627 __func__, strerror(-ret)); 628 } 629 630 g_free(bitmap); 631 } 632 633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 634 { 635 PowerPCCPU *cpu = POWERPC_CPU(cs); 636 CPUPPCState *env = &cpu->env; 637 union { 638 uint32_t u32; 639 uint64_t u64; 640 } val; 641 struct kvm_one_reg reg = { 642 .id = id, 643 .addr = (uintptr_t) &val, 644 }; 645 int ret; 646 647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 648 if (ret != 0) { 649 trace_kvm_failed_spr_get(spr, strerror(errno)); 650 } else { 651 switch (id & KVM_REG_SIZE_MASK) { 652 case KVM_REG_SIZE_U32: 653 env->spr[spr] = val.u32; 654 break; 655 656 case KVM_REG_SIZE_U64: 657 env->spr[spr] = val.u64; 658 break; 659 660 default: 661 /* Don't handle this size yet */ 662 abort(); 663 } 664 } 665 } 666 667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 668 { 669 PowerPCCPU *cpu = POWERPC_CPU(cs); 670 CPUPPCState *env = &cpu->env; 671 union { 672 uint32_t u32; 673 uint64_t u64; 674 } val; 675 struct kvm_one_reg reg = { 676 .id = id, 677 .addr = (uintptr_t) &val, 678 }; 679 int ret; 680 681 switch (id & KVM_REG_SIZE_MASK) { 682 case KVM_REG_SIZE_U32: 683 val.u32 = env->spr[spr]; 684 break; 685 686 case KVM_REG_SIZE_U64: 687 val.u64 = env->spr[spr]; 688 break; 689 690 default: 691 /* Don't handle this size yet */ 692 abort(); 693 } 694 695 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 696 if (ret != 0) { 697 trace_kvm_failed_spr_set(spr, strerror(errno)); 698 } 699 } 700 701 static int kvm_put_fp(CPUState *cs) 702 { 703 PowerPCCPU *cpu = POWERPC_CPU(cs); 704 CPUPPCState *env = &cpu->env; 705 struct kvm_one_reg reg; 706 int i; 707 int ret; 708 709 if (env->insns_flags & PPC_FLOAT) { 710 uint64_t fpscr = env->fpscr; 711 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 712 713 reg.id = KVM_REG_PPC_FPSCR; 714 reg.addr = (uintptr_t)&fpscr; 715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 716 if (ret < 0) { 717 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 718 return ret; 719 } 720 721 for (i = 0; i < 32; i++) { 722 uint64_t vsr[2]; 723 724 #ifdef HOST_WORDS_BIGENDIAN 725 vsr[0] = float64_val(env->fpr[i]); 726 vsr[1] = env->vsr[i]; 727 #else 728 vsr[0] = env->vsr[i]; 729 vsr[1] = float64_val(env->fpr[i]); 730 #endif 731 reg.addr = (uintptr_t) &vsr; 732 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 733 734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 735 if (ret < 0) { 736 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 737 i, strerror(errno)); 738 return ret; 739 } 740 } 741 } 742 743 if (env->insns_flags & PPC_ALTIVEC) { 744 reg.id = KVM_REG_PPC_VSCR; 745 reg.addr = (uintptr_t)&env->vscr; 746 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 747 if (ret < 0) { 748 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 749 return ret; 750 } 751 752 for (i = 0; i < 32; i++) { 753 reg.id = KVM_REG_PPC_VR(i); 754 reg.addr = (uintptr_t)&env->avr[i]; 755 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 756 if (ret < 0) { 757 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 758 return ret; 759 } 760 } 761 } 762 763 return 0; 764 } 765 766 static int kvm_get_fp(CPUState *cs) 767 { 768 PowerPCCPU *cpu = POWERPC_CPU(cs); 769 CPUPPCState *env = &cpu->env; 770 struct kvm_one_reg reg; 771 int i; 772 int ret; 773 774 if (env->insns_flags & PPC_FLOAT) { 775 uint64_t fpscr; 776 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 777 778 reg.id = KVM_REG_PPC_FPSCR; 779 reg.addr = (uintptr_t)&fpscr; 780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 781 if (ret < 0) { 782 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 783 return ret; 784 } else { 785 env->fpscr = fpscr; 786 } 787 788 for (i = 0; i < 32; i++) { 789 uint64_t vsr[2]; 790 791 reg.addr = (uintptr_t) &vsr; 792 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 793 794 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 795 if (ret < 0) { 796 DPRINTF("Unable to get %s%d from KVM: %s\n", 797 vsx ? "VSR" : "FPR", i, strerror(errno)); 798 return ret; 799 } else { 800 #ifdef HOST_WORDS_BIGENDIAN 801 env->fpr[i] = vsr[0]; 802 if (vsx) { 803 env->vsr[i] = vsr[1]; 804 } 805 #else 806 env->fpr[i] = vsr[1]; 807 if (vsx) { 808 env->vsr[i] = vsr[0]; 809 } 810 #endif 811 } 812 } 813 } 814 815 if (env->insns_flags & PPC_ALTIVEC) { 816 reg.id = KVM_REG_PPC_VSCR; 817 reg.addr = (uintptr_t)&env->vscr; 818 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 819 if (ret < 0) { 820 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 821 return ret; 822 } 823 824 for (i = 0; i < 32; i++) { 825 reg.id = KVM_REG_PPC_VR(i); 826 reg.addr = (uintptr_t)&env->avr[i]; 827 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 828 if (ret < 0) { 829 DPRINTF("Unable to get VR%d from KVM: %s\n", 830 i, strerror(errno)); 831 return ret; 832 } 833 } 834 } 835 836 return 0; 837 } 838 839 #if defined(TARGET_PPC64) 840 static int kvm_get_vpa(CPUState *cs) 841 { 842 PowerPCCPU *cpu = POWERPC_CPU(cs); 843 CPUPPCState *env = &cpu->env; 844 struct kvm_one_reg reg; 845 int ret; 846 847 reg.id = KVM_REG_PPC_VPA_ADDR; 848 reg.addr = (uintptr_t)&env->vpa_addr; 849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 850 if (ret < 0) { 851 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 852 return ret; 853 } 854 855 assert((uintptr_t)&env->slb_shadow_size 856 == ((uintptr_t)&env->slb_shadow_addr + 8)); 857 reg.id = KVM_REG_PPC_VPA_SLB; 858 reg.addr = (uintptr_t)&env->slb_shadow_addr; 859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 860 if (ret < 0) { 861 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 862 strerror(errno)); 863 return ret; 864 } 865 866 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 867 reg.id = KVM_REG_PPC_VPA_DTL; 868 reg.addr = (uintptr_t)&env->dtl_addr; 869 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 870 if (ret < 0) { 871 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 872 strerror(errno)); 873 return ret; 874 } 875 876 return 0; 877 } 878 879 static int kvm_put_vpa(CPUState *cs) 880 { 881 PowerPCCPU *cpu = POWERPC_CPU(cs); 882 CPUPPCState *env = &cpu->env; 883 struct kvm_one_reg reg; 884 int ret; 885 886 /* SLB shadow or DTL can't be registered unless a master VPA is 887 * registered. That means when restoring state, if a VPA *is* 888 * registered, we need to set that up first. If not, we need to 889 * deregister the others before deregistering the master VPA */ 890 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 891 892 if (env->vpa_addr) { 893 reg.id = KVM_REG_PPC_VPA_ADDR; 894 reg.addr = (uintptr_t)&env->vpa_addr; 895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 896 if (ret < 0) { 897 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 898 return ret; 899 } 900 } 901 902 assert((uintptr_t)&env->slb_shadow_size 903 == ((uintptr_t)&env->slb_shadow_addr + 8)); 904 reg.id = KVM_REG_PPC_VPA_SLB; 905 reg.addr = (uintptr_t)&env->slb_shadow_addr; 906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 907 if (ret < 0) { 908 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 909 return ret; 910 } 911 912 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 913 reg.id = KVM_REG_PPC_VPA_DTL; 914 reg.addr = (uintptr_t)&env->dtl_addr; 915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 916 if (ret < 0) { 917 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 918 strerror(errno)); 919 return ret; 920 } 921 922 if (!env->vpa_addr) { 923 reg.id = KVM_REG_PPC_VPA_ADDR; 924 reg.addr = (uintptr_t)&env->vpa_addr; 925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 926 if (ret < 0) { 927 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 928 return ret; 929 } 930 } 931 932 return 0; 933 } 934 #endif /* TARGET_PPC64 */ 935 936 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 937 { 938 CPUPPCState *env = &cpu->env; 939 struct kvm_sregs sregs; 940 int i; 941 942 sregs.pvr = env->spr[SPR_PVR]; 943 944 if (cpu->vhyp) { 945 PPCVirtualHypervisorClass *vhc = 946 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 947 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 948 } else { 949 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 950 } 951 952 /* Sync SLB */ 953 #ifdef TARGET_PPC64 954 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 955 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 956 if (env->slb[i].esid & SLB_ESID_V) { 957 sregs.u.s.ppc64.slb[i].slbe |= i; 958 } 959 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 960 } 961 #endif 962 963 /* Sync SRs */ 964 for (i = 0; i < 16; i++) { 965 sregs.u.s.ppc32.sr[i] = env->sr[i]; 966 } 967 968 /* Sync BATs */ 969 for (i = 0; i < 8; i++) { 970 /* Beware. We have to swap upper and lower bits here */ 971 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 972 | env->DBAT[1][i]; 973 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 974 | env->IBAT[1][i]; 975 } 976 977 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 978 } 979 980 int kvm_arch_put_registers(CPUState *cs, int level) 981 { 982 PowerPCCPU *cpu = POWERPC_CPU(cs); 983 CPUPPCState *env = &cpu->env; 984 struct kvm_regs regs; 985 int ret; 986 int i; 987 988 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 989 if (ret < 0) { 990 return ret; 991 } 992 993 regs.ctr = env->ctr; 994 regs.lr = env->lr; 995 regs.xer = cpu_read_xer(env); 996 regs.msr = env->msr; 997 regs.pc = env->nip; 998 999 regs.srr0 = env->spr[SPR_SRR0]; 1000 regs.srr1 = env->spr[SPR_SRR1]; 1001 1002 regs.sprg0 = env->spr[SPR_SPRG0]; 1003 regs.sprg1 = env->spr[SPR_SPRG1]; 1004 regs.sprg2 = env->spr[SPR_SPRG2]; 1005 regs.sprg3 = env->spr[SPR_SPRG3]; 1006 regs.sprg4 = env->spr[SPR_SPRG4]; 1007 regs.sprg5 = env->spr[SPR_SPRG5]; 1008 regs.sprg6 = env->spr[SPR_SPRG6]; 1009 regs.sprg7 = env->spr[SPR_SPRG7]; 1010 1011 regs.pid = env->spr[SPR_BOOKE_PID]; 1012 1013 for (i = 0;i < 32; i++) 1014 regs.gpr[i] = env->gpr[i]; 1015 1016 regs.cr = 0; 1017 for (i = 0; i < 8; i++) { 1018 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1019 } 1020 1021 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1022 if (ret < 0) 1023 return ret; 1024 1025 kvm_put_fp(cs); 1026 1027 if (env->tlb_dirty) { 1028 kvm_sw_tlb_put(cpu); 1029 env->tlb_dirty = false; 1030 } 1031 1032 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1033 ret = kvmppc_put_books_sregs(cpu); 1034 if (ret < 0) { 1035 return ret; 1036 } 1037 } 1038 1039 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1040 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1041 } 1042 1043 if (cap_one_reg) { 1044 int i; 1045 1046 /* We deliberately ignore errors here, for kernels which have 1047 * the ONE_REG calls, but don't support the specific 1048 * registers, there's a reasonable chance things will still 1049 * work, at least until we try to migrate. */ 1050 for (i = 0; i < 1024; i++) { 1051 uint64_t id = env->spr_cb[i].one_reg_id; 1052 1053 if (id != 0) { 1054 kvm_put_one_spr(cs, id, i); 1055 } 1056 } 1057 1058 #ifdef TARGET_PPC64 1059 if (msr_ts) { 1060 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1062 } 1063 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1065 } 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1073 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1074 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1075 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1076 } 1077 1078 if (cap_papr) { 1079 if (kvm_put_vpa(cs) < 0) { 1080 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1081 } 1082 } 1083 1084 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1085 #endif /* TARGET_PPC64 */ 1086 } 1087 1088 return ret; 1089 } 1090 1091 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1092 { 1093 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1094 } 1095 1096 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1097 { 1098 CPUPPCState *env = &cpu->env; 1099 struct kvm_sregs sregs; 1100 int ret; 1101 1102 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1103 if (ret < 0) { 1104 return ret; 1105 } 1106 1107 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1108 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1109 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1110 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1111 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1112 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1113 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1114 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1115 env->spr[SPR_DECR] = sregs.u.e.dec; 1116 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1117 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1118 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1119 } 1120 1121 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1122 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1123 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1124 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1125 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1126 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1127 } 1128 1129 if (sregs.u.e.features & KVM_SREGS_E_64) { 1130 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1131 } 1132 1133 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1134 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1135 } 1136 1137 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1138 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1139 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1140 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1141 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1142 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1143 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1144 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1145 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1146 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1147 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1148 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1149 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1150 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1151 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1152 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1153 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1154 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1155 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1156 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1157 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1158 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1159 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1160 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1161 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1162 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1163 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1164 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1165 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1166 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1167 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1168 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1169 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1170 1171 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1172 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1173 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1174 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1175 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1176 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1177 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1178 } 1179 1180 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1181 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1182 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1183 } 1184 1185 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1186 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1187 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1188 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1189 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1190 } 1191 } 1192 1193 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1194 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1195 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1196 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1197 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1198 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1199 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1200 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1201 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1202 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1203 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1204 } 1205 1206 if (sregs.u.e.features & KVM_SREGS_EXP) { 1207 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1208 } 1209 1210 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1211 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1212 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1213 } 1214 1215 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1216 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1217 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1218 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1219 1220 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1221 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1222 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1223 } 1224 } 1225 1226 return 0; 1227 } 1228 1229 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1230 { 1231 CPUPPCState *env = &cpu->env; 1232 struct kvm_sregs sregs; 1233 int ret; 1234 int i; 1235 1236 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1237 if (ret < 0) { 1238 return ret; 1239 } 1240 1241 if (!cpu->vhyp) { 1242 ppc_store_sdr1(env, sregs.u.s.sdr1); 1243 } 1244 1245 /* Sync SLB */ 1246 #ifdef TARGET_PPC64 1247 /* 1248 * The packed SLB array we get from KVM_GET_SREGS only contains 1249 * information about valid entries. So we flush our internal copy 1250 * to get rid of stale ones, then put all valid SLB entries back 1251 * in. 1252 */ 1253 memset(env->slb, 0, sizeof(env->slb)); 1254 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1255 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1256 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1257 /* 1258 * Only restore valid entries 1259 */ 1260 if (rb & SLB_ESID_V) { 1261 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1262 } 1263 } 1264 #endif 1265 1266 /* Sync SRs */ 1267 for (i = 0; i < 16; i++) { 1268 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1269 } 1270 1271 /* Sync BATs */ 1272 for (i = 0; i < 8; i++) { 1273 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1274 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1275 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1276 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1277 } 1278 1279 return 0; 1280 } 1281 1282 int kvm_arch_get_registers(CPUState *cs) 1283 { 1284 PowerPCCPU *cpu = POWERPC_CPU(cs); 1285 CPUPPCState *env = &cpu->env; 1286 struct kvm_regs regs; 1287 uint32_t cr; 1288 int i, ret; 1289 1290 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1291 if (ret < 0) 1292 return ret; 1293 1294 cr = regs.cr; 1295 for (i = 7; i >= 0; i--) { 1296 env->crf[i] = cr & 15; 1297 cr >>= 4; 1298 } 1299 1300 env->ctr = regs.ctr; 1301 env->lr = regs.lr; 1302 cpu_write_xer(env, regs.xer); 1303 env->msr = regs.msr; 1304 env->nip = regs.pc; 1305 1306 env->spr[SPR_SRR0] = regs.srr0; 1307 env->spr[SPR_SRR1] = regs.srr1; 1308 1309 env->spr[SPR_SPRG0] = regs.sprg0; 1310 env->spr[SPR_SPRG1] = regs.sprg1; 1311 env->spr[SPR_SPRG2] = regs.sprg2; 1312 env->spr[SPR_SPRG3] = regs.sprg3; 1313 env->spr[SPR_SPRG4] = regs.sprg4; 1314 env->spr[SPR_SPRG5] = regs.sprg5; 1315 env->spr[SPR_SPRG6] = regs.sprg6; 1316 env->spr[SPR_SPRG7] = regs.sprg7; 1317 1318 env->spr[SPR_BOOKE_PID] = regs.pid; 1319 1320 for (i = 0;i < 32; i++) 1321 env->gpr[i] = regs.gpr[i]; 1322 1323 kvm_get_fp(cs); 1324 1325 if (cap_booke_sregs) { 1326 ret = kvmppc_get_booke_sregs(cpu); 1327 if (ret < 0) { 1328 return ret; 1329 } 1330 } 1331 1332 if (cap_segstate) { 1333 ret = kvmppc_get_books_sregs(cpu); 1334 if (ret < 0) { 1335 return ret; 1336 } 1337 } 1338 1339 if (cap_hior) { 1340 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1341 } 1342 1343 if (cap_one_reg) { 1344 int i; 1345 1346 /* We deliberately ignore errors here, for kernels which have 1347 * the ONE_REG calls, but don't support the specific 1348 * registers, there's a reasonable chance things will still 1349 * work, at least until we try to migrate. */ 1350 for (i = 0; i < 1024; i++) { 1351 uint64_t id = env->spr_cb[i].one_reg_id; 1352 1353 if (id != 0) { 1354 kvm_get_one_spr(cs, id, i); 1355 } 1356 } 1357 1358 #ifdef TARGET_PPC64 1359 if (msr_ts) { 1360 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1362 } 1363 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1365 } 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1373 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1374 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1375 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1376 } 1377 1378 if (cap_papr) { 1379 if (kvm_get_vpa(cs) < 0) { 1380 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1381 } 1382 } 1383 1384 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1385 #endif 1386 } 1387 1388 return 0; 1389 } 1390 1391 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1392 { 1393 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1394 1395 if (irq != PPC_INTERRUPT_EXT) { 1396 return 0; 1397 } 1398 1399 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1400 return 0; 1401 } 1402 1403 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1404 1405 return 0; 1406 } 1407 1408 #if defined(TARGET_PPCEMB) 1409 #define PPC_INPUT_INT PPC40x_INPUT_INT 1410 #elif defined(TARGET_PPC64) 1411 #define PPC_INPUT_INT PPC970_INPUT_INT 1412 #else 1413 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1414 #endif 1415 1416 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1417 { 1418 PowerPCCPU *cpu = POWERPC_CPU(cs); 1419 CPUPPCState *env = &cpu->env; 1420 int r; 1421 unsigned irq; 1422 1423 qemu_mutex_lock_iothread(); 1424 1425 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1426 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1427 if (!cap_interrupt_level && 1428 run->ready_for_interrupt_injection && 1429 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1430 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1431 { 1432 /* For now KVM disregards the 'irq' argument. However, in the 1433 * future KVM could cache it in-kernel to avoid a heavyweight exit 1434 * when reading the UIC. 1435 */ 1436 irq = KVM_INTERRUPT_SET; 1437 1438 DPRINTF("injected interrupt %d\n", irq); 1439 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1440 if (r < 0) { 1441 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1442 } 1443 1444 /* Always wake up soon in case the interrupt was level based */ 1445 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1446 (NANOSECONDS_PER_SECOND / 50)); 1447 } 1448 1449 /* We don't know if there are more interrupts pending after this. However, 1450 * the guest will return to userspace in the course of handling this one 1451 * anyways, so we will get a chance to deliver the rest. */ 1452 1453 qemu_mutex_unlock_iothread(); 1454 } 1455 1456 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1457 { 1458 return MEMTXATTRS_UNSPECIFIED; 1459 } 1460 1461 int kvm_arch_process_async_events(CPUState *cs) 1462 { 1463 return cs->halted; 1464 } 1465 1466 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1467 { 1468 CPUState *cs = CPU(cpu); 1469 CPUPPCState *env = &cpu->env; 1470 1471 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1472 cs->halted = 1; 1473 cs->exception_index = EXCP_HLT; 1474 } 1475 1476 return 0; 1477 } 1478 1479 /* map dcr access to existing qemu dcr emulation */ 1480 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1481 { 1482 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1483 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1484 1485 return 0; 1486 } 1487 1488 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1489 { 1490 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1491 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1492 1493 return 0; 1494 } 1495 1496 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1497 { 1498 /* Mixed endian case is not handled */ 1499 uint32_t sc = debug_inst_opcode; 1500 1501 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1502 sizeof(sc), 0) || 1503 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1504 return -EINVAL; 1505 } 1506 1507 return 0; 1508 } 1509 1510 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1511 { 1512 uint32_t sc; 1513 1514 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1515 sc != debug_inst_opcode || 1516 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1517 sizeof(sc), 1)) { 1518 return -EINVAL; 1519 } 1520 1521 return 0; 1522 } 1523 1524 static int find_hw_breakpoint(target_ulong addr, int type) 1525 { 1526 int n; 1527 1528 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1529 <= ARRAY_SIZE(hw_debug_points)); 1530 1531 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1532 if (hw_debug_points[n].addr == addr && 1533 hw_debug_points[n].type == type) { 1534 return n; 1535 } 1536 } 1537 1538 return -1; 1539 } 1540 1541 static int find_hw_watchpoint(target_ulong addr, int *flag) 1542 { 1543 int n; 1544 1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1546 if (n >= 0) { 1547 *flag = BP_MEM_ACCESS; 1548 return n; 1549 } 1550 1551 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1552 if (n >= 0) { 1553 *flag = BP_MEM_WRITE; 1554 return n; 1555 } 1556 1557 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1558 if (n >= 0) { 1559 *flag = BP_MEM_READ; 1560 return n; 1561 } 1562 1563 return -1; 1564 } 1565 1566 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1567 target_ulong len, int type) 1568 { 1569 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1570 return -ENOBUFS; 1571 } 1572 1573 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1574 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1575 1576 switch (type) { 1577 case GDB_BREAKPOINT_HW: 1578 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1579 return -ENOBUFS; 1580 } 1581 1582 if (find_hw_breakpoint(addr, type) >= 0) { 1583 return -EEXIST; 1584 } 1585 1586 nb_hw_breakpoint++; 1587 break; 1588 1589 case GDB_WATCHPOINT_WRITE: 1590 case GDB_WATCHPOINT_READ: 1591 case GDB_WATCHPOINT_ACCESS: 1592 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1593 return -ENOBUFS; 1594 } 1595 1596 if (find_hw_breakpoint(addr, type) >= 0) { 1597 return -EEXIST; 1598 } 1599 1600 nb_hw_watchpoint++; 1601 break; 1602 1603 default: 1604 return -ENOSYS; 1605 } 1606 1607 return 0; 1608 } 1609 1610 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1611 target_ulong len, int type) 1612 { 1613 int n; 1614 1615 n = find_hw_breakpoint(addr, type); 1616 if (n < 0) { 1617 return -ENOENT; 1618 } 1619 1620 switch (type) { 1621 case GDB_BREAKPOINT_HW: 1622 nb_hw_breakpoint--; 1623 break; 1624 1625 case GDB_WATCHPOINT_WRITE: 1626 case GDB_WATCHPOINT_READ: 1627 case GDB_WATCHPOINT_ACCESS: 1628 nb_hw_watchpoint--; 1629 break; 1630 1631 default: 1632 return -ENOSYS; 1633 } 1634 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1635 1636 return 0; 1637 } 1638 1639 void kvm_arch_remove_all_hw_breakpoints(void) 1640 { 1641 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1642 } 1643 1644 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1645 { 1646 int n; 1647 1648 /* Software Breakpoint updates */ 1649 if (kvm_sw_breakpoints_active(cs)) { 1650 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1651 } 1652 1653 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1654 <= ARRAY_SIZE(hw_debug_points)); 1655 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1656 1657 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1658 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1659 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1660 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1661 switch (hw_debug_points[n].type) { 1662 case GDB_BREAKPOINT_HW: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1664 break; 1665 case GDB_WATCHPOINT_WRITE: 1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1667 break; 1668 case GDB_WATCHPOINT_READ: 1669 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1670 break; 1671 case GDB_WATCHPOINT_ACCESS: 1672 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1673 KVMPPC_DEBUG_WATCH_READ; 1674 break; 1675 default: 1676 cpu_abort(cs, "Unsupported breakpoint type\n"); 1677 } 1678 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1679 } 1680 } 1681 } 1682 1683 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1684 { 1685 CPUState *cs = CPU(cpu); 1686 CPUPPCState *env = &cpu->env; 1687 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1688 int handle = 0; 1689 int n; 1690 int flag = 0; 1691 1692 if (cs->singlestep_enabled) { 1693 handle = 1; 1694 } else if (arch_info->status) { 1695 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1696 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1697 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1698 if (n >= 0) { 1699 handle = 1; 1700 } 1701 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1702 KVMPPC_DEBUG_WATCH_WRITE)) { 1703 n = find_hw_watchpoint(arch_info->address, &flag); 1704 if (n >= 0) { 1705 handle = 1; 1706 cs->watchpoint_hit = &hw_watchpoint; 1707 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1708 hw_watchpoint.flags = flag; 1709 } 1710 } 1711 } 1712 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1713 handle = 1; 1714 } else { 1715 /* QEMU is not able to handle debug exception, so inject 1716 * program exception to guest; 1717 * Yes program exception NOT debug exception !! 1718 * When QEMU is using debug resources then debug exception must 1719 * be always set. To achieve this we set MSR_DE and also set 1720 * MSRP_DEP so guest cannot change MSR_DE. 1721 * When emulating debug resource for guest we want guest 1722 * to control MSR_DE (enable/disable debug interrupt on need). 1723 * Supporting both configurations are NOT possible. 1724 * So the result is that we cannot share debug resources 1725 * between QEMU and Guest on BOOKE architecture. 1726 * In the current design QEMU gets the priority over guest, 1727 * this means that if QEMU is using debug resources then guest 1728 * cannot use them; 1729 * For software breakpoint QEMU uses a privileged instruction; 1730 * So there cannot be any reason that we are here for guest 1731 * set debug exception, only possibility is guest executed a 1732 * privileged / illegal instruction and that's why we are 1733 * injecting a program interrupt. 1734 */ 1735 1736 cpu_synchronize_state(cs); 1737 /* env->nip is PC, so increment this by 4 to use 1738 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1739 */ 1740 env->nip += 4; 1741 cs->exception_index = POWERPC_EXCP_PROGRAM; 1742 env->error_code = POWERPC_EXCP_INVAL; 1743 ppc_cpu_do_interrupt(cs); 1744 } 1745 1746 return handle; 1747 } 1748 1749 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1750 { 1751 PowerPCCPU *cpu = POWERPC_CPU(cs); 1752 CPUPPCState *env = &cpu->env; 1753 int ret; 1754 1755 qemu_mutex_lock_iothread(); 1756 1757 switch (run->exit_reason) { 1758 case KVM_EXIT_DCR: 1759 if (run->dcr.is_write) { 1760 DPRINTF("handle dcr write\n"); 1761 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1762 } else { 1763 DPRINTF("handle dcr read\n"); 1764 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1765 } 1766 break; 1767 case KVM_EXIT_HLT: 1768 DPRINTF("handle halt\n"); 1769 ret = kvmppc_handle_halt(cpu); 1770 break; 1771 #if defined(TARGET_PPC64) 1772 case KVM_EXIT_PAPR_HCALL: 1773 DPRINTF("handle PAPR hypercall\n"); 1774 run->papr_hcall.ret = spapr_hypercall(cpu, 1775 run->papr_hcall.nr, 1776 run->papr_hcall.args); 1777 ret = 0; 1778 break; 1779 #endif 1780 case KVM_EXIT_EPR: 1781 DPRINTF("handle epr\n"); 1782 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1783 ret = 0; 1784 break; 1785 case KVM_EXIT_WATCHDOG: 1786 DPRINTF("handle watchdog expiry\n"); 1787 watchdog_perform_action(); 1788 ret = 0; 1789 break; 1790 1791 case KVM_EXIT_DEBUG: 1792 DPRINTF("handle debug exception\n"); 1793 if (kvm_handle_debug(cpu, run)) { 1794 ret = EXCP_DEBUG; 1795 break; 1796 } 1797 /* re-enter, this exception was guest-internal */ 1798 ret = 0; 1799 break; 1800 1801 default: 1802 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1803 ret = -1; 1804 break; 1805 } 1806 1807 qemu_mutex_unlock_iothread(); 1808 return ret; 1809 } 1810 1811 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1812 { 1813 CPUState *cs = CPU(cpu); 1814 uint32_t bits = tsr_bits; 1815 struct kvm_one_reg reg = { 1816 .id = KVM_REG_PPC_OR_TSR, 1817 .addr = (uintptr_t) &bits, 1818 }; 1819 1820 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1821 } 1822 1823 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1824 { 1825 1826 CPUState *cs = CPU(cpu); 1827 uint32_t bits = tsr_bits; 1828 struct kvm_one_reg reg = { 1829 .id = KVM_REG_PPC_CLEAR_TSR, 1830 .addr = (uintptr_t) &bits, 1831 }; 1832 1833 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1834 } 1835 1836 int kvmppc_set_tcr(PowerPCCPU *cpu) 1837 { 1838 CPUState *cs = CPU(cpu); 1839 CPUPPCState *env = &cpu->env; 1840 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1841 1842 struct kvm_one_reg reg = { 1843 .id = KVM_REG_PPC_TCR, 1844 .addr = (uintptr_t) &tcr, 1845 }; 1846 1847 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1848 } 1849 1850 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1851 { 1852 CPUState *cs = CPU(cpu); 1853 int ret; 1854 1855 if (!kvm_enabled()) { 1856 return -1; 1857 } 1858 1859 if (!cap_ppc_watchdog) { 1860 printf("warning: KVM does not support watchdog"); 1861 return -1; 1862 } 1863 1864 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1865 if (ret < 0) { 1866 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1867 __func__, strerror(-ret)); 1868 return ret; 1869 } 1870 1871 return ret; 1872 } 1873 1874 static int read_cpuinfo(const char *field, char *value, int len) 1875 { 1876 FILE *f; 1877 int ret = -1; 1878 int field_len = strlen(field); 1879 char line[512]; 1880 1881 f = fopen("/proc/cpuinfo", "r"); 1882 if (!f) { 1883 return -1; 1884 } 1885 1886 do { 1887 if (!fgets(line, sizeof(line), f)) { 1888 break; 1889 } 1890 if (!strncmp(line, field, field_len)) { 1891 pstrcpy(value, len, line); 1892 ret = 0; 1893 break; 1894 } 1895 } while(*line); 1896 1897 fclose(f); 1898 1899 return ret; 1900 } 1901 1902 uint32_t kvmppc_get_tbfreq(void) 1903 { 1904 char line[512]; 1905 char *ns; 1906 uint32_t retval = NANOSECONDS_PER_SECOND; 1907 1908 if (read_cpuinfo("timebase", line, sizeof(line))) { 1909 return retval; 1910 } 1911 1912 if (!(ns = strchr(line, ':'))) { 1913 return retval; 1914 } 1915 1916 ns++; 1917 1918 return atoi(ns); 1919 } 1920 1921 bool kvmppc_get_host_serial(char **value) 1922 { 1923 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1924 NULL); 1925 } 1926 1927 bool kvmppc_get_host_model(char **value) 1928 { 1929 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1930 } 1931 1932 /* Try to find a device tree node for a CPU with clock-frequency property */ 1933 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1934 { 1935 struct dirent *dirp; 1936 DIR *dp; 1937 1938 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1939 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1940 return -1; 1941 } 1942 1943 buf[0] = '\0'; 1944 while ((dirp = readdir(dp)) != NULL) { 1945 FILE *f; 1946 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1947 dirp->d_name); 1948 f = fopen(buf, "r"); 1949 if (f) { 1950 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1951 fclose(f); 1952 break; 1953 } 1954 buf[0] = '\0'; 1955 } 1956 closedir(dp); 1957 if (buf[0] == '\0') { 1958 printf("Unknown host!\n"); 1959 return -1; 1960 } 1961 1962 return 0; 1963 } 1964 1965 static uint64_t kvmppc_read_int_dt(const char *filename) 1966 { 1967 union { 1968 uint32_t v32; 1969 uint64_t v64; 1970 } u; 1971 FILE *f; 1972 int len; 1973 1974 f = fopen(filename, "rb"); 1975 if (!f) { 1976 return -1; 1977 } 1978 1979 len = fread(&u, 1, sizeof(u), f); 1980 fclose(f); 1981 switch (len) { 1982 case 4: 1983 /* property is a 32-bit quantity */ 1984 return be32_to_cpu(u.v32); 1985 case 8: 1986 return be64_to_cpu(u.v64); 1987 } 1988 1989 return 0; 1990 } 1991 1992 /* Read a CPU node property from the host device tree that's a single 1993 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1994 * (can't find or open the property, or doesn't understand the 1995 * format) */ 1996 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1997 { 1998 char buf[PATH_MAX], *tmp; 1999 uint64_t val; 2000 2001 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 2002 return -1; 2003 } 2004 2005 tmp = g_strdup_printf("%s/%s", buf, propname); 2006 val = kvmppc_read_int_dt(tmp); 2007 g_free(tmp); 2008 2009 return val; 2010 } 2011 2012 uint64_t kvmppc_get_clockfreq(void) 2013 { 2014 return kvmppc_read_int_cpu_dt("clock-frequency"); 2015 } 2016 2017 uint32_t kvmppc_get_vmx(void) 2018 { 2019 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2020 } 2021 2022 uint32_t kvmppc_get_dfp(void) 2023 { 2024 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2025 } 2026 2027 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2028 { 2029 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2030 CPUState *cs = CPU(cpu); 2031 2032 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2033 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2034 return 0; 2035 } 2036 2037 return 1; 2038 } 2039 2040 int kvmppc_get_hasidle(CPUPPCState *env) 2041 { 2042 struct kvm_ppc_pvinfo pvinfo; 2043 2044 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2045 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2046 return 1; 2047 } 2048 2049 return 0; 2050 } 2051 2052 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2053 { 2054 uint32_t *hc = (uint32_t*)buf; 2055 struct kvm_ppc_pvinfo pvinfo; 2056 2057 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2058 memcpy(buf, pvinfo.hcall, buf_len); 2059 return 0; 2060 } 2061 2062 /* 2063 * Fallback to always fail hypercalls regardless of endianness: 2064 * 2065 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2066 * li r3, -1 2067 * b .+8 (becomes nop in wrong endian) 2068 * bswap32(li r3, -1) 2069 */ 2070 2071 hc[0] = cpu_to_be32(0x08000048); 2072 hc[1] = cpu_to_be32(0x3860ffff); 2073 hc[2] = cpu_to_be32(0x48000008); 2074 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2075 2076 return 1; 2077 } 2078 2079 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2080 { 2081 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2082 } 2083 2084 void kvmppc_enable_logical_ci_hcalls(void) 2085 { 2086 /* 2087 * FIXME: it would be nice if we could detect the cases where 2088 * we're using a device which requires the in kernel 2089 * implementation of these hcalls, but the kernel lacks them and 2090 * produce a warning. 2091 */ 2092 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2093 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2094 } 2095 2096 void kvmppc_enable_set_mode_hcall(void) 2097 { 2098 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2099 } 2100 2101 void kvmppc_enable_clear_ref_mod_hcalls(void) 2102 { 2103 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2104 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2105 } 2106 2107 void kvmppc_set_papr(PowerPCCPU *cpu) 2108 { 2109 CPUState *cs = CPU(cpu); 2110 int ret; 2111 2112 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2113 if (ret) { 2114 error_report("This vCPU type or KVM version does not support PAPR"); 2115 exit(1); 2116 } 2117 2118 /* Update the capability flag so we sync the right information 2119 * with kvm */ 2120 cap_papr = 1; 2121 } 2122 2123 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2124 { 2125 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2126 } 2127 2128 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2129 { 2130 CPUState *cs = CPU(cpu); 2131 int ret; 2132 2133 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2134 if (ret && mpic_proxy) { 2135 error_report("This KVM version does not support EPR"); 2136 exit(1); 2137 } 2138 } 2139 2140 int kvmppc_smt_threads(void) 2141 { 2142 return cap_ppc_smt ? cap_ppc_smt : 1; 2143 } 2144 2145 int kvmppc_set_smt_threads(int smt) 2146 { 2147 int ret; 2148 2149 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2150 if (!ret) { 2151 cap_ppc_smt = smt; 2152 } 2153 return ret; 2154 } 2155 2156 void kvmppc_hint_smt_possible(Error **errp) 2157 { 2158 int i; 2159 GString *g; 2160 char *s; 2161 2162 assert(kvm_enabled()); 2163 if (cap_ppc_smt_possible) { 2164 g = g_string_new("Available VSMT modes:"); 2165 for (i = 63; i >= 0; i--) { 2166 if ((1UL << i) & cap_ppc_smt_possible) { 2167 g_string_append_printf(g, " %lu", (1UL << i)); 2168 } 2169 } 2170 s = g_string_free(g, false); 2171 error_append_hint(errp, "%s.\n", s); 2172 g_free(s); 2173 } else { 2174 error_append_hint(errp, 2175 "This KVM seems to be too old to support VSMT.\n"); 2176 } 2177 } 2178 2179 2180 #ifdef TARGET_PPC64 2181 off_t kvmppc_alloc_rma(void **rma) 2182 { 2183 off_t size; 2184 int fd; 2185 struct kvm_allocate_rma ret; 2186 2187 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2188 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2189 * not necessary on this hardware 2190 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2191 * 2192 * FIXME: We should allow the user to force contiguous RMA 2193 * allocation in the cap_ppc_rma==1 case. 2194 */ 2195 if (cap_ppc_rma < 2) { 2196 return 0; 2197 } 2198 2199 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2200 if (fd < 0) { 2201 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2202 strerror(errno)); 2203 return -1; 2204 } 2205 2206 size = MIN(ret.rma_size, 256ul << 20); 2207 2208 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2209 if (*rma == MAP_FAILED) { 2210 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2211 return -1; 2212 }; 2213 2214 return size; 2215 } 2216 2217 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2218 { 2219 struct kvm_ppc_smmu_info info; 2220 long rampagesize, best_page_shift; 2221 int i; 2222 2223 if (cap_ppc_rma >= 2) { 2224 return current_size; 2225 } 2226 2227 /* Find the largest hardware supported page size that's less than 2228 * or equal to the (logical) backing page size of guest RAM */ 2229 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2230 rampagesize = qemu_getrampagesize(); 2231 best_page_shift = 0; 2232 2233 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2234 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2235 2236 if (!sps->page_shift) { 2237 continue; 2238 } 2239 2240 if ((sps->page_shift > best_page_shift) 2241 && ((1UL << sps->page_shift) <= rampagesize)) { 2242 best_page_shift = sps->page_shift; 2243 } 2244 } 2245 2246 return MIN(current_size, 2247 1ULL << (best_page_shift + hash_shift - 7)); 2248 } 2249 #endif 2250 2251 bool kvmppc_spapr_use_multitce(void) 2252 { 2253 return cap_spapr_multitce; 2254 } 2255 2256 int kvmppc_spapr_enable_inkernel_multitce(void) 2257 { 2258 int ret; 2259 2260 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2261 H_PUT_TCE_INDIRECT, 1); 2262 if (!ret) { 2263 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2264 H_STUFF_TCE, 1); 2265 } 2266 2267 return ret; 2268 } 2269 2270 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2271 uint64_t bus_offset, uint32_t nb_table, 2272 int *pfd, bool need_vfio) 2273 { 2274 long len; 2275 int fd; 2276 void *table; 2277 2278 /* Must set fd to -1 so we don't try to munmap when called for 2279 * destroying the table, which the upper layers -will- do 2280 */ 2281 *pfd = -1; 2282 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2283 return NULL; 2284 } 2285 2286 if (cap_spapr_tce_64) { 2287 struct kvm_create_spapr_tce_64 args = { 2288 .liobn = liobn, 2289 .page_shift = page_shift, 2290 .offset = bus_offset >> page_shift, 2291 .size = nb_table, 2292 .flags = 0 2293 }; 2294 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2295 if (fd < 0) { 2296 fprintf(stderr, 2297 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2298 liobn); 2299 return NULL; 2300 } 2301 } else if (cap_spapr_tce) { 2302 uint64_t window_size = (uint64_t) nb_table << page_shift; 2303 struct kvm_create_spapr_tce args = { 2304 .liobn = liobn, 2305 .window_size = window_size, 2306 }; 2307 if ((window_size != args.window_size) || bus_offset) { 2308 return NULL; 2309 } 2310 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2311 if (fd < 0) { 2312 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2313 liobn); 2314 return NULL; 2315 } 2316 } else { 2317 return NULL; 2318 } 2319 2320 len = nb_table * sizeof(uint64_t); 2321 /* FIXME: round this up to page size */ 2322 2323 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2324 if (table == MAP_FAILED) { 2325 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2326 liobn); 2327 close(fd); 2328 return NULL; 2329 } 2330 2331 *pfd = fd; 2332 return table; 2333 } 2334 2335 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2336 { 2337 long len; 2338 2339 if (fd < 0) { 2340 return -1; 2341 } 2342 2343 len = nb_table * sizeof(uint64_t); 2344 if ((munmap(table, len) < 0) || 2345 (close(fd) < 0)) { 2346 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2347 strerror(errno)); 2348 /* Leak the table */ 2349 } 2350 2351 return 0; 2352 } 2353 2354 int kvmppc_reset_htab(int shift_hint) 2355 { 2356 uint32_t shift = shift_hint; 2357 2358 if (!kvm_enabled()) { 2359 /* Full emulation, tell caller to allocate htab itself */ 2360 return 0; 2361 } 2362 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2363 int ret; 2364 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2365 if (ret == -ENOTTY) { 2366 /* At least some versions of PR KVM advertise the 2367 * capability, but don't implement the ioctl(). Oops. 2368 * Return 0 so that we allocate the htab in qemu, as is 2369 * correct for PR. */ 2370 return 0; 2371 } else if (ret < 0) { 2372 return ret; 2373 } 2374 return shift; 2375 } 2376 2377 /* We have a kernel that predates the htab reset calls. For PR 2378 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2379 * this era, it has allocated a 16MB fixed size hash table already. */ 2380 if (kvmppc_is_pr(kvm_state)) { 2381 /* PR - tell caller to allocate htab */ 2382 return 0; 2383 } else { 2384 /* HV - assume 16MB kernel allocated htab */ 2385 return 24; 2386 } 2387 } 2388 2389 static inline uint32_t mfpvr(void) 2390 { 2391 uint32_t pvr; 2392 2393 asm ("mfpvr %0" 2394 : "=r"(pvr)); 2395 return pvr; 2396 } 2397 2398 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2399 { 2400 if (on) { 2401 *word |= flags; 2402 } else { 2403 *word &= ~flags; 2404 } 2405 } 2406 2407 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2408 { 2409 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2410 uint32_t vmx = kvmppc_get_vmx(); 2411 uint32_t dfp = kvmppc_get_dfp(); 2412 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2413 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2414 2415 /* Now fix up the class with information we can query from the host */ 2416 pcc->pvr = mfpvr(); 2417 2418 if (vmx != -1) { 2419 /* Only override when we know what the host supports */ 2420 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2421 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2422 } 2423 if (dfp != -1) { 2424 /* Only override when we know what the host supports */ 2425 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2426 } 2427 2428 if (dcache_size != -1) { 2429 pcc->l1_dcache_size = dcache_size; 2430 } 2431 2432 if (icache_size != -1) { 2433 pcc->l1_icache_size = icache_size; 2434 } 2435 2436 #if defined(TARGET_PPC64) 2437 pcc->radix_page_info = kvm_get_radix_page_info(); 2438 2439 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2440 /* 2441 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2442 * compliant. More importantly, advertising ISA 3.00 2443 * architected mode may prevent guests from activating 2444 * necessary DD1 workarounds. 2445 */ 2446 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2447 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2448 } 2449 #endif /* defined(TARGET_PPC64) */ 2450 } 2451 2452 bool kvmppc_has_cap_epr(void) 2453 { 2454 return cap_epr; 2455 } 2456 2457 bool kvmppc_has_cap_fixup_hcalls(void) 2458 { 2459 return cap_fixup_hcalls; 2460 } 2461 2462 bool kvmppc_has_cap_htm(void) 2463 { 2464 return cap_htm; 2465 } 2466 2467 bool kvmppc_has_cap_mmu_radix(void) 2468 { 2469 return cap_mmu_radix; 2470 } 2471 2472 bool kvmppc_has_cap_mmu_hash_v3(void) 2473 { 2474 return cap_mmu_hash_v3; 2475 } 2476 2477 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2478 { 2479 uint32_t host_pvr = mfpvr(); 2480 PowerPCCPUClass *pvr_pcc; 2481 2482 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2483 if (pvr_pcc == NULL) { 2484 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2485 } 2486 2487 return pvr_pcc; 2488 } 2489 2490 static int kvm_ppc_register_host_cpu_type(void) 2491 { 2492 TypeInfo type_info = { 2493 .name = TYPE_HOST_POWERPC_CPU, 2494 .class_init = kvmppc_host_cpu_class_init, 2495 }; 2496 PowerPCCPUClass *pvr_pcc; 2497 ObjectClass *oc; 2498 DeviceClass *dc; 2499 int i; 2500 2501 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2502 if (pvr_pcc == NULL) { 2503 return -1; 2504 } 2505 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2506 type_register(&type_info); 2507 2508 oc = object_class_by_name(type_info.name); 2509 g_assert(oc); 2510 2511 #if defined(TARGET_PPC64) 2512 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2513 type_info.parent = TYPE_SPAPR_CPU_CORE, 2514 type_info.instance_size = sizeof(sPAPRCPUCore); 2515 type_info.instance_init = NULL; 2516 type_info.class_init = spapr_cpu_core_class_init; 2517 type_info.class_data = (void *) "host"; 2518 type_register(&type_info); 2519 g_free((void *)type_info.name); 2520 #endif 2521 2522 /* 2523 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2524 * we want "POWER8" to be a "family" alias that points to the current 2525 * host CPU type, too) 2526 */ 2527 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2528 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2529 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2530 char *suffix; 2531 2532 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2533 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2534 if (suffix) { 2535 *suffix = 0; 2536 } 2537 break; 2538 } 2539 } 2540 2541 return 0; 2542 } 2543 2544 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2545 { 2546 struct kvm_rtas_token_args args = { 2547 .token = token, 2548 }; 2549 2550 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2551 return -ENOENT; 2552 } 2553 2554 strncpy(args.name, function, sizeof(args.name)); 2555 2556 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2557 } 2558 2559 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2560 { 2561 struct kvm_get_htab_fd s = { 2562 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2563 .start_index = index, 2564 }; 2565 int ret; 2566 2567 if (!cap_htab_fd) { 2568 error_setg(errp, "KVM version doesn't support %s the HPT", 2569 write ? "writing" : "reading"); 2570 return -ENOTSUP; 2571 } 2572 2573 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2574 if (ret < 0) { 2575 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2576 write ? "writing" : "reading", write ? "to" : "from", 2577 strerror(errno)); 2578 return -errno; 2579 } 2580 2581 return ret; 2582 } 2583 2584 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2585 { 2586 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2587 uint8_t buf[bufsize]; 2588 ssize_t rc; 2589 2590 do { 2591 rc = read(fd, buf, bufsize); 2592 if (rc < 0) { 2593 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2594 strerror(errno)); 2595 return rc; 2596 } else if (rc) { 2597 uint8_t *buffer = buf; 2598 ssize_t n = rc; 2599 while (n) { 2600 struct kvm_get_htab_header *head = 2601 (struct kvm_get_htab_header *) buffer; 2602 size_t chunksize = sizeof(*head) + 2603 HASH_PTE_SIZE_64 * head->n_valid; 2604 2605 qemu_put_be32(f, head->index); 2606 qemu_put_be16(f, head->n_valid); 2607 qemu_put_be16(f, head->n_invalid); 2608 qemu_put_buffer(f, (void *)(head + 1), 2609 HASH_PTE_SIZE_64 * head->n_valid); 2610 2611 buffer += chunksize; 2612 n -= chunksize; 2613 } 2614 } 2615 } while ((rc != 0) 2616 && ((max_ns < 0) 2617 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2618 2619 return (rc == 0) ? 1 : 0; 2620 } 2621 2622 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2623 uint16_t n_valid, uint16_t n_invalid) 2624 { 2625 struct kvm_get_htab_header *buf; 2626 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2627 ssize_t rc; 2628 2629 buf = alloca(chunksize); 2630 buf->index = index; 2631 buf->n_valid = n_valid; 2632 buf->n_invalid = n_invalid; 2633 2634 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2635 2636 rc = write(fd, buf, chunksize); 2637 if (rc < 0) { 2638 fprintf(stderr, "Error writing KVM hash table: %s\n", 2639 strerror(errno)); 2640 return rc; 2641 } 2642 if (rc != chunksize) { 2643 /* We should never get a short write on a single chunk */ 2644 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2645 return -1; 2646 } 2647 return 0; 2648 } 2649 2650 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2651 { 2652 return true; 2653 } 2654 2655 void kvm_arch_init_irq_routing(KVMState *s) 2656 { 2657 } 2658 2659 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2660 { 2661 int fd, rc; 2662 int i; 2663 2664 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2665 2666 i = 0; 2667 while (i < n) { 2668 struct kvm_get_htab_header *hdr; 2669 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2670 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2671 2672 rc = read(fd, buf, sizeof(buf)); 2673 if (rc < 0) { 2674 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2675 } 2676 2677 hdr = (struct kvm_get_htab_header *)buf; 2678 while ((i < n) && ((char *)hdr < (buf + rc))) { 2679 int invalid = hdr->n_invalid; 2680 2681 if (hdr->index != (ptex + i)) { 2682 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2683 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2684 } 2685 2686 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2687 i += hdr->n_valid; 2688 2689 if ((n - i) < invalid) { 2690 invalid = n - i; 2691 } 2692 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2693 i += hdr->n_invalid; 2694 2695 hdr = (struct kvm_get_htab_header *) 2696 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2697 } 2698 } 2699 2700 close(fd); 2701 } 2702 2703 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2704 { 2705 int fd, rc; 2706 struct { 2707 struct kvm_get_htab_header hdr; 2708 uint64_t pte0; 2709 uint64_t pte1; 2710 } buf; 2711 2712 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2713 2714 buf.hdr.n_valid = 1; 2715 buf.hdr.n_invalid = 0; 2716 buf.hdr.index = ptex; 2717 buf.pte0 = cpu_to_be64(pte0); 2718 buf.pte1 = cpu_to_be64(pte1); 2719 2720 rc = write(fd, &buf, sizeof(buf)); 2721 if (rc != sizeof(buf)) { 2722 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2723 } 2724 close(fd); 2725 } 2726 2727 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2728 uint64_t address, uint32_t data, PCIDevice *dev) 2729 { 2730 return 0; 2731 } 2732 2733 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2734 int vector, PCIDevice *dev) 2735 { 2736 return 0; 2737 } 2738 2739 int kvm_arch_release_virq_post(int virq) 2740 { 2741 return 0; 2742 } 2743 2744 int kvm_arch_msi_data_to_gsi(uint32_t data) 2745 { 2746 return data & 0xffff; 2747 } 2748 2749 int kvmppc_enable_hwrng(void) 2750 { 2751 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2752 return -1; 2753 } 2754 2755 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2756 } 2757 2758 void kvmppc_check_papr_resize_hpt(Error **errp) 2759 { 2760 if (!kvm_enabled()) { 2761 return; /* No KVM, we're good */ 2762 } 2763 2764 if (cap_resize_hpt) { 2765 return; /* Kernel has explicit support, we're good */ 2766 } 2767 2768 /* Otherwise fallback on looking for PR KVM */ 2769 if (kvmppc_is_pr(kvm_state)) { 2770 return; 2771 } 2772 2773 error_setg(errp, 2774 "Hash page table resizing not available with this KVM version"); 2775 } 2776 2777 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2778 { 2779 CPUState *cs = CPU(cpu); 2780 struct kvm_ppc_resize_hpt rhpt = { 2781 .flags = flags, 2782 .shift = shift, 2783 }; 2784 2785 if (!cap_resize_hpt) { 2786 return -ENOSYS; 2787 } 2788 2789 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2790 } 2791 2792 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2793 { 2794 CPUState *cs = CPU(cpu); 2795 struct kvm_ppc_resize_hpt rhpt = { 2796 .flags = flags, 2797 .shift = shift, 2798 }; 2799 2800 if (!cap_resize_hpt) { 2801 return -ENOSYS; 2802 } 2803 2804 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2805 } 2806 2807 /* 2808 * This is a helper function to detect a post migration scenario 2809 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2810 * the guest kernel can't handle a PVR value other than the actual host 2811 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2812 * 2813 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2814 * (so, we're HV), return true. The workaround itself is done in 2815 * cpu_post_load. 2816 * 2817 * The order here is important: we'll only check for KVM PR as a 2818 * fallback if the guest kernel can't handle the situation itself. 2819 * We need to avoid as much as possible querying the running KVM type 2820 * in QEMU level. 2821 */ 2822 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2823 { 2824 CPUState *cs = CPU(cpu); 2825 2826 if (!kvm_enabled()) { 2827 return false; 2828 } 2829 2830 if (cap_ppc_pvr_compat) { 2831 return false; 2832 } 2833 2834 return !kvmppc_is_pr(cs->kvm_state); 2835 } 2836