1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 static int cap_ppc_safe_cache; 93 static int cap_ppc_safe_bounds_check; 94 static int cap_ppc_safe_indirect_branch; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 127 static void kvmppc_get_cpu_characteristics(KVMState *s); 128 129 int kvm_arch_init(MachineState *ms, KVMState *s) 130 { 131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 136 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 137 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 138 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 139 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 140 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 141 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 142 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 143 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 144 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 145 /* Note: we don't set cap_papr here, because this capability is 146 * only activated after this by kvmppc_set_papr() */ 147 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 148 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 149 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 150 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 151 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 152 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 153 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 154 kvmppc_get_cpu_characteristics(s); 155 /* 156 * Note: setting it to false because there is not such capability 157 * in KVM at this moment. 158 * 159 * TODO: call kvm_vm_check_extension() with the right capability 160 * after the kernel starts implementing it.*/ 161 cap_ppc_pvr_compat = false; 162 163 if (!cap_interrupt_level) { 164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 165 "VM to stall at times!\n"); 166 } 167 168 kvm_ppc_register_host_cpu_type(ms); 169 170 return 0; 171 } 172 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 174 { 175 return 0; 176 } 177 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 179 { 180 CPUPPCState *cenv = &cpu->env; 181 CPUState *cs = CPU(cpu); 182 struct kvm_sregs sregs; 183 int ret; 184 185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 186 /* What we're really trying to say is "if we're on BookE, we use 187 the native PVR for now". This is the only sane way to check 188 it though, so we potentially confuse users that they can run 189 BookE guests on BookS. Let's hope nobody dares enough :) */ 190 return 0; 191 } else { 192 if (!cap_segstate) { 193 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 194 return -ENOSYS; 195 } 196 } 197 198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 199 if (ret) { 200 return ret; 201 } 202 203 sregs.pvr = cenv->spr[SPR_PVR]; 204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 205 } 206 207 /* Set up a shared TLB array with KVM */ 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 209 { 210 CPUPPCState *env = &cpu->env; 211 CPUState *cs = CPU(cpu); 212 struct kvm_book3e_206_tlb_params params = {}; 213 struct kvm_config_tlb cfg = {}; 214 unsigned int entries = 0; 215 int ret, i; 216 217 if (!kvm_enabled() || 218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 219 return 0; 220 } 221 222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 223 224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 225 params.tlb_sizes[i] = booke206_tlb_size(env, i); 226 params.tlb_ways[i] = booke206_tlb_ways(env, i); 227 entries += params.tlb_sizes[i]; 228 } 229 230 assert(entries == env->nb_tlb); 231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 232 233 env->tlb_dirty = true; 234 235 cfg.array = (uintptr_t)env->tlb.tlbm; 236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 237 cfg.params = (uintptr_t)¶ms; 238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 239 240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 241 if (ret < 0) { 242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 243 __func__, strerror(-ret)); 244 return ret; 245 } 246 247 env->kvm_sw_tlb = true; 248 return 0; 249 } 250 251 252 #if defined(TARGET_PPC64) 253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 254 struct kvm_ppc_smmu_info *info) 255 { 256 CPUPPCState *env = &cpu->env; 257 CPUState *cs = CPU(cpu); 258 259 memset(info, 0, sizeof(*info)); 260 261 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 262 * need to "guess" what the supported page sizes are. 263 * 264 * For that to work we make a few assumptions: 265 * 266 * - Check whether we are running "PR" KVM which only supports 4K 267 * and 16M pages, but supports them regardless of the backing 268 * store characteritics. We also don't support 1T segments. 269 * 270 * This is safe as if HV KVM ever supports that capability or PR 271 * KVM grows supports for more page/segment sizes, those versions 272 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 273 * will not hit this fallback 274 * 275 * - Else we are running HV KVM. This means we only support page 276 * sizes that fit in the backing store. Additionally we only 277 * advertize 64K pages if the processor is ARCH 2.06 and we assume 278 * P7 encodings for the SLB and hash table. Here too, we assume 279 * support for any newer processor will mean a kernel that 280 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 281 * this fallback. 282 */ 283 if (kvmppc_is_pr(cs->kvm_state)) { 284 /* No flags */ 285 info->flags = 0; 286 info->slb_size = 64; 287 288 /* Standard 4k base page size segment */ 289 info->sps[0].page_shift = 12; 290 info->sps[0].slb_enc = 0; 291 info->sps[0].enc[0].page_shift = 12; 292 info->sps[0].enc[0].pte_enc = 0; 293 294 /* Standard 16M large page size segment */ 295 info->sps[1].page_shift = 24; 296 info->sps[1].slb_enc = SLB_VSID_L; 297 info->sps[1].enc[0].page_shift = 24; 298 info->sps[1].enc[0].pte_enc = 0; 299 } else { 300 int i = 0; 301 302 /* HV KVM has backing store size restrictions */ 303 info->flags = KVM_PPC_PAGE_SIZES_REAL; 304 305 if (env->mmu_model & POWERPC_MMU_1TSEG) { 306 info->flags |= KVM_PPC_1T_SEGMENTS; 307 } 308 309 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 310 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 311 info->slb_size = 32; 312 } else { 313 info->slb_size = 64; 314 } 315 316 /* Standard 4k base page size segment */ 317 info->sps[i].page_shift = 12; 318 info->sps[i].slb_enc = 0; 319 info->sps[i].enc[0].page_shift = 12; 320 info->sps[i].enc[0].pte_enc = 0; 321 i++; 322 323 /* 64K on MMU 2.06 and later */ 324 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 325 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 326 info->sps[i].page_shift = 16; 327 info->sps[i].slb_enc = 0x110; 328 info->sps[i].enc[0].page_shift = 16; 329 info->sps[i].enc[0].pte_enc = 1; 330 i++; 331 } 332 333 /* Standard 16M large page size segment */ 334 info->sps[i].page_shift = 24; 335 info->sps[i].slb_enc = SLB_VSID_L; 336 info->sps[i].enc[0].page_shift = 24; 337 info->sps[i].enc[0].pte_enc = 0; 338 } 339 } 340 341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 342 { 343 CPUState *cs = CPU(cpu); 344 int ret; 345 346 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 347 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 348 if (ret == 0) { 349 return; 350 } 351 } 352 353 kvm_get_fallback_smmu_info(cpu, info); 354 } 355 356 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 357 { 358 KVMState *s = KVM_STATE(current_machine->accelerator); 359 struct ppc_radix_page_info *radix_page_info; 360 struct kvm_ppc_rmmu_info rmmu_info; 361 int i; 362 363 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 364 return NULL; 365 } 366 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 367 return NULL; 368 } 369 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 370 radix_page_info->count = 0; 371 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 372 if (rmmu_info.ap_encodings[i]) { 373 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 374 radix_page_info->count++; 375 } 376 } 377 return radix_page_info; 378 } 379 380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 381 bool radix, bool gtse, 382 uint64_t proc_tbl) 383 { 384 CPUState *cs = CPU(cpu); 385 int ret; 386 uint64_t flags = 0; 387 struct kvm_ppc_mmuv3_cfg cfg = { 388 .process_table = proc_tbl, 389 }; 390 391 if (radix) { 392 flags |= KVM_PPC_MMUV3_RADIX; 393 } 394 if (gtse) { 395 flags |= KVM_PPC_MMUV3_GTSE; 396 } 397 cfg.flags = flags; 398 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 399 switch (ret) { 400 case 0: 401 return H_SUCCESS; 402 case -EINVAL: 403 return H_PARAMETER; 404 case -ENODEV: 405 return H_NOT_AVAILABLE; 406 default: 407 return H_HARDWARE; 408 } 409 } 410 411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 412 { 413 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 414 return true; 415 } 416 417 return (1ul << shift) <= rampgsize; 418 } 419 420 static long max_cpu_page_size; 421 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 423 { 424 static struct kvm_ppc_smmu_info smmu_info; 425 static bool has_smmu_info; 426 CPUPPCState *env = &cpu->env; 427 int iq, ik, jq, jk; 428 bool has_64k_pages = false; 429 430 /* We only handle page sizes for 64-bit server guests for now */ 431 if (!(env->mmu_model & POWERPC_MMU_64)) { 432 return; 433 } 434 435 /* Collect MMU info from kernel if not already */ 436 if (!has_smmu_info) { 437 kvm_get_smmu_info(cpu, &smmu_info); 438 has_smmu_info = true; 439 } 440 441 if (!max_cpu_page_size) { 442 max_cpu_page_size = qemu_getrampagesize(); 443 } 444 445 /* Convert to QEMU form */ 446 memset(&env->sps, 0, sizeof(env->sps)); 447 448 /* If we have HV KVM, we need to forbid CI large pages if our 449 * host page size is smaller than 64K. 450 */ 451 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 452 env->ci_large_pages = getpagesize() >= 0x10000; 453 } 454 455 /* 456 * XXX This loop should be an entry wide AND of the capabilities that 457 * the selected CPU has with the capabilities that KVM supports. 458 */ 459 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 460 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 461 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 462 463 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 464 ksps->page_shift)) { 465 continue; 466 } 467 qsps->page_shift = ksps->page_shift; 468 qsps->slb_enc = ksps->slb_enc; 469 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 470 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 471 ksps->enc[jk].page_shift)) { 472 continue; 473 } 474 if (ksps->enc[jk].page_shift == 16) { 475 has_64k_pages = true; 476 } 477 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 478 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 479 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 480 break; 481 } 482 } 483 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 484 break; 485 } 486 } 487 env->slb_nr = smmu_info.slb_size; 488 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 489 env->mmu_model &= ~POWERPC_MMU_1TSEG; 490 } 491 if (!has_64k_pages) { 492 env->mmu_model &= ~POWERPC_MMU_64K; 493 } 494 } 495 496 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 497 { 498 Object *mem_obj = object_resolve_path(obj_path, NULL); 499 long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj)); 500 501 return pagesize >= max_cpu_page_size; 502 } 503 504 #else /* defined (TARGET_PPC64) */ 505 506 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 507 { 508 } 509 510 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 511 { 512 return true; 513 } 514 515 #endif /* !defined (TARGET_PPC64) */ 516 517 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 518 { 519 return POWERPC_CPU(cpu)->vcpu_id; 520 } 521 522 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 523 * book3s supports only 1 watchpoint, so array size 524 * of 4 is sufficient for now. 525 */ 526 #define MAX_HW_BKPTS 4 527 528 static struct HWBreakpoint { 529 target_ulong addr; 530 int type; 531 } hw_debug_points[MAX_HW_BKPTS]; 532 533 static CPUWatchpoint hw_watchpoint; 534 535 /* Default there is no breakpoint and watchpoint supported */ 536 static int max_hw_breakpoint; 537 static int max_hw_watchpoint; 538 static int nb_hw_breakpoint; 539 static int nb_hw_watchpoint; 540 541 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 542 { 543 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 544 max_hw_breakpoint = 2; 545 max_hw_watchpoint = 2; 546 } 547 548 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 549 fprintf(stderr, "Error initializing h/w breakpoints\n"); 550 return; 551 } 552 } 553 554 int kvm_arch_init_vcpu(CPUState *cs) 555 { 556 PowerPCCPU *cpu = POWERPC_CPU(cs); 557 CPUPPCState *cenv = &cpu->env; 558 int ret; 559 560 /* Gather server mmu info from KVM and update the CPU state */ 561 kvm_fixup_page_sizes(cpu); 562 563 /* Synchronize sregs with kvm */ 564 ret = kvm_arch_sync_sregs(cpu); 565 if (ret) { 566 if (ret == -EINVAL) { 567 error_report("Register sync failed... If you're using kvm-hv.ko," 568 " only \"-cpu host\" is possible"); 569 } 570 return ret; 571 } 572 573 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 574 575 switch (cenv->mmu_model) { 576 case POWERPC_MMU_BOOKE206: 577 /* This target supports access to KVM's guest TLB */ 578 ret = kvm_booke206_tlb_init(cpu); 579 break; 580 case POWERPC_MMU_2_07: 581 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 582 /* KVM-HV has transactional memory on POWER8 also without the 583 * KVM_CAP_PPC_HTM extension, so enable it here instead as 584 * long as it's availble to userspace on the host. */ 585 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 586 cap_htm = true; 587 } 588 } 589 break; 590 default: 591 break; 592 } 593 594 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 595 kvmppc_hw_debug_points_init(cenv); 596 597 return ret; 598 } 599 600 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 601 { 602 CPUPPCState *env = &cpu->env; 603 CPUState *cs = CPU(cpu); 604 struct kvm_dirty_tlb dirty_tlb; 605 unsigned char *bitmap; 606 int ret; 607 608 if (!env->kvm_sw_tlb) { 609 return; 610 } 611 612 bitmap = g_malloc((env->nb_tlb + 7) / 8); 613 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 614 615 dirty_tlb.bitmap = (uintptr_t)bitmap; 616 dirty_tlb.num_dirty = env->nb_tlb; 617 618 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 619 if (ret) { 620 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 621 __func__, strerror(-ret)); 622 } 623 624 g_free(bitmap); 625 } 626 627 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 628 { 629 PowerPCCPU *cpu = POWERPC_CPU(cs); 630 CPUPPCState *env = &cpu->env; 631 union { 632 uint32_t u32; 633 uint64_t u64; 634 } val; 635 struct kvm_one_reg reg = { 636 .id = id, 637 .addr = (uintptr_t) &val, 638 }; 639 int ret; 640 641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 642 if (ret != 0) { 643 trace_kvm_failed_spr_get(spr, strerror(errno)); 644 } else { 645 switch (id & KVM_REG_SIZE_MASK) { 646 case KVM_REG_SIZE_U32: 647 env->spr[spr] = val.u32; 648 break; 649 650 case KVM_REG_SIZE_U64: 651 env->spr[spr] = val.u64; 652 break; 653 654 default: 655 /* Don't handle this size yet */ 656 abort(); 657 } 658 } 659 } 660 661 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 662 { 663 PowerPCCPU *cpu = POWERPC_CPU(cs); 664 CPUPPCState *env = &cpu->env; 665 union { 666 uint32_t u32; 667 uint64_t u64; 668 } val; 669 struct kvm_one_reg reg = { 670 .id = id, 671 .addr = (uintptr_t) &val, 672 }; 673 int ret; 674 675 switch (id & KVM_REG_SIZE_MASK) { 676 case KVM_REG_SIZE_U32: 677 val.u32 = env->spr[spr]; 678 break; 679 680 case KVM_REG_SIZE_U64: 681 val.u64 = env->spr[spr]; 682 break; 683 684 default: 685 /* Don't handle this size yet */ 686 abort(); 687 } 688 689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 690 if (ret != 0) { 691 trace_kvm_failed_spr_set(spr, strerror(errno)); 692 } 693 } 694 695 static int kvm_put_fp(CPUState *cs) 696 { 697 PowerPCCPU *cpu = POWERPC_CPU(cs); 698 CPUPPCState *env = &cpu->env; 699 struct kvm_one_reg reg; 700 int i; 701 int ret; 702 703 if (env->insns_flags & PPC_FLOAT) { 704 uint64_t fpscr = env->fpscr; 705 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 706 707 reg.id = KVM_REG_PPC_FPSCR; 708 reg.addr = (uintptr_t)&fpscr; 709 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 710 if (ret < 0) { 711 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 712 return ret; 713 } 714 715 for (i = 0; i < 32; i++) { 716 uint64_t vsr[2]; 717 718 #ifdef HOST_WORDS_BIGENDIAN 719 vsr[0] = float64_val(env->fpr[i]); 720 vsr[1] = env->vsr[i]; 721 #else 722 vsr[0] = env->vsr[i]; 723 vsr[1] = float64_val(env->fpr[i]); 724 #endif 725 reg.addr = (uintptr_t) &vsr; 726 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 727 728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 729 if (ret < 0) { 730 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 731 i, strerror(errno)); 732 return ret; 733 } 734 } 735 } 736 737 if (env->insns_flags & PPC_ALTIVEC) { 738 reg.id = KVM_REG_PPC_VSCR; 739 reg.addr = (uintptr_t)&env->vscr; 740 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 741 if (ret < 0) { 742 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 743 return ret; 744 } 745 746 for (i = 0; i < 32; i++) { 747 reg.id = KVM_REG_PPC_VR(i); 748 reg.addr = (uintptr_t)&env->avr[i]; 749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 750 if (ret < 0) { 751 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 752 return ret; 753 } 754 } 755 } 756 757 return 0; 758 } 759 760 static int kvm_get_fp(CPUState *cs) 761 { 762 PowerPCCPU *cpu = POWERPC_CPU(cs); 763 CPUPPCState *env = &cpu->env; 764 struct kvm_one_reg reg; 765 int i; 766 int ret; 767 768 if (env->insns_flags & PPC_FLOAT) { 769 uint64_t fpscr; 770 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 771 772 reg.id = KVM_REG_PPC_FPSCR; 773 reg.addr = (uintptr_t)&fpscr; 774 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 775 if (ret < 0) { 776 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 777 return ret; 778 } else { 779 env->fpscr = fpscr; 780 } 781 782 for (i = 0; i < 32; i++) { 783 uint64_t vsr[2]; 784 785 reg.addr = (uintptr_t) &vsr; 786 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 787 788 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 789 if (ret < 0) { 790 DPRINTF("Unable to get %s%d from KVM: %s\n", 791 vsx ? "VSR" : "FPR", i, strerror(errno)); 792 return ret; 793 } else { 794 #ifdef HOST_WORDS_BIGENDIAN 795 env->fpr[i] = vsr[0]; 796 if (vsx) { 797 env->vsr[i] = vsr[1]; 798 } 799 #else 800 env->fpr[i] = vsr[1]; 801 if (vsx) { 802 env->vsr[i] = vsr[0]; 803 } 804 #endif 805 } 806 } 807 } 808 809 if (env->insns_flags & PPC_ALTIVEC) { 810 reg.id = KVM_REG_PPC_VSCR; 811 reg.addr = (uintptr_t)&env->vscr; 812 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 813 if (ret < 0) { 814 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 815 return ret; 816 } 817 818 for (i = 0; i < 32; i++) { 819 reg.id = KVM_REG_PPC_VR(i); 820 reg.addr = (uintptr_t)&env->avr[i]; 821 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 822 if (ret < 0) { 823 DPRINTF("Unable to get VR%d from KVM: %s\n", 824 i, strerror(errno)); 825 return ret; 826 } 827 } 828 } 829 830 return 0; 831 } 832 833 #if defined(TARGET_PPC64) 834 static int kvm_get_vpa(CPUState *cs) 835 { 836 PowerPCCPU *cpu = POWERPC_CPU(cs); 837 CPUPPCState *env = &cpu->env; 838 struct kvm_one_reg reg; 839 int ret; 840 841 reg.id = KVM_REG_PPC_VPA_ADDR; 842 reg.addr = (uintptr_t)&env->vpa_addr; 843 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 844 if (ret < 0) { 845 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 846 return ret; 847 } 848 849 assert((uintptr_t)&env->slb_shadow_size 850 == ((uintptr_t)&env->slb_shadow_addr + 8)); 851 reg.id = KVM_REG_PPC_VPA_SLB; 852 reg.addr = (uintptr_t)&env->slb_shadow_addr; 853 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 854 if (ret < 0) { 855 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 856 strerror(errno)); 857 return ret; 858 } 859 860 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 861 reg.id = KVM_REG_PPC_VPA_DTL; 862 reg.addr = (uintptr_t)&env->dtl_addr; 863 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 864 if (ret < 0) { 865 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 866 strerror(errno)); 867 return ret; 868 } 869 870 return 0; 871 } 872 873 static int kvm_put_vpa(CPUState *cs) 874 { 875 PowerPCCPU *cpu = POWERPC_CPU(cs); 876 CPUPPCState *env = &cpu->env; 877 struct kvm_one_reg reg; 878 int ret; 879 880 /* SLB shadow or DTL can't be registered unless a master VPA is 881 * registered. That means when restoring state, if a VPA *is* 882 * registered, we need to set that up first. If not, we need to 883 * deregister the others before deregistering the master VPA */ 884 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 885 886 if (env->vpa_addr) { 887 reg.id = KVM_REG_PPC_VPA_ADDR; 888 reg.addr = (uintptr_t)&env->vpa_addr; 889 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 890 if (ret < 0) { 891 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 892 return ret; 893 } 894 } 895 896 assert((uintptr_t)&env->slb_shadow_size 897 == ((uintptr_t)&env->slb_shadow_addr + 8)); 898 reg.id = KVM_REG_PPC_VPA_SLB; 899 reg.addr = (uintptr_t)&env->slb_shadow_addr; 900 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 901 if (ret < 0) { 902 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 903 return ret; 904 } 905 906 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 907 reg.id = KVM_REG_PPC_VPA_DTL; 908 reg.addr = (uintptr_t)&env->dtl_addr; 909 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 910 if (ret < 0) { 911 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 912 strerror(errno)); 913 return ret; 914 } 915 916 if (!env->vpa_addr) { 917 reg.id = KVM_REG_PPC_VPA_ADDR; 918 reg.addr = (uintptr_t)&env->vpa_addr; 919 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 920 if (ret < 0) { 921 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 922 return ret; 923 } 924 } 925 926 return 0; 927 } 928 #endif /* TARGET_PPC64 */ 929 930 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 931 { 932 CPUPPCState *env = &cpu->env; 933 struct kvm_sregs sregs; 934 int i; 935 936 sregs.pvr = env->spr[SPR_PVR]; 937 938 if (cpu->vhyp) { 939 PPCVirtualHypervisorClass *vhc = 940 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 941 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 942 } else { 943 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 944 } 945 946 /* Sync SLB */ 947 #ifdef TARGET_PPC64 948 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 949 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 950 if (env->slb[i].esid & SLB_ESID_V) { 951 sregs.u.s.ppc64.slb[i].slbe |= i; 952 } 953 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 954 } 955 #endif 956 957 /* Sync SRs */ 958 for (i = 0; i < 16; i++) { 959 sregs.u.s.ppc32.sr[i] = env->sr[i]; 960 } 961 962 /* Sync BATs */ 963 for (i = 0; i < 8; i++) { 964 /* Beware. We have to swap upper and lower bits here */ 965 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 966 | env->DBAT[1][i]; 967 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 968 | env->IBAT[1][i]; 969 } 970 971 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 972 } 973 974 int kvm_arch_put_registers(CPUState *cs, int level) 975 { 976 PowerPCCPU *cpu = POWERPC_CPU(cs); 977 CPUPPCState *env = &cpu->env; 978 struct kvm_regs regs; 979 int ret; 980 int i; 981 982 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 983 if (ret < 0) { 984 return ret; 985 } 986 987 regs.ctr = env->ctr; 988 regs.lr = env->lr; 989 regs.xer = cpu_read_xer(env); 990 regs.msr = env->msr; 991 regs.pc = env->nip; 992 993 regs.srr0 = env->spr[SPR_SRR0]; 994 regs.srr1 = env->spr[SPR_SRR1]; 995 996 regs.sprg0 = env->spr[SPR_SPRG0]; 997 regs.sprg1 = env->spr[SPR_SPRG1]; 998 regs.sprg2 = env->spr[SPR_SPRG2]; 999 regs.sprg3 = env->spr[SPR_SPRG3]; 1000 regs.sprg4 = env->spr[SPR_SPRG4]; 1001 regs.sprg5 = env->spr[SPR_SPRG5]; 1002 regs.sprg6 = env->spr[SPR_SPRG6]; 1003 regs.sprg7 = env->spr[SPR_SPRG7]; 1004 1005 regs.pid = env->spr[SPR_BOOKE_PID]; 1006 1007 for (i = 0;i < 32; i++) 1008 regs.gpr[i] = env->gpr[i]; 1009 1010 regs.cr = 0; 1011 for (i = 0; i < 8; i++) { 1012 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1013 } 1014 1015 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1016 if (ret < 0) 1017 return ret; 1018 1019 kvm_put_fp(cs); 1020 1021 if (env->tlb_dirty) { 1022 kvm_sw_tlb_put(cpu); 1023 env->tlb_dirty = false; 1024 } 1025 1026 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1027 ret = kvmppc_put_books_sregs(cpu); 1028 if (ret < 0) { 1029 return ret; 1030 } 1031 } 1032 1033 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1034 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1035 } 1036 1037 if (cap_one_reg) { 1038 int i; 1039 1040 /* We deliberately ignore errors here, for kernels which have 1041 * the ONE_REG calls, but don't support the specific 1042 * registers, there's a reasonable chance things will still 1043 * work, at least until we try to migrate. */ 1044 for (i = 0; i < 1024; i++) { 1045 uint64_t id = env->spr_cb[i].one_reg_id; 1046 1047 if (id != 0) { 1048 kvm_put_one_spr(cs, id, i); 1049 } 1050 } 1051 1052 #ifdef TARGET_PPC64 1053 if (msr_ts) { 1054 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1056 } 1057 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1059 } 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1070 } 1071 1072 if (cap_papr) { 1073 if (kvm_put_vpa(cs) < 0) { 1074 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1075 } 1076 } 1077 1078 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1079 #endif /* TARGET_PPC64 */ 1080 } 1081 1082 return ret; 1083 } 1084 1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1086 { 1087 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1088 } 1089 1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1091 { 1092 CPUPPCState *env = &cpu->env; 1093 struct kvm_sregs sregs; 1094 int ret; 1095 1096 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1097 if (ret < 0) { 1098 return ret; 1099 } 1100 1101 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1102 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1103 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1104 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1105 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1106 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1107 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1108 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1109 env->spr[SPR_DECR] = sregs.u.e.dec; 1110 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1111 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1112 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1113 } 1114 1115 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1116 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1117 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1118 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1119 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1120 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1121 } 1122 1123 if (sregs.u.e.features & KVM_SREGS_E_64) { 1124 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1125 } 1126 1127 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1128 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1129 } 1130 1131 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1132 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1133 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1134 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1135 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1136 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1137 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1138 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1139 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1140 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1141 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1142 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1143 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1144 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1145 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1146 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1147 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1148 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1149 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1150 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1151 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1152 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1153 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1154 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1155 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1156 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1157 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1158 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1159 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1160 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1161 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1162 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1163 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1164 1165 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1166 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1167 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1168 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1169 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1170 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1171 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1172 } 1173 1174 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1175 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1176 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1177 } 1178 1179 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1180 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1181 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1182 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1183 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1184 } 1185 } 1186 1187 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1188 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1189 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1190 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1191 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1192 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1193 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1194 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1195 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1196 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1197 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1198 } 1199 1200 if (sregs.u.e.features & KVM_SREGS_EXP) { 1201 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1202 } 1203 1204 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1205 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1206 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1207 } 1208 1209 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1210 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1211 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1212 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1213 1214 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1215 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1216 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1217 } 1218 } 1219 1220 return 0; 1221 } 1222 1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1224 { 1225 CPUPPCState *env = &cpu->env; 1226 struct kvm_sregs sregs; 1227 int ret; 1228 int i; 1229 1230 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1231 if (ret < 0) { 1232 return ret; 1233 } 1234 1235 if (!cpu->vhyp) { 1236 ppc_store_sdr1(env, sregs.u.s.sdr1); 1237 } 1238 1239 /* Sync SLB */ 1240 #ifdef TARGET_PPC64 1241 /* 1242 * The packed SLB array we get from KVM_GET_SREGS only contains 1243 * information about valid entries. So we flush our internal copy 1244 * to get rid of stale ones, then put all valid SLB entries back 1245 * in. 1246 */ 1247 memset(env->slb, 0, sizeof(env->slb)); 1248 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1249 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1250 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1251 /* 1252 * Only restore valid entries 1253 */ 1254 if (rb & SLB_ESID_V) { 1255 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1256 } 1257 } 1258 #endif 1259 1260 /* Sync SRs */ 1261 for (i = 0; i < 16; i++) { 1262 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1263 } 1264 1265 /* Sync BATs */ 1266 for (i = 0; i < 8; i++) { 1267 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1268 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1269 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1270 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1271 } 1272 1273 return 0; 1274 } 1275 1276 int kvm_arch_get_registers(CPUState *cs) 1277 { 1278 PowerPCCPU *cpu = POWERPC_CPU(cs); 1279 CPUPPCState *env = &cpu->env; 1280 struct kvm_regs regs; 1281 uint32_t cr; 1282 int i, ret; 1283 1284 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1285 if (ret < 0) 1286 return ret; 1287 1288 cr = regs.cr; 1289 for (i = 7; i >= 0; i--) { 1290 env->crf[i] = cr & 15; 1291 cr >>= 4; 1292 } 1293 1294 env->ctr = regs.ctr; 1295 env->lr = regs.lr; 1296 cpu_write_xer(env, regs.xer); 1297 env->msr = regs.msr; 1298 env->nip = regs.pc; 1299 1300 env->spr[SPR_SRR0] = regs.srr0; 1301 env->spr[SPR_SRR1] = regs.srr1; 1302 1303 env->spr[SPR_SPRG0] = regs.sprg0; 1304 env->spr[SPR_SPRG1] = regs.sprg1; 1305 env->spr[SPR_SPRG2] = regs.sprg2; 1306 env->spr[SPR_SPRG3] = regs.sprg3; 1307 env->spr[SPR_SPRG4] = regs.sprg4; 1308 env->spr[SPR_SPRG5] = regs.sprg5; 1309 env->spr[SPR_SPRG6] = regs.sprg6; 1310 env->spr[SPR_SPRG7] = regs.sprg7; 1311 1312 env->spr[SPR_BOOKE_PID] = regs.pid; 1313 1314 for (i = 0;i < 32; i++) 1315 env->gpr[i] = regs.gpr[i]; 1316 1317 kvm_get_fp(cs); 1318 1319 if (cap_booke_sregs) { 1320 ret = kvmppc_get_booke_sregs(cpu); 1321 if (ret < 0) { 1322 return ret; 1323 } 1324 } 1325 1326 if (cap_segstate) { 1327 ret = kvmppc_get_books_sregs(cpu); 1328 if (ret < 0) { 1329 return ret; 1330 } 1331 } 1332 1333 if (cap_hior) { 1334 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1335 } 1336 1337 if (cap_one_reg) { 1338 int i; 1339 1340 /* We deliberately ignore errors here, for kernels which have 1341 * the ONE_REG calls, but don't support the specific 1342 * registers, there's a reasonable chance things will still 1343 * work, at least until we try to migrate. */ 1344 for (i = 0; i < 1024; i++) { 1345 uint64_t id = env->spr_cb[i].one_reg_id; 1346 1347 if (id != 0) { 1348 kvm_get_one_spr(cs, id, i); 1349 } 1350 } 1351 1352 #ifdef TARGET_PPC64 1353 if (msr_ts) { 1354 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1356 } 1357 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1359 } 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1370 } 1371 1372 if (cap_papr) { 1373 if (kvm_get_vpa(cs) < 0) { 1374 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1375 } 1376 } 1377 1378 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1379 #endif 1380 } 1381 1382 return 0; 1383 } 1384 1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1386 { 1387 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1388 1389 if (irq != PPC_INTERRUPT_EXT) { 1390 return 0; 1391 } 1392 1393 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1394 return 0; 1395 } 1396 1397 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1398 1399 return 0; 1400 } 1401 1402 #if defined(TARGET_PPCEMB) 1403 #define PPC_INPUT_INT PPC40x_INPUT_INT 1404 #elif defined(TARGET_PPC64) 1405 #define PPC_INPUT_INT PPC970_INPUT_INT 1406 #else 1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1408 #endif 1409 1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1411 { 1412 PowerPCCPU *cpu = POWERPC_CPU(cs); 1413 CPUPPCState *env = &cpu->env; 1414 int r; 1415 unsigned irq; 1416 1417 qemu_mutex_lock_iothread(); 1418 1419 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1420 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1421 if (!cap_interrupt_level && 1422 run->ready_for_interrupt_injection && 1423 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1424 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1425 { 1426 /* For now KVM disregards the 'irq' argument. However, in the 1427 * future KVM could cache it in-kernel to avoid a heavyweight exit 1428 * when reading the UIC. 1429 */ 1430 irq = KVM_INTERRUPT_SET; 1431 1432 DPRINTF("injected interrupt %d\n", irq); 1433 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1434 if (r < 0) { 1435 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1436 } 1437 1438 /* Always wake up soon in case the interrupt was level based */ 1439 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1440 (NANOSECONDS_PER_SECOND / 50)); 1441 } 1442 1443 /* We don't know if there are more interrupts pending after this. However, 1444 * the guest will return to userspace in the course of handling this one 1445 * anyways, so we will get a chance to deliver the rest. */ 1446 1447 qemu_mutex_unlock_iothread(); 1448 } 1449 1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1451 { 1452 return MEMTXATTRS_UNSPECIFIED; 1453 } 1454 1455 int kvm_arch_process_async_events(CPUState *cs) 1456 { 1457 return cs->halted; 1458 } 1459 1460 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1461 { 1462 CPUState *cs = CPU(cpu); 1463 CPUPPCState *env = &cpu->env; 1464 1465 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1466 cs->halted = 1; 1467 cs->exception_index = EXCP_HLT; 1468 } 1469 1470 return 0; 1471 } 1472 1473 /* map dcr access to existing qemu dcr emulation */ 1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1475 { 1476 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1477 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1478 1479 return 0; 1480 } 1481 1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1483 { 1484 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1485 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1486 1487 return 0; 1488 } 1489 1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1491 { 1492 /* Mixed endian case is not handled */ 1493 uint32_t sc = debug_inst_opcode; 1494 1495 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1496 sizeof(sc), 0) || 1497 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1498 return -EINVAL; 1499 } 1500 1501 return 0; 1502 } 1503 1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1505 { 1506 uint32_t sc; 1507 1508 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1509 sc != debug_inst_opcode || 1510 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1511 sizeof(sc), 1)) { 1512 return -EINVAL; 1513 } 1514 1515 return 0; 1516 } 1517 1518 static int find_hw_breakpoint(target_ulong addr, int type) 1519 { 1520 int n; 1521 1522 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1523 <= ARRAY_SIZE(hw_debug_points)); 1524 1525 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1526 if (hw_debug_points[n].addr == addr && 1527 hw_debug_points[n].type == type) { 1528 return n; 1529 } 1530 } 1531 1532 return -1; 1533 } 1534 1535 static int find_hw_watchpoint(target_ulong addr, int *flag) 1536 { 1537 int n; 1538 1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1540 if (n >= 0) { 1541 *flag = BP_MEM_ACCESS; 1542 return n; 1543 } 1544 1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1546 if (n >= 0) { 1547 *flag = BP_MEM_WRITE; 1548 return n; 1549 } 1550 1551 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1552 if (n >= 0) { 1553 *flag = BP_MEM_READ; 1554 return n; 1555 } 1556 1557 return -1; 1558 } 1559 1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1561 target_ulong len, int type) 1562 { 1563 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1564 return -ENOBUFS; 1565 } 1566 1567 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1568 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1569 1570 switch (type) { 1571 case GDB_BREAKPOINT_HW: 1572 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1573 return -ENOBUFS; 1574 } 1575 1576 if (find_hw_breakpoint(addr, type) >= 0) { 1577 return -EEXIST; 1578 } 1579 1580 nb_hw_breakpoint++; 1581 break; 1582 1583 case GDB_WATCHPOINT_WRITE: 1584 case GDB_WATCHPOINT_READ: 1585 case GDB_WATCHPOINT_ACCESS: 1586 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1587 return -ENOBUFS; 1588 } 1589 1590 if (find_hw_breakpoint(addr, type) >= 0) { 1591 return -EEXIST; 1592 } 1593 1594 nb_hw_watchpoint++; 1595 break; 1596 1597 default: 1598 return -ENOSYS; 1599 } 1600 1601 return 0; 1602 } 1603 1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1605 target_ulong len, int type) 1606 { 1607 int n; 1608 1609 n = find_hw_breakpoint(addr, type); 1610 if (n < 0) { 1611 return -ENOENT; 1612 } 1613 1614 switch (type) { 1615 case GDB_BREAKPOINT_HW: 1616 nb_hw_breakpoint--; 1617 break; 1618 1619 case GDB_WATCHPOINT_WRITE: 1620 case GDB_WATCHPOINT_READ: 1621 case GDB_WATCHPOINT_ACCESS: 1622 nb_hw_watchpoint--; 1623 break; 1624 1625 default: 1626 return -ENOSYS; 1627 } 1628 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1629 1630 return 0; 1631 } 1632 1633 void kvm_arch_remove_all_hw_breakpoints(void) 1634 { 1635 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1636 } 1637 1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1639 { 1640 int n; 1641 1642 /* Software Breakpoint updates */ 1643 if (kvm_sw_breakpoints_active(cs)) { 1644 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1645 } 1646 1647 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1648 <= ARRAY_SIZE(hw_debug_points)); 1649 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1650 1651 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1652 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1653 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1654 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1655 switch (hw_debug_points[n].type) { 1656 case GDB_BREAKPOINT_HW: 1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1658 break; 1659 case GDB_WATCHPOINT_WRITE: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1661 break; 1662 case GDB_WATCHPOINT_READ: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1664 break; 1665 case GDB_WATCHPOINT_ACCESS: 1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1667 KVMPPC_DEBUG_WATCH_READ; 1668 break; 1669 default: 1670 cpu_abort(cs, "Unsupported breakpoint type\n"); 1671 } 1672 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1673 } 1674 } 1675 } 1676 1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1678 { 1679 CPUState *cs = CPU(cpu); 1680 CPUPPCState *env = &cpu->env; 1681 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1682 int handle = 0; 1683 int n; 1684 int flag = 0; 1685 1686 if (cs->singlestep_enabled) { 1687 handle = 1; 1688 } else if (arch_info->status) { 1689 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1690 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1691 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1692 if (n >= 0) { 1693 handle = 1; 1694 } 1695 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1696 KVMPPC_DEBUG_WATCH_WRITE)) { 1697 n = find_hw_watchpoint(arch_info->address, &flag); 1698 if (n >= 0) { 1699 handle = 1; 1700 cs->watchpoint_hit = &hw_watchpoint; 1701 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1702 hw_watchpoint.flags = flag; 1703 } 1704 } 1705 } 1706 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1707 handle = 1; 1708 } else { 1709 /* QEMU is not able to handle debug exception, so inject 1710 * program exception to guest; 1711 * Yes program exception NOT debug exception !! 1712 * When QEMU is using debug resources then debug exception must 1713 * be always set. To achieve this we set MSR_DE and also set 1714 * MSRP_DEP so guest cannot change MSR_DE. 1715 * When emulating debug resource for guest we want guest 1716 * to control MSR_DE (enable/disable debug interrupt on need). 1717 * Supporting both configurations are NOT possible. 1718 * So the result is that we cannot share debug resources 1719 * between QEMU and Guest on BOOKE architecture. 1720 * In the current design QEMU gets the priority over guest, 1721 * this means that if QEMU is using debug resources then guest 1722 * cannot use them; 1723 * For software breakpoint QEMU uses a privileged instruction; 1724 * So there cannot be any reason that we are here for guest 1725 * set debug exception, only possibility is guest executed a 1726 * privileged / illegal instruction and that's why we are 1727 * injecting a program interrupt. 1728 */ 1729 1730 cpu_synchronize_state(cs); 1731 /* env->nip is PC, so increment this by 4 to use 1732 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1733 */ 1734 env->nip += 4; 1735 cs->exception_index = POWERPC_EXCP_PROGRAM; 1736 env->error_code = POWERPC_EXCP_INVAL; 1737 ppc_cpu_do_interrupt(cs); 1738 } 1739 1740 return handle; 1741 } 1742 1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1744 { 1745 PowerPCCPU *cpu = POWERPC_CPU(cs); 1746 CPUPPCState *env = &cpu->env; 1747 int ret; 1748 1749 qemu_mutex_lock_iothread(); 1750 1751 switch (run->exit_reason) { 1752 case KVM_EXIT_DCR: 1753 if (run->dcr.is_write) { 1754 DPRINTF("handle dcr write\n"); 1755 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1756 } else { 1757 DPRINTF("handle dcr read\n"); 1758 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1759 } 1760 break; 1761 case KVM_EXIT_HLT: 1762 DPRINTF("handle halt\n"); 1763 ret = kvmppc_handle_halt(cpu); 1764 break; 1765 #if defined(TARGET_PPC64) 1766 case KVM_EXIT_PAPR_HCALL: 1767 DPRINTF("handle PAPR hypercall\n"); 1768 run->papr_hcall.ret = spapr_hypercall(cpu, 1769 run->papr_hcall.nr, 1770 run->papr_hcall.args); 1771 ret = 0; 1772 break; 1773 #endif 1774 case KVM_EXIT_EPR: 1775 DPRINTF("handle epr\n"); 1776 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1777 ret = 0; 1778 break; 1779 case KVM_EXIT_WATCHDOG: 1780 DPRINTF("handle watchdog expiry\n"); 1781 watchdog_perform_action(); 1782 ret = 0; 1783 break; 1784 1785 case KVM_EXIT_DEBUG: 1786 DPRINTF("handle debug exception\n"); 1787 if (kvm_handle_debug(cpu, run)) { 1788 ret = EXCP_DEBUG; 1789 break; 1790 } 1791 /* re-enter, this exception was guest-internal */ 1792 ret = 0; 1793 break; 1794 1795 default: 1796 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1797 ret = -1; 1798 break; 1799 } 1800 1801 qemu_mutex_unlock_iothread(); 1802 return ret; 1803 } 1804 1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1806 { 1807 CPUState *cs = CPU(cpu); 1808 uint32_t bits = tsr_bits; 1809 struct kvm_one_reg reg = { 1810 .id = KVM_REG_PPC_OR_TSR, 1811 .addr = (uintptr_t) &bits, 1812 }; 1813 1814 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1815 } 1816 1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1818 { 1819 1820 CPUState *cs = CPU(cpu); 1821 uint32_t bits = tsr_bits; 1822 struct kvm_one_reg reg = { 1823 .id = KVM_REG_PPC_CLEAR_TSR, 1824 .addr = (uintptr_t) &bits, 1825 }; 1826 1827 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1828 } 1829 1830 int kvmppc_set_tcr(PowerPCCPU *cpu) 1831 { 1832 CPUState *cs = CPU(cpu); 1833 CPUPPCState *env = &cpu->env; 1834 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1835 1836 struct kvm_one_reg reg = { 1837 .id = KVM_REG_PPC_TCR, 1838 .addr = (uintptr_t) &tcr, 1839 }; 1840 1841 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1842 } 1843 1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1845 { 1846 CPUState *cs = CPU(cpu); 1847 int ret; 1848 1849 if (!kvm_enabled()) { 1850 return -1; 1851 } 1852 1853 if (!cap_ppc_watchdog) { 1854 printf("warning: KVM does not support watchdog"); 1855 return -1; 1856 } 1857 1858 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1859 if (ret < 0) { 1860 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1861 __func__, strerror(-ret)); 1862 return ret; 1863 } 1864 1865 return ret; 1866 } 1867 1868 static int read_cpuinfo(const char *field, char *value, int len) 1869 { 1870 FILE *f; 1871 int ret = -1; 1872 int field_len = strlen(field); 1873 char line[512]; 1874 1875 f = fopen("/proc/cpuinfo", "r"); 1876 if (!f) { 1877 return -1; 1878 } 1879 1880 do { 1881 if (!fgets(line, sizeof(line), f)) { 1882 break; 1883 } 1884 if (!strncmp(line, field, field_len)) { 1885 pstrcpy(value, len, line); 1886 ret = 0; 1887 break; 1888 } 1889 } while(*line); 1890 1891 fclose(f); 1892 1893 return ret; 1894 } 1895 1896 uint32_t kvmppc_get_tbfreq(void) 1897 { 1898 char line[512]; 1899 char *ns; 1900 uint32_t retval = NANOSECONDS_PER_SECOND; 1901 1902 if (read_cpuinfo("timebase", line, sizeof(line))) { 1903 return retval; 1904 } 1905 1906 if (!(ns = strchr(line, ':'))) { 1907 return retval; 1908 } 1909 1910 ns++; 1911 1912 return atoi(ns); 1913 } 1914 1915 bool kvmppc_get_host_serial(char **value) 1916 { 1917 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1918 NULL); 1919 } 1920 1921 bool kvmppc_get_host_model(char **value) 1922 { 1923 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1924 } 1925 1926 /* Try to find a device tree node for a CPU with clock-frequency property */ 1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1928 { 1929 struct dirent *dirp; 1930 DIR *dp; 1931 1932 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1933 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1934 return -1; 1935 } 1936 1937 buf[0] = '\0'; 1938 while ((dirp = readdir(dp)) != NULL) { 1939 FILE *f; 1940 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1941 dirp->d_name); 1942 f = fopen(buf, "r"); 1943 if (f) { 1944 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1945 fclose(f); 1946 break; 1947 } 1948 buf[0] = '\0'; 1949 } 1950 closedir(dp); 1951 if (buf[0] == '\0') { 1952 printf("Unknown host!\n"); 1953 return -1; 1954 } 1955 1956 return 0; 1957 } 1958 1959 static uint64_t kvmppc_read_int_dt(const char *filename) 1960 { 1961 union { 1962 uint32_t v32; 1963 uint64_t v64; 1964 } u; 1965 FILE *f; 1966 int len; 1967 1968 f = fopen(filename, "rb"); 1969 if (!f) { 1970 return -1; 1971 } 1972 1973 len = fread(&u, 1, sizeof(u), f); 1974 fclose(f); 1975 switch (len) { 1976 case 4: 1977 /* property is a 32-bit quantity */ 1978 return be32_to_cpu(u.v32); 1979 case 8: 1980 return be64_to_cpu(u.v64); 1981 } 1982 1983 return 0; 1984 } 1985 1986 /* Read a CPU node property from the host device tree that's a single 1987 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1988 * (can't find or open the property, or doesn't understand the 1989 * format) */ 1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1991 { 1992 char buf[PATH_MAX], *tmp; 1993 uint64_t val; 1994 1995 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1996 return -1; 1997 } 1998 1999 tmp = g_strdup_printf("%s/%s", buf, propname); 2000 val = kvmppc_read_int_dt(tmp); 2001 g_free(tmp); 2002 2003 return val; 2004 } 2005 2006 uint64_t kvmppc_get_clockfreq(void) 2007 { 2008 return kvmppc_read_int_cpu_dt("clock-frequency"); 2009 } 2010 2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2012 { 2013 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2014 CPUState *cs = CPU(cpu); 2015 2016 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2017 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2018 return 0; 2019 } 2020 2021 return 1; 2022 } 2023 2024 int kvmppc_get_hasidle(CPUPPCState *env) 2025 { 2026 struct kvm_ppc_pvinfo pvinfo; 2027 2028 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2029 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2030 return 1; 2031 } 2032 2033 return 0; 2034 } 2035 2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2037 { 2038 uint32_t *hc = (uint32_t*)buf; 2039 struct kvm_ppc_pvinfo pvinfo; 2040 2041 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2042 memcpy(buf, pvinfo.hcall, buf_len); 2043 return 0; 2044 } 2045 2046 /* 2047 * Fallback to always fail hypercalls regardless of endianness: 2048 * 2049 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2050 * li r3, -1 2051 * b .+8 (becomes nop in wrong endian) 2052 * bswap32(li r3, -1) 2053 */ 2054 2055 hc[0] = cpu_to_be32(0x08000048); 2056 hc[1] = cpu_to_be32(0x3860ffff); 2057 hc[2] = cpu_to_be32(0x48000008); 2058 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2059 2060 return 1; 2061 } 2062 2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2064 { 2065 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2066 } 2067 2068 void kvmppc_enable_logical_ci_hcalls(void) 2069 { 2070 /* 2071 * FIXME: it would be nice if we could detect the cases where 2072 * we're using a device which requires the in kernel 2073 * implementation of these hcalls, but the kernel lacks them and 2074 * produce a warning. 2075 */ 2076 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2077 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2078 } 2079 2080 void kvmppc_enable_set_mode_hcall(void) 2081 { 2082 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2083 } 2084 2085 void kvmppc_enable_clear_ref_mod_hcalls(void) 2086 { 2087 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2088 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2089 } 2090 2091 void kvmppc_set_papr(PowerPCCPU *cpu) 2092 { 2093 CPUState *cs = CPU(cpu); 2094 int ret; 2095 2096 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2097 if (ret) { 2098 error_report("This vCPU type or KVM version does not support PAPR"); 2099 exit(1); 2100 } 2101 2102 /* Update the capability flag so we sync the right information 2103 * with kvm */ 2104 cap_papr = 1; 2105 } 2106 2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2108 { 2109 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2110 } 2111 2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2113 { 2114 CPUState *cs = CPU(cpu); 2115 int ret; 2116 2117 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2118 if (ret && mpic_proxy) { 2119 error_report("This KVM version does not support EPR"); 2120 exit(1); 2121 } 2122 } 2123 2124 int kvmppc_smt_threads(void) 2125 { 2126 return cap_ppc_smt ? cap_ppc_smt : 1; 2127 } 2128 2129 int kvmppc_set_smt_threads(int smt) 2130 { 2131 int ret; 2132 2133 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2134 if (!ret) { 2135 cap_ppc_smt = smt; 2136 } 2137 return ret; 2138 } 2139 2140 void kvmppc_hint_smt_possible(Error **errp) 2141 { 2142 int i; 2143 GString *g; 2144 char *s; 2145 2146 assert(kvm_enabled()); 2147 if (cap_ppc_smt_possible) { 2148 g = g_string_new("Available VSMT modes:"); 2149 for (i = 63; i >= 0; i--) { 2150 if ((1UL << i) & cap_ppc_smt_possible) { 2151 g_string_append_printf(g, " %lu", (1UL << i)); 2152 } 2153 } 2154 s = g_string_free(g, false); 2155 error_append_hint(errp, "%s.\n", s); 2156 g_free(s); 2157 } else { 2158 error_append_hint(errp, 2159 "This KVM seems to be too old to support VSMT.\n"); 2160 } 2161 } 2162 2163 2164 #ifdef TARGET_PPC64 2165 off_t kvmppc_alloc_rma(void **rma) 2166 { 2167 off_t size; 2168 int fd; 2169 struct kvm_allocate_rma ret; 2170 2171 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2172 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2173 * not necessary on this hardware 2174 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2175 * 2176 * FIXME: We should allow the user to force contiguous RMA 2177 * allocation in the cap_ppc_rma==1 case. 2178 */ 2179 if (cap_ppc_rma < 2) { 2180 return 0; 2181 } 2182 2183 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2184 if (fd < 0) { 2185 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2186 strerror(errno)); 2187 return -1; 2188 } 2189 2190 size = MIN(ret.rma_size, 256ul << 20); 2191 2192 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2193 if (*rma == MAP_FAILED) { 2194 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2195 return -1; 2196 }; 2197 2198 return size; 2199 } 2200 2201 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2202 { 2203 struct kvm_ppc_smmu_info info; 2204 long rampagesize, best_page_shift; 2205 int i; 2206 2207 if (cap_ppc_rma >= 2) { 2208 return current_size; 2209 } 2210 2211 /* Find the largest hardware supported page size that's less than 2212 * or equal to the (logical) backing page size of guest RAM */ 2213 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2214 rampagesize = qemu_getrampagesize(); 2215 best_page_shift = 0; 2216 2217 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2218 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2219 2220 if (!sps->page_shift) { 2221 continue; 2222 } 2223 2224 if ((sps->page_shift > best_page_shift) 2225 && ((1UL << sps->page_shift) <= rampagesize)) { 2226 best_page_shift = sps->page_shift; 2227 } 2228 } 2229 2230 return MIN(current_size, 2231 1ULL << (best_page_shift + hash_shift - 7)); 2232 } 2233 #endif 2234 2235 bool kvmppc_spapr_use_multitce(void) 2236 { 2237 return cap_spapr_multitce; 2238 } 2239 2240 int kvmppc_spapr_enable_inkernel_multitce(void) 2241 { 2242 int ret; 2243 2244 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2245 H_PUT_TCE_INDIRECT, 1); 2246 if (!ret) { 2247 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2248 H_STUFF_TCE, 1); 2249 } 2250 2251 return ret; 2252 } 2253 2254 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2255 uint64_t bus_offset, uint32_t nb_table, 2256 int *pfd, bool need_vfio) 2257 { 2258 long len; 2259 int fd; 2260 void *table; 2261 2262 /* Must set fd to -1 so we don't try to munmap when called for 2263 * destroying the table, which the upper layers -will- do 2264 */ 2265 *pfd = -1; 2266 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2267 return NULL; 2268 } 2269 2270 if (cap_spapr_tce_64) { 2271 struct kvm_create_spapr_tce_64 args = { 2272 .liobn = liobn, 2273 .page_shift = page_shift, 2274 .offset = bus_offset >> page_shift, 2275 .size = nb_table, 2276 .flags = 0 2277 }; 2278 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2279 if (fd < 0) { 2280 fprintf(stderr, 2281 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2282 liobn); 2283 return NULL; 2284 } 2285 } else if (cap_spapr_tce) { 2286 uint64_t window_size = (uint64_t) nb_table << page_shift; 2287 struct kvm_create_spapr_tce args = { 2288 .liobn = liobn, 2289 .window_size = window_size, 2290 }; 2291 if ((window_size != args.window_size) || bus_offset) { 2292 return NULL; 2293 } 2294 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2295 if (fd < 0) { 2296 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2297 liobn); 2298 return NULL; 2299 } 2300 } else { 2301 return NULL; 2302 } 2303 2304 len = nb_table * sizeof(uint64_t); 2305 /* FIXME: round this up to page size */ 2306 2307 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2308 if (table == MAP_FAILED) { 2309 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2310 liobn); 2311 close(fd); 2312 return NULL; 2313 } 2314 2315 *pfd = fd; 2316 return table; 2317 } 2318 2319 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2320 { 2321 long len; 2322 2323 if (fd < 0) { 2324 return -1; 2325 } 2326 2327 len = nb_table * sizeof(uint64_t); 2328 if ((munmap(table, len) < 0) || 2329 (close(fd) < 0)) { 2330 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2331 strerror(errno)); 2332 /* Leak the table */ 2333 } 2334 2335 return 0; 2336 } 2337 2338 int kvmppc_reset_htab(int shift_hint) 2339 { 2340 uint32_t shift = shift_hint; 2341 2342 if (!kvm_enabled()) { 2343 /* Full emulation, tell caller to allocate htab itself */ 2344 return 0; 2345 } 2346 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2347 int ret; 2348 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2349 if (ret == -ENOTTY) { 2350 /* At least some versions of PR KVM advertise the 2351 * capability, but don't implement the ioctl(). Oops. 2352 * Return 0 so that we allocate the htab in qemu, as is 2353 * correct for PR. */ 2354 return 0; 2355 } else if (ret < 0) { 2356 return ret; 2357 } 2358 return shift; 2359 } 2360 2361 /* We have a kernel that predates the htab reset calls. For PR 2362 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2363 * this era, it has allocated a 16MB fixed size hash table already. */ 2364 if (kvmppc_is_pr(kvm_state)) { 2365 /* PR - tell caller to allocate htab */ 2366 return 0; 2367 } else { 2368 /* HV - assume 16MB kernel allocated htab */ 2369 return 24; 2370 } 2371 } 2372 2373 static inline uint32_t mfpvr(void) 2374 { 2375 uint32_t pvr; 2376 2377 asm ("mfpvr %0" 2378 : "=r"(pvr)); 2379 return pvr; 2380 } 2381 2382 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2383 { 2384 if (on) { 2385 *word |= flags; 2386 } else { 2387 *word &= ~flags; 2388 } 2389 } 2390 2391 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2392 { 2393 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2394 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2395 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2396 2397 /* Now fix up the class with information we can query from the host */ 2398 pcc->pvr = mfpvr(); 2399 2400 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2401 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2402 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2403 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2404 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2405 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2406 2407 if (dcache_size != -1) { 2408 pcc->l1_dcache_size = dcache_size; 2409 } 2410 2411 if (icache_size != -1) { 2412 pcc->l1_icache_size = icache_size; 2413 } 2414 2415 #if defined(TARGET_PPC64) 2416 pcc->radix_page_info = kvm_get_radix_page_info(); 2417 2418 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2419 /* 2420 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2421 * compliant. More importantly, advertising ISA 3.00 2422 * architected mode may prevent guests from activating 2423 * necessary DD1 workarounds. 2424 */ 2425 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2426 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2427 } 2428 #endif /* defined(TARGET_PPC64) */ 2429 } 2430 2431 bool kvmppc_has_cap_epr(void) 2432 { 2433 return cap_epr; 2434 } 2435 2436 bool kvmppc_has_cap_fixup_hcalls(void) 2437 { 2438 return cap_fixup_hcalls; 2439 } 2440 2441 bool kvmppc_has_cap_htm(void) 2442 { 2443 return cap_htm; 2444 } 2445 2446 bool kvmppc_has_cap_mmu_radix(void) 2447 { 2448 return cap_mmu_radix; 2449 } 2450 2451 bool kvmppc_has_cap_mmu_hash_v3(void) 2452 { 2453 return cap_mmu_hash_v3; 2454 } 2455 2456 static void kvmppc_get_cpu_characteristics(KVMState *s) 2457 { 2458 struct kvm_ppc_cpu_char c; 2459 int ret; 2460 2461 /* Assume broken */ 2462 cap_ppc_safe_cache = 0; 2463 cap_ppc_safe_bounds_check = 0; 2464 cap_ppc_safe_indirect_branch = 0; 2465 2466 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2467 if (!ret) { 2468 return; 2469 } 2470 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2471 if (ret < 0) { 2472 return; 2473 } 2474 /* Parse and set cap_ppc_safe_cache */ 2475 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2476 cap_ppc_safe_cache = 2; 2477 } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2478 (c.character & c.character_mask 2479 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2480 cap_ppc_safe_cache = 1; 2481 } 2482 /* Parse and set cap_ppc_safe_bounds_check */ 2483 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2484 cap_ppc_safe_bounds_check = 2; 2485 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2486 cap_ppc_safe_bounds_check = 1; 2487 } 2488 /* Parse and set cap_ppc_safe_indirect_branch */ 2489 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2490 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD; 2491 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2492 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS; 2493 } 2494 } 2495 2496 int kvmppc_get_cap_safe_cache(void) 2497 { 2498 return cap_ppc_safe_cache; 2499 } 2500 2501 int kvmppc_get_cap_safe_bounds_check(void) 2502 { 2503 return cap_ppc_safe_bounds_check; 2504 } 2505 2506 int kvmppc_get_cap_safe_indirect_branch(void) 2507 { 2508 return cap_ppc_safe_indirect_branch; 2509 } 2510 2511 bool kvmppc_has_cap_spapr_vfio(void) 2512 { 2513 return cap_spapr_vfio; 2514 } 2515 2516 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2517 { 2518 uint32_t host_pvr = mfpvr(); 2519 PowerPCCPUClass *pvr_pcc; 2520 2521 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2522 if (pvr_pcc == NULL) { 2523 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2524 } 2525 2526 return pvr_pcc; 2527 } 2528 2529 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2530 { 2531 TypeInfo type_info = { 2532 .name = TYPE_HOST_POWERPC_CPU, 2533 .class_init = kvmppc_host_cpu_class_init, 2534 }; 2535 MachineClass *mc = MACHINE_GET_CLASS(ms); 2536 PowerPCCPUClass *pvr_pcc; 2537 ObjectClass *oc; 2538 DeviceClass *dc; 2539 int i; 2540 2541 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2542 if (pvr_pcc == NULL) { 2543 return -1; 2544 } 2545 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2546 type_register(&type_info); 2547 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2548 /* override TCG default cpu type with 'host' cpu model */ 2549 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2550 } 2551 2552 oc = object_class_by_name(type_info.name); 2553 g_assert(oc); 2554 2555 /* 2556 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2557 * we want "POWER8" to be a "family" alias that points to the current 2558 * host CPU type, too) 2559 */ 2560 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2561 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2562 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2563 char *suffix; 2564 2565 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2566 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2567 if (suffix) { 2568 *suffix = 0; 2569 } 2570 break; 2571 } 2572 } 2573 2574 return 0; 2575 } 2576 2577 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2578 { 2579 struct kvm_rtas_token_args args = { 2580 .token = token, 2581 }; 2582 2583 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2584 return -ENOENT; 2585 } 2586 2587 strncpy(args.name, function, sizeof(args.name)); 2588 2589 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2590 } 2591 2592 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2593 { 2594 struct kvm_get_htab_fd s = { 2595 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2596 .start_index = index, 2597 }; 2598 int ret; 2599 2600 if (!cap_htab_fd) { 2601 error_setg(errp, "KVM version doesn't support %s the HPT", 2602 write ? "writing" : "reading"); 2603 return -ENOTSUP; 2604 } 2605 2606 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2607 if (ret < 0) { 2608 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2609 write ? "writing" : "reading", write ? "to" : "from", 2610 strerror(errno)); 2611 return -errno; 2612 } 2613 2614 return ret; 2615 } 2616 2617 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2618 { 2619 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2620 uint8_t buf[bufsize]; 2621 ssize_t rc; 2622 2623 do { 2624 rc = read(fd, buf, bufsize); 2625 if (rc < 0) { 2626 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2627 strerror(errno)); 2628 return rc; 2629 } else if (rc) { 2630 uint8_t *buffer = buf; 2631 ssize_t n = rc; 2632 while (n) { 2633 struct kvm_get_htab_header *head = 2634 (struct kvm_get_htab_header *) buffer; 2635 size_t chunksize = sizeof(*head) + 2636 HASH_PTE_SIZE_64 * head->n_valid; 2637 2638 qemu_put_be32(f, head->index); 2639 qemu_put_be16(f, head->n_valid); 2640 qemu_put_be16(f, head->n_invalid); 2641 qemu_put_buffer(f, (void *)(head + 1), 2642 HASH_PTE_SIZE_64 * head->n_valid); 2643 2644 buffer += chunksize; 2645 n -= chunksize; 2646 } 2647 } 2648 } while ((rc != 0) 2649 && ((max_ns < 0) 2650 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2651 2652 return (rc == 0) ? 1 : 0; 2653 } 2654 2655 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2656 uint16_t n_valid, uint16_t n_invalid) 2657 { 2658 struct kvm_get_htab_header *buf; 2659 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2660 ssize_t rc; 2661 2662 buf = alloca(chunksize); 2663 buf->index = index; 2664 buf->n_valid = n_valid; 2665 buf->n_invalid = n_invalid; 2666 2667 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2668 2669 rc = write(fd, buf, chunksize); 2670 if (rc < 0) { 2671 fprintf(stderr, "Error writing KVM hash table: %s\n", 2672 strerror(errno)); 2673 return rc; 2674 } 2675 if (rc != chunksize) { 2676 /* We should never get a short write on a single chunk */ 2677 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2678 return -1; 2679 } 2680 return 0; 2681 } 2682 2683 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2684 { 2685 return true; 2686 } 2687 2688 void kvm_arch_init_irq_routing(KVMState *s) 2689 { 2690 } 2691 2692 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2693 { 2694 int fd, rc; 2695 int i; 2696 2697 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2698 2699 i = 0; 2700 while (i < n) { 2701 struct kvm_get_htab_header *hdr; 2702 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2703 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2704 2705 rc = read(fd, buf, sizeof(buf)); 2706 if (rc < 0) { 2707 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2708 } 2709 2710 hdr = (struct kvm_get_htab_header *)buf; 2711 while ((i < n) && ((char *)hdr < (buf + rc))) { 2712 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2713 2714 if (hdr->index != (ptex + i)) { 2715 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2716 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2717 } 2718 2719 if (n - i < valid) { 2720 valid = n - i; 2721 } 2722 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2723 i += valid; 2724 2725 if ((n - i) < invalid) { 2726 invalid = n - i; 2727 } 2728 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2729 i += invalid; 2730 2731 hdr = (struct kvm_get_htab_header *) 2732 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2733 } 2734 } 2735 2736 close(fd); 2737 } 2738 2739 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2740 { 2741 int fd, rc; 2742 struct { 2743 struct kvm_get_htab_header hdr; 2744 uint64_t pte0; 2745 uint64_t pte1; 2746 } buf; 2747 2748 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2749 2750 buf.hdr.n_valid = 1; 2751 buf.hdr.n_invalid = 0; 2752 buf.hdr.index = ptex; 2753 buf.pte0 = cpu_to_be64(pte0); 2754 buf.pte1 = cpu_to_be64(pte1); 2755 2756 rc = write(fd, &buf, sizeof(buf)); 2757 if (rc != sizeof(buf)) { 2758 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2759 } 2760 close(fd); 2761 } 2762 2763 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2764 uint64_t address, uint32_t data, PCIDevice *dev) 2765 { 2766 return 0; 2767 } 2768 2769 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2770 int vector, PCIDevice *dev) 2771 { 2772 return 0; 2773 } 2774 2775 int kvm_arch_release_virq_post(int virq) 2776 { 2777 return 0; 2778 } 2779 2780 int kvm_arch_msi_data_to_gsi(uint32_t data) 2781 { 2782 return data & 0xffff; 2783 } 2784 2785 int kvmppc_enable_hwrng(void) 2786 { 2787 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2788 return -1; 2789 } 2790 2791 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2792 } 2793 2794 void kvmppc_check_papr_resize_hpt(Error **errp) 2795 { 2796 if (!kvm_enabled()) { 2797 return; /* No KVM, we're good */ 2798 } 2799 2800 if (cap_resize_hpt) { 2801 return; /* Kernel has explicit support, we're good */ 2802 } 2803 2804 /* Otherwise fallback on looking for PR KVM */ 2805 if (kvmppc_is_pr(kvm_state)) { 2806 return; 2807 } 2808 2809 error_setg(errp, 2810 "Hash page table resizing not available with this KVM version"); 2811 } 2812 2813 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2814 { 2815 CPUState *cs = CPU(cpu); 2816 struct kvm_ppc_resize_hpt rhpt = { 2817 .flags = flags, 2818 .shift = shift, 2819 }; 2820 2821 if (!cap_resize_hpt) { 2822 return -ENOSYS; 2823 } 2824 2825 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2826 } 2827 2828 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2829 { 2830 CPUState *cs = CPU(cpu); 2831 struct kvm_ppc_resize_hpt rhpt = { 2832 .flags = flags, 2833 .shift = shift, 2834 }; 2835 2836 if (!cap_resize_hpt) { 2837 return -ENOSYS; 2838 } 2839 2840 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2841 } 2842 2843 /* 2844 * This is a helper function to detect a post migration scenario 2845 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2846 * the guest kernel can't handle a PVR value other than the actual host 2847 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2848 * 2849 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2850 * (so, we're HV), return true. The workaround itself is done in 2851 * cpu_post_load. 2852 * 2853 * The order here is important: we'll only check for KVM PR as a 2854 * fallback if the guest kernel can't handle the situation itself. 2855 * We need to avoid as much as possible querying the running KVM type 2856 * in QEMU level. 2857 */ 2858 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2859 { 2860 CPUState *cs = CPU(cpu); 2861 2862 if (!kvm_enabled()) { 2863 return false; 2864 } 2865 2866 if (cap_ppc_pvr_compat) { 2867 return false; 2868 } 2869 2870 return !kvmppc_is_pr(cs->kvm_state); 2871 } 2872