1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #if defined(TARGET_PPC64) 51 #include "hw/ppc/spapr_cpu_core.h" 52 #endif 53 #include "elf.h" 54 #include "sysemu/kvm_int.h" 55 56 //#define DEBUG_KVM 57 58 #ifdef DEBUG_KVM 59 #define DPRINTF(fmt, ...) \ 60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 61 #else 62 #define DPRINTF(fmt, ...) \ 63 do { } while (0) 64 #endif 65 66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 67 68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 69 KVM_CAP_LAST_INFO 70 }; 71 72 static int cap_interrupt_unset = false; 73 static int cap_interrupt_level = false; 74 static int cap_segstate; 75 static int cap_booke_sregs; 76 static int cap_ppc_smt; 77 static int cap_ppc_smt_possible; 78 static int cap_ppc_rma; 79 static int cap_spapr_tce; 80 static int cap_spapr_tce_64; 81 static int cap_spapr_multitce; 82 static int cap_spapr_vfio; 83 static int cap_hior; 84 static int cap_one_reg; 85 static int cap_epr; 86 static int cap_ppc_watchdog; 87 static int cap_papr; 88 static int cap_htab_fd; 89 static int cap_fixup_hcalls; 90 static int cap_htm; /* Hardware transactional memory support */ 91 static int cap_mmu_radix; 92 static int cap_mmu_hash_v3; 93 static int cap_resize_hpt; 94 static int cap_ppc_pvr_compat; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(void); 127 128 int kvm_arch_init(MachineState *ms, KVMState *s) 129 { 130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 134 cap_ppc_smt_possible = kvm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 135 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 139 cap_spapr_vfio = false; 140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 144 /* Note: we don't set cap_papr here, because this capability is 145 * only activated after this by kvmppc_set_papr() */ 146 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 153 /* 154 * Note: setting it to false because there is not such capability 155 * in KVM at this moment. 156 * 157 * TODO: call kvm_vm_check_extension() with the right capability 158 * after the kernel starts implementing it.*/ 159 cap_ppc_pvr_compat = false; 160 161 if (!cap_interrupt_level) { 162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 163 "VM to stall at times!\n"); 164 } 165 166 kvm_ppc_register_host_cpu_type(); 167 168 return 0; 169 } 170 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 172 { 173 return 0; 174 } 175 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 177 { 178 CPUPPCState *cenv = &cpu->env; 179 CPUState *cs = CPU(cpu); 180 struct kvm_sregs sregs; 181 int ret; 182 183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 184 /* What we're really trying to say is "if we're on BookE, we use 185 the native PVR for now". This is the only sane way to check 186 it though, so we potentially confuse users that they can run 187 BookE guests on BookS. Let's hope nobody dares enough :) */ 188 return 0; 189 } else { 190 if (!cap_segstate) { 191 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 192 return -ENOSYS; 193 } 194 } 195 196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 197 if (ret) { 198 return ret; 199 } 200 201 sregs.pvr = cenv->spr[SPR_PVR]; 202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 203 } 204 205 /* Set up a shared TLB array with KVM */ 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 207 { 208 CPUPPCState *env = &cpu->env; 209 CPUState *cs = CPU(cpu); 210 struct kvm_book3e_206_tlb_params params = {}; 211 struct kvm_config_tlb cfg = {}; 212 unsigned int entries = 0; 213 int ret, i; 214 215 if (!kvm_enabled() || 216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 217 return 0; 218 } 219 220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 221 222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 223 params.tlb_sizes[i] = booke206_tlb_size(env, i); 224 params.tlb_ways[i] = booke206_tlb_ways(env, i); 225 entries += params.tlb_sizes[i]; 226 } 227 228 assert(entries == env->nb_tlb); 229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 230 231 env->tlb_dirty = true; 232 233 cfg.array = (uintptr_t)env->tlb.tlbm; 234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 235 cfg.params = (uintptr_t)¶ms; 236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 237 238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 239 if (ret < 0) { 240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 241 __func__, strerror(-ret)); 242 return ret; 243 } 244 245 env->kvm_sw_tlb = true; 246 return 0; 247 } 248 249 250 #if defined(TARGET_PPC64) 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 252 struct kvm_ppc_smmu_info *info) 253 { 254 CPUPPCState *env = &cpu->env; 255 CPUState *cs = CPU(cpu); 256 257 memset(info, 0, sizeof(*info)); 258 259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 260 * need to "guess" what the supported page sizes are. 261 * 262 * For that to work we make a few assumptions: 263 * 264 * - Check whether we are running "PR" KVM which only supports 4K 265 * and 16M pages, but supports them regardless of the backing 266 * store characteritics. We also don't support 1T segments. 267 * 268 * This is safe as if HV KVM ever supports that capability or PR 269 * KVM grows supports for more page/segment sizes, those versions 270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 271 * will not hit this fallback 272 * 273 * - Else we are running HV KVM. This means we only support page 274 * sizes that fit in the backing store. Additionally we only 275 * advertize 64K pages if the processor is ARCH 2.06 and we assume 276 * P7 encodings for the SLB and hash table. Here too, we assume 277 * support for any newer processor will mean a kernel that 278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 279 * this fallback. 280 */ 281 if (kvmppc_is_pr(cs->kvm_state)) { 282 /* No flags */ 283 info->flags = 0; 284 info->slb_size = 64; 285 286 /* Standard 4k base page size segment */ 287 info->sps[0].page_shift = 12; 288 info->sps[0].slb_enc = 0; 289 info->sps[0].enc[0].page_shift = 12; 290 info->sps[0].enc[0].pte_enc = 0; 291 292 /* Standard 16M large page size segment */ 293 info->sps[1].page_shift = 24; 294 info->sps[1].slb_enc = SLB_VSID_L; 295 info->sps[1].enc[0].page_shift = 24; 296 info->sps[1].enc[0].pte_enc = 0; 297 } else { 298 int i = 0; 299 300 /* HV KVM has backing store size restrictions */ 301 info->flags = KVM_PPC_PAGE_SIZES_REAL; 302 303 if (env->mmu_model & POWERPC_MMU_1TSEG) { 304 info->flags |= KVM_PPC_1T_SEGMENTS; 305 } 306 307 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 308 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 309 info->slb_size = 32; 310 } else { 311 info->slb_size = 64; 312 } 313 314 /* Standard 4k base page size segment */ 315 info->sps[i].page_shift = 12; 316 info->sps[i].slb_enc = 0; 317 info->sps[i].enc[0].page_shift = 12; 318 info->sps[i].enc[0].pte_enc = 0; 319 i++; 320 321 /* 64K on MMU 2.06 and later */ 322 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 323 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 324 info->sps[i].page_shift = 16; 325 info->sps[i].slb_enc = 0x110; 326 info->sps[i].enc[0].page_shift = 16; 327 info->sps[i].enc[0].pte_enc = 1; 328 i++; 329 } 330 331 /* Standard 16M large page size segment */ 332 info->sps[i].page_shift = 24; 333 info->sps[i].slb_enc = SLB_VSID_L; 334 info->sps[i].enc[0].page_shift = 24; 335 info->sps[i].enc[0].pte_enc = 0; 336 } 337 } 338 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 340 { 341 CPUState *cs = CPU(cpu); 342 int ret; 343 344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 346 if (ret == 0) { 347 return; 348 } 349 } 350 351 kvm_get_fallback_smmu_info(cpu, info); 352 } 353 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 355 { 356 KVMState *s = KVM_STATE(current_machine->accelerator); 357 struct ppc_radix_page_info *radix_page_info; 358 struct kvm_ppc_rmmu_info rmmu_info; 359 int i; 360 361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 362 return NULL; 363 } 364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 365 return NULL; 366 } 367 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 368 radix_page_info->count = 0; 369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 370 if (rmmu_info.ap_encodings[i]) { 371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 372 radix_page_info->count++; 373 } 374 } 375 return radix_page_info; 376 } 377 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 379 bool radix, bool gtse, 380 uint64_t proc_tbl) 381 { 382 CPUState *cs = CPU(cpu); 383 int ret; 384 uint64_t flags = 0; 385 struct kvm_ppc_mmuv3_cfg cfg = { 386 .process_table = proc_tbl, 387 }; 388 389 if (radix) { 390 flags |= KVM_PPC_MMUV3_RADIX; 391 } 392 if (gtse) { 393 flags |= KVM_PPC_MMUV3_GTSE; 394 } 395 cfg.flags = flags; 396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 397 switch (ret) { 398 case 0: 399 return H_SUCCESS; 400 case -EINVAL: 401 return H_PARAMETER; 402 case -ENODEV: 403 return H_NOT_AVAILABLE; 404 default: 405 return H_HARDWARE; 406 } 407 } 408 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 410 { 411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 412 return true; 413 } 414 415 return (1ul << shift) <= rampgsize; 416 } 417 418 static long max_cpu_page_size; 419 420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 421 { 422 static struct kvm_ppc_smmu_info smmu_info; 423 static bool has_smmu_info; 424 CPUPPCState *env = &cpu->env; 425 int iq, ik, jq, jk; 426 bool has_64k_pages = false; 427 428 /* We only handle page sizes for 64-bit server guests for now */ 429 if (!(env->mmu_model & POWERPC_MMU_64)) { 430 return; 431 } 432 433 /* Collect MMU info from kernel if not already */ 434 if (!has_smmu_info) { 435 kvm_get_smmu_info(cpu, &smmu_info); 436 has_smmu_info = true; 437 } 438 439 if (!max_cpu_page_size) { 440 max_cpu_page_size = qemu_getrampagesize(); 441 } 442 443 /* Convert to QEMU form */ 444 memset(&env->sps, 0, sizeof(env->sps)); 445 446 /* If we have HV KVM, we need to forbid CI large pages if our 447 * host page size is smaller than 64K. 448 */ 449 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 450 env->ci_large_pages = getpagesize() >= 0x10000; 451 } 452 453 /* 454 * XXX This loop should be an entry wide AND of the capabilities that 455 * the selected CPU has with the capabilities that KVM supports. 456 */ 457 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 458 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 459 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 460 461 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 462 ksps->page_shift)) { 463 continue; 464 } 465 qsps->page_shift = ksps->page_shift; 466 qsps->slb_enc = ksps->slb_enc; 467 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 468 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 469 ksps->enc[jk].page_shift)) { 470 continue; 471 } 472 if (ksps->enc[jk].page_shift == 16) { 473 has_64k_pages = true; 474 } 475 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 476 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 477 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 478 break; 479 } 480 } 481 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 482 break; 483 } 484 } 485 env->slb_nr = smmu_info.slb_size; 486 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 487 env->mmu_model &= ~POWERPC_MMU_1TSEG; 488 } 489 if (!has_64k_pages) { 490 env->mmu_model &= ~POWERPC_MMU_64K; 491 } 492 } 493 494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 495 { 496 Object *mem_obj = object_resolve_path(obj_path, NULL); 497 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 498 long pagesize; 499 500 if (mempath) { 501 pagesize = qemu_mempath_getpagesize(mempath); 502 g_free(mempath); 503 } else { 504 pagesize = getpagesize(); 505 } 506 507 return pagesize >= max_cpu_page_size; 508 } 509 510 #else /* defined (TARGET_PPC64) */ 511 512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 513 { 514 } 515 516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 517 { 518 return true; 519 } 520 521 #endif /* !defined (TARGET_PPC64) */ 522 523 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 524 { 525 return POWERPC_CPU(cpu)->vcpu_id; 526 } 527 528 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 529 * book3s supports only 1 watchpoint, so array size 530 * of 4 is sufficient for now. 531 */ 532 #define MAX_HW_BKPTS 4 533 534 static struct HWBreakpoint { 535 target_ulong addr; 536 int type; 537 } hw_debug_points[MAX_HW_BKPTS]; 538 539 static CPUWatchpoint hw_watchpoint; 540 541 /* Default there is no breakpoint and watchpoint supported */ 542 static int max_hw_breakpoint; 543 static int max_hw_watchpoint; 544 static int nb_hw_breakpoint; 545 static int nb_hw_watchpoint; 546 547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 548 { 549 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 550 max_hw_breakpoint = 2; 551 max_hw_watchpoint = 2; 552 } 553 554 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 555 fprintf(stderr, "Error initializing h/w breakpoints\n"); 556 return; 557 } 558 } 559 560 int kvm_arch_init_vcpu(CPUState *cs) 561 { 562 PowerPCCPU *cpu = POWERPC_CPU(cs); 563 CPUPPCState *cenv = &cpu->env; 564 int ret; 565 566 /* Gather server mmu info from KVM and update the CPU state */ 567 kvm_fixup_page_sizes(cpu); 568 569 /* Synchronize sregs with kvm */ 570 ret = kvm_arch_sync_sregs(cpu); 571 if (ret) { 572 if (ret == -EINVAL) { 573 error_report("Register sync failed... If you're using kvm-hv.ko," 574 " only \"-cpu host\" is possible"); 575 } 576 return ret; 577 } 578 579 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 580 581 switch (cenv->mmu_model) { 582 case POWERPC_MMU_BOOKE206: 583 /* This target supports access to KVM's guest TLB */ 584 ret = kvm_booke206_tlb_init(cpu); 585 break; 586 case POWERPC_MMU_2_07: 587 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 588 /* KVM-HV has transactional memory on POWER8 also without the 589 * KVM_CAP_PPC_HTM extension, so enable it here instead as 590 * long as it's availble to userspace on the host. */ 591 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 592 cap_htm = true; 593 } 594 } 595 break; 596 default: 597 break; 598 } 599 600 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 601 kvmppc_hw_debug_points_init(cenv); 602 603 return ret; 604 } 605 606 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 607 { 608 CPUPPCState *env = &cpu->env; 609 CPUState *cs = CPU(cpu); 610 struct kvm_dirty_tlb dirty_tlb; 611 unsigned char *bitmap; 612 int ret; 613 614 if (!env->kvm_sw_tlb) { 615 return; 616 } 617 618 bitmap = g_malloc((env->nb_tlb + 7) / 8); 619 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 620 621 dirty_tlb.bitmap = (uintptr_t)bitmap; 622 dirty_tlb.num_dirty = env->nb_tlb; 623 624 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 625 if (ret) { 626 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 627 __func__, strerror(-ret)); 628 } 629 630 g_free(bitmap); 631 } 632 633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 634 { 635 PowerPCCPU *cpu = POWERPC_CPU(cs); 636 CPUPPCState *env = &cpu->env; 637 union { 638 uint32_t u32; 639 uint64_t u64; 640 } val; 641 struct kvm_one_reg reg = { 642 .id = id, 643 .addr = (uintptr_t) &val, 644 }; 645 int ret; 646 647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 648 if (ret != 0) { 649 trace_kvm_failed_spr_get(spr, strerror(errno)); 650 } else { 651 switch (id & KVM_REG_SIZE_MASK) { 652 case KVM_REG_SIZE_U32: 653 env->spr[spr] = val.u32; 654 break; 655 656 case KVM_REG_SIZE_U64: 657 env->spr[spr] = val.u64; 658 break; 659 660 default: 661 /* Don't handle this size yet */ 662 abort(); 663 } 664 } 665 } 666 667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 668 { 669 PowerPCCPU *cpu = POWERPC_CPU(cs); 670 CPUPPCState *env = &cpu->env; 671 union { 672 uint32_t u32; 673 uint64_t u64; 674 } val; 675 struct kvm_one_reg reg = { 676 .id = id, 677 .addr = (uintptr_t) &val, 678 }; 679 int ret; 680 681 switch (id & KVM_REG_SIZE_MASK) { 682 case KVM_REG_SIZE_U32: 683 val.u32 = env->spr[spr]; 684 break; 685 686 case KVM_REG_SIZE_U64: 687 val.u64 = env->spr[spr]; 688 break; 689 690 default: 691 /* Don't handle this size yet */ 692 abort(); 693 } 694 695 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 696 if (ret != 0) { 697 trace_kvm_failed_spr_set(spr, strerror(errno)); 698 } 699 } 700 701 static int kvm_put_fp(CPUState *cs) 702 { 703 PowerPCCPU *cpu = POWERPC_CPU(cs); 704 CPUPPCState *env = &cpu->env; 705 struct kvm_one_reg reg; 706 int i; 707 int ret; 708 709 if (env->insns_flags & PPC_FLOAT) { 710 uint64_t fpscr = env->fpscr; 711 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 712 713 reg.id = KVM_REG_PPC_FPSCR; 714 reg.addr = (uintptr_t)&fpscr; 715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 716 if (ret < 0) { 717 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 718 return ret; 719 } 720 721 for (i = 0; i < 32; i++) { 722 uint64_t vsr[2]; 723 724 #ifdef HOST_WORDS_BIGENDIAN 725 vsr[0] = float64_val(env->fpr[i]); 726 vsr[1] = env->vsr[i]; 727 #else 728 vsr[0] = env->vsr[i]; 729 vsr[1] = float64_val(env->fpr[i]); 730 #endif 731 reg.addr = (uintptr_t) &vsr; 732 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 733 734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 735 if (ret < 0) { 736 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 737 i, strerror(errno)); 738 return ret; 739 } 740 } 741 } 742 743 if (env->insns_flags & PPC_ALTIVEC) { 744 reg.id = KVM_REG_PPC_VSCR; 745 reg.addr = (uintptr_t)&env->vscr; 746 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 747 if (ret < 0) { 748 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 749 return ret; 750 } 751 752 for (i = 0; i < 32; i++) { 753 reg.id = KVM_REG_PPC_VR(i); 754 reg.addr = (uintptr_t)&env->avr[i]; 755 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 756 if (ret < 0) { 757 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 758 return ret; 759 } 760 } 761 } 762 763 return 0; 764 } 765 766 static int kvm_get_fp(CPUState *cs) 767 { 768 PowerPCCPU *cpu = POWERPC_CPU(cs); 769 CPUPPCState *env = &cpu->env; 770 struct kvm_one_reg reg; 771 int i; 772 int ret; 773 774 if (env->insns_flags & PPC_FLOAT) { 775 uint64_t fpscr; 776 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 777 778 reg.id = KVM_REG_PPC_FPSCR; 779 reg.addr = (uintptr_t)&fpscr; 780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 781 if (ret < 0) { 782 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 783 return ret; 784 } else { 785 env->fpscr = fpscr; 786 } 787 788 for (i = 0; i < 32; i++) { 789 uint64_t vsr[2]; 790 791 reg.addr = (uintptr_t) &vsr; 792 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 793 794 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 795 if (ret < 0) { 796 DPRINTF("Unable to get %s%d from KVM: %s\n", 797 vsx ? "VSR" : "FPR", i, strerror(errno)); 798 return ret; 799 } else { 800 #ifdef HOST_WORDS_BIGENDIAN 801 env->fpr[i] = vsr[0]; 802 if (vsx) { 803 env->vsr[i] = vsr[1]; 804 } 805 #else 806 env->fpr[i] = vsr[1]; 807 if (vsx) { 808 env->vsr[i] = vsr[0]; 809 } 810 #endif 811 } 812 } 813 } 814 815 if (env->insns_flags & PPC_ALTIVEC) { 816 reg.id = KVM_REG_PPC_VSCR; 817 reg.addr = (uintptr_t)&env->vscr; 818 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 819 if (ret < 0) { 820 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 821 return ret; 822 } 823 824 for (i = 0; i < 32; i++) { 825 reg.id = KVM_REG_PPC_VR(i); 826 reg.addr = (uintptr_t)&env->avr[i]; 827 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 828 if (ret < 0) { 829 DPRINTF("Unable to get VR%d from KVM: %s\n", 830 i, strerror(errno)); 831 return ret; 832 } 833 } 834 } 835 836 return 0; 837 } 838 839 #if defined(TARGET_PPC64) 840 static int kvm_get_vpa(CPUState *cs) 841 { 842 PowerPCCPU *cpu = POWERPC_CPU(cs); 843 CPUPPCState *env = &cpu->env; 844 struct kvm_one_reg reg; 845 int ret; 846 847 reg.id = KVM_REG_PPC_VPA_ADDR; 848 reg.addr = (uintptr_t)&env->vpa_addr; 849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 850 if (ret < 0) { 851 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 852 return ret; 853 } 854 855 assert((uintptr_t)&env->slb_shadow_size 856 == ((uintptr_t)&env->slb_shadow_addr + 8)); 857 reg.id = KVM_REG_PPC_VPA_SLB; 858 reg.addr = (uintptr_t)&env->slb_shadow_addr; 859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 860 if (ret < 0) { 861 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 862 strerror(errno)); 863 return ret; 864 } 865 866 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 867 reg.id = KVM_REG_PPC_VPA_DTL; 868 reg.addr = (uintptr_t)&env->dtl_addr; 869 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 870 if (ret < 0) { 871 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 872 strerror(errno)); 873 return ret; 874 } 875 876 return 0; 877 } 878 879 static int kvm_put_vpa(CPUState *cs) 880 { 881 PowerPCCPU *cpu = POWERPC_CPU(cs); 882 CPUPPCState *env = &cpu->env; 883 struct kvm_one_reg reg; 884 int ret; 885 886 /* SLB shadow or DTL can't be registered unless a master VPA is 887 * registered. That means when restoring state, if a VPA *is* 888 * registered, we need to set that up first. If not, we need to 889 * deregister the others before deregistering the master VPA */ 890 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 891 892 if (env->vpa_addr) { 893 reg.id = KVM_REG_PPC_VPA_ADDR; 894 reg.addr = (uintptr_t)&env->vpa_addr; 895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 896 if (ret < 0) { 897 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 898 return ret; 899 } 900 } 901 902 assert((uintptr_t)&env->slb_shadow_size 903 == ((uintptr_t)&env->slb_shadow_addr + 8)); 904 reg.id = KVM_REG_PPC_VPA_SLB; 905 reg.addr = (uintptr_t)&env->slb_shadow_addr; 906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 907 if (ret < 0) { 908 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 909 return ret; 910 } 911 912 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 913 reg.id = KVM_REG_PPC_VPA_DTL; 914 reg.addr = (uintptr_t)&env->dtl_addr; 915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 916 if (ret < 0) { 917 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 918 strerror(errno)); 919 return ret; 920 } 921 922 if (!env->vpa_addr) { 923 reg.id = KVM_REG_PPC_VPA_ADDR; 924 reg.addr = (uintptr_t)&env->vpa_addr; 925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 926 if (ret < 0) { 927 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 928 return ret; 929 } 930 } 931 932 return 0; 933 } 934 #endif /* TARGET_PPC64 */ 935 936 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 937 { 938 CPUPPCState *env = &cpu->env; 939 struct kvm_sregs sregs; 940 int i; 941 942 sregs.pvr = env->spr[SPR_PVR]; 943 944 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 945 946 /* Sync SLB */ 947 #ifdef TARGET_PPC64 948 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 949 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 950 if (env->slb[i].esid & SLB_ESID_V) { 951 sregs.u.s.ppc64.slb[i].slbe |= i; 952 } 953 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 954 } 955 #endif 956 957 /* Sync SRs */ 958 for (i = 0; i < 16; i++) { 959 sregs.u.s.ppc32.sr[i] = env->sr[i]; 960 } 961 962 /* Sync BATs */ 963 for (i = 0; i < 8; i++) { 964 /* Beware. We have to swap upper and lower bits here */ 965 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 966 | env->DBAT[1][i]; 967 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 968 | env->IBAT[1][i]; 969 } 970 971 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 972 } 973 974 int kvm_arch_put_registers(CPUState *cs, int level) 975 { 976 PowerPCCPU *cpu = POWERPC_CPU(cs); 977 CPUPPCState *env = &cpu->env; 978 struct kvm_regs regs; 979 int ret; 980 int i; 981 982 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 983 if (ret < 0) { 984 return ret; 985 } 986 987 regs.ctr = env->ctr; 988 regs.lr = env->lr; 989 regs.xer = cpu_read_xer(env); 990 regs.msr = env->msr; 991 regs.pc = env->nip; 992 993 regs.srr0 = env->spr[SPR_SRR0]; 994 regs.srr1 = env->spr[SPR_SRR1]; 995 996 regs.sprg0 = env->spr[SPR_SPRG0]; 997 regs.sprg1 = env->spr[SPR_SPRG1]; 998 regs.sprg2 = env->spr[SPR_SPRG2]; 999 regs.sprg3 = env->spr[SPR_SPRG3]; 1000 regs.sprg4 = env->spr[SPR_SPRG4]; 1001 regs.sprg5 = env->spr[SPR_SPRG5]; 1002 regs.sprg6 = env->spr[SPR_SPRG6]; 1003 regs.sprg7 = env->spr[SPR_SPRG7]; 1004 1005 regs.pid = env->spr[SPR_BOOKE_PID]; 1006 1007 for (i = 0;i < 32; i++) 1008 regs.gpr[i] = env->gpr[i]; 1009 1010 regs.cr = 0; 1011 for (i = 0; i < 8; i++) { 1012 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1013 } 1014 1015 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1016 if (ret < 0) 1017 return ret; 1018 1019 kvm_put_fp(cs); 1020 1021 if (env->tlb_dirty) { 1022 kvm_sw_tlb_put(cpu); 1023 env->tlb_dirty = false; 1024 } 1025 1026 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1027 ret = kvmppc_put_books_sregs(cpu); 1028 if (ret < 0) { 1029 return ret; 1030 } 1031 } 1032 1033 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1034 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1035 } 1036 1037 if (cap_one_reg) { 1038 int i; 1039 1040 /* We deliberately ignore errors here, for kernels which have 1041 * the ONE_REG calls, but don't support the specific 1042 * registers, there's a reasonable chance things will still 1043 * work, at least until we try to migrate. */ 1044 for (i = 0; i < 1024; i++) { 1045 uint64_t id = env->spr_cb[i].one_reg_id; 1046 1047 if (id != 0) { 1048 kvm_put_one_spr(cs, id, i); 1049 } 1050 } 1051 1052 #ifdef TARGET_PPC64 1053 if (msr_ts) { 1054 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1056 } 1057 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1059 } 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1070 } 1071 1072 if (cap_papr) { 1073 if (kvm_put_vpa(cs) < 0) { 1074 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1075 } 1076 } 1077 1078 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1079 #endif /* TARGET_PPC64 */ 1080 } 1081 1082 return ret; 1083 } 1084 1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1086 { 1087 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1088 } 1089 1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1091 { 1092 CPUPPCState *env = &cpu->env; 1093 struct kvm_sregs sregs; 1094 int ret; 1095 1096 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1097 if (ret < 0) { 1098 return ret; 1099 } 1100 1101 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1102 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1103 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1104 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1105 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1106 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1107 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1108 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1109 env->spr[SPR_DECR] = sregs.u.e.dec; 1110 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1111 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1112 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1113 } 1114 1115 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1116 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1117 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1118 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1119 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1120 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1121 } 1122 1123 if (sregs.u.e.features & KVM_SREGS_E_64) { 1124 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1125 } 1126 1127 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1128 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1129 } 1130 1131 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1132 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1133 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1134 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1135 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1136 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1137 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1138 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1139 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1140 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1141 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1142 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1143 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1144 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1145 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1146 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1147 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1148 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1149 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1150 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1151 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1152 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1153 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1154 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1155 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1156 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1157 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1158 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1159 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1160 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1161 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1162 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1163 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1164 1165 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1166 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1167 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1168 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1169 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1170 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1171 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1172 } 1173 1174 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1175 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1176 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1177 } 1178 1179 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1180 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1181 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1182 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1183 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1184 } 1185 } 1186 1187 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1188 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1189 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1190 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1191 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1192 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1193 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1194 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1195 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1196 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1197 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1198 } 1199 1200 if (sregs.u.e.features & KVM_SREGS_EXP) { 1201 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1202 } 1203 1204 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1205 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1206 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1207 } 1208 1209 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1210 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1211 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1212 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1213 1214 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1215 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1216 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1217 } 1218 } 1219 1220 return 0; 1221 } 1222 1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1224 { 1225 CPUPPCState *env = &cpu->env; 1226 struct kvm_sregs sregs; 1227 int ret; 1228 int i; 1229 1230 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1231 if (ret < 0) { 1232 return ret; 1233 } 1234 1235 if (!cpu->vhyp) { 1236 ppc_store_sdr1(env, sregs.u.s.sdr1); 1237 } 1238 1239 /* Sync SLB */ 1240 #ifdef TARGET_PPC64 1241 /* 1242 * The packed SLB array we get from KVM_GET_SREGS only contains 1243 * information about valid entries. So we flush our internal copy 1244 * to get rid of stale ones, then put all valid SLB entries back 1245 * in. 1246 */ 1247 memset(env->slb, 0, sizeof(env->slb)); 1248 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1249 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1250 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1251 /* 1252 * Only restore valid entries 1253 */ 1254 if (rb & SLB_ESID_V) { 1255 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1256 } 1257 } 1258 #endif 1259 1260 /* Sync SRs */ 1261 for (i = 0; i < 16; i++) { 1262 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1263 } 1264 1265 /* Sync BATs */ 1266 for (i = 0; i < 8; i++) { 1267 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1268 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1269 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1270 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1271 } 1272 1273 return 0; 1274 } 1275 1276 int kvm_arch_get_registers(CPUState *cs) 1277 { 1278 PowerPCCPU *cpu = POWERPC_CPU(cs); 1279 CPUPPCState *env = &cpu->env; 1280 struct kvm_regs regs; 1281 uint32_t cr; 1282 int i, ret; 1283 1284 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1285 if (ret < 0) 1286 return ret; 1287 1288 cr = regs.cr; 1289 for (i = 7; i >= 0; i--) { 1290 env->crf[i] = cr & 15; 1291 cr >>= 4; 1292 } 1293 1294 env->ctr = regs.ctr; 1295 env->lr = regs.lr; 1296 cpu_write_xer(env, regs.xer); 1297 env->msr = regs.msr; 1298 env->nip = regs.pc; 1299 1300 env->spr[SPR_SRR0] = regs.srr0; 1301 env->spr[SPR_SRR1] = regs.srr1; 1302 1303 env->spr[SPR_SPRG0] = regs.sprg0; 1304 env->spr[SPR_SPRG1] = regs.sprg1; 1305 env->spr[SPR_SPRG2] = regs.sprg2; 1306 env->spr[SPR_SPRG3] = regs.sprg3; 1307 env->spr[SPR_SPRG4] = regs.sprg4; 1308 env->spr[SPR_SPRG5] = regs.sprg5; 1309 env->spr[SPR_SPRG6] = regs.sprg6; 1310 env->spr[SPR_SPRG7] = regs.sprg7; 1311 1312 env->spr[SPR_BOOKE_PID] = regs.pid; 1313 1314 for (i = 0;i < 32; i++) 1315 env->gpr[i] = regs.gpr[i]; 1316 1317 kvm_get_fp(cs); 1318 1319 if (cap_booke_sregs) { 1320 ret = kvmppc_get_booke_sregs(cpu); 1321 if (ret < 0) { 1322 return ret; 1323 } 1324 } 1325 1326 if (cap_segstate) { 1327 ret = kvmppc_get_books_sregs(cpu); 1328 if (ret < 0) { 1329 return ret; 1330 } 1331 } 1332 1333 if (cap_hior) { 1334 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1335 } 1336 1337 if (cap_one_reg) { 1338 int i; 1339 1340 /* We deliberately ignore errors here, for kernels which have 1341 * the ONE_REG calls, but don't support the specific 1342 * registers, there's a reasonable chance things will still 1343 * work, at least until we try to migrate. */ 1344 for (i = 0; i < 1024; i++) { 1345 uint64_t id = env->spr_cb[i].one_reg_id; 1346 1347 if (id != 0) { 1348 kvm_get_one_spr(cs, id, i); 1349 } 1350 } 1351 1352 #ifdef TARGET_PPC64 1353 if (msr_ts) { 1354 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1356 } 1357 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1359 } 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1370 } 1371 1372 if (cap_papr) { 1373 if (kvm_get_vpa(cs) < 0) { 1374 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1375 } 1376 } 1377 1378 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1379 #endif 1380 } 1381 1382 return 0; 1383 } 1384 1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1386 { 1387 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1388 1389 if (irq != PPC_INTERRUPT_EXT) { 1390 return 0; 1391 } 1392 1393 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1394 return 0; 1395 } 1396 1397 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1398 1399 return 0; 1400 } 1401 1402 #if defined(TARGET_PPCEMB) 1403 #define PPC_INPUT_INT PPC40x_INPUT_INT 1404 #elif defined(TARGET_PPC64) 1405 #define PPC_INPUT_INT PPC970_INPUT_INT 1406 #else 1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1408 #endif 1409 1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1411 { 1412 PowerPCCPU *cpu = POWERPC_CPU(cs); 1413 CPUPPCState *env = &cpu->env; 1414 int r; 1415 unsigned irq; 1416 1417 qemu_mutex_lock_iothread(); 1418 1419 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1420 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1421 if (!cap_interrupt_level && 1422 run->ready_for_interrupt_injection && 1423 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1424 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1425 { 1426 /* For now KVM disregards the 'irq' argument. However, in the 1427 * future KVM could cache it in-kernel to avoid a heavyweight exit 1428 * when reading the UIC. 1429 */ 1430 irq = KVM_INTERRUPT_SET; 1431 1432 DPRINTF("injected interrupt %d\n", irq); 1433 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1434 if (r < 0) { 1435 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1436 } 1437 1438 /* Always wake up soon in case the interrupt was level based */ 1439 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1440 (NANOSECONDS_PER_SECOND / 50)); 1441 } 1442 1443 /* We don't know if there are more interrupts pending after this. However, 1444 * the guest will return to userspace in the course of handling this one 1445 * anyways, so we will get a chance to deliver the rest. */ 1446 1447 qemu_mutex_unlock_iothread(); 1448 } 1449 1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1451 { 1452 return MEMTXATTRS_UNSPECIFIED; 1453 } 1454 1455 int kvm_arch_process_async_events(CPUState *cs) 1456 { 1457 return cs->halted; 1458 } 1459 1460 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1461 { 1462 CPUState *cs = CPU(cpu); 1463 CPUPPCState *env = &cpu->env; 1464 1465 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1466 cs->halted = 1; 1467 cs->exception_index = EXCP_HLT; 1468 } 1469 1470 return 0; 1471 } 1472 1473 /* map dcr access to existing qemu dcr emulation */ 1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1475 { 1476 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1477 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1478 1479 return 0; 1480 } 1481 1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1483 { 1484 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1485 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1486 1487 return 0; 1488 } 1489 1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1491 { 1492 /* Mixed endian case is not handled */ 1493 uint32_t sc = debug_inst_opcode; 1494 1495 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1496 sizeof(sc), 0) || 1497 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1498 return -EINVAL; 1499 } 1500 1501 return 0; 1502 } 1503 1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1505 { 1506 uint32_t sc; 1507 1508 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1509 sc != debug_inst_opcode || 1510 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1511 sizeof(sc), 1)) { 1512 return -EINVAL; 1513 } 1514 1515 return 0; 1516 } 1517 1518 static int find_hw_breakpoint(target_ulong addr, int type) 1519 { 1520 int n; 1521 1522 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1523 <= ARRAY_SIZE(hw_debug_points)); 1524 1525 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1526 if (hw_debug_points[n].addr == addr && 1527 hw_debug_points[n].type == type) { 1528 return n; 1529 } 1530 } 1531 1532 return -1; 1533 } 1534 1535 static int find_hw_watchpoint(target_ulong addr, int *flag) 1536 { 1537 int n; 1538 1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1540 if (n >= 0) { 1541 *flag = BP_MEM_ACCESS; 1542 return n; 1543 } 1544 1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1546 if (n >= 0) { 1547 *flag = BP_MEM_WRITE; 1548 return n; 1549 } 1550 1551 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1552 if (n >= 0) { 1553 *flag = BP_MEM_READ; 1554 return n; 1555 } 1556 1557 return -1; 1558 } 1559 1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1561 target_ulong len, int type) 1562 { 1563 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1564 return -ENOBUFS; 1565 } 1566 1567 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1568 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1569 1570 switch (type) { 1571 case GDB_BREAKPOINT_HW: 1572 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1573 return -ENOBUFS; 1574 } 1575 1576 if (find_hw_breakpoint(addr, type) >= 0) { 1577 return -EEXIST; 1578 } 1579 1580 nb_hw_breakpoint++; 1581 break; 1582 1583 case GDB_WATCHPOINT_WRITE: 1584 case GDB_WATCHPOINT_READ: 1585 case GDB_WATCHPOINT_ACCESS: 1586 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1587 return -ENOBUFS; 1588 } 1589 1590 if (find_hw_breakpoint(addr, type) >= 0) { 1591 return -EEXIST; 1592 } 1593 1594 nb_hw_watchpoint++; 1595 break; 1596 1597 default: 1598 return -ENOSYS; 1599 } 1600 1601 return 0; 1602 } 1603 1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1605 target_ulong len, int type) 1606 { 1607 int n; 1608 1609 n = find_hw_breakpoint(addr, type); 1610 if (n < 0) { 1611 return -ENOENT; 1612 } 1613 1614 switch (type) { 1615 case GDB_BREAKPOINT_HW: 1616 nb_hw_breakpoint--; 1617 break; 1618 1619 case GDB_WATCHPOINT_WRITE: 1620 case GDB_WATCHPOINT_READ: 1621 case GDB_WATCHPOINT_ACCESS: 1622 nb_hw_watchpoint--; 1623 break; 1624 1625 default: 1626 return -ENOSYS; 1627 } 1628 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1629 1630 return 0; 1631 } 1632 1633 void kvm_arch_remove_all_hw_breakpoints(void) 1634 { 1635 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1636 } 1637 1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1639 { 1640 int n; 1641 1642 /* Software Breakpoint updates */ 1643 if (kvm_sw_breakpoints_active(cs)) { 1644 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1645 } 1646 1647 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1648 <= ARRAY_SIZE(hw_debug_points)); 1649 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1650 1651 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1652 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1653 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1654 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1655 switch (hw_debug_points[n].type) { 1656 case GDB_BREAKPOINT_HW: 1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1658 break; 1659 case GDB_WATCHPOINT_WRITE: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1661 break; 1662 case GDB_WATCHPOINT_READ: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1664 break; 1665 case GDB_WATCHPOINT_ACCESS: 1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1667 KVMPPC_DEBUG_WATCH_READ; 1668 break; 1669 default: 1670 cpu_abort(cs, "Unsupported breakpoint type\n"); 1671 } 1672 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1673 } 1674 } 1675 } 1676 1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1678 { 1679 CPUState *cs = CPU(cpu); 1680 CPUPPCState *env = &cpu->env; 1681 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1682 int handle = 0; 1683 int n; 1684 int flag = 0; 1685 1686 if (cs->singlestep_enabled) { 1687 handle = 1; 1688 } else if (arch_info->status) { 1689 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1690 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1691 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1692 if (n >= 0) { 1693 handle = 1; 1694 } 1695 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1696 KVMPPC_DEBUG_WATCH_WRITE)) { 1697 n = find_hw_watchpoint(arch_info->address, &flag); 1698 if (n >= 0) { 1699 handle = 1; 1700 cs->watchpoint_hit = &hw_watchpoint; 1701 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1702 hw_watchpoint.flags = flag; 1703 } 1704 } 1705 } 1706 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1707 handle = 1; 1708 } else { 1709 /* QEMU is not able to handle debug exception, so inject 1710 * program exception to guest; 1711 * Yes program exception NOT debug exception !! 1712 * When QEMU is using debug resources then debug exception must 1713 * be always set. To achieve this we set MSR_DE and also set 1714 * MSRP_DEP so guest cannot change MSR_DE. 1715 * When emulating debug resource for guest we want guest 1716 * to control MSR_DE (enable/disable debug interrupt on need). 1717 * Supporting both configurations are NOT possible. 1718 * So the result is that we cannot share debug resources 1719 * between QEMU and Guest on BOOKE architecture. 1720 * In the current design QEMU gets the priority over guest, 1721 * this means that if QEMU is using debug resources then guest 1722 * cannot use them; 1723 * For software breakpoint QEMU uses a privileged instruction; 1724 * So there cannot be any reason that we are here for guest 1725 * set debug exception, only possibility is guest executed a 1726 * privileged / illegal instruction and that's why we are 1727 * injecting a program interrupt. 1728 */ 1729 1730 cpu_synchronize_state(cs); 1731 /* env->nip is PC, so increment this by 4 to use 1732 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1733 */ 1734 env->nip += 4; 1735 cs->exception_index = POWERPC_EXCP_PROGRAM; 1736 env->error_code = POWERPC_EXCP_INVAL; 1737 ppc_cpu_do_interrupt(cs); 1738 } 1739 1740 return handle; 1741 } 1742 1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1744 { 1745 PowerPCCPU *cpu = POWERPC_CPU(cs); 1746 CPUPPCState *env = &cpu->env; 1747 int ret; 1748 1749 qemu_mutex_lock_iothread(); 1750 1751 switch (run->exit_reason) { 1752 case KVM_EXIT_DCR: 1753 if (run->dcr.is_write) { 1754 DPRINTF("handle dcr write\n"); 1755 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1756 } else { 1757 DPRINTF("handle dcr read\n"); 1758 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1759 } 1760 break; 1761 case KVM_EXIT_HLT: 1762 DPRINTF("handle halt\n"); 1763 ret = kvmppc_handle_halt(cpu); 1764 break; 1765 #if defined(TARGET_PPC64) 1766 case KVM_EXIT_PAPR_HCALL: 1767 DPRINTF("handle PAPR hypercall\n"); 1768 run->papr_hcall.ret = spapr_hypercall(cpu, 1769 run->papr_hcall.nr, 1770 run->papr_hcall.args); 1771 ret = 0; 1772 break; 1773 #endif 1774 case KVM_EXIT_EPR: 1775 DPRINTF("handle epr\n"); 1776 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1777 ret = 0; 1778 break; 1779 case KVM_EXIT_WATCHDOG: 1780 DPRINTF("handle watchdog expiry\n"); 1781 watchdog_perform_action(); 1782 ret = 0; 1783 break; 1784 1785 case KVM_EXIT_DEBUG: 1786 DPRINTF("handle debug exception\n"); 1787 if (kvm_handle_debug(cpu, run)) { 1788 ret = EXCP_DEBUG; 1789 break; 1790 } 1791 /* re-enter, this exception was guest-internal */ 1792 ret = 0; 1793 break; 1794 1795 default: 1796 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1797 ret = -1; 1798 break; 1799 } 1800 1801 qemu_mutex_unlock_iothread(); 1802 return ret; 1803 } 1804 1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1806 { 1807 CPUState *cs = CPU(cpu); 1808 uint32_t bits = tsr_bits; 1809 struct kvm_one_reg reg = { 1810 .id = KVM_REG_PPC_OR_TSR, 1811 .addr = (uintptr_t) &bits, 1812 }; 1813 1814 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1815 } 1816 1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1818 { 1819 1820 CPUState *cs = CPU(cpu); 1821 uint32_t bits = tsr_bits; 1822 struct kvm_one_reg reg = { 1823 .id = KVM_REG_PPC_CLEAR_TSR, 1824 .addr = (uintptr_t) &bits, 1825 }; 1826 1827 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1828 } 1829 1830 int kvmppc_set_tcr(PowerPCCPU *cpu) 1831 { 1832 CPUState *cs = CPU(cpu); 1833 CPUPPCState *env = &cpu->env; 1834 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1835 1836 struct kvm_one_reg reg = { 1837 .id = KVM_REG_PPC_TCR, 1838 .addr = (uintptr_t) &tcr, 1839 }; 1840 1841 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1842 } 1843 1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1845 { 1846 CPUState *cs = CPU(cpu); 1847 int ret; 1848 1849 if (!kvm_enabled()) { 1850 return -1; 1851 } 1852 1853 if (!cap_ppc_watchdog) { 1854 printf("warning: KVM does not support watchdog"); 1855 return -1; 1856 } 1857 1858 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1859 if (ret < 0) { 1860 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1861 __func__, strerror(-ret)); 1862 return ret; 1863 } 1864 1865 return ret; 1866 } 1867 1868 static int read_cpuinfo(const char *field, char *value, int len) 1869 { 1870 FILE *f; 1871 int ret = -1; 1872 int field_len = strlen(field); 1873 char line[512]; 1874 1875 f = fopen("/proc/cpuinfo", "r"); 1876 if (!f) { 1877 return -1; 1878 } 1879 1880 do { 1881 if (!fgets(line, sizeof(line), f)) { 1882 break; 1883 } 1884 if (!strncmp(line, field, field_len)) { 1885 pstrcpy(value, len, line); 1886 ret = 0; 1887 break; 1888 } 1889 } while(*line); 1890 1891 fclose(f); 1892 1893 return ret; 1894 } 1895 1896 uint32_t kvmppc_get_tbfreq(void) 1897 { 1898 char line[512]; 1899 char *ns; 1900 uint32_t retval = NANOSECONDS_PER_SECOND; 1901 1902 if (read_cpuinfo("timebase", line, sizeof(line))) { 1903 return retval; 1904 } 1905 1906 if (!(ns = strchr(line, ':'))) { 1907 return retval; 1908 } 1909 1910 ns++; 1911 1912 return atoi(ns); 1913 } 1914 1915 bool kvmppc_get_host_serial(char **value) 1916 { 1917 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1918 NULL); 1919 } 1920 1921 bool kvmppc_get_host_model(char **value) 1922 { 1923 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1924 } 1925 1926 /* Try to find a device tree node for a CPU with clock-frequency property */ 1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1928 { 1929 struct dirent *dirp; 1930 DIR *dp; 1931 1932 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1933 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1934 return -1; 1935 } 1936 1937 buf[0] = '\0'; 1938 while ((dirp = readdir(dp)) != NULL) { 1939 FILE *f; 1940 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1941 dirp->d_name); 1942 f = fopen(buf, "r"); 1943 if (f) { 1944 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1945 fclose(f); 1946 break; 1947 } 1948 buf[0] = '\0'; 1949 } 1950 closedir(dp); 1951 if (buf[0] == '\0') { 1952 printf("Unknown host!\n"); 1953 return -1; 1954 } 1955 1956 return 0; 1957 } 1958 1959 static uint64_t kvmppc_read_int_dt(const char *filename) 1960 { 1961 union { 1962 uint32_t v32; 1963 uint64_t v64; 1964 } u; 1965 FILE *f; 1966 int len; 1967 1968 f = fopen(filename, "rb"); 1969 if (!f) { 1970 return -1; 1971 } 1972 1973 len = fread(&u, 1, sizeof(u), f); 1974 fclose(f); 1975 switch (len) { 1976 case 4: 1977 /* property is a 32-bit quantity */ 1978 return be32_to_cpu(u.v32); 1979 case 8: 1980 return be64_to_cpu(u.v64); 1981 } 1982 1983 return 0; 1984 } 1985 1986 /* Read a CPU node property from the host device tree that's a single 1987 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1988 * (can't find or open the property, or doesn't understand the 1989 * format) */ 1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1991 { 1992 char buf[PATH_MAX], *tmp; 1993 uint64_t val; 1994 1995 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1996 return -1; 1997 } 1998 1999 tmp = g_strdup_printf("%s/%s", buf, propname); 2000 val = kvmppc_read_int_dt(tmp); 2001 g_free(tmp); 2002 2003 return val; 2004 } 2005 2006 uint64_t kvmppc_get_clockfreq(void) 2007 { 2008 return kvmppc_read_int_cpu_dt("clock-frequency"); 2009 } 2010 2011 uint32_t kvmppc_get_vmx(void) 2012 { 2013 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2014 } 2015 2016 uint32_t kvmppc_get_dfp(void) 2017 { 2018 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2019 } 2020 2021 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2022 { 2023 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2024 CPUState *cs = CPU(cpu); 2025 2026 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2027 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2028 return 0; 2029 } 2030 2031 return 1; 2032 } 2033 2034 int kvmppc_get_hasidle(CPUPPCState *env) 2035 { 2036 struct kvm_ppc_pvinfo pvinfo; 2037 2038 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2039 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2040 return 1; 2041 } 2042 2043 return 0; 2044 } 2045 2046 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2047 { 2048 uint32_t *hc = (uint32_t*)buf; 2049 struct kvm_ppc_pvinfo pvinfo; 2050 2051 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2052 memcpy(buf, pvinfo.hcall, buf_len); 2053 return 0; 2054 } 2055 2056 /* 2057 * Fallback to always fail hypercalls regardless of endianness: 2058 * 2059 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2060 * li r3, -1 2061 * b .+8 (becomes nop in wrong endian) 2062 * bswap32(li r3, -1) 2063 */ 2064 2065 hc[0] = cpu_to_be32(0x08000048); 2066 hc[1] = cpu_to_be32(0x3860ffff); 2067 hc[2] = cpu_to_be32(0x48000008); 2068 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2069 2070 return 1; 2071 } 2072 2073 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2074 { 2075 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2076 } 2077 2078 void kvmppc_enable_logical_ci_hcalls(void) 2079 { 2080 /* 2081 * FIXME: it would be nice if we could detect the cases where 2082 * we're using a device which requires the in kernel 2083 * implementation of these hcalls, but the kernel lacks them and 2084 * produce a warning. 2085 */ 2086 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2087 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2088 } 2089 2090 void kvmppc_enable_set_mode_hcall(void) 2091 { 2092 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2093 } 2094 2095 void kvmppc_enable_clear_ref_mod_hcalls(void) 2096 { 2097 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2098 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2099 } 2100 2101 void kvmppc_set_papr(PowerPCCPU *cpu) 2102 { 2103 CPUState *cs = CPU(cpu); 2104 int ret; 2105 2106 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2107 if (ret) { 2108 error_report("This vCPU type or KVM version does not support PAPR"); 2109 exit(1); 2110 } 2111 2112 /* Update the capability flag so we sync the right information 2113 * with kvm */ 2114 cap_papr = 1; 2115 } 2116 2117 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2118 { 2119 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2120 } 2121 2122 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2123 { 2124 CPUState *cs = CPU(cpu); 2125 int ret; 2126 2127 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2128 if (ret && mpic_proxy) { 2129 error_report("This KVM version does not support EPR"); 2130 exit(1); 2131 } 2132 } 2133 2134 int kvmppc_smt_threads(void) 2135 { 2136 return cap_ppc_smt ? cap_ppc_smt : 1; 2137 } 2138 2139 int kvmppc_set_smt_threads(int smt) 2140 { 2141 int ret; 2142 2143 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2144 if (!ret) { 2145 cap_ppc_smt = smt; 2146 } 2147 return ret; 2148 } 2149 2150 void kvmppc_hint_smt_possible(Error **errp) 2151 { 2152 int i; 2153 GString *g; 2154 char *s; 2155 2156 assert(kvm_enabled()); 2157 if (cap_ppc_smt_possible) { 2158 g = g_string_new("Available VSMT modes:"); 2159 for (i = 63; i >= 0; i--) { 2160 if ((1UL << i) & cap_ppc_smt_possible) { 2161 g_string_append_printf(g, " %lu", (1UL << i)); 2162 } 2163 } 2164 s = g_string_free(g, false); 2165 error_append_hint(errp, "%s.\n", s); 2166 g_free(s); 2167 } else { 2168 error_append_hint(errp, 2169 "This KVM seems to be too old to support VSMT.\n"); 2170 } 2171 } 2172 2173 2174 #ifdef TARGET_PPC64 2175 off_t kvmppc_alloc_rma(void **rma) 2176 { 2177 off_t size; 2178 int fd; 2179 struct kvm_allocate_rma ret; 2180 2181 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2182 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2183 * not necessary on this hardware 2184 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2185 * 2186 * FIXME: We should allow the user to force contiguous RMA 2187 * allocation in the cap_ppc_rma==1 case. 2188 */ 2189 if (cap_ppc_rma < 2) { 2190 return 0; 2191 } 2192 2193 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2194 if (fd < 0) { 2195 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2196 strerror(errno)); 2197 return -1; 2198 } 2199 2200 size = MIN(ret.rma_size, 256ul << 20); 2201 2202 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2203 if (*rma == MAP_FAILED) { 2204 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2205 return -1; 2206 }; 2207 2208 return size; 2209 } 2210 2211 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2212 { 2213 struct kvm_ppc_smmu_info info; 2214 long rampagesize, best_page_shift; 2215 int i; 2216 2217 if (cap_ppc_rma >= 2) { 2218 return current_size; 2219 } 2220 2221 /* Find the largest hardware supported page size that's less than 2222 * or equal to the (logical) backing page size of guest RAM */ 2223 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2224 rampagesize = qemu_getrampagesize(); 2225 best_page_shift = 0; 2226 2227 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2228 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2229 2230 if (!sps->page_shift) { 2231 continue; 2232 } 2233 2234 if ((sps->page_shift > best_page_shift) 2235 && ((1UL << sps->page_shift) <= rampagesize)) { 2236 best_page_shift = sps->page_shift; 2237 } 2238 } 2239 2240 return MIN(current_size, 2241 1ULL << (best_page_shift + hash_shift - 7)); 2242 } 2243 #endif 2244 2245 bool kvmppc_spapr_use_multitce(void) 2246 { 2247 return cap_spapr_multitce; 2248 } 2249 2250 int kvmppc_spapr_enable_inkernel_multitce(void) 2251 { 2252 int ret; 2253 2254 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2255 H_PUT_TCE_INDIRECT, 1); 2256 if (!ret) { 2257 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2258 H_STUFF_TCE, 1); 2259 } 2260 2261 return ret; 2262 } 2263 2264 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2265 uint64_t bus_offset, uint32_t nb_table, 2266 int *pfd, bool need_vfio) 2267 { 2268 long len; 2269 int fd; 2270 void *table; 2271 2272 /* Must set fd to -1 so we don't try to munmap when called for 2273 * destroying the table, which the upper layers -will- do 2274 */ 2275 *pfd = -1; 2276 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2277 return NULL; 2278 } 2279 2280 if (cap_spapr_tce_64) { 2281 struct kvm_create_spapr_tce_64 args = { 2282 .liobn = liobn, 2283 .page_shift = page_shift, 2284 .offset = bus_offset >> page_shift, 2285 .size = nb_table, 2286 .flags = 0 2287 }; 2288 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2289 if (fd < 0) { 2290 fprintf(stderr, 2291 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2292 liobn); 2293 return NULL; 2294 } 2295 } else if (cap_spapr_tce) { 2296 uint64_t window_size = (uint64_t) nb_table << page_shift; 2297 struct kvm_create_spapr_tce args = { 2298 .liobn = liobn, 2299 .window_size = window_size, 2300 }; 2301 if ((window_size != args.window_size) || bus_offset) { 2302 return NULL; 2303 } 2304 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2305 if (fd < 0) { 2306 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2307 liobn); 2308 return NULL; 2309 } 2310 } else { 2311 return NULL; 2312 } 2313 2314 len = nb_table * sizeof(uint64_t); 2315 /* FIXME: round this up to page size */ 2316 2317 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2318 if (table == MAP_FAILED) { 2319 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2320 liobn); 2321 close(fd); 2322 return NULL; 2323 } 2324 2325 *pfd = fd; 2326 return table; 2327 } 2328 2329 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2330 { 2331 long len; 2332 2333 if (fd < 0) { 2334 return -1; 2335 } 2336 2337 len = nb_table * sizeof(uint64_t); 2338 if ((munmap(table, len) < 0) || 2339 (close(fd) < 0)) { 2340 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2341 strerror(errno)); 2342 /* Leak the table */ 2343 } 2344 2345 return 0; 2346 } 2347 2348 int kvmppc_reset_htab(int shift_hint) 2349 { 2350 uint32_t shift = shift_hint; 2351 2352 if (!kvm_enabled()) { 2353 /* Full emulation, tell caller to allocate htab itself */ 2354 return 0; 2355 } 2356 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2357 int ret; 2358 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2359 if (ret == -ENOTTY) { 2360 /* At least some versions of PR KVM advertise the 2361 * capability, but don't implement the ioctl(). Oops. 2362 * Return 0 so that we allocate the htab in qemu, as is 2363 * correct for PR. */ 2364 return 0; 2365 } else if (ret < 0) { 2366 return ret; 2367 } 2368 return shift; 2369 } 2370 2371 /* We have a kernel that predates the htab reset calls. For PR 2372 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2373 * this era, it has allocated a 16MB fixed size hash table already. */ 2374 if (kvmppc_is_pr(kvm_state)) { 2375 /* PR - tell caller to allocate htab */ 2376 return 0; 2377 } else { 2378 /* HV - assume 16MB kernel allocated htab */ 2379 return 24; 2380 } 2381 } 2382 2383 static inline uint32_t mfpvr(void) 2384 { 2385 uint32_t pvr; 2386 2387 asm ("mfpvr %0" 2388 : "=r"(pvr)); 2389 return pvr; 2390 } 2391 2392 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2393 { 2394 if (on) { 2395 *word |= flags; 2396 } else { 2397 *word &= ~flags; 2398 } 2399 } 2400 2401 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2402 { 2403 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2404 uint32_t vmx = kvmppc_get_vmx(); 2405 uint32_t dfp = kvmppc_get_dfp(); 2406 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2407 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2408 2409 /* Now fix up the class with information we can query from the host */ 2410 pcc->pvr = mfpvr(); 2411 2412 if (vmx != -1) { 2413 /* Only override when we know what the host supports */ 2414 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2415 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2416 } 2417 if (dfp != -1) { 2418 /* Only override when we know what the host supports */ 2419 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2420 } 2421 2422 if (dcache_size != -1) { 2423 pcc->l1_dcache_size = dcache_size; 2424 } 2425 2426 if (icache_size != -1) { 2427 pcc->l1_icache_size = icache_size; 2428 } 2429 2430 #if defined(TARGET_PPC64) 2431 pcc->radix_page_info = kvm_get_radix_page_info(); 2432 2433 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2434 /* 2435 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2436 * compliant. More importantly, advertising ISA 3.00 2437 * architected mode may prevent guests from activating 2438 * necessary DD1 workarounds. 2439 */ 2440 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2441 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2442 } 2443 #endif /* defined(TARGET_PPC64) */ 2444 } 2445 2446 bool kvmppc_has_cap_epr(void) 2447 { 2448 return cap_epr; 2449 } 2450 2451 bool kvmppc_has_cap_htab_fd(void) 2452 { 2453 return cap_htab_fd; 2454 } 2455 2456 bool kvmppc_has_cap_fixup_hcalls(void) 2457 { 2458 return cap_fixup_hcalls; 2459 } 2460 2461 bool kvmppc_has_cap_htm(void) 2462 { 2463 return cap_htm; 2464 } 2465 2466 bool kvmppc_has_cap_mmu_radix(void) 2467 { 2468 return cap_mmu_radix; 2469 } 2470 2471 bool kvmppc_has_cap_mmu_hash_v3(void) 2472 { 2473 return cap_mmu_hash_v3; 2474 } 2475 2476 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2477 { 2478 uint32_t host_pvr = mfpvr(); 2479 PowerPCCPUClass *pvr_pcc; 2480 2481 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2482 if (pvr_pcc == NULL) { 2483 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2484 } 2485 2486 return pvr_pcc; 2487 } 2488 2489 static int kvm_ppc_register_host_cpu_type(void) 2490 { 2491 TypeInfo type_info = { 2492 .name = TYPE_HOST_POWERPC_CPU, 2493 .class_init = kvmppc_host_cpu_class_init, 2494 }; 2495 PowerPCCPUClass *pvr_pcc; 2496 ObjectClass *oc; 2497 DeviceClass *dc; 2498 int i; 2499 2500 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2501 if (pvr_pcc == NULL) { 2502 return -1; 2503 } 2504 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2505 type_register(&type_info); 2506 2507 oc = object_class_by_name(type_info.name); 2508 g_assert(oc); 2509 2510 #if defined(TARGET_PPC64) 2511 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2512 type_info.parent = TYPE_SPAPR_CPU_CORE, 2513 type_info.instance_size = sizeof(sPAPRCPUCore); 2514 type_info.instance_init = NULL; 2515 type_info.class_init = spapr_cpu_core_class_init; 2516 type_info.class_data = (void *) "host"; 2517 type_register(&type_info); 2518 g_free((void *)type_info.name); 2519 #endif 2520 2521 /* 2522 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2523 * we want "POWER8" to be a "family" alias that points to the current 2524 * host CPU type, too) 2525 */ 2526 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2527 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2528 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2529 char *suffix; 2530 2531 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2532 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2533 if (suffix) { 2534 *suffix = 0; 2535 } 2536 break; 2537 } 2538 } 2539 2540 return 0; 2541 } 2542 2543 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2544 { 2545 struct kvm_rtas_token_args args = { 2546 .token = token, 2547 }; 2548 2549 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2550 return -ENOENT; 2551 } 2552 2553 strncpy(args.name, function, sizeof(args.name)); 2554 2555 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2556 } 2557 2558 int kvmppc_get_htab_fd(bool write) 2559 { 2560 struct kvm_get_htab_fd s = { 2561 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2562 .start_index = 0, 2563 }; 2564 2565 if (!cap_htab_fd) { 2566 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2567 return -1; 2568 } 2569 2570 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2571 } 2572 2573 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2574 { 2575 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2576 uint8_t buf[bufsize]; 2577 ssize_t rc; 2578 2579 do { 2580 rc = read(fd, buf, bufsize); 2581 if (rc < 0) { 2582 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2583 strerror(errno)); 2584 return rc; 2585 } else if (rc) { 2586 uint8_t *buffer = buf; 2587 ssize_t n = rc; 2588 while (n) { 2589 struct kvm_get_htab_header *head = 2590 (struct kvm_get_htab_header *) buffer; 2591 size_t chunksize = sizeof(*head) + 2592 HASH_PTE_SIZE_64 * head->n_valid; 2593 2594 qemu_put_be32(f, head->index); 2595 qemu_put_be16(f, head->n_valid); 2596 qemu_put_be16(f, head->n_invalid); 2597 qemu_put_buffer(f, (void *)(head + 1), 2598 HASH_PTE_SIZE_64 * head->n_valid); 2599 2600 buffer += chunksize; 2601 n -= chunksize; 2602 } 2603 } 2604 } while ((rc != 0) 2605 && ((max_ns < 0) 2606 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2607 2608 return (rc == 0) ? 1 : 0; 2609 } 2610 2611 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2612 uint16_t n_valid, uint16_t n_invalid) 2613 { 2614 struct kvm_get_htab_header *buf; 2615 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2616 ssize_t rc; 2617 2618 buf = alloca(chunksize); 2619 buf->index = index; 2620 buf->n_valid = n_valid; 2621 buf->n_invalid = n_invalid; 2622 2623 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2624 2625 rc = write(fd, buf, chunksize); 2626 if (rc < 0) { 2627 fprintf(stderr, "Error writing KVM hash table: %s\n", 2628 strerror(errno)); 2629 return rc; 2630 } 2631 if (rc != chunksize) { 2632 /* We should never get a short write on a single chunk */ 2633 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2634 return -1; 2635 } 2636 return 0; 2637 } 2638 2639 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2640 { 2641 return true; 2642 } 2643 2644 void kvm_arch_init_irq_routing(KVMState *s) 2645 { 2646 } 2647 2648 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2649 { 2650 struct kvm_get_htab_fd ghf = { 2651 .flags = 0, 2652 .start_index = ptex, 2653 }; 2654 int fd, rc; 2655 int i; 2656 2657 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2658 if (fd < 0) { 2659 hw_error("kvmppc_read_hptes: Unable to open HPT fd"); 2660 } 2661 2662 i = 0; 2663 while (i < n) { 2664 struct kvm_get_htab_header *hdr; 2665 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2666 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2667 2668 rc = read(fd, buf, sizeof(buf)); 2669 if (rc < 0) { 2670 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2671 } 2672 2673 hdr = (struct kvm_get_htab_header *)buf; 2674 while ((i < n) && ((char *)hdr < (buf + rc))) { 2675 int invalid = hdr->n_invalid; 2676 2677 if (hdr->index != (ptex + i)) { 2678 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2679 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2680 } 2681 2682 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2683 i += hdr->n_valid; 2684 2685 if ((n - i) < invalid) { 2686 invalid = n - i; 2687 } 2688 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2689 i += hdr->n_invalid; 2690 2691 hdr = (struct kvm_get_htab_header *) 2692 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2693 } 2694 } 2695 2696 close(fd); 2697 } 2698 2699 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2700 { 2701 int fd, rc; 2702 struct kvm_get_htab_fd ghf; 2703 struct { 2704 struct kvm_get_htab_header hdr; 2705 uint64_t pte0; 2706 uint64_t pte1; 2707 } buf; 2708 2709 ghf.flags = 0; 2710 ghf.start_index = 0; /* Ignored */ 2711 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2712 if (fd < 0) { 2713 hw_error("kvmppc_write_hpte: Unable to open HPT fd"); 2714 } 2715 2716 buf.hdr.n_valid = 1; 2717 buf.hdr.n_invalid = 0; 2718 buf.hdr.index = ptex; 2719 buf.pte0 = cpu_to_be64(pte0); 2720 buf.pte1 = cpu_to_be64(pte1); 2721 2722 rc = write(fd, &buf, sizeof(buf)); 2723 if (rc != sizeof(buf)) { 2724 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2725 } 2726 close(fd); 2727 } 2728 2729 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2730 uint64_t address, uint32_t data, PCIDevice *dev) 2731 { 2732 return 0; 2733 } 2734 2735 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2736 int vector, PCIDevice *dev) 2737 { 2738 return 0; 2739 } 2740 2741 int kvm_arch_release_virq_post(int virq) 2742 { 2743 return 0; 2744 } 2745 2746 int kvm_arch_msi_data_to_gsi(uint32_t data) 2747 { 2748 return data & 0xffff; 2749 } 2750 2751 int kvmppc_enable_hwrng(void) 2752 { 2753 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2754 return -1; 2755 } 2756 2757 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2758 } 2759 2760 void kvmppc_check_papr_resize_hpt(Error **errp) 2761 { 2762 if (!kvm_enabled()) { 2763 return; /* No KVM, we're good */ 2764 } 2765 2766 if (cap_resize_hpt) { 2767 return; /* Kernel has explicit support, we're good */ 2768 } 2769 2770 /* Otherwise fallback on looking for PR KVM */ 2771 if (kvmppc_is_pr(kvm_state)) { 2772 return; 2773 } 2774 2775 error_setg(errp, 2776 "Hash page table resizing not available with this KVM version"); 2777 } 2778 2779 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2780 { 2781 CPUState *cs = CPU(cpu); 2782 struct kvm_ppc_resize_hpt rhpt = { 2783 .flags = flags, 2784 .shift = shift, 2785 }; 2786 2787 if (!cap_resize_hpt) { 2788 return -ENOSYS; 2789 } 2790 2791 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2792 } 2793 2794 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2795 { 2796 CPUState *cs = CPU(cpu); 2797 struct kvm_ppc_resize_hpt rhpt = { 2798 .flags = flags, 2799 .shift = shift, 2800 }; 2801 2802 if (!cap_resize_hpt) { 2803 return -ENOSYS; 2804 } 2805 2806 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2807 } 2808 2809 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg) 2810 { 2811 target_ulong sdr1 = arg.target_ptr; 2812 PowerPCCPU *cpu = POWERPC_CPU(cs); 2813 CPUPPCState *env = &cpu->env; 2814 2815 /* This is just for the benefit of PR KVM */ 2816 cpu_synchronize_state(cs); 2817 env->spr[SPR_SDR1] = sdr1; 2818 if (kvmppc_put_books_sregs(cpu) < 0) { 2819 error_report("Unable to update SDR1 in KVM"); 2820 exit(1); 2821 } 2822 } 2823 2824 void kvmppc_update_sdr1(target_ulong sdr1) 2825 { 2826 CPUState *cs; 2827 2828 CPU_FOREACH(cs) { 2829 run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); 2830 } 2831 } 2832 2833 /* 2834 * This is a helper function to detect a post migration scenario 2835 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2836 * the guest kernel can't handle a PVR value other than the actual host 2837 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2838 * 2839 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2840 * (so, we're HV), return true. The workaround itself is done in 2841 * cpu_post_load. 2842 * 2843 * The order here is important: we'll only check for KVM PR as a 2844 * fallback if the guest kernel can't handle the situation itself. 2845 * We need to avoid as much as possible querying the running KVM type 2846 * in QEMU level. 2847 */ 2848 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2849 { 2850 CPUState *cs = CPU(cpu); 2851 2852 if (!kvm_enabled()) { 2853 return false; 2854 } 2855 2856 if (cap_ppc_pvr_compat) { 2857 return false; 2858 } 2859 2860 return !kvmppc_is_pr(cs->kvm_state); 2861 } 2862