1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qemu/error-report.h" 26 #include "cpu.h" 27 #include "qemu/timer.h" 28 #include "sysemu/sysemu.h" 29 #include "sysemu/kvm.h" 30 #include "sysemu/numa.h" 31 #include "kvm_ppc.h" 32 #include "sysemu/cpus.h" 33 #include "sysemu/device_tree.h" 34 #include "mmu-hash64.h" 35 36 #include "hw/sysbus.h" 37 #include "hw/ppc/spapr.h" 38 #include "hw/ppc/spapr_vio.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "sysemu/watchdog.h" 42 #include "trace.h" 43 #include "exec/gdbstub.h" 44 #include "exec/memattrs.h" 45 #include "sysemu/hostmem.h" 46 #include "qemu/cutils.h" 47 #if defined(TARGET_PPC64) 48 #include "hw/ppc/spapr_cpu_core.h" 49 #endif 50 51 //#define DEBUG_KVM 52 53 #ifdef DEBUG_KVM 54 #define DPRINTF(fmt, ...) \ 55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 56 #else 57 #define DPRINTF(fmt, ...) \ 58 do { } while (0) 59 #endif 60 61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 62 63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 64 KVM_CAP_LAST_INFO 65 }; 66 67 static int cap_interrupt_unset = false; 68 static int cap_interrupt_level = false; 69 static int cap_segstate; 70 static int cap_booke_sregs; 71 static int cap_ppc_smt; 72 static int cap_ppc_rma; 73 static int cap_spapr_tce; 74 static int cap_spapr_multitce; 75 static int cap_spapr_vfio; 76 static int cap_hior; 77 static int cap_one_reg; 78 static int cap_epr; 79 static int cap_ppc_watchdog; 80 static int cap_papr; 81 static int cap_htab_fd; 82 static int cap_fixup_hcalls; 83 static int cap_htm; /* Hardware transactional memory support */ 84 85 static uint32_t debug_inst_opcode; 86 87 /* XXX We have a race condition where we actually have a level triggered 88 * interrupt, but the infrastructure can't expose that yet, so the guest 89 * takes but ignores it, goes to sleep and never gets notified that there's 90 * still an interrupt pending. 91 * 92 * As a quick workaround, let's just wake up again 20 ms after we injected 93 * an interrupt. That way we can assure that we're always reinjecting 94 * interrupts in case the guest swallowed them. 95 */ 96 static QEMUTimer *idle_timer; 97 98 static void kvm_kick_cpu(void *opaque) 99 { 100 PowerPCCPU *cpu = opaque; 101 102 qemu_cpu_kick(CPU(cpu)); 103 } 104 105 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 106 * should only be used for fallback tests - generally we should use 107 * explicit capabilities for the features we want, rather than 108 * assuming what is/isn't available depending on the KVM variant. */ 109 static bool kvmppc_is_pr(KVMState *ks) 110 { 111 /* Assume KVM-PR if the GET_PVINFO capability is available */ 112 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 113 } 114 115 static int kvm_ppc_register_host_cpu_type(void); 116 117 int kvm_arch_init(MachineState *ms, KVMState *s) 118 { 119 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 120 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 121 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 122 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 123 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 124 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 125 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 126 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 127 cap_spapr_vfio = false; 128 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 129 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 130 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 131 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 132 /* Note: we don't set cap_papr here, because this capability is 133 * only activated after this by kvmppc_set_papr() */ 134 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 135 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 136 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 137 138 if (!cap_interrupt_level) { 139 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 140 "VM to stall at times!\n"); 141 } 142 143 kvm_ppc_register_host_cpu_type(); 144 145 return 0; 146 } 147 148 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 149 { 150 return 0; 151 } 152 153 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 154 { 155 CPUPPCState *cenv = &cpu->env; 156 CPUState *cs = CPU(cpu); 157 struct kvm_sregs sregs; 158 int ret; 159 160 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 161 /* What we're really trying to say is "if we're on BookE, we use 162 the native PVR for now". This is the only sane way to check 163 it though, so we potentially confuse users that they can run 164 BookE guests on BookS. Let's hope nobody dares enough :) */ 165 return 0; 166 } else { 167 if (!cap_segstate) { 168 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 169 return -ENOSYS; 170 } 171 } 172 173 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 174 if (ret) { 175 return ret; 176 } 177 178 sregs.pvr = cenv->spr[SPR_PVR]; 179 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 180 } 181 182 /* Set up a shared TLB array with KVM */ 183 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 184 { 185 CPUPPCState *env = &cpu->env; 186 CPUState *cs = CPU(cpu); 187 struct kvm_book3e_206_tlb_params params = {}; 188 struct kvm_config_tlb cfg = {}; 189 unsigned int entries = 0; 190 int ret, i; 191 192 if (!kvm_enabled() || 193 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 194 return 0; 195 } 196 197 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 198 199 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 200 params.tlb_sizes[i] = booke206_tlb_size(env, i); 201 params.tlb_ways[i] = booke206_tlb_ways(env, i); 202 entries += params.tlb_sizes[i]; 203 } 204 205 assert(entries == env->nb_tlb); 206 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 207 208 env->tlb_dirty = true; 209 210 cfg.array = (uintptr_t)env->tlb.tlbm; 211 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 212 cfg.params = (uintptr_t)¶ms; 213 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 214 215 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 216 if (ret < 0) { 217 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 218 __func__, strerror(-ret)); 219 return ret; 220 } 221 222 env->kvm_sw_tlb = true; 223 return 0; 224 } 225 226 227 #if defined(TARGET_PPC64) 228 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 229 struct kvm_ppc_smmu_info *info) 230 { 231 CPUPPCState *env = &cpu->env; 232 CPUState *cs = CPU(cpu); 233 234 memset(info, 0, sizeof(*info)); 235 236 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 237 * need to "guess" what the supported page sizes are. 238 * 239 * For that to work we make a few assumptions: 240 * 241 * - Check whether we are running "PR" KVM which only supports 4K 242 * and 16M pages, but supports them regardless of the backing 243 * store characteritics. We also don't support 1T segments. 244 * 245 * This is safe as if HV KVM ever supports that capability or PR 246 * KVM grows supports for more page/segment sizes, those versions 247 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 248 * will not hit this fallback 249 * 250 * - Else we are running HV KVM. This means we only support page 251 * sizes that fit in the backing store. Additionally we only 252 * advertize 64K pages if the processor is ARCH 2.06 and we assume 253 * P7 encodings for the SLB and hash table. Here too, we assume 254 * support for any newer processor will mean a kernel that 255 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 256 * this fallback. 257 */ 258 if (kvmppc_is_pr(cs->kvm_state)) { 259 /* No flags */ 260 info->flags = 0; 261 info->slb_size = 64; 262 263 /* Standard 4k base page size segment */ 264 info->sps[0].page_shift = 12; 265 info->sps[0].slb_enc = 0; 266 info->sps[0].enc[0].page_shift = 12; 267 info->sps[0].enc[0].pte_enc = 0; 268 269 /* Standard 16M large page size segment */ 270 info->sps[1].page_shift = 24; 271 info->sps[1].slb_enc = SLB_VSID_L; 272 info->sps[1].enc[0].page_shift = 24; 273 info->sps[1].enc[0].pte_enc = 0; 274 } else { 275 int i = 0; 276 277 /* HV KVM has backing store size restrictions */ 278 info->flags = KVM_PPC_PAGE_SIZES_REAL; 279 280 if (env->mmu_model & POWERPC_MMU_1TSEG) { 281 info->flags |= KVM_PPC_1T_SEGMENTS; 282 } 283 284 if (env->mmu_model == POWERPC_MMU_2_06 || 285 env->mmu_model == POWERPC_MMU_2_07) { 286 info->slb_size = 32; 287 } else { 288 info->slb_size = 64; 289 } 290 291 /* Standard 4k base page size segment */ 292 info->sps[i].page_shift = 12; 293 info->sps[i].slb_enc = 0; 294 info->sps[i].enc[0].page_shift = 12; 295 info->sps[i].enc[0].pte_enc = 0; 296 i++; 297 298 /* 64K on MMU 2.06 and later */ 299 if (env->mmu_model == POWERPC_MMU_2_06 || 300 env->mmu_model == POWERPC_MMU_2_07) { 301 info->sps[i].page_shift = 16; 302 info->sps[i].slb_enc = 0x110; 303 info->sps[i].enc[0].page_shift = 16; 304 info->sps[i].enc[0].pte_enc = 1; 305 i++; 306 } 307 308 /* Standard 16M large page size segment */ 309 info->sps[i].page_shift = 24; 310 info->sps[i].slb_enc = SLB_VSID_L; 311 info->sps[i].enc[0].page_shift = 24; 312 info->sps[i].enc[0].pte_enc = 0; 313 } 314 } 315 316 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 317 { 318 CPUState *cs = CPU(cpu); 319 int ret; 320 321 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 322 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 323 if (ret == 0) { 324 return; 325 } 326 } 327 328 kvm_get_fallback_smmu_info(cpu, info); 329 } 330 331 static long gethugepagesize(const char *mem_path) 332 { 333 struct statfs fs; 334 int ret; 335 336 do { 337 ret = statfs(mem_path, &fs); 338 } while (ret != 0 && errno == EINTR); 339 340 if (ret != 0) { 341 fprintf(stderr, "Couldn't statfs() memory path: %s\n", 342 strerror(errno)); 343 exit(1); 344 } 345 346 #define HUGETLBFS_MAGIC 0x958458f6 347 348 if (fs.f_type != HUGETLBFS_MAGIC) { 349 /* Explicit mempath, but it's ordinary pages */ 350 return getpagesize(); 351 } 352 353 /* It's hugepage, return the huge page size */ 354 return fs.f_bsize; 355 } 356 357 /* 358 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which 359 * may or may not name the same files / on the same filesystem now as 360 * when we actually open and map them. Iterate over the file 361 * descriptors instead, and use qemu_fd_getpagesize(). 362 */ 363 static int find_max_supported_pagesize(Object *obj, void *opaque) 364 { 365 char *mem_path; 366 long *hpsize_min = opaque; 367 368 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { 369 mem_path = object_property_get_str(obj, "mem-path", NULL); 370 if (mem_path) { 371 long hpsize = gethugepagesize(mem_path); 372 if (hpsize < *hpsize_min) { 373 *hpsize_min = hpsize; 374 } 375 } else { 376 *hpsize_min = getpagesize(); 377 } 378 } 379 380 return 0; 381 } 382 383 static long getrampagesize(void) 384 { 385 long hpsize = LONG_MAX; 386 long mainrampagesize; 387 Object *memdev_root; 388 389 if (mem_path) { 390 mainrampagesize = gethugepagesize(mem_path); 391 } else { 392 mainrampagesize = getpagesize(); 393 } 394 395 /* it's possible we have memory-backend objects with 396 * hugepage-backed RAM. these may get mapped into system 397 * address space via -numa parameters or memory hotplug 398 * hooks. we want to take these into account, but we 399 * also want to make sure these supported hugepage 400 * sizes are applicable across the entire range of memory 401 * we may boot from, so we take the min across all 402 * backends, and assume normal pages in cases where a 403 * backend isn't backed by hugepages. 404 */ 405 memdev_root = object_resolve_path("/objects", NULL); 406 if (memdev_root) { 407 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); 408 } 409 if (hpsize == LONG_MAX) { 410 /* No additional memory regions found ==> Report main RAM page size */ 411 return mainrampagesize; 412 } 413 414 /* If NUMA is disabled or the NUMA nodes are not backed with a 415 * memory-backend, then there is at least one node using "normal" RAM, 416 * so if its page size is smaller we have got to report that size instead. 417 */ 418 if (hpsize > mainrampagesize && 419 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { 420 static bool warned; 421 if (!warned) { 422 error_report("Huge page support disabled (n/a for main memory)."); 423 warned = true; 424 } 425 return mainrampagesize; 426 } 427 428 return hpsize; 429 } 430 431 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 432 { 433 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 434 return true; 435 } 436 437 return (1ul << shift) <= rampgsize; 438 } 439 440 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 441 { 442 static struct kvm_ppc_smmu_info smmu_info; 443 static bool has_smmu_info; 444 CPUPPCState *env = &cpu->env; 445 long rampagesize; 446 int iq, ik, jq, jk; 447 bool has_64k_pages = false; 448 449 /* We only handle page sizes for 64-bit server guests for now */ 450 if (!(env->mmu_model & POWERPC_MMU_64)) { 451 return; 452 } 453 454 /* Collect MMU info from kernel if not already */ 455 if (!has_smmu_info) { 456 kvm_get_smmu_info(cpu, &smmu_info); 457 has_smmu_info = true; 458 } 459 460 rampagesize = getrampagesize(); 461 462 /* Convert to QEMU form */ 463 memset(&env->sps, 0, sizeof(env->sps)); 464 465 /* If we have HV KVM, we need to forbid CI large pages if our 466 * host page size is smaller than 64K. 467 */ 468 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 469 env->ci_large_pages = getpagesize() >= 0x10000; 470 } 471 472 /* 473 * XXX This loop should be an entry wide AND of the capabilities that 474 * the selected CPU has with the capabilities that KVM supports. 475 */ 476 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 477 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 478 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 479 480 if (!kvm_valid_page_size(smmu_info.flags, rampagesize, 481 ksps->page_shift)) { 482 continue; 483 } 484 qsps->page_shift = ksps->page_shift; 485 qsps->slb_enc = ksps->slb_enc; 486 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 487 if (!kvm_valid_page_size(smmu_info.flags, rampagesize, 488 ksps->enc[jk].page_shift)) { 489 continue; 490 } 491 if (ksps->enc[jk].page_shift == 16) { 492 has_64k_pages = true; 493 } 494 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 495 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 496 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 497 break; 498 } 499 } 500 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 501 break; 502 } 503 } 504 env->slb_nr = smmu_info.slb_size; 505 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 506 env->mmu_model &= ~POWERPC_MMU_1TSEG; 507 } 508 if (!has_64k_pages) { 509 env->mmu_model &= ~POWERPC_MMU_64K; 510 } 511 } 512 #else /* defined (TARGET_PPC64) */ 513 514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 515 { 516 } 517 518 #endif /* !defined (TARGET_PPC64) */ 519 520 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 521 { 522 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 523 } 524 525 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 526 * book3s supports only 1 watchpoint, so array size 527 * of 4 is sufficient for now. 528 */ 529 #define MAX_HW_BKPTS 4 530 531 static struct HWBreakpoint { 532 target_ulong addr; 533 int type; 534 } hw_debug_points[MAX_HW_BKPTS]; 535 536 static CPUWatchpoint hw_watchpoint; 537 538 /* Default there is no breakpoint and watchpoint supported */ 539 static int max_hw_breakpoint; 540 static int max_hw_watchpoint; 541 static int nb_hw_breakpoint; 542 static int nb_hw_watchpoint; 543 544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 545 { 546 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 547 max_hw_breakpoint = 2; 548 max_hw_watchpoint = 2; 549 } 550 551 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 552 fprintf(stderr, "Error initializing h/w breakpoints\n"); 553 return; 554 } 555 } 556 557 int kvm_arch_init_vcpu(CPUState *cs) 558 { 559 PowerPCCPU *cpu = POWERPC_CPU(cs); 560 CPUPPCState *cenv = &cpu->env; 561 int ret; 562 563 /* Gather server mmu info from KVM and update the CPU state */ 564 kvm_fixup_page_sizes(cpu); 565 566 /* Synchronize sregs with kvm */ 567 ret = kvm_arch_sync_sregs(cpu); 568 if (ret) { 569 if (ret == -EINVAL) { 570 error_report("Register sync failed... If you're using kvm-hv.ko," 571 " only \"-cpu host\" is possible"); 572 } 573 return ret; 574 } 575 576 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 577 578 switch (cenv->mmu_model) { 579 case POWERPC_MMU_BOOKE206: 580 /* This target supports access to KVM's guest TLB */ 581 ret = kvm_booke206_tlb_init(cpu); 582 break; 583 case POWERPC_MMU_2_07: 584 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 585 /* KVM-HV has transactional memory on POWER8 also without the 586 * KVM_CAP_PPC_HTM extension, so enable it here instead. */ 587 cap_htm = true; 588 } 589 break; 590 default: 591 break; 592 } 593 594 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 595 kvmppc_hw_debug_points_init(cenv); 596 597 return ret; 598 } 599 600 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 601 { 602 CPUPPCState *env = &cpu->env; 603 CPUState *cs = CPU(cpu); 604 struct kvm_dirty_tlb dirty_tlb; 605 unsigned char *bitmap; 606 int ret; 607 608 if (!env->kvm_sw_tlb) { 609 return; 610 } 611 612 bitmap = g_malloc((env->nb_tlb + 7) / 8); 613 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 614 615 dirty_tlb.bitmap = (uintptr_t)bitmap; 616 dirty_tlb.num_dirty = env->nb_tlb; 617 618 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 619 if (ret) { 620 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 621 __func__, strerror(-ret)); 622 } 623 624 g_free(bitmap); 625 } 626 627 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 628 { 629 PowerPCCPU *cpu = POWERPC_CPU(cs); 630 CPUPPCState *env = &cpu->env; 631 union { 632 uint32_t u32; 633 uint64_t u64; 634 } val; 635 struct kvm_one_reg reg = { 636 .id = id, 637 .addr = (uintptr_t) &val, 638 }; 639 int ret; 640 641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 642 if (ret != 0) { 643 trace_kvm_failed_spr_get(spr, strerror(errno)); 644 } else { 645 switch (id & KVM_REG_SIZE_MASK) { 646 case KVM_REG_SIZE_U32: 647 env->spr[spr] = val.u32; 648 break; 649 650 case KVM_REG_SIZE_U64: 651 env->spr[spr] = val.u64; 652 break; 653 654 default: 655 /* Don't handle this size yet */ 656 abort(); 657 } 658 } 659 } 660 661 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 662 { 663 PowerPCCPU *cpu = POWERPC_CPU(cs); 664 CPUPPCState *env = &cpu->env; 665 union { 666 uint32_t u32; 667 uint64_t u64; 668 } val; 669 struct kvm_one_reg reg = { 670 .id = id, 671 .addr = (uintptr_t) &val, 672 }; 673 int ret; 674 675 switch (id & KVM_REG_SIZE_MASK) { 676 case KVM_REG_SIZE_U32: 677 val.u32 = env->spr[spr]; 678 break; 679 680 case KVM_REG_SIZE_U64: 681 val.u64 = env->spr[spr]; 682 break; 683 684 default: 685 /* Don't handle this size yet */ 686 abort(); 687 } 688 689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 690 if (ret != 0) { 691 trace_kvm_failed_spr_set(spr, strerror(errno)); 692 } 693 } 694 695 static int kvm_put_fp(CPUState *cs) 696 { 697 PowerPCCPU *cpu = POWERPC_CPU(cs); 698 CPUPPCState *env = &cpu->env; 699 struct kvm_one_reg reg; 700 int i; 701 int ret; 702 703 if (env->insns_flags & PPC_FLOAT) { 704 uint64_t fpscr = env->fpscr; 705 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 706 707 reg.id = KVM_REG_PPC_FPSCR; 708 reg.addr = (uintptr_t)&fpscr; 709 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 710 if (ret < 0) { 711 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 712 return ret; 713 } 714 715 for (i = 0; i < 32; i++) { 716 uint64_t vsr[2]; 717 718 #ifdef HOST_WORDS_BIGENDIAN 719 vsr[0] = float64_val(env->fpr[i]); 720 vsr[1] = env->vsr[i]; 721 #else 722 vsr[0] = env->vsr[i]; 723 vsr[1] = float64_val(env->fpr[i]); 724 #endif 725 reg.addr = (uintptr_t) &vsr; 726 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 727 728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 729 if (ret < 0) { 730 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 731 i, strerror(errno)); 732 return ret; 733 } 734 } 735 } 736 737 if (env->insns_flags & PPC_ALTIVEC) { 738 reg.id = KVM_REG_PPC_VSCR; 739 reg.addr = (uintptr_t)&env->vscr; 740 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 741 if (ret < 0) { 742 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 743 return ret; 744 } 745 746 for (i = 0; i < 32; i++) { 747 reg.id = KVM_REG_PPC_VR(i); 748 reg.addr = (uintptr_t)&env->avr[i]; 749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 750 if (ret < 0) { 751 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 752 return ret; 753 } 754 } 755 } 756 757 return 0; 758 } 759 760 static int kvm_get_fp(CPUState *cs) 761 { 762 PowerPCCPU *cpu = POWERPC_CPU(cs); 763 CPUPPCState *env = &cpu->env; 764 struct kvm_one_reg reg; 765 int i; 766 int ret; 767 768 if (env->insns_flags & PPC_FLOAT) { 769 uint64_t fpscr; 770 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 771 772 reg.id = KVM_REG_PPC_FPSCR; 773 reg.addr = (uintptr_t)&fpscr; 774 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 775 if (ret < 0) { 776 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 777 return ret; 778 } else { 779 env->fpscr = fpscr; 780 } 781 782 for (i = 0; i < 32; i++) { 783 uint64_t vsr[2]; 784 785 reg.addr = (uintptr_t) &vsr; 786 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 787 788 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 789 if (ret < 0) { 790 DPRINTF("Unable to get %s%d from KVM: %s\n", 791 vsx ? "VSR" : "FPR", i, strerror(errno)); 792 return ret; 793 } else { 794 #ifdef HOST_WORDS_BIGENDIAN 795 env->fpr[i] = vsr[0]; 796 if (vsx) { 797 env->vsr[i] = vsr[1]; 798 } 799 #else 800 env->fpr[i] = vsr[1]; 801 if (vsx) { 802 env->vsr[i] = vsr[0]; 803 } 804 #endif 805 } 806 } 807 } 808 809 if (env->insns_flags & PPC_ALTIVEC) { 810 reg.id = KVM_REG_PPC_VSCR; 811 reg.addr = (uintptr_t)&env->vscr; 812 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 813 if (ret < 0) { 814 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 815 return ret; 816 } 817 818 for (i = 0; i < 32; i++) { 819 reg.id = KVM_REG_PPC_VR(i); 820 reg.addr = (uintptr_t)&env->avr[i]; 821 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 822 if (ret < 0) { 823 DPRINTF("Unable to get VR%d from KVM: %s\n", 824 i, strerror(errno)); 825 return ret; 826 } 827 } 828 } 829 830 return 0; 831 } 832 833 #if defined(TARGET_PPC64) 834 static int kvm_get_vpa(CPUState *cs) 835 { 836 PowerPCCPU *cpu = POWERPC_CPU(cs); 837 CPUPPCState *env = &cpu->env; 838 struct kvm_one_reg reg; 839 int ret; 840 841 reg.id = KVM_REG_PPC_VPA_ADDR; 842 reg.addr = (uintptr_t)&env->vpa_addr; 843 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 844 if (ret < 0) { 845 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 846 return ret; 847 } 848 849 assert((uintptr_t)&env->slb_shadow_size 850 == ((uintptr_t)&env->slb_shadow_addr + 8)); 851 reg.id = KVM_REG_PPC_VPA_SLB; 852 reg.addr = (uintptr_t)&env->slb_shadow_addr; 853 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 854 if (ret < 0) { 855 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 856 strerror(errno)); 857 return ret; 858 } 859 860 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 861 reg.id = KVM_REG_PPC_VPA_DTL; 862 reg.addr = (uintptr_t)&env->dtl_addr; 863 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 864 if (ret < 0) { 865 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 866 strerror(errno)); 867 return ret; 868 } 869 870 return 0; 871 } 872 873 static int kvm_put_vpa(CPUState *cs) 874 { 875 PowerPCCPU *cpu = POWERPC_CPU(cs); 876 CPUPPCState *env = &cpu->env; 877 struct kvm_one_reg reg; 878 int ret; 879 880 /* SLB shadow or DTL can't be registered unless a master VPA is 881 * registered. That means when restoring state, if a VPA *is* 882 * registered, we need to set that up first. If not, we need to 883 * deregister the others before deregistering the master VPA */ 884 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 885 886 if (env->vpa_addr) { 887 reg.id = KVM_REG_PPC_VPA_ADDR; 888 reg.addr = (uintptr_t)&env->vpa_addr; 889 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 890 if (ret < 0) { 891 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 892 return ret; 893 } 894 } 895 896 assert((uintptr_t)&env->slb_shadow_size 897 == ((uintptr_t)&env->slb_shadow_addr + 8)); 898 reg.id = KVM_REG_PPC_VPA_SLB; 899 reg.addr = (uintptr_t)&env->slb_shadow_addr; 900 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 901 if (ret < 0) { 902 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 903 return ret; 904 } 905 906 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 907 reg.id = KVM_REG_PPC_VPA_DTL; 908 reg.addr = (uintptr_t)&env->dtl_addr; 909 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 910 if (ret < 0) { 911 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 912 strerror(errno)); 913 return ret; 914 } 915 916 if (!env->vpa_addr) { 917 reg.id = KVM_REG_PPC_VPA_ADDR; 918 reg.addr = (uintptr_t)&env->vpa_addr; 919 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 920 if (ret < 0) { 921 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 922 return ret; 923 } 924 } 925 926 return 0; 927 } 928 #endif /* TARGET_PPC64 */ 929 930 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 931 { 932 CPUPPCState *env = &cpu->env; 933 struct kvm_sregs sregs; 934 int i; 935 936 sregs.pvr = env->spr[SPR_PVR]; 937 938 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 939 940 /* Sync SLB */ 941 #ifdef TARGET_PPC64 942 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 943 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 944 if (env->slb[i].esid & SLB_ESID_V) { 945 sregs.u.s.ppc64.slb[i].slbe |= i; 946 } 947 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 948 } 949 #endif 950 951 /* Sync SRs */ 952 for (i = 0; i < 16; i++) { 953 sregs.u.s.ppc32.sr[i] = env->sr[i]; 954 } 955 956 /* Sync BATs */ 957 for (i = 0; i < 8; i++) { 958 /* Beware. We have to swap upper and lower bits here */ 959 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 960 | env->DBAT[1][i]; 961 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 962 | env->IBAT[1][i]; 963 } 964 965 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 966 } 967 968 int kvm_arch_put_registers(CPUState *cs, int level) 969 { 970 PowerPCCPU *cpu = POWERPC_CPU(cs); 971 CPUPPCState *env = &cpu->env; 972 struct kvm_regs regs; 973 int ret; 974 int i; 975 976 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 977 if (ret < 0) { 978 return ret; 979 } 980 981 regs.ctr = env->ctr; 982 regs.lr = env->lr; 983 regs.xer = cpu_read_xer(env); 984 regs.msr = env->msr; 985 regs.pc = env->nip; 986 987 regs.srr0 = env->spr[SPR_SRR0]; 988 regs.srr1 = env->spr[SPR_SRR1]; 989 990 regs.sprg0 = env->spr[SPR_SPRG0]; 991 regs.sprg1 = env->spr[SPR_SPRG1]; 992 regs.sprg2 = env->spr[SPR_SPRG2]; 993 regs.sprg3 = env->spr[SPR_SPRG3]; 994 regs.sprg4 = env->spr[SPR_SPRG4]; 995 regs.sprg5 = env->spr[SPR_SPRG5]; 996 regs.sprg6 = env->spr[SPR_SPRG6]; 997 regs.sprg7 = env->spr[SPR_SPRG7]; 998 999 regs.pid = env->spr[SPR_BOOKE_PID]; 1000 1001 for (i = 0;i < 32; i++) 1002 regs.gpr[i] = env->gpr[i]; 1003 1004 regs.cr = 0; 1005 for (i = 0; i < 8; i++) { 1006 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1007 } 1008 1009 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1010 if (ret < 0) 1011 return ret; 1012 1013 kvm_put_fp(cs); 1014 1015 if (env->tlb_dirty) { 1016 kvm_sw_tlb_put(cpu); 1017 env->tlb_dirty = false; 1018 } 1019 1020 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1021 ret = kvmppc_put_books_sregs(cpu); 1022 if (ret < 0) { 1023 return ret; 1024 } 1025 } 1026 1027 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1028 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1029 } 1030 1031 if (cap_one_reg) { 1032 int i; 1033 1034 /* We deliberately ignore errors here, for kernels which have 1035 * the ONE_REG calls, but don't support the specific 1036 * registers, there's a reasonable chance things will still 1037 * work, at least until we try to migrate. */ 1038 for (i = 0; i < 1024; i++) { 1039 uint64_t id = env->spr_cb[i].one_reg_id; 1040 1041 if (id != 0) { 1042 kvm_put_one_spr(cs, id, i); 1043 } 1044 } 1045 1046 #ifdef TARGET_PPC64 1047 if (msr_ts) { 1048 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1050 } 1051 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1053 } 1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1064 } 1065 1066 if (cap_papr) { 1067 if (kvm_put_vpa(cs) < 0) { 1068 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1069 } 1070 } 1071 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1073 #endif /* TARGET_PPC64 */ 1074 } 1075 1076 return ret; 1077 } 1078 1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1080 { 1081 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1082 } 1083 1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1085 { 1086 CPUPPCState *env = &cpu->env; 1087 struct kvm_sregs sregs; 1088 int ret; 1089 1090 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1091 if (ret < 0) { 1092 return ret; 1093 } 1094 1095 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1096 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1097 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1098 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1099 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1100 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1101 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1102 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1103 env->spr[SPR_DECR] = sregs.u.e.dec; 1104 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1105 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1106 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1107 } 1108 1109 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1110 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1111 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1112 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1113 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1114 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1115 } 1116 1117 if (sregs.u.e.features & KVM_SREGS_E_64) { 1118 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1119 } 1120 1121 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1122 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1123 } 1124 1125 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1126 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1127 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1128 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1129 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1130 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1131 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1132 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1133 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1134 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1135 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1136 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1137 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1138 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1139 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1140 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1141 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1142 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1143 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1144 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1145 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1146 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1147 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1148 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1149 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1150 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1151 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1152 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1153 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1154 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1155 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1156 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1157 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1158 1159 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1160 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1161 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1162 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1163 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1164 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1165 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1166 } 1167 1168 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1169 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1170 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1171 } 1172 1173 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1174 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1175 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1176 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1177 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1178 } 1179 } 1180 1181 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1182 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1183 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1184 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1185 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1186 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1187 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1188 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1189 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1190 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1191 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1192 } 1193 1194 if (sregs.u.e.features & KVM_SREGS_EXP) { 1195 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1196 } 1197 1198 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1199 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1200 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1201 } 1202 1203 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1204 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1205 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1206 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1207 1208 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1209 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1210 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1211 } 1212 } 1213 1214 return 0; 1215 } 1216 1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1218 { 1219 CPUPPCState *env = &cpu->env; 1220 struct kvm_sregs sregs; 1221 int ret; 1222 int i; 1223 1224 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1225 if (ret < 0) { 1226 return ret; 1227 } 1228 1229 if (!env->external_htab) { 1230 ppc_store_sdr1(env, sregs.u.s.sdr1); 1231 } 1232 1233 /* Sync SLB */ 1234 #ifdef TARGET_PPC64 1235 /* 1236 * The packed SLB array we get from KVM_GET_SREGS only contains 1237 * information about valid entries. So we flush our internal copy 1238 * to get rid of stale ones, then put all valid SLB entries back 1239 * in. 1240 */ 1241 memset(env->slb, 0, sizeof(env->slb)); 1242 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1243 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1244 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1245 /* 1246 * Only restore valid entries 1247 */ 1248 if (rb & SLB_ESID_V) { 1249 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1250 } 1251 } 1252 #endif 1253 1254 /* Sync SRs */ 1255 for (i = 0; i < 16; i++) { 1256 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1257 } 1258 1259 /* Sync BATs */ 1260 for (i = 0; i < 8; i++) { 1261 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1262 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1263 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1264 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1265 } 1266 1267 return 0; 1268 } 1269 1270 int kvm_arch_get_registers(CPUState *cs) 1271 { 1272 PowerPCCPU *cpu = POWERPC_CPU(cs); 1273 CPUPPCState *env = &cpu->env; 1274 struct kvm_regs regs; 1275 uint32_t cr; 1276 int i, ret; 1277 1278 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1279 if (ret < 0) 1280 return ret; 1281 1282 cr = regs.cr; 1283 for (i = 7; i >= 0; i--) { 1284 env->crf[i] = cr & 15; 1285 cr >>= 4; 1286 } 1287 1288 env->ctr = regs.ctr; 1289 env->lr = regs.lr; 1290 cpu_write_xer(env, regs.xer); 1291 env->msr = regs.msr; 1292 env->nip = regs.pc; 1293 1294 env->spr[SPR_SRR0] = regs.srr0; 1295 env->spr[SPR_SRR1] = regs.srr1; 1296 1297 env->spr[SPR_SPRG0] = regs.sprg0; 1298 env->spr[SPR_SPRG1] = regs.sprg1; 1299 env->spr[SPR_SPRG2] = regs.sprg2; 1300 env->spr[SPR_SPRG3] = regs.sprg3; 1301 env->spr[SPR_SPRG4] = regs.sprg4; 1302 env->spr[SPR_SPRG5] = regs.sprg5; 1303 env->spr[SPR_SPRG6] = regs.sprg6; 1304 env->spr[SPR_SPRG7] = regs.sprg7; 1305 1306 env->spr[SPR_BOOKE_PID] = regs.pid; 1307 1308 for (i = 0;i < 32; i++) 1309 env->gpr[i] = regs.gpr[i]; 1310 1311 kvm_get_fp(cs); 1312 1313 if (cap_booke_sregs) { 1314 ret = kvmppc_get_booke_sregs(cpu); 1315 if (ret < 0) { 1316 return ret; 1317 } 1318 } 1319 1320 if (cap_segstate) { 1321 ret = kvmppc_get_books_sregs(cpu); 1322 if (ret < 0) { 1323 return ret; 1324 } 1325 } 1326 1327 if (cap_hior) { 1328 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1329 } 1330 1331 if (cap_one_reg) { 1332 int i; 1333 1334 /* We deliberately ignore errors here, for kernels which have 1335 * the ONE_REG calls, but don't support the specific 1336 * registers, there's a reasonable chance things will still 1337 * work, at least until we try to migrate. */ 1338 for (i = 0; i < 1024; i++) { 1339 uint64_t id = env->spr_cb[i].one_reg_id; 1340 1341 if (id != 0) { 1342 kvm_get_one_spr(cs, id, i); 1343 } 1344 } 1345 1346 #ifdef TARGET_PPC64 1347 if (msr_ts) { 1348 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1350 } 1351 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1353 } 1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1364 } 1365 1366 if (cap_papr) { 1367 if (kvm_get_vpa(cs) < 0) { 1368 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1369 } 1370 } 1371 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1373 #endif 1374 } 1375 1376 return 0; 1377 } 1378 1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1380 { 1381 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1382 1383 if (irq != PPC_INTERRUPT_EXT) { 1384 return 0; 1385 } 1386 1387 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1388 return 0; 1389 } 1390 1391 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1392 1393 return 0; 1394 } 1395 1396 #if defined(TARGET_PPCEMB) 1397 #define PPC_INPUT_INT PPC40x_INPUT_INT 1398 #elif defined(TARGET_PPC64) 1399 #define PPC_INPUT_INT PPC970_INPUT_INT 1400 #else 1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1402 #endif 1403 1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1405 { 1406 PowerPCCPU *cpu = POWERPC_CPU(cs); 1407 CPUPPCState *env = &cpu->env; 1408 int r; 1409 unsigned irq; 1410 1411 qemu_mutex_lock_iothread(); 1412 1413 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1414 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1415 if (!cap_interrupt_level && 1416 run->ready_for_interrupt_injection && 1417 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1418 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1419 { 1420 /* For now KVM disregards the 'irq' argument. However, in the 1421 * future KVM could cache it in-kernel to avoid a heavyweight exit 1422 * when reading the UIC. 1423 */ 1424 irq = KVM_INTERRUPT_SET; 1425 1426 DPRINTF("injected interrupt %d\n", irq); 1427 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1428 if (r < 0) { 1429 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1430 } 1431 1432 /* Always wake up soon in case the interrupt was level based */ 1433 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1434 (NANOSECONDS_PER_SECOND / 50)); 1435 } 1436 1437 /* We don't know if there are more interrupts pending after this. However, 1438 * the guest will return to userspace in the course of handling this one 1439 * anyways, so we will get a chance to deliver the rest. */ 1440 1441 qemu_mutex_unlock_iothread(); 1442 } 1443 1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1445 { 1446 return MEMTXATTRS_UNSPECIFIED; 1447 } 1448 1449 int kvm_arch_process_async_events(CPUState *cs) 1450 { 1451 return cs->halted; 1452 } 1453 1454 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1455 { 1456 CPUState *cs = CPU(cpu); 1457 CPUPPCState *env = &cpu->env; 1458 1459 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1460 cs->halted = 1; 1461 cs->exception_index = EXCP_HLT; 1462 } 1463 1464 return 0; 1465 } 1466 1467 /* map dcr access to existing qemu dcr emulation */ 1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1469 { 1470 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1471 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1472 1473 return 0; 1474 } 1475 1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1477 { 1478 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1479 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1480 1481 return 0; 1482 } 1483 1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1485 { 1486 /* Mixed endian case is not handled */ 1487 uint32_t sc = debug_inst_opcode; 1488 1489 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1490 sizeof(sc), 0) || 1491 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1492 return -EINVAL; 1493 } 1494 1495 return 0; 1496 } 1497 1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1499 { 1500 uint32_t sc; 1501 1502 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1503 sc != debug_inst_opcode || 1504 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1505 sizeof(sc), 1)) { 1506 return -EINVAL; 1507 } 1508 1509 return 0; 1510 } 1511 1512 static int find_hw_breakpoint(target_ulong addr, int type) 1513 { 1514 int n; 1515 1516 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1517 <= ARRAY_SIZE(hw_debug_points)); 1518 1519 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1520 if (hw_debug_points[n].addr == addr && 1521 hw_debug_points[n].type == type) { 1522 return n; 1523 } 1524 } 1525 1526 return -1; 1527 } 1528 1529 static int find_hw_watchpoint(target_ulong addr, int *flag) 1530 { 1531 int n; 1532 1533 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1534 if (n >= 0) { 1535 *flag = BP_MEM_ACCESS; 1536 return n; 1537 } 1538 1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1540 if (n >= 0) { 1541 *flag = BP_MEM_WRITE; 1542 return n; 1543 } 1544 1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1546 if (n >= 0) { 1547 *flag = BP_MEM_READ; 1548 return n; 1549 } 1550 1551 return -1; 1552 } 1553 1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1555 target_ulong len, int type) 1556 { 1557 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1558 return -ENOBUFS; 1559 } 1560 1561 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1562 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1563 1564 switch (type) { 1565 case GDB_BREAKPOINT_HW: 1566 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1567 return -ENOBUFS; 1568 } 1569 1570 if (find_hw_breakpoint(addr, type) >= 0) { 1571 return -EEXIST; 1572 } 1573 1574 nb_hw_breakpoint++; 1575 break; 1576 1577 case GDB_WATCHPOINT_WRITE: 1578 case GDB_WATCHPOINT_READ: 1579 case GDB_WATCHPOINT_ACCESS: 1580 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1581 return -ENOBUFS; 1582 } 1583 1584 if (find_hw_breakpoint(addr, type) >= 0) { 1585 return -EEXIST; 1586 } 1587 1588 nb_hw_watchpoint++; 1589 break; 1590 1591 default: 1592 return -ENOSYS; 1593 } 1594 1595 return 0; 1596 } 1597 1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1599 target_ulong len, int type) 1600 { 1601 int n; 1602 1603 n = find_hw_breakpoint(addr, type); 1604 if (n < 0) { 1605 return -ENOENT; 1606 } 1607 1608 switch (type) { 1609 case GDB_BREAKPOINT_HW: 1610 nb_hw_breakpoint--; 1611 break; 1612 1613 case GDB_WATCHPOINT_WRITE: 1614 case GDB_WATCHPOINT_READ: 1615 case GDB_WATCHPOINT_ACCESS: 1616 nb_hw_watchpoint--; 1617 break; 1618 1619 default: 1620 return -ENOSYS; 1621 } 1622 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1623 1624 return 0; 1625 } 1626 1627 void kvm_arch_remove_all_hw_breakpoints(void) 1628 { 1629 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1630 } 1631 1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1633 { 1634 int n; 1635 1636 /* Software Breakpoint updates */ 1637 if (kvm_sw_breakpoints_active(cs)) { 1638 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1639 } 1640 1641 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1642 <= ARRAY_SIZE(hw_debug_points)); 1643 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1644 1645 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1646 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1647 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1648 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1649 switch (hw_debug_points[n].type) { 1650 case GDB_BREAKPOINT_HW: 1651 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1652 break; 1653 case GDB_WATCHPOINT_WRITE: 1654 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1655 break; 1656 case GDB_WATCHPOINT_READ: 1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1658 break; 1659 case GDB_WATCHPOINT_ACCESS: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1661 KVMPPC_DEBUG_WATCH_READ; 1662 break; 1663 default: 1664 cpu_abort(cs, "Unsupported breakpoint type\n"); 1665 } 1666 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1667 } 1668 } 1669 } 1670 1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1672 { 1673 CPUState *cs = CPU(cpu); 1674 CPUPPCState *env = &cpu->env; 1675 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1676 int handle = 0; 1677 int n; 1678 int flag = 0; 1679 1680 if (cs->singlestep_enabled) { 1681 handle = 1; 1682 } else if (arch_info->status) { 1683 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1684 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1685 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1686 if (n >= 0) { 1687 handle = 1; 1688 } 1689 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1690 KVMPPC_DEBUG_WATCH_WRITE)) { 1691 n = find_hw_watchpoint(arch_info->address, &flag); 1692 if (n >= 0) { 1693 handle = 1; 1694 cs->watchpoint_hit = &hw_watchpoint; 1695 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1696 hw_watchpoint.flags = flag; 1697 } 1698 } 1699 } 1700 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1701 handle = 1; 1702 } else { 1703 /* QEMU is not able to handle debug exception, so inject 1704 * program exception to guest; 1705 * Yes program exception NOT debug exception !! 1706 * When QEMU is using debug resources then debug exception must 1707 * be always set. To achieve this we set MSR_DE and also set 1708 * MSRP_DEP so guest cannot change MSR_DE. 1709 * When emulating debug resource for guest we want guest 1710 * to control MSR_DE (enable/disable debug interrupt on need). 1711 * Supporting both configurations are NOT possible. 1712 * So the result is that we cannot share debug resources 1713 * between QEMU and Guest on BOOKE architecture. 1714 * In the current design QEMU gets the priority over guest, 1715 * this means that if QEMU is using debug resources then guest 1716 * cannot use them; 1717 * For software breakpoint QEMU uses a privileged instruction; 1718 * So there cannot be any reason that we are here for guest 1719 * set debug exception, only possibility is guest executed a 1720 * privileged / illegal instruction and that's why we are 1721 * injecting a program interrupt. 1722 */ 1723 1724 cpu_synchronize_state(cs); 1725 /* env->nip is PC, so increment this by 4 to use 1726 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1727 */ 1728 env->nip += 4; 1729 cs->exception_index = POWERPC_EXCP_PROGRAM; 1730 env->error_code = POWERPC_EXCP_INVAL; 1731 ppc_cpu_do_interrupt(cs); 1732 } 1733 1734 return handle; 1735 } 1736 1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1738 { 1739 PowerPCCPU *cpu = POWERPC_CPU(cs); 1740 CPUPPCState *env = &cpu->env; 1741 int ret; 1742 1743 qemu_mutex_lock_iothread(); 1744 1745 switch (run->exit_reason) { 1746 case KVM_EXIT_DCR: 1747 if (run->dcr.is_write) { 1748 DPRINTF("handle dcr write\n"); 1749 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1750 } else { 1751 DPRINTF("handle dcr read\n"); 1752 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1753 } 1754 break; 1755 case KVM_EXIT_HLT: 1756 DPRINTF("handle halt\n"); 1757 ret = kvmppc_handle_halt(cpu); 1758 break; 1759 #if defined(TARGET_PPC64) 1760 case KVM_EXIT_PAPR_HCALL: 1761 DPRINTF("handle PAPR hypercall\n"); 1762 run->papr_hcall.ret = spapr_hypercall(cpu, 1763 run->papr_hcall.nr, 1764 run->papr_hcall.args); 1765 ret = 0; 1766 break; 1767 #endif 1768 case KVM_EXIT_EPR: 1769 DPRINTF("handle epr\n"); 1770 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1771 ret = 0; 1772 break; 1773 case KVM_EXIT_WATCHDOG: 1774 DPRINTF("handle watchdog expiry\n"); 1775 watchdog_perform_action(); 1776 ret = 0; 1777 break; 1778 1779 case KVM_EXIT_DEBUG: 1780 DPRINTF("handle debug exception\n"); 1781 if (kvm_handle_debug(cpu, run)) { 1782 ret = EXCP_DEBUG; 1783 break; 1784 } 1785 /* re-enter, this exception was guest-internal */ 1786 ret = 0; 1787 break; 1788 1789 default: 1790 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1791 ret = -1; 1792 break; 1793 } 1794 1795 qemu_mutex_unlock_iothread(); 1796 return ret; 1797 } 1798 1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1800 { 1801 CPUState *cs = CPU(cpu); 1802 uint32_t bits = tsr_bits; 1803 struct kvm_one_reg reg = { 1804 .id = KVM_REG_PPC_OR_TSR, 1805 .addr = (uintptr_t) &bits, 1806 }; 1807 1808 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1809 } 1810 1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1812 { 1813 1814 CPUState *cs = CPU(cpu); 1815 uint32_t bits = tsr_bits; 1816 struct kvm_one_reg reg = { 1817 .id = KVM_REG_PPC_CLEAR_TSR, 1818 .addr = (uintptr_t) &bits, 1819 }; 1820 1821 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1822 } 1823 1824 int kvmppc_set_tcr(PowerPCCPU *cpu) 1825 { 1826 CPUState *cs = CPU(cpu); 1827 CPUPPCState *env = &cpu->env; 1828 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1829 1830 struct kvm_one_reg reg = { 1831 .id = KVM_REG_PPC_TCR, 1832 .addr = (uintptr_t) &tcr, 1833 }; 1834 1835 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1836 } 1837 1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1839 { 1840 CPUState *cs = CPU(cpu); 1841 int ret; 1842 1843 if (!kvm_enabled()) { 1844 return -1; 1845 } 1846 1847 if (!cap_ppc_watchdog) { 1848 printf("warning: KVM does not support watchdog"); 1849 return -1; 1850 } 1851 1852 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1853 if (ret < 0) { 1854 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1855 __func__, strerror(-ret)); 1856 return ret; 1857 } 1858 1859 return ret; 1860 } 1861 1862 static int read_cpuinfo(const char *field, char *value, int len) 1863 { 1864 FILE *f; 1865 int ret = -1; 1866 int field_len = strlen(field); 1867 char line[512]; 1868 1869 f = fopen("/proc/cpuinfo", "r"); 1870 if (!f) { 1871 return -1; 1872 } 1873 1874 do { 1875 if (!fgets(line, sizeof(line), f)) { 1876 break; 1877 } 1878 if (!strncmp(line, field, field_len)) { 1879 pstrcpy(value, len, line); 1880 ret = 0; 1881 break; 1882 } 1883 } while(*line); 1884 1885 fclose(f); 1886 1887 return ret; 1888 } 1889 1890 uint32_t kvmppc_get_tbfreq(void) 1891 { 1892 char line[512]; 1893 char *ns; 1894 uint32_t retval = NANOSECONDS_PER_SECOND; 1895 1896 if (read_cpuinfo("timebase", line, sizeof(line))) { 1897 return retval; 1898 } 1899 1900 if (!(ns = strchr(line, ':'))) { 1901 return retval; 1902 } 1903 1904 ns++; 1905 1906 return atoi(ns); 1907 } 1908 1909 bool kvmppc_get_host_serial(char **value) 1910 { 1911 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1912 NULL); 1913 } 1914 1915 bool kvmppc_get_host_model(char **value) 1916 { 1917 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1918 } 1919 1920 /* Try to find a device tree node for a CPU with clock-frequency property */ 1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1922 { 1923 struct dirent *dirp; 1924 DIR *dp; 1925 1926 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1927 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1928 return -1; 1929 } 1930 1931 buf[0] = '\0'; 1932 while ((dirp = readdir(dp)) != NULL) { 1933 FILE *f; 1934 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1935 dirp->d_name); 1936 f = fopen(buf, "r"); 1937 if (f) { 1938 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1939 fclose(f); 1940 break; 1941 } 1942 buf[0] = '\0'; 1943 } 1944 closedir(dp); 1945 if (buf[0] == '\0') { 1946 printf("Unknown host!\n"); 1947 return -1; 1948 } 1949 1950 return 0; 1951 } 1952 1953 static uint64_t kvmppc_read_int_dt(const char *filename) 1954 { 1955 union { 1956 uint32_t v32; 1957 uint64_t v64; 1958 } u; 1959 FILE *f; 1960 int len; 1961 1962 f = fopen(filename, "rb"); 1963 if (!f) { 1964 return -1; 1965 } 1966 1967 len = fread(&u, 1, sizeof(u), f); 1968 fclose(f); 1969 switch (len) { 1970 case 4: 1971 /* property is a 32-bit quantity */ 1972 return be32_to_cpu(u.v32); 1973 case 8: 1974 return be64_to_cpu(u.v64); 1975 } 1976 1977 return 0; 1978 } 1979 1980 /* Read a CPU node property from the host device tree that's a single 1981 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1982 * (can't find or open the property, or doesn't understand the 1983 * format) */ 1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1985 { 1986 char buf[PATH_MAX], *tmp; 1987 uint64_t val; 1988 1989 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1990 return -1; 1991 } 1992 1993 tmp = g_strdup_printf("%s/%s", buf, propname); 1994 val = kvmppc_read_int_dt(tmp); 1995 g_free(tmp); 1996 1997 return val; 1998 } 1999 2000 uint64_t kvmppc_get_clockfreq(void) 2001 { 2002 return kvmppc_read_int_cpu_dt("clock-frequency"); 2003 } 2004 2005 uint32_t kvmppc_get_vmx(void) 2006 { 2007 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2008 } 2009 2010 uint32_t kvmppc_get_dfp(void) 2011 { 2012 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2013 } 2014 2015 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2016 { 2017 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2018 CPUState *cs = CPU(cpu); 2019 2020 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2021 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2022 return 0; 2023 } 2024 2025 return 1; 2026 } 2027 2028 int kvmppc_get_hasidle(CPUPPCState *env) 2029 { 2030 struct kvm_ppc_pvinfo pvinfo; 2031 2032 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2033 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2034 return 1; 2035 } 2036 2037 return 0; 2038 } 2039 2040 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2041 { 2042 uint32_t *hc = (uint32_t*)buf; 2043 struct kvm_ppc_pvinfo pvinfo; 2044 2045 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2046 memcpy(buf, pvinfo.hcall, buf_len); 2047 return 0; 2048 } 2049 2050 /* 2051 * Fallback to always fail hypercalls regardless of endianness: 2052 * 2053 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2054 * li r3, -1 2055 * b .+8 (becomes nop in wrong endian) 2056 * bswap32(li r3, -1) 2057 */ 2058 2059 hc[0] = cpu_to_be32(0x08000048); 2060 hc[1] = cpu_to_be32(0x3860ffff); 2061 hc[2] = cpu_to_be32(0x48000008); 2062 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2063 2064 return 1; 2065 } 2066 2067 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2068 { 2069 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2070 } 2071 2072 void kvmppc_enable_logical_ci_hcalls(void) 2073 { 2074 /* 2075 * FIXME: it would be nice if we could detect the cases where 2076 * we're using a device which requires the in kernel 2077 * implementation of these hcalls, but the kernel lacks them and 2078 * produce a warning. 2079 */ 2080 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2081 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2082 } 2083 2084 void kvmppc_enable_set_mode_hcall(void) 2085 { 2086 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2087 } 2088 2089 void kvmppc_enable_clear_ref_mod_hcalls(void) 2090 { 2091 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2092 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2093 } 2094 2095 void kvmppc_set_papr(PowerPCCPU *cpu) 2096 { 2097 CPUState *cs = CPU(cpu); 2098 int ret; 2099 2100 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2101 if (ret) { 2102 error_report("This vCPU type or KVM version does not support PAPR"); 2103 exit(1); 2104 } 2105 2106 /* Update the capability flag so we sync the right information 2107 * with kvm */ 2108 cap_papr = 1; 2109 } 2110 2111 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version) 2112 { 2113 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version); 2114 } 2115 2116 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2117 { 2118 CPUState *cs = CPU(cpu); 2119 int ret; 2120 2121 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2122 if (ret && mpic_proxy) { 2123 error_report("This KVM version does not support EPR"); 2124 exit(1); 2125 } 2126 } 2127 2128 int kvmppc_smt_threads(void) 2129 { 2130 return cap_ppc_smt ? cap_ppc_smt : 1; 2131 } 2132 2133 #ifdef TARGET_PPC64 2134 off_t kvmppc_alloc_rma(void **rma) 2135 { 2136 off_t size; 2137 int fd; 2138 struct kvm_allocate_rma ret; 2139 2140 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2141 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2142 * not necessary on this hardware 2143 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2144 * 2145 * FIXME: We should allow the user to force contiguous RMA 2146 * allocation in the cap_ppc_rma==1 case. 2147 */ 2148 if (cap_ppc_rma < 2) { 2149 return 0; 2150 } 2151 2152 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2153 if (fd < 0) { 2154 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2155 strerror(errno)); 2156 return -1; 2157 } 2158 2159 size = MIN(ret.rma_size, 256ul << 20); 2160 2161 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2162 if (*rma == MAP_FAILED) { 2163 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2164 return -1; 2165 }; 2166 2167 return size; 2168 } 2169 2170 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2171 { 2172 struct kvm_ppc_smmu_info info; 2173 long rampagesize, best_page_shift; 2174 int i; 2175 2176 if (cap_ppc_rma >= 2) { 2177 return current_size; 2178 } 2179 2180 /* Find the largest hardware supported page size that's less than 2181 * or equal to the (logical) backing page size of guest RAM */ 2182 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2183 rampagesize = getrampagesize(); 2184 best_page_shift = 0; 2185 2186 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2187 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2188 2189 if (!sps->page_shift) { 2190 continue; 2191 } 2192 2193 if ((sps->page_shift > best_page_shift) 2194 && ((1UL << sps->page_shift) <= rampagesize)) { 2195 best_page_shift = sps->page_shift; 2196 } 2197 } 2198 2199 return MIN(current_size, 2200 1ULL << (best_page_shift + hash_shift - 7)); 2201 } 2202 #endif 2203 2204 bool kvmppc_spapr_use_multitce(void) 2205 { 2206 return cap_spapr_multitce; 2207 } 2208 2209 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, 2210 bool need_vfio) 2211 { 2212 struct kvm_create_spapr_tce args = { 2213 .liobn = liobn, 2214 .window_size = window_size, 2215 }; 2216 long len; 2217 int fd; 2218 void *table; 2219 2220 /* Must set fd to -1 so we don't try to munmap when called for 2221 * destroying the table, which the upper layers -will- do 2222 */ 2223 *pfd = -1; 2224 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2225 return NULL; 2226 } 2227 2228 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2229 if (fd < 0) { 2230 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2231 liobn); 2232 return NULL; 2233 } 2234 2235 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t); 2236 /* FIXME: round this up to page size */ 2237 2238 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2239 if (table == MAP_FAILED) { 2240 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2241 liobn); 2242 close(fd); 2243 return NULL; 2244 } 2245 2246 *pfd = fd; 2247 return table; 2248 } 2249 2250 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2251 { 2252 long len; 2253 2254 if (fd < 0) { 2255 return -1; 2256 } 2257 2258 len = nb_table * sizeof(uint64_t); 2259 if ((munmap(table, len) < 0) || 2260 (close(fd) < 0)) { 2261 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2262 strerror(errno)); 2263 /* Leak the table */ 2264 } 2265 2266 return 0; 2267 } 2268 2269 int kvmppc_reset_htab(int shift_hint) 2270 { 2271 uint32_t shift = shift_hint; 2272 2273 if (!kvm_enabled()) { 2274 /* Full emulation, tell caller to allocate htab itself */ 2275 return 0; 2276 } 2277 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2278 int ret; 2279 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2280 if (ret == -ENOTTY) { 2281 /* At least some versions of PR KVM advertise the 2282 * capability, but don't implement the ioctl(). Oops. 2283 * Return 0 so that we allocate the htab in qemu, as is 2284 * correct for PR. */ 2285 return 0; 2286 } else if (ret < 0) { 2287 return ret; 2288 } 2289 return shift; 2290 } 2291 2292 /* We have a kernel that predates the htab reset calls. For PR 2293 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2294 * this era, it has allocated a 16MB fixed size hash table already. */ 2295 if (kvmppc_is_pr(kvm_state)) { 2296 /* PR - tell caller to allocate htab */ 2297 return 0; 2298 } else { 2299 /* HV - assume 16MB kernel allocated htab */ 2300 return 24; 2301 } 2302 } 2303 2304 static inline uint32_t mfpvr(void) 2305 { 2306 uint32_t pvr; 2307 2308 asm ("mfpvr %0" 2309 : "=r"(pvr)); 2310 return pvr; 2311 } 2312 2313 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2314 { 2315 if (on) { 2316 *word |= flags; 2317 } else { 2318 *word &= ~flags; 2319 } 2320 } 2321 2322 static void kvmppc_host_cpu_initfn(Object *obj) 2323 { 2324 assert(kvm_enabled()); 2325 } 2326 2327 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2328 { 2329 DeviceClass *dc = DEVICE_CLASS(oc); 2330 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2331 uint32_t vmx = kvmppc_get_vmx(); 2332 uint32_t dfp = kvmppc_get_dfp(); 2333 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2334 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2335 2336 /* Now fix up the class with information we can query from the host */ 2337 pcc->pvr = mfpvr(); 2338 2339 if (vmx != -1) { 2340 /* Only override when we know what the host supports */ 2341 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2342 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2343 } 2344 if (dfp != -1) { 2345 /* Only override when we know what the host supports */ 2346 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2347 } 2348 2349 if (dcache_size != -1) { 2350 pcc->l1_dcache_size = dcache_size; 2351 } 2352 2353 if (icache_size != -1) { 2354 pcc->l1_icache_size = icache_size; 2355 } 2356 2357 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */ 2358 dc->cannot_destroy_with_object_finalize_yet = true; 2359 } 2360 2361 bool kvmppc_has_cap_epr(void) 2362 { 2363 return cap_epr; 2364 } 2365 2366 bool kvmppc_has_cap_htab_fd(void) 2367 { 2368 return cap_htab_fd; 2369 } 2370 2371 bool kvmppc_has_cap_fixup_hcalls(void) 2372 { 2373 return cap_fixup_hcalls; 2374 } 2375 2376 bool kvmppc_has_cap_htm(void) 2377 { 2378 return cap_htm; 2379 } 2380 2381 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) 2382 { 2383 ObjectClass *oc = OBJECT_CLASS(pcc); 2384 2385 while (oc && !object_class_is_abstract(oc)) { 2386 oc = object_class_get_parent(oc); 2387 } 2388 assert(oc); 2389 2390 return POWERPC_CPU_CLASS(oc); 2391 } 2392 2393 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2394 { 2395 uint32_t host_pvr = mfpvr(); 2396 PowerPCCPUClass *pvr_pcc; 2397 2398 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2399 if (pvr_pcc == NULL) { 2400 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2401 } 2402 2403 return pvr_pcc; 2404 } 2405 2406 static int kvm_ppc_register_host_cpu_type(void) 2407 { 2408 TypeInfo type_info = { 2409 .name = TYPE_HOST_POWERPC_CPU, 2410 .instance_init = kvmppc_host_cpu_initfn, 2411 .class_init = kvmppc_host_cpu_class_init, 2412 }; 2413 PowerPCCPUClass *pvr_pcc; 2414 DeviceClass *dc; 2415 2416 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2417 if (pvr_pcc == NULL) { 2418 return -1; 2419 } 2420 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2421 type_register(&type_info); 2422 2423 /* Register generic family CPU class for a family */ 2424 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc); 2425 dc = DEVICE_CLASS(pvr_pcc); 2426 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2427 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc); 2428 type_register(&type_info); 2429 2430 #if defined(TARGET_PPC64) 2431 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2432 type_info.parent = TYPE_SPAPR_CPU_CORE, 2433 type_info.instance_size = sizeof(sPAPRCPUCore); 2434 type_info.instance_init = NULL; 2435 type_info.class_init = spapr_cpu_core_class_init; 2436 type_info.class_data = (void *) "host"; 2437 type_register(&type_info); 2438 g_free((void *)type_info.name); 2439 2440 /* Register generic spapr CPU family class for current host CPU type */ 2441 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc); 2442 type_info.class_data = (void *) dc->desc; 2443 type_register(&type_info); 2444 g_free((void *)type_info.name); 2445 #endif 2446 2447 return 0; 2448 } 2449 2450 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2451 { 2452 struct kvm_rtas_token_args args = { 2453 .token = token, 2454 }; 2455 2456 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2457 return -ENOENT; 2458 } 2459 2460 strncpy(args.name, function, sizeof(args.name)); 2461 2462 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2463 } 2464 2465 int kvmppc_get_htab_fd(bool write) 2466 { 2467 struct kvm_get_htab_fd s = { 2468 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2469 .start_index = 0, 2470 }; 2471 2472 if (!cap_htab_fd) { 2473 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2474 return -1; 2475 } 2476 2477 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2478 } 2479 2480 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2481 { 2482 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2483 uint8_t buf[bufsize]; 2484 ssize_t rc; 2485 2486 do { 2487 rc = read(fd, buf, bufsize); 2488 if (rc < 0) { 2489 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2490 strerror(errno)); 2491 return rc; 2492 } else if (rc) { 2493 uint8_t *buffer = buf; 2494 ssize_t n = rc; 2495 while (n) { 2496 struct kvm_get_htab_header *head = 2497 (struct kvm_get_htab_header *) buffer; 2498 size_t chunksize = sizeof(*head) + 2499 HASH_PTE_SIZE_64 * head->n_valid; 2500 2501 qemu_put_be32(f, head->index); 2502 qemu_put_be16(f, head->n_valid); 2503 qemu_put_be16(f, head->n_invalid); 2504 qemu_put_buffer(f, (void *)(head + 1), 2505 HASH_PTE_SIZE_64 * head->n_valid); 2506 2507 buffer += chunksize; 2508 n -= chunksize; 2509 } 2510 } 2511 } while ((rc != 0) 2512 && ((max_ns < 0) 2513 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2514 2515 return (rc == 0) ? 1 : 0; 2516 } 2517 2518 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2519 uint16_t n_valid, uint16_t n_invalid) 2520 { 2521 struct kvm_get_htab_header *buf; 2522 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2523 ssize_t rc; 2524 2525 buf = alloca(chunksize); 2526 buf->index = index; 2527 buf->n_valid = n_valid; 2528 buf->n_invalid = n_invalid; 2529 2530 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2531 2532 rc = write(fd, buf, chunksize); 2533 if (rc < 0) { 2534 fprintf(stderr, "Error writing KVM hash table: %s\n", 2535 strerror(errno)); 2536 return rc; 2537 } 2538 if (rc != chunksize) { 2539 /* We should never get a short write on a single chunk */ 2540 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2541 return -1; 2542 } 2543 return 0; 2544 } 2545 2546 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2547 { 2548 return true; 2549 } 2550 2551 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) 2552 { 2553 return 1; 2554 } 2555 2556 int kvm_arch_on_sigbus(int code, void *addr) 2557 { 2558 return 1; 2559 } 2560 2561 void kvm_arch_init_irq_routing(KVMState *s) 2562 { 2563 } 2564 2565 struct kvm_get_htab_buf { 2566 struct kvm_get_htab_header header; 2567 /* 2568 * We require one extra byte for read 2569 */ 2570 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1]; 2571 }; 2572 2573 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index) 2574 { 2575 int htab_fd; 2576 struct kvm_get_htab_fd ghf; 2577 struct kvm_get_htab_buf *hpte_buf; 2578 2579 ghf.flags = 0; 2580 ghf.start_index = pte_index; 2581 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2582 if (htab_fd < 0) { 2583 goto error_out; 2584 } 2585 2586 hpte_buf = g_malloc0(sizeof(*hpte_buf)); 2587 /* 2588 * Read the hpte group 2589 */ 2590 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) { 2591 goto out_close; 2592 } 2593 2594 close(htab_fd); 2595 return (uint64_t)(uintptr_t) hpte_buf->hpte; 2596 2597 out_close: 2598 g_free(hpte_buf); 2599 close(htab_fd); 2600 error_out: 2601 return 0; 2602 } 2603 2604 void kvmppc_hash64_free_pteg(uint64_t token) 2605 { 2606 struct kvm_get_htab_buf *htab_buf; 2607 2608 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf, 2609 hpte); 2610 g_free(htab_buf); 2611 return; 2612 } 2613 2614 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index, 2615 target_ulong pte0, target_ulong pte1) 2616 { 2617 int htab_fd; 2618 struct kvm_get_htab_fd ghf; 2619 struct kvm_get_htab_buf hpte_buf; 2620 2621 ghf.flags = 0; 2622 ghf.start_index = 0; /* Ignored */ 2623 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2624 if (htab_fd < 0) { 2625 goto error_out; 2626 } 2627 2628 hpte_buf.header.n_valid = 1; 2629 hpte_buf.header.n_invalid = 0; 2630 hpte_buf.header.index = pte_index; 2631 hpte_buf.hpte[0] = pte0; 2632 hpte_buf.hpte[1] = pte1; 2633 /* 2634 * Write the hpte entry. 2635 * CAUTION: write() has the warn_unused_result attribute. Hence we 2636 * need to check the return value, even though we do nothing. 2637 */ 2638 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { 2639 goto out_close; 2640 } 2641 2642 out_close: 2643 close(htab_fd); 2644 return; 2645 2646 error_out: 2647 return; 2648 } 2649 2650 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2651 uint64_t address, uint32_t data, PCIDevice *dev) 2652 { 2653 return 0; 2654 } 2655 2656 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2657 int vector, PCIDevice *dev) 2658 { 2659 return 0; 2660 } 2661 2662 int kvm_arch_release_virq_post(int virq) 2663 { 2664 return 0; 2665 } 2666 2667 int kvm_arch_msi_data_to_gsi(uint32_t data) 2668 { 2669 return data & 0xffff; 2670 } 2671 2672 int kvmppc_enable_hwrng(void) 2673 { 2674 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2675 return -1; 2676 } 2677 2678 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2679 } 2680