1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qemu/error-report.h" 26 #include "cpu.h" 27 #include "cpu-models.h" 28 #include "qemu/timer.h" 29 #include "sysemu/sysemu.h" 30 #include "sysemu/hw_accel.h" 31 #include "sysemu/numa.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "sysemu/hostmem.h" 47 #include "qemu/cutils.h" 48 #if defined(TARGET_PPC64) 49 #include "hw/ppc/spapr_cpu_core.h" 50 #endif 51 52 //#define DEBUG_KVM 53 54 #ifdef DEBUG_KVM 55 #define DPRINTF(fmt, ...) \ 56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 57 #else 58 #define DPRINTF(fmt, ...) \ 59 do { } while (0) 60 #endif 61 62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 63 64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 65 KVM_CAP_LAST_INFO 66 }; 67 68 static int cap_interrupt_unset = false; 69 static int cap_interrupt_level = false; 70 static int cap_segstate; 71 static int cap_booke_sregs; 72 static int cap_ppc_smt; 73 static int cap_ppc_rma; 74 static int cap_spapr_tce; 75 static int cap_spapr_multitce; 76 static int cap_spapr_vfio; 77 static int cap_hior; 78 static int cap_one_reg; 79 static int cap_epr; 80 static int cap_ppc_watchdog; 81 static int cap_papr; 82 static int cap_htab_fd; 83 static int cap_fixup_hcalls; 84 static int cap_htm; /* Hardware transactional memory support */ 85 86 static uint32_t debug_inst_opcode; 87 88 /* XXX We have a race condition where we actually have a level triggered 89 * interrupt, but the infrastructure can't expose that yet, so the guest 90 * takes but ignores it, goes to sleep and never gets notified that there's 91 * still an interrupt pending. 92 * 93 * As a quick workaround, let's just wake up again 20 ms after we injected 94 * an interrupt. That way we can assure that we're always reinjecting 95 * interrupts in case the guest swallowed them. 96 */ 97 static QEMUTimer *idle_timer; 98 99 static void kvm_kick_cpu(void *opaque) 100 { 101 PowerPCCPU *cpu = opaque; 102 103 qemu_cpu_kick(CPU(cpu)); 104 } 105 106 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 107 * should only be used for fallback tests - generally we should use 108 * explicit capabilities for the features we want, rather than 109 * assuming what is/isn't available depending on the KVM variant. */ 110 static bool kvmppc_is_pr(KVMState *ks) 111 { 112 /* Assume KVM-PR if the GET_PVINFO capability is available */ 113 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 114 } 115 116 static int kvm_ppc_register_host_cpu_type(void); 117 118 int kvm_arch_init(MachineState *ms, KVMState *s) 119 { 120 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 121 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 122 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 123 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 124 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 125 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 126 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 127 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 128 cap_spapr_vfio = false; 129 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 130 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 131 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 132 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 133 /* Note: we don't set cap_papr here, because this capability is 134 * only activated after this by kvmppc_set_papr() */ 135 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 136 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 137 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 138 139 if (!cap_interrupt_level) { 140 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 141 "VM to stall at times!\n"); 142 } 143 144 kvm_ppc_register_host_cpu_type(); 145 146 return 0; 147 } 148 149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 150 { 151 return 0; 152 } 153 154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 155 { 156 CPUPPCState *cenv = &cpu->env; 157 CPUState *cs = CPU(cpu); 158 struct kvm_sregs sregs; 159 int ret; 160 161 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 162 /* What we're really trying to say is "if we're on BookE, we use 163 the native PVR for now". This is the only sane way to check 164 it though, so we potentially confuse users that they can run 165 BookE guests on BookS. Let's hope nobody dares enough :) */ 166 return 0; 167 } else { 168 if (!cap_segstate) { 169 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 170 return -ENOSYS; 171 } 172 } 173 174 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 175 if (ret) { 176 return ret; 177 } 178 179 sregs.pvr = cenv->spr[SPR_PVR]; 180 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 181 } 182 183 /* Set up a shared TLB array with KVM */ 184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 185 { 186 CPUPPCState *env = &cpu->env; 187 CPUState *cs = CPU(cpu); 188 struct kvm_book3e_206_tlb_params params = {}; 189 struct kvm_config_tlb cfg = {}; 190 unsigned int entries = 0; 191 int ret, i; 192 193 if (!kvm_enabled() || 194 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 195 return 0; 196 } 197 198 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 199 200 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 201 params.tlb_sizes[i] = booke206_tlb_size(env, i); 202 params.tlb_ways[i] = booke206_tlb_ways(env, i); 203 entries += params.tlb_sizes[i]; 204 } 205 206 assert(entries == env->nb_tlb); 207 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 208 209 env->tlb_dirty = true; 210 211 cfg.array = (uintptr_t)env->tlb.tlbm; 212 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 213 cfg.params = (uintptr_t)¶ms; 214 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 215 216 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 217 if (ret < 0) { 218 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 219 __func__, strerror(-ret)); 220 return ret; 221 } 222 223 env->kvm_sw_tlb = true; 224 return 0; 225 } 226 227 228 #if defined(TARGET_PPC64) 229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 230 struct kvm_ppc_smmu_info *info) 231 { 232 CPUPPCState *env = &cpu->env; 233 CPUState *cs = CPU(cpu); 234 235 memset(info, 0, sizeof(*info)); 236 237 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 238 * need to "guess" what the supported page sizes are. 239 * 240 * For that to work we make a few assumptions: 241 * 242 * - Check whether we are running "PR" KVM which only supports 4K 243 * and 16M pages, but supports them regardless of the backing 244 * store characteritics. We also don't support 1T segments. 245 * 246 * This is safe as if HV KVM ever supports that capability or PR 247 * KVM grows supports for more page/segment sizes, those versions 248 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 249 * will not hit this fallback 250 * 251 * - Else we are running HV KVM. This means we only support page 252 * sizes that fit in the backing store. Additionally we only 253 * advertize 64K pages if the processor is ARCH 2.06 and we assume 254 * P7 encodings for the SLB and hash table. Here too, we assume 255 * support for any newer processor will mean a kernel that 256 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 257 * this fallback. 258 */ 259 if (kvmppc_is_pr(cs->kvm_state)) { 260 /* No flags */ 261 info->flags = 0; 262 info->slb_size = 64; 263 264 /* Standard 4k base page size segment */ 265 info->sps[0].page_shift = 12; 266 info->sps[0].slb_enc = 0; 267 info->sps[0].enc[0].page_shift = 12; 268 info->sps[0].enc[0].pte_enc = 0; 269 270 /* Standard 16M large page size segment */ 271 info->sps[1].page_shift = 24; 272 info->sps[1].slb_enc = SLB_VSID_L; 273 info->sps[1].enc[0].page_shift = 24; 274 info->sps[1].enc[0].pte_enc = 0; 275 } else { 276 int i = 0; 277 278 /* HV KVM has backing store size restrictions */ 279 info->flags = KVM_PPC_PAGE_SIZES_REAL; 280 281 if (env->mmu_model & POWERPC_MMU_1TSEG) { 282 info->flags |= KVM_PPC_1T_SEGMENTS; 283 } 284 285 if (env->mmu_model == POWERPC_MMU_2_06 || 286 env->mmu_model == POWERPC_MMU_2_07) { 287 info->slb_size = 32; 288 } else { 289 info->slb_size = 64; 290 } 291 292 /* Standard 4k base page size segment */ 293 info->sps[i].page_shift = 12; 294 info->sps[i].slb_enc = 0; 295 info->sps[i].enc[0].page_shift = 12; 296 info->sps[i].enc[0].pte_enc = 0; 297 i++; 298 299 /* 64K on MMU 2.06 and later */ 300 if (env->mmu_model == POWERPC_MMU_2_06 || 301 env->mmu_model == POWERPC_MMU_2_07) { 302 info->sps[i].page_shift = 16; 303 info->sps[i].slb_enc = 0x110; 304 info->sps[i].enc[0].page_shift = 16; 305 info->sps[i].enc[0].pte_enc = 1; 306 i++; 307 } 308 309 /* Standard 16M large page size segment */ 310 info->sps[i].page_shift = 24; 311 info->sps[i].slb_enc = SLB_VSID_L; 312 info->sps[i].enc[0].page_shift = 24; 313 info->sps[i].enc[0].pte_enc = 0; 314 } 315 } 316 317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 318 { 319 CPUState *cs = CPU(cpu); 320 int ret; 321 322 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 323 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 324 if (ret == 0) { 325 return; 326 } 327 } 328 329 kvm_get_fallback_smmu_info(cpu, info); 330 } 331 332 static long gethugepagesize(const char *mem_path) 333 { 334 struct statfs fs; 335 int ret; 336 337 do { 338 ret = statfs(mem_path, &fs); 339 } while (ret != 0 && errno == EINTR); 340 341 if (ret != 0) { 342 fprintf(stderr, "Couldn't statfs() memory path: %s\n", 343 strerror(errno)); 344 exit(1); 345 } 346 347 #define HUGETLBFS_MAGIC 0x958458f6 348 349 if (fs.f_type != HUGETLBFS_MAGIC) { 350 /* Explicit mempath, but it's ordinary pages */ 351 return getpagesize(); 352 } 353 354 /* It's hugepage, return the huge page size */ 355 return fs.f_bsize; 356 } 357 358 /* 359 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which 360 * may or may not name the same files / on the same filesystem now as 361 * when we actually open and map them. Iterate over the file 362 * descriptors instead, and use qemu_fd_getpagesize(). 363 */ 364 static int find_max_supported_pagesize(Object *obj, void *opaque) 365 { 366 char *mem_path; 367 long *hpsize_min = opaque; 368 369 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { 370 mem_path = object_property_get_str(obj, "mem-path", NULL); 371 if (mem_path) { 372 long hpsize = gethugepagesize(mem_path); 373 if (hpsize < *hpsize_min) { 374 *hpsize_min = hpsize; 375 } 376 } else { 377 *hpsize_min = getpagesize(); 378 } 379 } 380 381 return 0; 382 } 383 384 static long getrampagesize(void) 385 { 386 long hpsize = LONG_MAX; 387 long mainrampagesize; 388 Object *memdev_root; 389 390 if (mem_path) { 391 mainrampagesize = gethugepagesize(mem_path); 392 } else { 393 mainrampagesize = getpagesize(); 394 } 395 396 /* it's possible we have memory-backend objects with 397 * hugepage-backed RAM. these may get mapped into system 398 * address space via -numa parameters or memory hotplug 399 * hooks. we want to take these into account, but we 400 * also want to make sure these supported hugepage 401 * sizes are applicable across the entire range of memory 402 * we may boot from, so we take the min across all 403 * backends, and assume normal pages in cases where a 404 * backend isn't backed by hugepages. 405 */ 406 memdev_root = object_resolve_path("/objects", NULL); 407 if (memdev_root) { 408 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); 409 } 410 if (hpsize == LONG_MAX) { 411 /* No additional memory regions found ==> Report main RAM page size */ 412 return mainrampagesize; 413 } 414 415 /* If NUMA is disabled or the NUMA nodes are not backed with a 416 * memory-backend, then there is at least one node using "normal" RAM, 417 * so if its page size is smaller we have got to report that size instead. 418 */ 419 if (hpsize > mainrampagesize && 420 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { 421 static bool warned; 422 if (!warned) { 423 error_report("Huge page support disabled (n/a for main memory)."); 424 warned = true; 425 } 426 return mainrampagesize; 427 } 428 429 return hpsize; 430 } 431 432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 433 { 434 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 435 return true; 436 } 437 438 return (1ul << shift) <= rampgsize; 439 } 440 441 static long max_cpu_page_size; 442 443 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 444 { 445 static struct kvm_ppc_smmu_info smmu_info; 446 static bool has_smmu_info; 447 CPUPPCState *env = &cpu->env; 448 int iq, ik, jq, jk; 449 bool has_64k_pages = false; 450 451 /* We only handle page sizes for 64-bit server guests for now */ 452 if (!(env->mmu_model & POWERPC_MMU_64)) { 453 return; 454 } 455 456 /* Collect MMU info from kernel if not already */ 457 if (!has_smmu_info) { 458 kvm_get_smmu_info(cpu, &smmu_info); 459 has_smmu_info = true; 460 } 461 462 if (!max_cpu_page_size) { 463 max_cpu_page_size = getrampagesize(); 464 } 465 466 /* Convert to QEMU form */ 467 memset(&env->sps, 0, sizeof(env->sps)); 468 469 /* If we have HV KVM, we need to forbid CI large pages if our 470 * host page size is smaller than 64K. 471 */ 472 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 473 env->ci_large_pages = getpagesize() >= 0x10000; 474 } 475 476 /* 477 * XXX This loop should be an entry wide AND of the capabilities that 478 * the selected CPU has with the capabilities that KVM supports. 479 */ 480 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 481 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 482 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 483 484 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 485 ksps->page_shift)) { 486 continue; 487 } 488 qsps->page_shift = ksps->page_shift; 489 qsps->slb_enc = ksps->slb_enc; 490 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 491 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 492 ksps->enc[jk].page_shift)) { 493 continue; 494 } 495 if (ksps->enc[jk].page_shift == 16) { 496 has_64k_pages = true; 497 } 498 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 499 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 500 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 501 break; 502 } 503 } 504 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 505 break; 506 } 507 } 508 env->slb_nr = smmu_info.slb_size; 509 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 510 env->mmu_model &= ~POWERPC_MMU_1TSEG; 511 } 512 if (!has_64k_pages) { 513 env->mmu_model &= ~POWERPC_MMU_64K; 514 } 515 } 516 517 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 518 { 519 Object *mem_obj = object_resolve_path(obj_path, NULL); 520 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 521 long pagesize; 522 523 if (mempath) { 524 pagesize = gethugepagesize(mempath); 525 } else { 526 pagesize = getpagesize(); 527 } 528 529 return pagesize >= max_cpu_page_size; 530 } 531 532 #else /* defined (TARGET_PPC64) */ 533 534 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 535 { 536 } 537 538 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 539 { 540 return true; 541 } 542 543 #endif /* !defined (TARGET_PPC64) */ 544 545 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 546 { 547 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 548 } 549 550 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 551 * book3s supports only 1 watchpoint, so array size 552 * of 4 is sufficient for now. 553 */ 554 #define MAX_HW_BKPTS 4 555 556 static struct HWBreakpoint { 557 target_ulong addr; 558 int type; 559 } hw_debug_points[MAX_HW_BKPTS]; 560 561 static CPUWatchpoint hw_watchpoint; 562 563 /* Default there is no breakpoint and watchpoint supported */ 564 static int max_hw_breakpoint; 565 static int max_hw_watchpoint; 566 static int nb_hw_breakpoint; 567 static int nb_hw_watchpoint; 568 569 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 570 { 571 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 572 max_hw_breakpoint = 2; 573 max_hw_watchpoint = 2; 574 } 575 576 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 577 fprintf(stderr, "Error initializing h/w breakpoints\n"); 578 return; 579 } 580 } 581 582 int kvm_arch_init_vcpu(CPUState *cs) 583 { 584 PowerPCCPU *cpu = POWERPC_CPU(cs); 585 CPUPPCState *cenv = &cpu->env; 586 int ret; 587 588 /* Gather server mmu info from KVM and update the CPU state */ 589 kvm_fixup_page_sizes(cpu); 590 591 /* Synchronize sregs with kvm */ 592 ret = kvm_arch_sync_sregs(cpu); 593 if (ret) { 594 if (ret == -EINVAL) { 595 error_report("Register sync failed... If you're using kvm-hv.ko," 596 " only \"-cpu host\" is possible"); 597 } 598 return ret; 599 } 600 601 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 602 603 switch (cenv->mmu_model) { 604 case POWERPC_MMU_BOOKE206: 605 /* This target supports access to KVM's guest TLB */ 606 ret = kvm_booke206_tlb_init(cpu); 607 break; 608 case POWERPC_MMU_2_07: 609 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 610 /* KVM-HV has transactional memory on POWER8 also without the 611 * KVM_CAP_PPC_HTM extension, so enable it here instead. */ 612 cap_htm = true; 613 } 614 break; 615 default: 616 break; 617 } 618 619 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 620 kvmppc_hw_debug_points_init(cenv); 621 622 return ret; 623 } 624 625 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 626 { 627 CPUPPCState *env = &cpu->env; 628 CPUState *cs = CPU(cpu); 629 struct kvm_dirty_tlb dirty_tlb; 630 unsigned char *bitmap; 631 int ret; 632 633 if (!env->kvm_sw_tlb) { 634 return; 635 } 636 637 bitmap = g_malloc((env->nb_tlb + 7) / 8); 638 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 639 640 dirty_tlb.bitmap = (uintptr_t)bitmap; 641 dirty_tlb.num_dirty = env->nb_tlb; 642 643 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 644 if (ret) { 645 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 646 __func__, strerror(-ret)); 647 } 648 649 g_free(bitmap); 650 } 651 652 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 653 { 654 PowerPCCPU *cpu = POWERPC_CPU(cs); 655 CPUPPCState *env = &cpu->env; 656 union { 657 uint32_t u32; 658 uint64_t u64; 659 } val; 660 struct kvm_one_reg reg = { 661 .id = id, 662 .addr = (uintptr_t) &val, 663 }; 664 int ret; 665 666 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 667 if (ret != 0) { 668 trace_kvm_failed_spr_get(spr, strerror(errno)); 669 } else { 670 switch (id & KVM_REG_SIZE_MASK) { 671 case KVM_REG_SIZE_U32: 672 env->spr[spr] = val.u32; 673 break; 674 675 case KVM_REG_SIZE_U64: 676 env->spr[spr] = val.u64; 677 break; 678 679 default: 680 /* Don't handle this size yet */ 681 abort(); 682 } 683 } 684 } 685 686 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 687 { 688 PowerPCCPU *cpu = POWERPC_CPU(cs); 689 CPUPPCState *env = &cpu->env; 690 union { 691 uint32_t u32; 692 uint64_t u64; 693 } val; 694 struct kvm_one_reg reg = { 695 .id = id, 696 .addr = (uintptr_t) &val, 697 }; 698 int ret; 699 700 switch (id & KVM_REG_SIZE_MASK) { 701 case KVM_REG_SIZE_U32: 702 val.u32 = env->spr[spr]; 703 break; 704 705 case KVM_REG_SIZE_U64: 706 val.u64 = env->spr[spr]; 707 break; 708 709 default: 710 /* Don't handle this size yet */ 711 abort(); 712 } 713 714 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 715 if (ret != 0) { 716 trace_kvm_failed_spr_set(spr, strerror(errno)); 717 } 718 } 719 720 static int kvm_put_fp(CPUState *cs) 721 { 722 PowerPCCPU *cpu = POWERPC_CPU(cs); 723 CPUPPCState *env = &cpu->env; 724 struct kvm_one_reg reg; 725 int i; 726 int ret; 727 728 if (env->insns_flags & PPC_FLOAT) { 729 uint64_t fpscr = env->fpscr; 730 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 731 732 reg.id = KVM_REG_PPC_FPSCR; 733 reg.addr = (uintptr_t)&fpscr; 734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 735 if (ret < 0) { 736 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 737 return ret; 738 } 739 740 for (i = 0; i < 32; i++) { 741 uint64_t vsr[2]; 742 743 #ifdef HOST_WORDS_BIGENDIAN 744 vsr[0] = float64_val(env->fpr[i]); 745 vsr[1] = env->vsr[i]; 746 #else 747 vsr[0] = env->vsr[i]; 748 vsr[1] = float64_val(env->fpr[i]); 749 #endif 750 reg.addr = (uintptr_t) &vsr; 751 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 752 753 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 754 if (ret < 0) { 755 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 756 i, strerror(errno)); 757 return ret; 758 } 759 } 760 } 761 762 if (env->insns_flags & PPC_ALTIVEC) { 763 reg.id = KVM_REG_PPC_VSCR; 764 reg.addr = (uintptr_t)&env->vscr; 765 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 766 if (ret < 0) { 767 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 768 return ret; 769 } 770 771 for (i = 0; i < 32; i++) { 772 reg.id = KVM_REG_PPC_VR(i); 773 reg.addr = (uintptr_t)&env->avr[i]; 774 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 775 if (ret < 0) { 776 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 777 return ret; 778 } 779 } 780 } 781 782 return 0; 783 } 784 785 static int kvm_get_fp(CPUState *cs) 786 { 787 PowerPCCPU *cpu = POWERPC_CPU(cs); 788 CPUPPCState *env = &cpu->env; 789 struct kvm_one_reg reg; 790 int i; 791 int ret; 792 793 if (env->insns_flags & PPC_FLOAT) { 794 uint64_t fpscr; 795 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 796 797 reg.id = KVM_REG_PPC_FPSCR; 798 reg.addr = (uintptr_t)&fpscr; 799 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 800 if (ret < 0) { 801 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 802 return ret; 803 } else { 804 env->fpscr = fpscr; 805 } 806 807 for (i = 0; i < 32; i++) { 808 uint64_t vsr[2]; 809 810 reg.addr = (uintptr_t) &vsr; 811 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 812 813 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 814 if (ret < 0) { 815 DPRINTF("Unable to get %s%d from KVM: %s\n", 816 vsx ? "VSR" : "FPR", i, strerror(errno)); 817 return ret; 818 } else { 819 #ifdef HOST_WORDS_BIGENDIAN 820 env->fpr[i] = vsr[0]; 821 if (vsx) { 822 env->vsr[i] = vsr[1]; 823 } 824 #else 825 env->fpr[i] = vsr[1]; 826 if (vsx) { 827 env->vsr[i] = vsr[0]; 828 } 829 #endif 830 } 831 } 832 } 833 834 if (env->insns_flags & PPC_ALTIVEC) { 835 reg.id = KVM_REG_PPC_VSCR; 836 reg.addr = (uintptr_t)&env->vscr; 837 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 838 if (ret < 0) { 839 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 840 return ret; 841 } 842 843 for (i = 0; i < 32; i++) { 844 reg.id = KVM_REG_PPC_VR(i); 845 reg.addr = (uintptr_t)&env->avr[i]; 846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 847 if (ret < 0) { 848 DPRINTF("Unable to get VR%d from KVM: %s\n", 849 i, strerror(errno)); 850 return ret; 851 } 852 } 853 } 854 855 return 0; 856 } 857 858 #if defined(TARGET_PPC64) 859 static int kvm_get_vpa(CPUState *cs) 860 { 861 PowerPCCPU *cpu = POWERPC_CPU(cs); 862 CPUPPCState *env = &cpu->env; 863 struct kvm_one_reg reg; 864 int ret; 865 866 reg.id = KVM_REG_PPC_VPA_ADDR; 867 reg.addr = (uintptr_t)&env->vpa_addr; 868 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 869 if (ret < 0) { 870 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 871 return ret; 872 } 873 874 assert((uintptr_t)&env->slb_shadow_size 875 == ((uintptr_t)&env->slb_shadow_addr + 8)); 876 reg.id = KVM_REG_PPC_VPA_SLB; 877 reg.addr = (uintptr_t)&env->slb_shadow_addr; 878 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 879 if (ret < 0) { 880 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 881 strerror(errno)); 882 return ret; 883 } 884 885 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 886 reg.id = KVM_REG_PPC_VPA_DTL; 887 reg.addr = (uintptr_t)&env->dtl_addr; 888 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 889 if (ret < 0) { 890 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 891 strerror(errno)); 892 return ret; 893 } 894 895 return 0; 896 } 897 898 static int kvm_put_vpa(CPUState *cs) 899 { 900 PowerPCCPU *cpu = POWERPC_CPU(cs); 901 CPUPPCState *env = &cpu->env; 902 struct kvm_one_reg reg; 903 int ret; 904 905 /* SLB shadow or DTL can't be registered unless a master VPA is 906 * registered. That means when restoring state, if a VPA *is* 907 * registered, we need to set that up first. If not, we need to 908 * deregister the others before deregistering the master VPA */ 909 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 910 911 if (env->vpa_addr) { 912 reg.id = KVM_REG_PPC_VPA_ADDR; 913 reg.addr = (uintptr_t)&env->vpa_addr; 914 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 915 if (ret < 0) { 916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 917 return ret; 918 } 919 } 920 921 assert((uintptr_t)&env->slb_shadow_size 922 == ((uintptr_t)&env->slb_shadow_addr + 8)); 923 reg.id = KVM_REG_PPC_VPA_SLB; 924 reg.addr = (uintptr_t)&env->slb_shadow_addr; 925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 926 if (ret < 0) { 927 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 928 return ret; 929 } 930 931 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 932 reg.id = KVM_REG_PPC_VPA_DTL; 933 reg.addr = (uintptr_t)&env->dtl_addr; 934 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 935 if (ret < 0) { 936 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 937 strerror(errno)); 938 return ret; 939 } 940 941 if (!env->vpa_addr) { 942 reg.id = KVM_REG_PPC_VPA_ADDR; 943 reg.addr = (uintptr_t)&env->vpa_addr; 944 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 945 if (ret < 0) { 946 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 947 return ret; 948 } 949 } 950 951 return 0; 952 } 953 #endif /* TARGET_PPC64 */ 954 955 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 956 { 957 CPUPPCState *env = &cpu->env; 958 struct kvm_sregs sregs; 959 int i; 960 961 sregs.pvr = env->spr[SPR_PVR]; 962 963 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 964 965 /* Sync SLB */ 966 #ifdef TARGET_PPC64 967 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 968 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 969 if (env->slb[i].esid & SLB_ESID_V) { 970 sregs.u.s.ppc64.slb[i].slbe |= i; 971 } 972 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 973 } 974 #endif 975 976 /* Sync SRs */ 977 for (i = 0; i < 16; i++) { 978 sregs.u.s.ppc32.sr[i] = env->sr[i]; 979 } 980 981 /* Sync BATs */ 982 for (i = 0; i < 8; i++) { 983 /* Beware. We have to swap upper and lower bits here */ 984 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 985 | env->DBAT[1][i]; 986 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 987 | env->IBAT[1][i]; 988 } 989 990 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 991 } 992 993 int kvm_arch_put_registers(CPUState *cs, int level) 994 { 995 PowerPCCPU *cpu = POWERPC_CPU(cs); 996 CPUPPCState *env = &cpu->env; 997 struct kvm_regs regs; 998 int ret; 999 int i; 1000 1001 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1002 if (ret < 0) { 1003 return ret; 1004 } 1005 1006 regs.ctr = env->ctr; 1007 regs.lr = env->lr; 1008 regs.xer = cpu_read_xer(env); 1009 regs.msr = env->msr; 1010 regs.pc = env->nip; 1011 1012 regs.srr0 = env->spr[SPR_SRR0]; 1013 regs.srr1 = env->spr[SPR_SRR1]; 1014 1015 regs.sprg0 = env->spr[SPR_SPRG0]; 1016 regs.sprg1 = env->spr[SPR_SPRG1]; 1017 regs.sprg2 = env->spr[SPR_SPRG2]; 1018 regs.sprg3 = env->spr[SPR_SPRG3]; 1019 regs.sprg4 = env->spr[SPR_SPRG4]; 1020 regs.sprg5 = env->spr[SPR_SPRG5]; 1021 regs.sprg6 = env->spr[SPR_SPRG6]; 1022 regs.sprg7 = env->spr[SPR_SPRG7]; 1023 1024 regs.pid = env->spr[SPR_BOOKE_PID]; 1025 1026 for (i = 0;i < 32; i++) 1027 regs.gpr[i] = env->gpr[i]; 1028 1029 regs.cr = 0; 1030 for (i = 0; i < 8; i++) { 1031 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1032 } 1033 1034 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1035 if (ret < 0) 1036 return ret; 1037 1038 kvm_put_fp(cs); 1039 1040 if (env->tlb_dirty) { 1041 kvm_sw_tlb_put(cpu); 1042 env->tlb_dirty = false; 1043 } 1044 1045 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1046 ret = kvmppc_put_books_sregs(cpu); 1047 if (ret < 0) { 1048 return ret; 1049 } 1050 } 1051 1052 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1053 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1054 } 1055 1056 if (cap_one_reg) { 1057 int i; 1058 1059 /* We deliberately ignore errors here, for kernels which have 1060 * the ONE_REG calls, but don't support the specific 1061 * registers, there's a reasonable chance things will still 1062 * work, at least until we try to migrate. */ 1063 for (i = 0; i < 1024; i++) { 1064 uint64_t id = env->spr_cb[i].one_reg_id; 1065 1066 if (id != 0) { 1067 kvm_put_one_spr(cs, id, i); 1068 } 1069 } 1070 1071 #ifdef TARGET_PPC64 1072 if (msr_ts) { 1073 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1074 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1075 } 1076 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1077 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1078 } 1079 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1080 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1081 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1082 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1083 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1084 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1085 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1086 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1087 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1088 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1089 } 1090 1091 if (cap_papr) { 1092 if (kvm_put_vpa(cs) < 0) { 1093 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1094 } 1095 } 1096 1097 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1098 #endif /* TARGET_PPC64 */ 1099 } 1100 1101 return ret; 1102 } 1103 1104 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1105 { 1106 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1107 } 1108 1109 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1110 { 1111 CPUPPCState *env = &cpu->env; 1112 struct kvm_sregs sregs; 1113 int ret; 1114 1115 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1116 if (ret < 0) { 1117 return ret; 1118 } 1119 1120 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1121 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1122 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1123 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1124 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1125 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1126 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1127 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1128 env->spr[SPR_DECR] = sregs.u.e.dec; 1129 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1130 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1131 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1132 } 1133 1134 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1135 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1136 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1137 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1138 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1139 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1140 } 1141 1142 if (sregs.u.e.features & KVM_SREGS_E_64) { 1143 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1144 } 1145 1146 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1147 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1148 } 1149 1150 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1151 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1152 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1153 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1154 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1155 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1156 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1157 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1158 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1159 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1160 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1161 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1162 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1163 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1164 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1165 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1166 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1167 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1168 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1169 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1170 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1171 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1172 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1173 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1174 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1175 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1176 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1177 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1178 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1179 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1180 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1181 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1182 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1183 1184 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1185 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1186 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1187 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1188 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1189 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1190 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1191 } 1192 1193 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1194 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1195 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1196 } 1197 1198 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1199 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1200 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1201 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1202 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1203 } 1204 } 1205 1206 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1207 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1208 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1209 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1210 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1211 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1212 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1213 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1214 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1215 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1216 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1217 } 1218 1219 if (sregs.u.e.features & KVM_SREGS_EXP) { 1220 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1221 } 1222 1223 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1224 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1225 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1226 } 1227 1228 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1229 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1230 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1231 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1232 1233 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1234 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1235 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1236 } 1237 } 1238 1239 return 0; 1240 } 1241 1242 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1243 { 1244 CPUPPCState *env = &cpu->env; 1245 struct kvm_sregs sregs; 1246 int ret; 1247 int i; 1248 1249 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1250 if (ret < 0) { 1251 return ret; 1252 } 1253 1254 if (!env->external_htab) { 1255 ppc_store_sdr1(env, sregs.u.s.sdr1); 1256 } 1257 1258 /* Sync SLB */ 1259 #ifdef TARGET_PPC64 1260 /* 1261 * The packed SLB array we get from KVM_GET_SREGS only contains 1262 * information about valid entries. So we flush our internal copy 1263 * to get rid of stale ones, then put all valid SLB entries back 1264 * in. 1265 */ 1266 memset(env->slb, 0, sizeof(env->slb)); 1267 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1268 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1269 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1270 /* 1271 * Only restore valid entries 1272 */ 1273 if (rb & SLB_ESID_V) { 1274 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1275 } 1276 } 1277 #endif 1278 1279 /* Sync SRs */ 1280 for (i = 0; i < 16; i++) { 1281 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1282 } 1283 1284 /* Sync BATs */ 1285 for (i = 0; i < 8; i++) { 1286 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1287 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1288 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1289 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1290 } 1291 1292 return 0; 1293 } 1294 1295 int kvm_arch_get_registers(CPUState *cs) 1296 { 1297 PowerPCCPU *cpu = POWERPC_CPU(cs); 1298 CPUPPCState *env = &cpu->env; 1299 struct kvm_regs regs; 1300 uint32_t cr; 1301 int i, ret; 1302 1303 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1304 if (ret < 0) 1305 return ret; 1306 1307 cr = regs.cr; 1308 for (i = 7; i >= 0; i--) { 1309 env->crf[i] = cr & 15; 1310 cr >>= 4; 1311 } 1312 1313 env->ctr = regs.ctr; 1314 env->lr = regs.lr; 1315 cpu_write_xer(env, regs.xer); 1316 env->msr = regs.msr; 1317 env->nip = regs.pc; 1318 1319 env->spr[SPR_SRR0] = regs.srr0; 1320 env->spr[SPR_SRR1] = regs.srr1; 1321 1322 env->spr[SPR_SPRG0] = regs.sprg0; 1323 env->spr[SPR_SPRG1] = regs.sprg1; 1324 env->spr[SPR_SPRG2] = regs.sprg2; 1325 env->spr[SPR_SPRG3] = regs.sprg3; 1326 env->spr[SPR_SPRG4] = regs.sprg4; 1327 env->spr[SPR_SPRG5] = regs.sprg5; 1328 env->spr[SPR_SPRG6] = regs.sprg6; 1329 env->spr[SPR_SPRG7] = regs.sprg7; 1330 1331 env->spr[SPR_BOOKE_PID] = regs.pid; 1332 1333 for (i = 0;i < 32; i++) 1334 env->gpr[i] = regs.gpr[i]; 1335 1336 kvm_get_fp(cs); 1337 1338 if (cap_booke_sregs) { 1339 ret = kvmppc_get_booke_sregs(cpu); 1340 if (ret < 0) { 1341 return ret; 1342 } 1343 } 1344 1345 if (cap_segstate) { 1346 ret = kvmppc_get_books_sregs(cpu); 1347 if (ret < 0) { 1348 return ret; 1349 } 1350 } 1351 1352 if (cap_hior) { 1353 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1354 } 1355 1356 if (cap_one_reg) { 1357 int i; 1358 1359 /* We deliberately ignore errors here, for kernels which have 1360 * the ONE_REG calls, but don't support the specific 1361 * registers, there's a reasonable chance things will still 1362 * work, at least until we try to migrate. */ 1363 for (i = 0; i < 1024; i++) { 1364 uint64_t id = env->spr_cb[i].one_reg_id; 1365 1366 if (id != 0) { 1367 kvm_get_one_spr(cs, id, i); 1368 } 1369 } 1370 1371 #ifdef TARGET_PPC64 1372 if (msr_ts) { 1373 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1374 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1375 } 1376 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1377 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1378 } 1379 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1380 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1381 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1382 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1383 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1384 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1385 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1386 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1387 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1388 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1389 } 1390 1391 if (cap_papr) { 1392 if (kvm_get_vpa(cs) < 0) { 1393 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1394 } 1395 } 1396 1397 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1398 #endif 1399 } 1400 1401 return 0; 1402 } 1403 1404 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1405 { 1406 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1407 1408 if (irq != PPC_INTERRUPT_EXT) { 1409 return 0; 1410 } 1411 1412 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1413 return 0; 1414 } 1415 1416 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1417 1418 return 0; 1419 } 1420 1421 #if defined(TARGET_PPCEMB) 1422 #define PPC_INPUT_INT PPC40x_INPUT_INT 1423 #elif defined(TARGET_PPC64) 1424 #define PPC_INPUT_INT PPC970_INPUT_INT 1425 #else 1426 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1427 #endif 1428 1429 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1430 { 1431 PowerPCCPU *cpu = POWERPC_CPU(cs); 1432 CPUPPCState *env = &cpu->env; 1433 int r; 1434 unsigned irq; 1435 1436 qemu_mutex_lock_iothread(); 1437 1438 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1439 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1440 if (!cap_interrupt_level && 1441 run->ready_for_interrupt_injection && 1442 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1443 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1444 { 1445 /* For now KVM disregards the 'irq' argument. However, in the 1446 * future KVM could cache it in-kernel to avoid a heavyweight exit 1447 * when reading the UIC. 1448 */ 1449 irq = KVM_INTERRUPT_SET; 1450 1451 DPRINTF("injected interrupt %d\n", irq); 1452 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1453 if (r < 0) { 1454 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1455 } 1456 1457 /* Always wake up soon in case the interrupt was level based */ 1458 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1459 (NANOSECONDS_PER_SECOND / 50)); 1460 } 1461 1462 /* We don't know if there are more interrupts pending after this. However, 1463 * the guest will return to userspace in the course of handling this one 1464 * anyways, so we will get a chance to deliver the rest. */ 1465 1466 qemu_mutex_unlock_iothread(); 1467 } 1468 1469 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1470 { 1471 return MEMTXATTRS_UNSPECIFIED; 1472 } 1473 1474 int kvm_arch_process_async_events(CPUState *cs) 1475 { 1476 return cs->halted; 1477 } 1478 1479 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1480 { 1481 CPUState *cs = CPU(cpu); 1482 CPUPPCState *env = &cpu->env; 1483 1484 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1485 cs->halted = 1; 1486 cs->exception_index = EXCP_HLT; 1487 } 1488 1489 return 0; 1490 } 1491 1492 /* map dcr access to existing qemu dcr emulation */ 1493 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1494 { 1495 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1496 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1497 1498 return 0; 1499 } 1500 1501 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1502 { 1503 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1504 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1505 1506 return 0; 1507 } 1508 1509 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1510 { 1511 /* Mixed endian case is not handled */ 1512 uint32_t sc = debug_inst_opcode; 1513 1514 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1515 sizeof(sc), 0) || 1516 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1517 return -EINVAL; 1518 } 1519 1520 return 0; 1521 } 1522 1523 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1524 { 1525 uint32_t sc; 1526 1527 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1528 sc != debug_inst_opcode || 1529 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1530 sizeof(sc), 1)) { 1531 return -EINVAL; 1532 } 1533 1534 return 0; 1535 } 1536 1537 static int find_hw_breakpoint(target_ulong addr, int type) 1538 { 1539 int n; 1540 1541 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1542 <= ARRAY_SIZE(hw_debug_points)); 1543 1544 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1545 if (hw_debug_points[n].addr == addr && 1546 hw_debug_points[n].type == type) { 1547 return n; 1548 } 1549 } 1550 1551 return -1; 1552 } 1553 1554 static int find_hw_watchpoint(target_ulong addr, int *flag) 1555 { 1556 int n; 1557 1558 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1559 if (n >= 0) { 1560 *flag = BP_MEM_ACCESS; 1561 return n; 1562 } 1563 1564 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1565 if (n >= 0) { 1566 *flag = BP_MEM_WRITE; 1567 return n; 1568 } 1569 1570 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1571 if (n >= 0) { 1572 *flag = BP_MEM_READ; 1573 return n; 1574 } 1575 1576 return -1; 1577 } 1578 1579 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1580 target_ulong len, int type) 1581 { 1582 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1583 return -ENOBUFS; 1584 } 1585 1586 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1587 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1588 1589 switch (type) { 1590 case GDB_BREAKPOINT_HW: 1591 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1592 return -ENOBUFS; 1593 } 1594 1595 if (find_hw_breakpoint(addr, type) >= 0) { 1596 return -EEXIST; 1597 } 1598 1599 nb_hw_breakpoint++; 1600 break; 1601 1602 case GDB_WATCHPOINT_WRITE: 1603 case GDB_WATCHPOINT_READ: 1604 case GDB_WATCHPOINT_ACCESS: 1605 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1606 return -ENOBUFS; 1607 } 1608 1609 if (find_hw_breakpoint(addr, type) >= 0) { 1610 return -EEXIST; 1611 } 1612 1613 nb_hw_watchpoint++; 1614 break; 1615 1616 default: 1617 return -ENOSYS; 1618 } 1619 1620 return 0; 1621 } 1622 1623 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1624 target_ulong len, int type) 1625 { 1626 int n; 1627 1628 n = find_hw_breakpoint(addr, type); 1629 if (n < 0) { 1630 return -ENOENT; 1631 } 1632 1633 switch (type) { 1634 case GDB_BREAKPOINT_HW: 1635 nb_hw_breakpoint--; 1636 break; 1637 1638 case GDB_WATCHPOINT_WRITE: 1639 case GDB_WATCHPOINT_READ: 1640 case GDB_WATCHPOINT_ACCESS: 1641 nb_hw_watchpoint--; 1642 break; 1643 1644 default: 1645 return -ENOSYS; 1646 } 1647 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1648 1649 return 0; 1650 } 1651 1652 void kvm_arch_remove_all_hw_breakpoints(void) 1653 { 1654 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1655 } 1656 1657 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1658 { 1659 int n; 1660 1661 /* Software Breakpoint updates */ 1662 if (kvm_sw_breakpoints_active(cs)) { 1663 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1664 } 1665 1666 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1667 <= ARRAY_SIZE(hw_debug_points)); 1668 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1669 1670 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1671 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1672 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1673 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1674 switch (hw_debug_points[n].type) { 1675 case GDB_BREAKPOINT_HW: 1676 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1677 break; 1678 case GDB_WATCHPOINT_WRITE: 1679 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1680 break; 1681 case GDB_WATCHPOINT_READ: 1682 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1683 break; 1684 case GDB_WATCHPOINT_ACCESS: 1685 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1686 KVMPPC_DEBUG_WATCH_READ; 1687 break; 1688 default: 1689 cpu_abort(cs, "Unsupported breakpoint type\n"); 1690 } 1691 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1692 } 1693 } 1694 } 1695 1696 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1697 { 1698 CPUState *cs = CPU(cpu); 1699 CPUPPCState *env = &cpu->env; 1700 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1701 int handle = 0; 1702 int n; 1703 int flag = 0; 1704 1705 if (cs->singlestep_enabled) { 1706 handle = 1; 1707 } else if (arch_info->status) { 1708 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1709 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1710 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1711 if (n >= 0) { 1712 handle = 1; 1713 } 1714 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1715 KVMPPC_DEBUG_WATCH_WRITE)) { 1716 n = find_hw_watchpoint(arch_info->address, &flag); 1717 if (n >= 0) { 1718 handle = 1; 1719 cs->watchpoint_hit = &hw_watchpoint; 1720 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1721 hw_watchpoint.flags = flag; 1722 } 1723 } 1724 } 1725 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1726 handle = 1; 1727 } else { 1728 /* QEMU is not able to handle debug exception, so inject 1729 * program exception to guest; 1730 * Yes program exception NOT debug exception !! 1731 * When QEMU is using debug resources then debug exception must 1732 * be always set. To achieve this we set MSR_DE and also set 1733 * MSRP_DEP so guest cannot change MSR_DE. 1734 * When emulating debug resource for guest we want guest 1735 * to control MSR_DE (enable/disable debug interrupt on need). 1736 * Supporting both configurations are NOT possible. 1737 * So the result is that we cannot share debug resources 1738 * between QEMU and Guest on BOOKE architecture. 1739 * In the current design QEMU gets the priority over guest, 1740 * this means that if QEMU is using debug resources then guest 1741 * cannot use them; 1742 * For software breakpoint QEMU uses a privileged instruction; 1743 * So there cannot be any reason that we are here for guest 1744 * set debug exception, only possibility is guest executed a 1745 * privileged / illegal instruction and that's why we are 1746 * injecting a program interrupt. 1747 */ 1748 1749 cpu_synchronize_state(cs); 1750 /* env->nip is PC, so increment this by 4 to use 1751 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1752 */ 1753 env->nip += 4; 1754 cs->exception_index = POWERPC_EXCP_PROGRAM; 1755 env->error_code = POWERPC_EXCP_INVAL; 1756 ppc_cpu_do_interrupt(cs); 1757 } 1758 1759 return handle; 1760 } 1761 1762 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1763 { 1764 PowerPCCPU *cpu = POWERPC_CPU(cs); 1765 CPUPPCState *env = &cpu->env; 1766 int ret; 1767 1768 qemu_mutex_lock_iothread(); 1769 1770 switch (run->exit_reason) { 1771 case KVM_EXIT_DCR: 1772 if (run->dcr.is_write) { 1773 DPRINTF("handle dcr write\n"); 1774 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1775 } else { 1776 DPRINTF("handle dcr read\n"); 1777 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1778 } 1779 break; 1780 case KVM_EXIT_HLT: 1781 DPRINTF("handle halt\n"); 1782 ret = kvmppc_handle_halt(cpu); 1783 break; 1784 #if defined(TARGET_PPC64) 1785 case KVM_EXIT_PAPR_HCALL: 1786 DPRINTF("handle PAPR hypercall\n"); 1787 run->papr_hcall.ret = spapr_hypercall(cpu, 1788 run->papr_hcall.nr, 1789 run->papr_hcall.args); 1790 ret = 0; 1791 break; 1792 #endif 1793 case KVM_EXIT_EPR: 1794 DPRINTF("handle epr\n"); 1795 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1796 ret = 0; 1797 break; 1798 case KVM_EXIT_WATCHDOG: 1799 DPRINTF("handle watchdog expiry\n"); 1800 watchdog_perform_action(); 1801 ret = 0; 1802 break; 1803 1804 case KVM_EXIT_DEBUG: 1805 DPRINTF("handle debug exception\n"); 1806 if (kvm_handle_debug(cpu, run)) { 1807 ret = EXCP_DEBUG; 1808 break; 1809 } 1810 /* re-enter, this exception was guest-internal */ 1811 ret = 0; 1812 break; 1813 1814 default: 1815 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1816 ret = -1; 1817 break; 1818 } 1819 1820 qemu_mutex_unlock_iothread(); 1821 return ret; 1822 } 1823 1824 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1825 { 1826 CPUState *cs = CPU(cpu); 1827 uint32_t bits = tsr_bits; 1828 struct kvm_one_reg reg = { 1829 .id = KVM_REG_PPC_OR_TSR, 1830 .addr = (uintptr_t) &bits, 1831 }; 1832 1833 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1834 } 1835 1836 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1837 { 1838 1839 CPUState *cs = CPU(cpu); 1840 uint32_t bits = tsr_bits; 1841 struct kvm_one_reg reg = { 1842 .id = KVM_REG_PPC_CLEAR_TSR, 1843 .addr = (uintptr_t) &bits, 1844 }; 1845 1846 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1847 } 1848 1849 int kvmppc_set_tcr(PowerPCCPU *cpu) 1850 { 1851 CPUState *cs = CPU(cpu); 1852 CPUPPCState *env = &cpu->env; 1853 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1854 1855 struct kvm_one_reg reg = { 1856 .id = KVM_REG_PPC_TCR, 1857 .addr = (uintptr_t) &tcr, 1858 }; 1859 1860 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1861 } 1862 1863 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1864 { 1865 CPUState *cs = CPU(cpu); 1866 int ret; 1867 1868 if (!kvm_enabled()) { 1869 return -1; 1870 } 1871 1872 if (!cap_ppc_watchdog) { 1873 printf("warning: KVM does not support watchdog"); 1874 return -1; 1875 } 1876 1877 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1878 if (ret < 0) { 1879 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1880 __func__, strerror(-ret)); 1881 return ret; 1882 } 1883 1884 return ret; 1885 } 1886 1887 static int read_cpuinfo(const char *field, char *value, int len) 1888 { 1889 FILE *f; 1890 int ret = -1; 1891 int field_len = strlen(field); 1892 char line[512]; 1893 1894 f = fopen("/proc/cpuinfo", "r"); 1895 if (!f) { 1896 return -1; 1897 } 1898 1899 do { 1900 if (!fgets(line, sizeof(line), f)) { 1901 break; 1902 } 1903 if (!strncmp(line, field, field_len)) { 1904 pstrcpy(value, len, line); 1905 ret = 0; 1906 break; 1907 } 1908 } while(*line); 1909 1910 fclose(f); 1911 1912 return ret; 1913 } 1914 1915 uint32_t kvmppc_get_tbfreq(void) 1916 { 1917 char line[512]; 1918 char *ns; 1919 uint32_t retval = NANOSECONDS_PER_SECOND; 1920 1921 if (read_cpuinfo("timebase", line, sizeof(line))) { 1922 return retval; 1923 } 1924 1925 if (!(ns = strchr(line, ':'))) { 1926 return retval; 1927 } 1928 1929 ns++; 1930 1931 return atoi(ns); 1932 } 1933 1934 bool kvmppc_get_host_serial(char **value) 1935 { 1936 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1937 NULL); 1938 } 1939 1940 bool kvmppc_get_host_model(char **value) 1941 { 1942 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1943 } 1944 1945 /* Try to find a device tree node for a CPU with clock-frequency property */ 1946 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1947 { 1948 struct dirent *dirp; 1949 DIR *dp; 1950 1951 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1952 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1953 return -1; 1954 } 1955 1956 buf[0] = '\0'; 1957 while ((dirp = readdir(dp)) != NULL) { 1958 FILE *f; 1959 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1960 dirp->d_name); 1961 f = fopen(buf, "r"); 1962 if (f) { 1963 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1964 fclose(f); 1965 break; 1966 } 1967 buf[0] = '\0'; 1968 } 1969 closedir(dp); 1970 if (buf[0] == '\0') { 1971 printf("Unknown host!\n"); 1972 return -1; 1973 } 1974 1975 return 0; 1976 } 1977 1978 static uint64_t kvmppc_read_int_dt(const char *filename) 1979 { 1980 union { 1981 uint32_t v32; 1982 uint64_t v64; 1983 } u; 1984 FILE *f; 1985 int len; 1986 1987 f = fopen(filename, "rb"); 1988 if (!f) { 1989 return -1; 1990 } 1991 1992 len = fread(&u, 1, sizeof(u), f); 1993 fclose(f); 1994 switch (len) { 1995 case 4: 1996 /* property is a 32-bit quantity */ 1997 return be32_to_cpu(u.v32); 1998 case 8: 1999 return be64_to_cpu(u.v64); 2000 } 2001 2002 return 0; 2003 } 2004 2005 /* Read a CPU node property from the host device tree that's a single 2006 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 2007 * (can't find or open the property, or doesn't understand the 2008 * format) */ 2009 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 2010 { 2011 char buf[PATH_MAX], *tmp; 2012 uint64_t val; 2013 2014 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 2015 return -1; 2016 } 2017 2018 tmp = g_strdup_printf("%s/%s", buf, propname); 2019 val = kvmppc_read_int_dt(tmp); 2020 g_free(tmp); 2021 2022 return val; 2023 } 2024 2025 uint64_t kvmppc_get_clockfreq(void) 2026 { 2027 return kvmppc_read_int_cpu_dt("clock-frequency"); 2028 } 2029 2030 uint32_t kvmppc_get_vmx(void) 2031 { 2032 return kvmppc_read_int_cpu_dt("ibm,vmx"); 2033 } 2034 2035 uint32_t kvmppc_get_dfp(void) 2036 { 2037 return kvmppc_read_int_cpu_dt("ibm,dfp"); 2038 } 2039 2040 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2041 { 2042 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2043 CPUState *cs = CPU(cpu); 2044 2045 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2046 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2047 return 0; 2048 } 2049 2050 return 1; 2051 } 2052 2053 int kvmppc_get_hasidle(CPUPPCState *env) 2054 { 2055 struct kvm_ppc_pvinfo pvinfo; 2056 2057 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2058 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2059 return 1; 2060 } 2061 2062 return 0; 2063 } 2064 2065 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2066 { 2067 uint32_t *hc = (uint32_t*)buf; 2068 struct kvm_ppc_pvinfo pvinfo; 2069 2070 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2071 memcpy(buf, pvinfo.hcall, buf_len); 2072 return 0; 2073 } 2074 2075 /* 2076 * Fallback to always fail hypercalls regardless of endianness: 2077 * 2078 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2079 * li r3, -1 2080 * b .+8 (becomes nop in wrong endian) 2081 * bswap32(li r3, -1) 2082 */ 2083 2084 hc[0] = cpu_to_be32(0x08000048); 2085 hc[1] = cpu_to_be32(0x3860ffff); 2086 hc[2] = cpu_to_be32(0x48000008); 2087 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2088 2089 return 1; 2090 } 2091 2092 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2093 { 2094 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2095 } 2096 2097 void kvmppc_enable_logical_ci_hcalls(void) 2098 { 2099 /* 2100 * FIXME: it would be nice if we could detect the cases where 2101 * we're using a device which requires the in kernel 2102 * implementation of these hcalls, but the kernel lacks them and 2103 * produce a warning. 2104 */ 2105 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2106 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2107 } 2108 2109 void kvmppc_enable_set_mode_hcall(void) 2110 { 2111 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2112 } 2113 2114 void kvmppc_enable_clear_ref_mod_hcalls(void) 2115 { 2116 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2117 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2118 } 2119 2120 void kvmppc_set_papr(PowerPCCPU *cpu) 2121 { 2122 CPUState *cs = CPU(cpu); 2123 int ret; 2124 2125 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2126 if (ret) { 2127 error_report("This vCPU type or KVM version does not support PAPR"); 2128 exit(1); 2129 } 2130 2131 /* Update the capability flag so we sync the right information 2132 * with kvm */ 2133 cap_papr = 1; 2134 } 2135 2136 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2137 { 2138 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2139 } 2140 2141 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2142 { 2143 CPUState *cs = CPU(cpu); 2144 int ret; 2145 2146 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2147 if (ret && mpic_proxy) { 2148 error_report("This KVM version does not support EPR"); 2149 exit(1); 2150 } 2151 } 2152 2153 int kvmppc_smt_threads(void) 2154 { 2155 return cap_ppc_smt ? cap_ppc_smt : 1; 2156 } 2157 2158 #ifdef TARGET_PPC64 2159 off_t kvmppc_alloc_rma(void **rma) 2160 { 2161 off_t size; 2162 int fd; 2163 struct kvm_allocate_rma ret; 2164 2165 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2166 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2167 * not necessary on this hardware 2168 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2169 * 2170 * FIXME: We should allow the user to force contiguous RMA 2171 * allocation in the cap_ppc_rma==1 case. 2172 */ 2173 if (cap_ppc_rma < 2) { 2174 return 0; 2175 } 2176 2177 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2178 if (fd < 0) { 2179 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2180 strerror(errno)); 2181 return -1; 2182 } 2183 2184 size = MIN(ret.rma_size, 256ul << 20); 2185 2186 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2187 if (*rma == MAP_FAILED) { 2188 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2189 return -1; 2190 }; 2191 2192 return size; 2193 } 2194 2195 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2196 { 2197 struct kvm_ppc_smmu_info info; 2198 long rampagesize, best_page_shift; 2199 int i; 2200 2201 if (cap_ppc_rma >= 2) { 2202 return current_size; 2203 } 2204 2205 /* Find the largest hardware supported page size that's less than 2206 * or equal to the (logical) backing page size of guest RAM */ 2207 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2208 rampagesize = getrampagesize(); 2209 best_page_shift = 0; 2210 2211 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2212 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2213 2214 if (!sps->page_shift) { 2215 continue; 2216 } 2217 2218 if ((sps->page_shift > best_page_shift) 2219 && ((1UL << sps->page_shift) <= rampagesize)) { 2220 best_page_shift = sps->page_shift; 2221 } 2222 } 2223 2224 return MIN(current_size, 2225 1ULL << (best_page_shift + hash_shift - 7)); 2226 } 2227 #endif 2228 2229 bool kvmppc_spapr_use_multitce(void) 2230 { 2231 return cap_spapr_multitce; 2232 } 2233 2234 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, 2235 bool need_vfio) 2236 { 2237 struct kvm_create_spapr_tce args = { 2238 .liobn = liobn, 2239 .window_size = window_size, 2240 }; 2241 long len; 2242 int fd; 2243 void *table; 2244 2245 /* Must set fd to -1 so we don't try to munmap when called for 2246 * destroying the table, which the upper layers -will- do 2247 */ 2248 *pfd = -1; 2249 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2250 return NULL; 2251 } 2252 2253 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2254 if (fd < 0) { 2255 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2256 liobn); 2257 return NULL; 2258 } 2259 2260 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t); 2261 /* FIXME: round this up to page size */ 2262 2263 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2264 if (table == MAP_FAILED) { 2265 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2266 liobn); 2267 close(fd); 2268 return NULL; 2269 } 2270 2271 *pfd = fd; 2272 return table; 2273 } 2274 2275 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2276 { 2277 long len; 2278 2279 if (fd < 0) { 2280 return -1; 2281 } 2282 2283 len = nb_table * sizeof(uint64_t); 2284 if ((munmap(table, len) < 0) || 2285 (close(fd) < 0)) { 2286 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2287 strerror(errno)); 2288 /* Leak the table */ 2289 } 2290 2291 return 0; 2292 } 2293 2294 int kvmppc_reset_htab(int shift_hint) 2295 { 2296 uint32_t shift = shift_hint; 2297 2298 if (!kvm_enabled()) { 2299 /* Full emulation, tell caller to allocate htab itself */ 2300 return 0; 2301 } 2302 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2303 int ret; 2304 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2305 if (ret == -ENOTTY) { 2306 /* At least some versions of PR KVM advertise the 2307 * capability, but don't implement the ioctl(). Oops. 2308 * Return 0 so that we allocate the htab in qemu, as is 2309 * correct for PR. */ 2310 return 0; 2311 } else if (ret < 0) { 2312 return ret; 2313 } 2314 return shift; 2315 } 2316 2317 /* We have a kernel that predates the htab reset calls. For PR 2318 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2319 * this era, it has allocated a 16MB fixed size hash table already. */ 2320 if (kvmppc_is_pr(kvm_state)) { 2321 /* PR - tell caller to allocate htab */ 2322 return 0; 2323 } else { 2324 /* HV - assume 16MB kernel allocated htab */ 2325 return 24; 2326 } 2327 } 2328 2329 static inline uint32_t mfpvr(void) 2330 { 2331 uint32_t pvr; 2332 2333 asm ("mfpvr %0" 2334 : "=r"(pvr)); 2335 return pvr; 2336 } 2337 2338 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2339 { 2340 if (on) { 2341 *word |= flags; 2342 } else { 2343 *word &= ~flags; 2344 } 2345 } 2346 2347 static void kvmppc_host_cpu_initfn(Object *obj) 2348 { 2349 assert(kvm_enabled()); 2350 } 2351 2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2353 { 2354 DeviceClass *dc = DEVICE_CLASS(oc); 2355 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2356 uint32_t vmx = kvmppc_get_vmx(); 2357 uint32_t dfp = kvmppc_get_dfp(); 2358 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2359 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2360 2361 /* Now fix up the class with information we can query from the host */ 2362 pcc->pvr = mfpvr(); 2363 2364 if (vmx != -1) { 2365 /* Only override when we know what the host supports */ 2366 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2367 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2368 } 2369 if (dfp != -1) { 2370 /* Only override when we know what the host supports */ 2371 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2372 } 2373 2374 if (dcache_size != -1) { 2375 pcc->l1_dcache_size = dcache_size; 2376 } 2377 2378 if (icache_size != -1) { 2379 pcc->l1_icache_size = icache_size; 2380 } 2381 2382 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */ 2383 dc->cannot_destroy_with_object_finalize_yet = true; 2384 } 2385 2386 bool kvmppc_has_cap_epr(void) 2387 { 2388 return cap_epr; 2389 } 2390 2391 bool kvmppc_has_cap_htab_fd(void) 2392 { 2393 return cap_htab_fd; 2394 } 2395 2396 bool kvmppc_has_cap_fixup_hcalls(void) 2397 { 2398 return cap_fixup_hcalls; 2399 } 2400 2401 bool kvmppc_has_cap_htm(void) 2402 { 2403 return cap_htm; 2404 } 2405 2406 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) 2407 { 2408 ObjectClass *oc = OBJECT_CLASS(pcc); 2409 2410 while (oc && !object_class_is_abstract(oc)) { 2411 oc = object_class_get_parent(oc); 2412 } 2413 assert(oc); 2414 2415 return POWERPC_CPU_CLASS(oc); 2416 } 2417 2418 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2419 { 2420 uint32_t host_pvr = mfpvr(); 2421 PowerPCCPUClass *pvr_pcc; 2422 2423 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2424 if (pvr_pcc == NULL) { 2425 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2426 } 2427 2428 return pvr_pcc; 2429 } 2430 2431 static int kvm_ppc_register_host_cpu_type(void) 2432 { 2433 TypeInfo type_info = { 2434 .name = TYPE_HOST_POWERPC_CPU, 2435 .instance_init = kvmppc_host_cpu_initfn, 2436 .class_init = kvmppc_host_cpu_class_init, 2437 }; 2438 PowerPCCPUClass *pvr_pcc; 2439 DeviceClass *dc; 2440 int i; 2441 2442 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2443 if (pvr_pcc == NULL) { 2444 return -1; 2445 } 2446 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2447 type_register(&type_info); 2448 2449 #if defined(TARGET_PPC64) 2450 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2451 type_info.parent = TYPE_SPAPR_CPU_CORE, 2452 type_info.instance_size = sizeof(sPAPRCPUCore); 2453 type_info.instance_init = NULL; 2454 type_info.class_init = spapr_cpu_core_class_init; 2455 type_info.class_data = (void *) "host"; 2456 type_register(&type_info); 2457 g_free((void *)type_info.name); 2458 #endif 2459 2460 /* 2461 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2462 * we want "POWER8" to be a "family" alias that points to the current 2463 * host CPU type, too) 2464 */ 2465 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2466 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2467 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2468 ObjectClass *oc = OBJECT_CLASS(pvr_pcc); 2469 char *suffix; 2470 2471 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2472 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU); 2473 if (suffix) { 2474 *suffix = 0; 2475 } 2476 ppc_cpu_aliases[i].oc = oc; 2477 break; 2478 } 2479 } 2480 2481 return 0; 2482 } 2483 2484 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2485 { 2486 struct kvm_rtas_token_args args = { 2487 .token = token, 2488 }; 2489 2490 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2491 return -ENOENT; 2492 } 2493 2494 strncpy(args.name, function, sizeof(args.name)); 2495 2496 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2497 } 2498 2499 int kvmppc_get_htab_fd(bool write) 2500 { 2501 struct kvm_get_htab_fd s = { 2502 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2503 .start_index = 0, 2504 }; 2505 2506 if (!cap_htab_fd) { 2507 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2508 return -1; 2509 } 2510 2511 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2512 } 2513 2514 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2515 { 2516 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2517 uint8_t buf[bufsize]; 2518 ssize_t rc; 2519 2520 do { 2521 rc = read(fd, buf, bufsize); 2522 if (rc < 0) { 2523 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2524 strerror(errno)); 2525 return rc; 2526 } else if (rc) { 2527 uint8_t *buffer = buf; 2528 ssize_t n = rc; 2529 while (n) { 2530 struct kvm_get_htab_header *head = 2531 (struct kvm_get_htab_header *) buffer; 2532 size_t chunksize = sizeof(*head) + 2533 HASH_PTE_SIZE_64 * head->n_valid; 2534 2535 qemu_put_be32(f, head->index); 2536 qemu_put_be16(f, head->n_valid); 2537 qemu_put_be16(f, head->n_invalid); 2538 qemu_put_buffer(f, (void *)(head + 1), 2539 HASH_PTE_SIZE_64 * head->n_valid); 2540 2541 buffer += chunksize; 2542 n -= chunksize; 2543 } 2544 } 2545 } while ((rc != 0) 2546 && ((max_ns < 0) 2547 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2548 2549 return (rc == 0) ? 1 : 0; 2550 } 2551 2552 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2553 uint16_t n_valid, uint16_t n_invalid) 2554 { 2555 struct kvm_get_htab_header *buf; 2556 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2557 ssize_t rc; 2558 2559 buf = alloca(chunksize); 2560 buf->index = index; 2561 buf->n_valid = n_valid; 2562 buf->n_invalid = n_invalid; 2563 2564 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2565 2566 rc = write(fd, buf, chunksize); 2567 if (rc < 0) { 2568 fprintf(stderr, "Error writing KVM hash table: %s\n", 2569 strerror(errno)); 2570 return rc; 2571 } 2572 if (rc != chunksize) { 2573 /* We should never get a short write on a single chunk */ 2574 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2575 return -1; 2576 } 2577 return 0; 2578 } 2579 2580 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2581 { 2582 return true; 2583 } 2584 2585 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) 2586 { 2587 return 1; 2588 } 2589 2590 int kvm_arch_on_sigbus(int code, void *addr) 2591 { 2592 return 1; 2593 } 2594 2595 void kvm_arch_init_irq_routing(KVMState *s) 2596 { 2597 } 2598 2599 struct kvm_get_htab_buf { 2600 struct kvm_get_htab_header header; 2601 /* 2602 * We require one extra byte for read 2603 */ 2604 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1]; 2605 }; 2606 2607 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index) 2608 { 2609 int htab_fd; 2610 struct kvm_get_htab_fd ghf; 2611 struct kvm_get_htab_buf *hpte_buf; 2612 2613 ghf.flags = 0; 2614 ghf.start_index = pte_index; 2615 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2616 if (htab_fd < 0) { 2617 goto error_out; 2618 } 2619 2620 hpte_buf = g_malloc0(sizeof(*hpte_buf)); 2621 /* 2622 * Read the hpte group 2623 */ 2624 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) { 2625 goto out_close; 2626 } 2627 2628 close(htab_fd); 2629 return (uint64_t)(uintptr_t) hpte_buf->hpte; 2630 2631 out_close: 2632 g_free(hpte_buf); 2633 close(htab_fd); 2634 error_out: 2635 return 0; 2636 } 2637 2638 void kvmppc_hash64_free_pteg(uint64_t token) 2639 { 2640 struct kvm_get_htab_buf *htab_buf; 2641 2642 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf, 2643 hpte); 2644 g_free(htab_buf); 2645 return; 2646 } 2647 2648 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index, 2649 target_ulong pte0, target_ulong pte1) 2650 { 2651 int htab_fd; 2652 struct kvm_get_htab_fd ghf; 2653 struct kvm_get_htab_buf hpte_buf; 2654 2655 ghf.flags = 0; 2656 ghf.start_index = 0; /* Ignored */ 2657 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2658 if (htab_fd < 0) { 2659 goto error_out; 2660 } 2661 2662 hpte_buf.header.n_valid = 1; 2663 hpte_buf.header.n_invalid = 0; 2664 hpte_buf.header.index = pte_index; 2665 hpte_buf.hpte[0] = pte0; 2666 hpte_buf.hpte[1] = pte1; 2667 /* 2668 * Write the hpte entry. 2669 * CAUTION: write() has the warn_unused_result attribute. Hence we 2670 * need to check the return value, even though we do nothing. 2671 */ 2672 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { 2673 goto out_close; 2674 } 2675 2676 out_close: 2677 close(htab_fd); 2678 return; 2679 2680 error_out: 2681 return; 2682 } 2683 2684 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2685 uint64_t address, uint32_t data, PCIDevice *dev) 2686 { 2687 return 0; 2688 } 2689 2690 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2691 int vector, PCIDevice *dev) 2692 { 2693 return 0; 2694 } 2695 2696 int kvm_arch_release_virq_post(int virq) 2697 { 2698 return 0; 2699 } 2700 2701 int kvm_arch_msi_data_to_gsi(uint32_t data) 2702 { 2703 return data & 0xffff; 2704 } 2705 2706 int kvmppc_enable_hwrng(void) 2707 { 2708 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2709 return -1; 2710 } 2711 2712 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2713 } 2714