1 /* 2 * QEMU KVM support 3 * 4 * Copyright (C) 2006-2008 Qumranet Technologies 5 * Copyright IBM, Corp. 2008 6 * 7 * Authors: 8 * Anthony Liguori <aliguori@us.ibm.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 #include "qapi/qapi-events-run-state.h" 17 #include "qapi/error.h" 18 #include <sys/ioctl.h> 19 #include <sys/utsname.h> 20 21 #include <linux/kvm.h> 22 #include "standard-headers/asm-x86/kvm_para.h" 23 24 #include "cpu.h" 25 #include "host-cpu.h" 26 #include "sysemu/sysemu.h" 27 #include "sysemu/hw_accel.h" 28 #include "sysemu/kvm_int.h" 29 #include "sysemu/runstate.h" 30 #include "kvm_i386.h" 31 #include "sev.h" 32 #include "hyperv.h" 33 #include "hyperv-proto.h" 34 35 #include "exec/gdbstub.h" 36 #include "qemu/host-utils.h" 37 #include "qemu/main-loop.h" 38 #include "qemu/config-file.h" 39 #include "qemu/error-report.h" 40 #include "qemu/memalign.h" 41 #include "hw/i386/x86.h" 42 #include "hw/i386/apic.h" 43 #include "hw/i386/apic_internal.h" 44 #include "hw/i386/apic-msidef.h" 45 #include "hw/i386/intel_iommu.h" 46 #include "hw/i386/x86-iommu.h" 47 #include "hw/i386/e820_memory_layout.h" 48 49 #include "hw/pci/pci.h" 50 #include "hw/pci/msi.h" 51 #include "hw/pci/msix.h" 52 #include "migration/blocker.h" 53 #include "exec/memattrs.h" 54 #include "trace.h" 55 56 //#define DEBUG_KVM 57 58 #ifdef DEBUG_KVM 59 #define DPRINTF(fmt, ...) \ 60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 61 #else 62 #define DPRINTF(fmt, ...) \ 63 do { } while (0) 64 #endif 65 66 /* From arch/x86/kvm/lapic.h */ 67 #define KVM_APIC_BUS_CYCLE_NS 1 68 #define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS) 69 70 #define MSR_KVM_WALL_CLOCK 0x11 71 #define MSR_KVM_SYSTEM_TIME 0x12 72 73 /* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus 74 * 255 kvm_msr_entry structs */ 75 #define MSR_BUF_SIZE 4096 76 77 static void kvm_init_msrs(X86CPU *cpu); 78 79 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 80 KVM_CAP_INFO(SET_TSS_ADDR), 81 KVM_CAP_INFO(EXT_CPUID), 82 KVM_CAP_INFO(MP_STATE), 83 KVM_CAP_LAST_INFO 84 }; 85 86 static bool has_msr_star; 87 static bool has_msr_hsave_pa; 88 static bool has_msr_tsc_aux; 89 static bool has_msr_tsc_adjust; 90 static bool has_msr_tsc_deadline; 91 static bool has_msr_feature_control; 92 static bool has_msr_misc_enable; 93 static bool has_msr_smbase; 94 static bool has_msr_bndcfgs; 95 static int lm_capable_kernel; 96 static bool has_msr_hv_hypercall; 97 static bool has_msr_hv_crash; 98 static bool has_msr_hv_reset; 99 static bool has_msr_hv_vpindex; 100 static bool hv_vpindex_settable; 101 static bool has_msr_hv_runtime; 102 static bool has_msr_hv_synic; 103 static bool has_msr_hv_stimer; 104 static bool has_msr_hv_frequencies; 105 static bool has_msr_hv_reenlightenment; 106 static bool has_msr_xss; 107 static bool has_msr_umwait; 108 static bool has_msr_spec_ctrl; 109 static bool has_tsc_scale_msr; 110 static bool has_msr_tsx_ctrl; 111 static bool has_msr_virt_ssbd; 112 static bool has_msr_smi_count; 113 static bool has_msr_arch_capabs; 114 static bool has_msr_core_capabs; 115 static bool has_msr_vmx_vmfunc; 116 static bool has_msr_ucode_rev; 117 static bool has_msr_vmx_procbased_ctls2; 118 static bool has_msr_perf_capabs; 119 static bool has_msr_pkrs; 120 121 static uint32_t has_architectural_pmu_version; 122 static uint32_t num_architectural_pmu_gp_counters; 123 static uint32_t num_architectural_pmu_fixed_counters; 124 125 static int has_xsave; 126 static int has_xcrs; 127 static int has_pit_state2; 128 static int has_sregs2; 129 static int has_exception_payload; 130 131 static bool has_msr_mcg_ext_ctl; 132 133 static struct kvm_cpuid2 *cpuid_cache; 134 static struct kvm_cpuid2 *hv_cpuid_cache; 135 static struct kvm_msr_list *kvm_feature_msrs; 136 137 #define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ 138 static RateLimit bus_lock_ratelimit_ctrl; 139 140 int kvm_has_pit_state2(void) 141 { 142 return has_pit_state2; 143 } 144 145 bool kvm_has_smm(void) 146 { 147 return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); 148 } 149 150 bool kvm_has_adjust_clock_stable(void) 151 { 152 int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); 153 154 return (ret == KVM_CLOCK_TSC_STABLE); 155 } 156 157 bool kvm_has_adjust_clock(void) 158 { 159 return kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); 160 } 161 162 bool kvm_has_exception_payload(void) 163 { 164 return has_exception_payload; 165 } 166 167 static bool kvm_x2apic_api_set_flags(uint64_t flags) 168 { 169 KVMState *s = KVM_STATE(current_accel()); 170 171 return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags); 172 } 173 174 #define MEMORIZE(fn, _result) \ 175 ({ \ 176 static bool _memorized; \ 177 \ 178 if (_memorized) { \ 179 return _result; \ 180 } \ 181 _memorized = true; \ 182 _result = fn; \ 183 }) 184 185 static bool has_x2apic_api; 186 187 bool kvm_has_x2apic_api(void) 188 { 189 return has_x2apic_api; 190 } 191 192 bool kvm_enable_x2apic(void) 193 { 194 return MEMORIZE( 195 kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS | 196 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK), 197 has_x2apic_api); 198 } 199 200 bool kvm_hv_vpindex_settable(void) 201 { 202 return hv_vpindex_settable; 203 } 204 205 static int kvm_get_tsc(CPUState *cs) 206 { 207 X86CPU *cpu = X86_CPU(cs); 208 CPUX86State *env = &cpu->env; 209 struct { 210 struct kvm_msrs info; 211 struct kvm_msr_entry entries[1]; 212 } msr_data = {}; 213 int ret; 214 215 if (env->tsc_valid) { 216 return 0; 217 } 218 219 memset(&msr_data, 0, sizeof(msr_data)); 220 msr_data.info.nmsrs = 1; 221 msr_data.entries[0].index = MSR_IA32_TSC; 222 env->tsc_valid = !runstate_is_running(); 223 224 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); 225 if (ret < 0) { 226 return ret; 227 } 228 229 assert(ret == 1); 230 env->tsc = msr_data.entries[0].data; 231 return 0; 232 } 233 234 static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg) 235 { 236 kvm_get_tsc(cpu); 237 } 238 239 void kvm_synchronize_all_tsc(void) 240 { 241 CPUState *cpu; 242 243 if (kvm_enabled()) { 244 CPU_FOREACH(cpu) { 245 run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); 246 } 247 } 248 } 249 250 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) 251 { 252 struct kvm_cpuid2 *cpuid; 253 int r, size; 254 255 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); 256 cpuid = g_malloc0(size); 257 cpuid->nent = max; 258 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); 259 if (r == 0 && cpuid->nent >= max) { 260 r = -E2BIG; 261 } 262 if (r < 0) { 263 if (r == -E2BIG) { 264 g_free(cpuid); 265 return NULL; 266 } else { 267 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", 268 strerror(-r)); 269 exit(1); 270 } 271 } 272 return cpuid; 273 } 274 275 /* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough 276 * for all entries. 277 */ 278 static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s) 279 { 280 struct kvm_cpuid2 *cpuid; 281 int max = 1; 282 283 if (cpuid_cache != NULL) { 284 return cpuid_cache; 285 } 286 while ((cpuid = try_get_cpuid(s, max)) == NULL) { 287 max *= 2; 288 } 289 cpuid_cache = cpuid; 290 return cpuid; 291 } 292 293 static bool host_tsx_broken(void) 294 { 295 int family, model, stepping;\ 296 char vendor[CPUID_VENDOR_SZ + 1]; 297 298 host_cpu_vendor_fms(vendor, &family, &model, &stepping); 299 300 /* Check if we are running on a Haswell host known to have broken TSX */ 301 return !strcmp(vendor, CPUID_VENDOR_INTEL) && 302 (family == 6) && 303 ((model == 63 && stepping < 4) || 304 model == 60 || model == 69 || model == 70); 305 } 306 307 /* Returns the value for a specific register on the cpuid entry 308 */ 309 static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) 310 { 311 uint32_t ret = 0; 312 switch (reg) { 313 case R_EAX: 314 ret = entry->eax; 315 break; 316 case R_EBX: 317 ret = entry->ebx; 318 break; 319 case R_ECX: 320 ret = entry->ecx; 321 break; 322 case R_EDX: 323 ret = entry->edx; 324 break; 325 } 326 return ret; 327 } 328 329 /* Find matching entry for function/index on kvm_cpuid2 struct 330 */ 331 static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, 332 uint32_t function, 333 uint32_t index) 334 { 335 int i; 336 for (i = 0; i < cpuid->nent; ++i) { 337 if (cpuid->entries[i].function == function && 338 cpuid->entries[i].index == index) { 339 return &cpuid->entries[i]; 340 } 341 } 342 /* not found: */ 343 return NULL; 344 } 345 346 uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, 347 uint32_t index, int reg) 348 { 349 struct kvm_cpuid2 *cpuid; 350 uint32_t ret = 0; 351 uint32_t cpuid_1_edx; 352 353 cpuid = get_supported_cpuid(s); 354 355 struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index); 356 if (entry) { 357 ret = cpuid_entry_get_reg(entry, reg); 358 } 359 360 /* Fixups for the data returned by KVM, below */ 361 362 if (function == 1 && reg == R_EDX) { 363 /* KVM before 2.6.30 misreports the following features */ 364 ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; 365 } else if (function == 1 && reg == R_ECX) { 366 /* We can set the hypervisor flag, even if KVM does not return it on 367 * GET_SUPPORTED_CPUID 368 */ 369 ret |= CPUID_EXT_HYPERVISOR; 370 /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it 371 * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER, 372 * and the irqchip is in the kernel. 373 */ 374 if (kvm_irqchip_in_kernel() && 375 kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { 376 ret |= CPUID_EXT_TSC_DEADLINE_TIMER; 377 } 378 379 /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled 380 * without the in-kernel irqchip 381 */ 382 if (!kvm_irqchip_in_kernel()) { 383 ret &= ~CPUID_EXT_X2APIC; 384 } 385 386 if (enable_cpu_pm) { 387 int disable_exits = kvm_check_extension(s, 388 KVM_CAP_X86_DISABLE_EXITS); 389 390 if (disable_exits & KVM_X86_DISABLE_EXITS_MWAIT) { 391 ret |= CPUID_EXT_MONITOR; 392 } 393 } 394 } else if (function == 6 && reg == R_EAX) { 395 ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ 396 } else if (function == 7 && index == 0 && reg == R_EBX) { 397 if (host_tsx_broken()) { 398 ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); 399 } 400 } else if (function == 7 && index == 0 && reg == R_EDX) { 401 /* 402 * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. 403 * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is 404 * returned by KVM_GET_MSR_INDEX_LIST. 405 */ 406 if (!has_msr_arch_capabs) { 407 ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; 408 } 409 } else if (function == 0x80000001 && reg == R_ECX) { 410 /* 411 * It's safe to enable TOPOEXT even if it's not returned by 412 * GET_SUPPORTED_CPUID. Unconditionally enabling TOPOEXT here allows 413 * us to keep CPU models including TOPOEXT runnable on older kernels. 414 */ 415 ret |= CPUID_EXT3_TOPOEXT; 416 } else if (function == 0x80000001 && reg == R_EDX) { 417 /* On Intel, kvm returns cpuid according to the Intel spec, 418 * so add missing bits according to the AMD spec: 419 */ 420 cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); 421 ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; 422 } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { 423 /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't 424 * be enabled without the in-kernel irqchip 425 */ 426 if (!kvm_irqchip_in_kernel()) { 427 ret &= ~(1U << KVM_FEATURE_PV_UNHALT); 428 } 429 if (kvm_irqchip_is_split()) { 430 ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID; 431 } 432 } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { 433 ret |= 1U << KVM_HINTS_REALTIME; 434 } 435 436 return ret; 437 } 438 439 uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) 440 { 441 struct { 442 struct kvm_msrs info; 443 struct kvm_msr_entry entries[1]; 444 } msr_data = {}; 445 uint64_t value; 446 uint32_t ret, can_be_one, must_be_one; 447 448 if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ 449 return 0; 450 } 451 452 /* Check if requested MSR is supported feature MSR */ 453 int i; 454 for (i = 0; i < kvm_feature_msrs->nmsrs; i++) 455 if (kvm_feature_msrs->indices[i] == index) { 456 break; 457 } 458 if (i == kvm_feature_msrs->nmsrs) { 459 return 0; /* if the feature MSR is not supported, simply return 0 */ 460 } 461 462 msr_data.info.nmsrs = 1; 463 msr_data.entries[0].index = index; 464 465 ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data); 466 if (ret != 1) { 467 error_report("KVM get MSR (index=0x%x) feature failed, %s", 468 index, strerror(-ret)); 469 exit(1); 470 } 471 472 value = msr_data.entries[0].data; 473 switch (index) { 474 case MSR_IA32_VMX_PROCBASED_CTLS2: 475 if (!has_msr_vmx_procbased_ctls2) { 476 /* KVM forgot to add these bits for some time, do this ourselves. */ 477 if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & 478 CPUID_XSAVE_XSAVES) { 479 value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; 480 } 481 if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & 482 CPUID_EXT_RDRAND) { 483 value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; 484 } 485 if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & 486 CPUID_7_0_EBX_INVPCID) { 487 value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; 488 } 489 if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & 490 CPUID_7_0_EBX_RDSEED) { 491 value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; 492 } 493 if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & 494 CPUID_EXT2_RDTSCP) { 495 value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; 496 } 497 } 498 /* fall through */ 499 case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 500 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 501 case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 502 case MSR_IA32_VMX_TRUE_EXIT_CTLS: 503 /* 504 * Return true for bits that can be one, but do not have to be one. 505 * The SDM tells us which bits could have a "must be one" setting, 506 * so we can do the opposite transformation in make_vmx_msr_value. 507 */ 508 must_be_one = (uint32_t)value; 509 can_be_one = (uint32_t)(value >> 32); 510 return can_be_one & ~must_be_one; 511 512 default: 513 return value; 514 } 515 } 516 517 static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, 518 int *max_banks) 519 { 520 int r; 521 522 r = kvm_check_extension(s, KVM_CAP_MCE); 523 if (r > 0) { 524 *max_banks = r; 525 return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); 526 } 527 return -ENOSYS; 528 } 529 530 static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) 531 { 532 CPUState *cs = CPU(cpu); 533 CPUX86State *env = &cpu->env; 534 uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | 535 MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; 536 uint64_t mcg_status = MCG_STATUS_MCIP; 537 int flags = 0; 538 539 if (code == BUS_MCEERR_AR) { 540 status |= MCI_STATUS_AR | 0x134; 541 mcg_status |= MCG_STATUS_EIPV; 542 } else { 543 status |= 0xc0; 544 mcg_status |= MCG_STATUS_RIPV; 545 } 546 547 flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; 548 /* We need to read back the value of MSR_EXT_MCG_CTL that was set by the 549 * guest kernel back into env->mcg_ext_ctl. 550 */ 551 cpu_synchronize_state(cs); 552 if (env->mcg_ext_ctl & MCG_EXT_CTL_LMCE_EN) { 553 mcg_status |= MCG_STATUS_LMCE; 554 flags = 0; 555 } 556 557 cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, 558 (MCM_ADDR_PHYS << 6) | 0xc, flags); 559 } 560 561 static void emit_hypervisor_memory_failure(MemoryFailureAction action, bool ar) 562 { 563 MemoryFailureFlags mff = {.action_required = ar, .recursive = false}; 564 565 qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_HYPERVISOR, action, 566 &mff); 567 } 568 569 static void hardware_memory_error(void *host_addr) 570 { 571 emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_FATAL, true); 572 error_report("QEMU got Hardware memory error at addr %p", host_addr); 573 exit(1); 574 } 575 576 void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) 577 { 578 X86CPU *cpu = X86_CPU(c); 579 CPUX86State *env = &cpu->env; 580 ram_addr_t ram_addr; 581 hwaddr paddr; 582 583 /* If we get an action required MCE, it has been injected by KVM 584 * while the VM was running. An action optional MCE instead should 585 * be coming from the main thread, which qemu_init_sigbus identifies 586 * as the "early kill" thread. 587 */ 588 assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); 589 590 if ((env->mcg_cap & MCG_SER_P) && addr) { 591 ram_addr = qemu_ram_addr_from_host(addr); 592 if (ram_addr != RAM_ADDR_INVALID && 593 kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { 594 kvm_hwpoison_page_add(ram_addr); 595 kvm_mce_inject(cpu, paddr, code); 596 597 /* 598 * Use different logging severity based on error type. 599 * If there is additional MCE reporting on the hypervisor, QEMU VA 600 * could be another source to identify the PA and MCE details. 601 */ 602 if (code == BUS_MCEERR_AR) { 603 error_report("Guest MCE Memory Error at QEMU addr %p and " 604 "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", 605 addr, paddr, "BUS_MCEERR_AR"); 606 } else { 607 warn_report("Guest MCE Memory Error at QEMU addr %p and " 608 "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", 609 addr, paddr, "BUS_MCEERR_AO"); 610 } 611 612 return; 613 } 614 615 if (code == BUS_MCEERR_AO) { 616 warn_report("Hardware memory error at addr %p of type %s " 617 "for memory used by QEMU itself instead of guest system!", 618 addr, "BUS_MCEERR_AO"); 619 } 620 } 621 622 if (code == BUS_MCEERR_AR) { 623 hardware_memory_error(addr); 624 } 625 626 /* Hope we are lucky for AO MCE, just notify a event */ 627 emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, false); 628 } 629 630 static void kvm_reset_exception(CPUX86State *env) 631 { 632 env->exception_nr = -1; 633 env->exception_pending = 0; 634 env->exception_injected = 0; 635 env->exception_has_payload = false; 636 env->exception_payload = 0; 637 } 638 639 static void kvm_queue_exception(CPUX86State *env, 640 int32_t exception_nr, 641 uint8_t exception_has_payload, 642 uint64_t exception_payload) 643 { 644 assert(env->exception_nr == -1); 645 assert(!env->exception_pending); 646 assert(!env->exception_injected); 647 assert(!env->exception_has_payload); 648 649 env->exception_nr = exception_nr; 650 651 if (has_exception_payload) { 652 env->exception_pending = 1; 653 654 env->exception_has_payload = exception_has_payload; 655 env->exception_payload = exception_payload; 656 } else { 657 env->exception_injected = 1; 658 659 if (exception_nr == EXCP01_DB) { 660 assert(exception_has_payload); 661 env->dr[6] = exception_payload; 662 } else if (exception_nr == EXCP0E_PAGE) { 663 assert(exception_has_payload); 664 env->cr[2] = exception_payload; 665 } else { 666 assert(!exception_has_payload); 667 } 668 } 669 } 670 671 static int kvm_inject_mce_oldstyle(X86CPU *cpu) 672 { 673 CPUX86State *env = &cpu->env; 674 675 if (!kvm_has_vcpu_events() && env->exception_nr == EXCP12_MCHK) { 676 unsigned int bank, bank_num = env->mcg_cap & 0xff; 677 struct kvm_x86_mce mce; 678 679 kvm_reset_exception(env); 680 681 /* 682 * There must be at least one bank in use if an MCE is pending. 683 * Find it and use its values for the event injection. 684 */ 685 for (bank = 0; bank < bank_num; bank++) { 686 if (env->mce_banks[bank * 4 + 1] & MCI_STATUS_VAL) { 687 break; 688 } 689 } 690 assert(bank < bank_num); 691 692 mce.bank = bank; 693 mce.status = env->mce_banks[bank * 4 + 1]; 694 mce.mcg_status = env->mcg_status; 695 mce.addr = env->mce_banks[bank * 4 + 2]; 696 mce.misc = env->mce_banks[bank * 4 + 3]; 697 698 return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce); 699 } 700 return 0; 701 } 702 703 static void cpu_update_state(void *opaque, bool running, RunState state) 704 { 705 CPUX86State *env = opaque; 706 707 if (running) { 708 env->tsc_valid = false; 709 } 710 } 711 712 unsigned long kvm_arch_vcpu_id(CPUState *cs) 713 { 714 X86CPU *cpu = X86_CPU(cs); 715 return cpu->apic_id; 716 } 717 718 #ifndef KVM_CPUID_SIGNATURE_NEXT 719 #define KVM_CPUID_SIGNATURE_NEXT 0x40000100 720 #endif 721 722 static bool hyperv_enabled(X86CPU *cpu) 723 { 724 return kvm_check_extension(kvm_state, KVM_CAP_HYPERV) > 0 && 725 ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) || 726 cpu->hyperv_features || cpu->hyperv_passthrough); 727 } 728 729 /* 730 * Check whether target_freq is within conservative 731 * ntp correctable bounds (250ppm) of freq 732 */ 733 static inline bool freq_within_bounds(int freq, int target_freq) 734 { 735 int max_freq = freq + (freq * 250 / 1000000); 736 int min_freq = freq - (freq * 250 / 1000000); 737 738 if (target_freq >= min_freq && target_freq <= max_freq) { 739 return true; 740 } 741 742 return false; 743 } 744 745 static int kvm_arch_set_tsc_khz(CPUState *cs) 746 { 747 X86CPU *cpu = X86_CPU(cs); 748 CPUX86State *env = &cpu->env; 749 int r, cur_freq; 750 bool set_ioctl = false; 751 752 if (!env->tsc_khz) { 753 return 0; 754 } 755 756 cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? 757 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : -ENOTSUP; 758 759 /* 760 * If TSC scaling is supported, attempt to set TSC frequency. 761 */ 762 if (kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL)) { 763 set_ioctl = true; 764 } 765 766 /* 767 * If desired TSC frequency is within bounds of NTP correction, 768 * attempt to set TSC frequency. 769 */ 770 if (cur_freq != -ENOTSUP && freq_within_bounds(cur_freq, env->tsc_khz)) { 771 set_ioctl = true; 772 } 773 774 r = set_ioctl ? 775 kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) : 776 -ENOTSUP; 777 778 if (r < 0) { 779 /* When KVM_SET_TSC_KHZ fails, it's an error only if the current 780 * TSC frequency doesn't match the one we want. 781 */ 782 cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? 783 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : 784 -ENOTSUP; 785 if (cur_freq <= 0 || cur_freq != env->tsc_khz) { 786 warn_report("TSC frequency mismatch between " 787 "VM (%" PRId64 " kHz) and host (%d kHz), " 788 "and TSC scaling unavailable", 789 env->tsc_khz, cur_freq); 790 return r; 791 } 792 } 793 794 return 0; 795 } 796 797 static bool tsc_is_stable_and_known(CPUX86State *env) 798 { 799 if (!env->tsc_khz) { 800 return false; 801 } 802 return (env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) 803 || env->user_tsc_khz; 804 } 805 806 static struct { 807 const char *desc; 808 struct { 809 uint32_t func; 810 int reg; 811 uint32_t bits; 812 } flags[2]; 813 uint64_t dependencies; 814 } kvm_hyperv_properties[] = { 815 [HYPERV_FEAT_RELAXED] = { 816 .desc = "relaxed timing (hv-relaxed)", 817 .flags = { 818 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, 819 .bits = HV_RELAXED_TIMING_RECOMMENDED} 820 } 821 }, 822 [HYPERV_FEAT_VAPIC] = { 823 .desc = "virtual APIC (hv-vapic)", 824 .flags = { 825 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 826 .bits = HV_APIC_ACCESS_AVAILABLE} 827 } 828 }, 829 [HYPERV_FEAT_TIME] = { 830 .desc = "clocksources (hv-time)", 831 .flags = { 832 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 833 .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE} 834 } 835 }, 836 [HYPERV_FEAT_CRASH] = { 837 .desc = "crash MSRs (hv-crash)", 838 .flags = { 839 {.func = HV_CPUID_FEATURES, .reg = R_EDX, 840 .bits = HV_GUEST_CRASH_MSR_AVAILABLE} 841 } 842 }, 843 [HYPERV_FEAT_RESET] = { 844 .desc = "reset MSR (hv-reset)", 845 .flags = { 846 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 847 .bits = HV_RESET_AVAILABLE} 848 } 849 }, 850 [HYPERV_FEAT_VPINDEX] = { 851 .desc = "VP_INDEX MSR (hv-vpindex)", 852 .flags = { 853 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 854 .bits = HV_VP_INDEX_AVAILABLE} 855 } 856 }, 857 [HYPERV_FEAT_RUNTIME] = { 858 .desc = "VP_RUNTIME MSR (hv-runtime)", 859 .flags = { 860 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 861 .bits = HV_VP_RUNTIME_AVAILABLE} 862 } 863 }, 864 [HYPERV_FEAT_SYNIC] = { 865 .desc = "synthetic interrupt controller (hv-synic)", 866 .flags = { 867 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 868 .bits = HV_SYNIC_AVAILABLE} 869 } 870 }, 871 [HYPERV_FEAT_STIMER] = { 872 .desc = "synthetic timers (hv-stimer)", 873 .flags = { 874 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 875 .bits = HV_SYNTIMERS_AVAILABLE} 876 }, 877 .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME) 878 }, 879 [HYPERV_FEAT_FREQUENCIES] = { 880 .desc = "frequency MSRs (hv-frequencies)", 881 .flags = { 882 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 883 .bits = HV_ACCESS_FREQUENCY_MSRS}, 884 {.func = HV_CPUID_FEATURES, .reg = R_EDX, 885 .bits = HV_FREQUENCY_MSRS_AVAILABLE} 886 } 887 }, 888 [HYPERV_FEAT_REENLIGHTENMENT] = { 889 .desc = "reenlightenment MSRs (hv-reenlightenment)", 890 .flags = { 891 {.func = HV_CPUID_FEATURES, .reg = R_EAX, 892 .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL} 893 } 894 }, 895 [HYPERV_FEAT_TLBFLUSH] = { 896 .desc = "paravirtualized TLB flush (hv-tlbflush)", 897 .flags = { 898 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, 899 .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | 900 HV_EX_PROCESSOR_MASKS_RECOMMENDED} 901 }, 902 .dependencies = BIT(HYPERV_FEAT_VPINDEX) 903 }, 904 [HYPERV_FEAT_EVMCS] = { 905 .desc = "enlightened VMCS (hv-evmcs)", 906 .flags = { 907 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, 908 .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED} 909 }, 910 .dependencies = BIT(HYPERV_FEAT_VAPIC) 911 }, 912 [HYPERV_FEAT_IPI] = { 913 .desc = "paravirtualized IPI (hv-ipi)", 914 .flags = { 915 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, 916 .bits = HV_CLUSTER_IPI_RECOMMENDED | 917 HV_EX_PROCESSOR_MASKS_RECOMMENDED} 918 }, 919 .dependencies = BIT(HYPERV_FEAT_VPINDEX) 920 }, 921 [HYPERV_FEAT_STIMER_DIRECT] = { 922 .desc = "direct mode synthetic timers (hv-stimer-direct)", 923 .flags = { 924 {.func = HV_CPUID_FEATURES, .reg = R_EDX, 925 .bits = HV_STIMER_DIRECT_MODE_AVAILABLE} 926 }, 927 .dependencies = BIT(HYPERV_FEAT_STIMER) 928 }, 929 [HYPERV_FEAT_AVIC] = { 930 .desc = "AVIC/APICv support (hv-avic/hv-apicv)", 931 .flags = { 932 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, 933 .bits = HV_DEPRECATING_AEOI_RECOMMENDED} 934 } 935 }, 936 }; 937 938 static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max, 939 bool do_sys_ioctl) 940 { 941 struct kvm_cpuid2 *cpuid; 942 int r, size; 943 944 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); 945 cpuid = g_malloc0(size); 946 cpuid->nent = max; 947 948 if (do_sys_ioctl) { 949 r = kvm_ioctl(kvm_state, KVM_GET_SUPPORTED_HV_CPUID, cpuid); 950 } else { 951 r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid); 952 } 953 if (r == 0 && cpuid->nent >= max) { 954 r = -E2BIG; 955 } 956 if (r < 0) { 957 if (r == -E2BIG) { 958 g_free(cpuid); 959 return NULL; 960 } else { 961 fprintf(stderr, "KVM_GET_SUPPORTED_HV_CPUID failed: %s\n", 962 strerror(-r)); 963 exit(1); 964 } 965 } 966 return cpuid; 967 } 968 969 /* 970 * Run KVM_GET_SUPPORTED_HV_CPUID ioctl(), allocating a buffer large enough 971 * for all entries. 972 */ 973 static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs) 974 { 975 struct kvm_cpuid2 *cpuid; 976 /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000080 leaves */ 977 int max = 10; 978 int i; 979 bool do_sys_ioctl; 980 981 do_sys_ioctl = 982 kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID) > 0; 983 984 /* 985 * Non-empty KVM context is needed when KVM_CAP_SYS_HYPERV_CPUID is 986 * unsupported, kvm_hyperv_expand_features() checks for that. 987 */ 988 assert(do_sys_ioctl || cs->kvm_state); 989 990 /* 991 * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with 992 * -E2BIG, however, it doesn't report back the right size. Keep increasing 993 * it and re-trying until we succeed. 994 */ 995 while ((cpuid = try_get_hv_cpuid(cs, max, do_sys_ioctl)) == NULL) { 996 max++; 997 } 998 999 /* 1000 * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before 1001 * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the 1002 * information early, just check for the capability and set the bit 1003 * manually. 1004 */ 1005 if (!do_sys_ioctl && kvm_check_extension(cs->kvm_state, 1006 KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) { 1007 for (i = 0; i < cpuid->nent; i++) { 1008 if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) { 1009 cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; 1010 } 1011 } 1012 } 1013 1014 return cpuid; 1015 } 1016 1017 /* 1018 * When KVM_GET_SUPPORTED_HV_CPUID is not supported we fill CPUID feature 1019 * leaves from KVM_CAP_HYPERV* and present MSRs data. 1020 */ 1021 static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs) 1022 { 1023 X86CPU *cpu = X86_CPU(cs); 1024 struct kvm_cpuid2 *cpuid; 1025 struct kvm_cpuid_entry2 *entry_feat, *entry_recomm; 1026 1027 /* HV_CPUID_FEATURES, HV_CPUID_ENLIGHTMENT_INFO */ 1028 cpuid = g_malloc0(sizeof(*cpuid) + 2 * sizeof(*cpuid->entries)); 1029 cpuid->nent = 2; 1030 1031 /* HV_CPUID_VENDOR_AND_MAX_FUNCTIONS */ 1032 entry_feat = &cpuid->entries[0]; 1033 entry_feat->function = HV_CPUID_FEATURES; 1034 1035 entry_recomm = &cpuid->entries[1]; 1036 entry_recomm->function = HV_CPUID_ENLIGHTMENT_INFO; 1037 entry_recomm->ebx = cpu->hyperv_spinlock_attempts; 1038 1039 if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0) { 1040 entry_feat->eax |= HV_HYPERCALL_AVAILABLE; 1041 entry_feat->eax |= HV_APIC_ACCESS_AVAILABLE; 1042 entry_feat->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; 1043 entry_recomm->eax |= HV_RELAXED_TIMING_RECOMMENDED; 1044 entry_recomm->eax |= HV_APIC_ACCESS_RECOMMENDED; 1045 } 1046 1047 if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) { 1048 entry_feat->eax |= HV_TIME_REF_COUNT_AVAILABLE; 1049 entry_feat->eax |= HV_REFERENCE_TSC_AVAILABLE; 1050 } 1051 1052 if (has_msr_hv_frequencies) { 1053 entry_feat->eax |= HV_ACCESS_FREQUENCY_MSRS; 1054 entry_feat->edx |= HV_FREQUENCY_MSRS_AVAILABLE; 1055 } 1056 1057 if (has_msr_hv_crash) { 1058 entry_feat->edx |= HV_GUEST_CRASH_MSR_AVAILABLE; 1059 } 1060 1061 if (has_msr_hv_reenlightenment) { 1062 entry_feat->eax |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; 1063 } 1064 1065 if (has_msr_hv_reset) { 1066 entry_feat->eax |= HV_RESET_AVAILABLE; 1067 } 1068 1069 if (has_msr_hv_vpindex) { 1070 entry_feat->eax |= HV_VP_INDEX_AVAILABLE; 1071 } 1072 1073 if (has_msr_hv_runtime) { 1074 entry_feat->eax |= HV_VP_RUNTIME_AVAILABLE; 1075 } 1076 1077 if (has_msr_hv_synic) { 1078 unsigned int cap = cpu->hyperv_synic_kvm_only ? 1079 KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; 1080 1081 if (kvm_check_extension(cs->kvm_state, cap) > 0) { 1082 entry_feat->eax |= HV_SYNIC_AVAILABLE; 1083 } 1084 } 1085 1086 if (has_msr_hv_stimer) { 1087 entry_feat->eax |= HV_SYNTIMERS_AVAILABLE; 1088 } 1089 1090 if (kvm_check_extension(cs->kvm_state, 1091 KVM_CAP_HYPERV_TLBFLUSH) > 0) { 1092 entry_recomm->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; 1093 entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; 1094 } 1095 1096 if (kvm_check_extension(cs->kvm_state, 1097 KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) { 1098 entry_recomm->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; 1099 } 1100 1101 if (kvm_check_extension(cs->kvm_state, 1102 KVM_CAP_HYPERV_SEND_IPI) > 0) { 1103 entry_recomm->eax |= HV_CLUSTER_IPI_RECOMMENDED; 1104 entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; 1105 } 1106 1107 return cpuid; 1108 } 1109 1110 static uint32_t hv_cpuid_get_host(CPUState *cs, uint32_t func, int reg) 1111 { 1112 struct kvm_cpuid_entry2 *entry; 1113 struct kvm_cpuid2 *cpuid; 1114 1115 if (hv_cpuid_cache) { 1116 cpuid = hv_cpuid_cache; 1117 } else { 1118 if (kvm_check_extension(kvm_state, KVM_CAP_HYPERV_CPUID) > 0) { 1119 cpuid = get_supported_hv_cpuid(cs); 1120 } else { 1121 /* 1122 * 'cs->kvm_state' may be NULL when Hyper-V features are expanded 1123 * before KVM context is created but this is only done when 1124 * KVM_CAP_SYS_HYPERV_CPUID is supported and it implies 1125 * KVM_CAP_HYPERV_CPUID. 1126 */ 1127 assert(cs->kvm_state); 1128 1129 cpuid = get_supported_hv_cpuid_legacy(cs); 1130 } 1131 hv_cpuid_cache = cpuid; 1132 } 1133 1134 if (!cpuid) { 1135 return 0; 1136 } 1137 1138 entry = cpuid_find_entry(cpuid, func, 0); 1139 if (!entry) { 1140 return 0; 1141 } 1142 1143 return cpuid_entry_get_reg(entry, reg); 1144 } 1145 1146 static bool hyperv_feature_supported(CPUState *cs, int feature) 1147 { 1148 uint32_t func, bits; 1149 int i, reg; 1150 1151 for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) { 1152 1153 func = kvm_hyperv_properties[feature].flags[i].func; 1154 reg = kvm_hyperv_properties[feature].flags[i].reg; 1155 bits = kvm_hyperv_properties[feature].flags[i].bits; 1156 1157 if (!func) { 1158 continue; 1159 } 1160 1161 if ((hv_cpuid_get_host(cs, func, reg) & bits) != bits) { 1162 return false; 1163 } 1164 } 1165 1166 return true; 1167 } 1168 1169 /* Checks that all feature dependencies are enabled */ 1170 static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp) 1171 { 1172 uint64_t deps; 1173 int dep_feat; 1174 1175 deps = kvm_hyperv_properties[feature].dependencies; 1176 while (deps) { 1177 dep_feat = ctz64(deps); 1178 if (!(hyperv_feat_enabled(cpu, dep_feat))) { 1179 error_setg(errp, "Hyper-V %s requires Hyper-V %s", 1180 kvm_hyperv_properties[feature].desc, 1181 kvm_hyperv_properties[dep_feat].desc); 1182 return false; 1183 } 1184 deps &= ~(1ull << dep_feat); 1185 } 1186 1187 return true; 1188 } 1189 1190 static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg) 1191 { 1192 X86CPU *cpu = X86_CPU(cs); 1193 uint32_t r = 0; 1194 int i, j; 1195 1196 for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties); i++) { 1197 if (!hyperv_feat_enabled(cpu, i)) { 1198 continue; 1199 } 1200 1201 for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) { 1202 if (kvm_hyperv_properties[i].flags[j].func != func) { 1203 continue; 1204 } 1205 if (kvm_hyperv_properties[i].flags[j].reg != reg) { 1206 continue; 1207 } 1208 1209 r |= kvm_hyperv_properties[i].flags[j].bits; 1210 } 1211 } 1212 1213 return r; 1214 } 1215 1216 /* 1217 * Expand Hyper-V CPU features. In partucular, check that all the requested 1218 * features are supported by the host and the sanity of the configuration 1219 * (that all the required dependencies are included). Also, this takes care 1220 * of 'hv_passthrough' mode and fills the environment with all supported 1221 * Hyper-V features. 1222 */ 1223 bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) 1224 { 1225 CPUState *cs = CPU(cpu); 1226 Error *local_err = NULL; 1227 int feat; 1228 1229 if (!hyperv_enabled(cpu)) 1230 return true; 1231 1232 /* 1233 * When kvm_hyperv_expand_features is called at CPU feature expansion 1234 * time per-CPU kvm_state is not available yet so we can only proceed 1235 * when KVM_CAP_SYS_HYPERV_CPUID is supported. 1236 */ 1237 if (!cs->kvm_state && 1238 !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID)) 1239 return true; 1240 1241 if (cpu->hyperv_passthrough) { 1242 cpu->hyperv_vendor_id[0] = 1243 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX); 1244 cpu->hyperv_vendor_id[1] = 1245 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_ECX); 1246 cpu->hyperv_vendor_id[2] = 1247 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EDX); 1248 cpu->hyperv_vendor = g_realloc(cpu->hyperv_vendor, 1249 sizeof(cpu->hyperv_vendor_id) + 1); 1250 memcpy(cpu->hyperv_vendor, cpu->hyperv_vendor_id, 1251 sizeof(cpu->hyperv_vendor_id)); 1252 cpu->hyperv_vendor[sizeof(cpu->hyperv_vendor_id)] = 0; 1253 1254 cpu->hyperv_interface_id[0] = 1255 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EAX); 1256 cpu->hyperv_interface_id[1] = 1257 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EBX); 1258 cpu->hyperv_interface_id[2] = 1259 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_ECX); 1260 cpu->hyperv_interface_id[3] = 1261 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EDX); 1262 1263 cpu->hyperv_ver_id_build = 1264 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EAX); 1265 cpu->hyperv_ver_id_major = 1266 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) >> 16; 1267 cpu->hyperv_ver_id_minor = 1268 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) & 0xffff; 1269 cpu->hyperv_ver_id_sp = 1270 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_ECX); 1271 cpu->hyperv_ver_id_sb = 1272 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) >> 24; 1273 cpu->hyperv_ver_id_sn = 1274 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) & 0xffffff; 1275 1276 cpu->hv_max_vps = hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, 1277 R_EAX); 1278 cpu->hyperv_limits[0] = 1279 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EBX); 1280 cpu->hyperv_limits[1] = 1281 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_ECX); 1282 cpu->hyperv_limits[2] = 1283 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EDX); 1284 1285 cpu->hyperv_spinlock_attempts = 1286 hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX); 1287 1288 /* 1289 * Mark feature as enabled in 'cpu->hyperv_features' as 1290 * hv_build_cpuid_leaf() uses this info to build guest CPUIDs. 1291 */ 1292 for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { 1293 if (hyperv_feature_supported(cs, feat)) { 1294 cpu->hyperv_features |= BIT(feat); 1295 } 1296 } 1297 } else { 1298 /* Check features availability and dependencies */ 1299 for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { 1300 /* If the feature was not requested skip it. */ 1301 if (!hyperv_feat_enabled(cpu, feat)) { 1302 continue; 1303 } 1304 1305 /* Check if the feature is supported by KVM */ 1306 if (!hyperv_feature_supported(cs, feat)) { 1307 error_setg(errp, "Hyper-V %s is not supported by kernel", 1308 kvm_hyperv_properties[feat].desc); 1309 return false; 1310 } 1311 1312 /* Check dependencies */ 1313 if (!hv_feature_check_deps(cpu, feat, &local_err)) { 1314 error_propagate(errp, local_err); 1315 return false; 1316 } 1317 } 1318 } 1319 1320 /* Additional dependencies not covered by kvm_hyperv_properties[] */ 1321 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && 1322 !cpu->hyperv_synic_kvm_only && 1323 !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) { 1324 error_setg(errp, "Hyper-V %s requires Hyper-V %s", 1325 kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc, 1326 kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc); 1327 return false; 1328 } 1329 1330 return true; 1331 } 1332 1333 /* 1334 * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent. 1335 */ 1336 static int hyperv_fill_cpuids(CPUState *cs, 1337 struct kvm_cpuid_entry2 *cpuid_ent) 1338 { 1339 X86CPU *cpu = X86_CPU(cs); 1340 struct kvm_cpuid_entry2 *c; 1341 uint32_t cpuid_i = 0; 1342 1343 c = &cpuid_ent[cpuid_i++]; 1344 c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; 1345 c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ? 1346 HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; 1347 c->ebx = cpu->hyperv_vendor_id[0]; 1348 c->ecx = cpu->hyperv_vendor_id[1]; 1349 c->edx = cpu->hyperv_vendor_id[2]; 1350 1351 c = &cpuid_ent[cpuid_i++]; 1352 c->function = HV_CPUID_INTERFACE; 1353 c->eax = cpu->hyperv_interface_id[0]; 1354 c->ebx = cpu->hyperv_interface_id[1]; 1355 c->ecx = cpu->hyperv_interface_id[2]; 1356 c->edx = cpu->hyperv_interface_id[3]; 1357 1358 c = &cpuid_ent[cpuid_i++]; 1359 c->function = HV_CPUID_VERSION; 1360 c->eax = cpu->hyperv_ver_id_build; 1361 c->ebx = (uint32_t)cpu->hyperv_ver_id_major << 16 | 1362 cpu->hyperv_ver_id_minor; 1363 c->ecx = cpu->hyperv_ver_id_sp; 1364 c->edx = (uint32_t)cpu->hyperv_ver_id_sb << 24 | 1365 (cpu->hyperv_ver_id_sn & 0xffffff); 1366 1367 c = &cpuid_ent[cpuid_i++]; 1368 c->function = HV_CPUID_FEATURES; 1369 c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EAX); 1370 c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX); 1371 c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX); 1372 1373 /* Unconditionally required with any Hyper-V enlightenment */ 1374 c->eax |= HV_HYPERCALL_AVAILABLE; 1375 1376 /* SynIC and Vmbus devices require messages/signals hypercalls */ 1377 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && 1378 !cpu->hyperv_synic_kvm_only) { 1379 c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS; 1380 } 1381 1382 1383 /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ 1384 c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; 1385 1386 c = &cpuid_ent[cpuid_i++]; 1387 c->function = HV_CPUID_ENLIGHTMENT_INFO; 1388 c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX); 1389 c->ebx = cpu->hyperv_spinlock_attempts; 1390 1391 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) && 1392 !hyperv_feat_enabled(cpu, HYPERV_FEAT_AVIC)) { 1393 c->eax |= HV_APIC_ACCESS_RECOMMENDED; 1394 } 1395 1396 if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) { 1397 c->eax |= HV_NO_NONARCH_CORESHARING; 1398 } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) { 1399 c->eax |= hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX) & 1400 HV_NO_NONARCH_CORESHARING; 1401 } 1402 1403 c = &cpuid_ent[cpuid_i++]; 1404 c->function = HV_CPUID_IMPLEMENT_LIMITS; 1405 c->eax = cpu->hv_max_vps; 1406 c->ebx = cpu->hyperv_limits[0]; 1407 c->ecx = cpu->hyperv_limits[1]; 1408 c->edx = cpu->hyperv_limits[2]; 1409 1410 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { 1411 uint32_t function; 1412 1413 /* Create zeroed 0x40000006..0x40000009 leaves */ 1414 for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; 1415 function < HV_CPUID_NESTED_FEATURES; function++) { 1416 c = &cpuid_ent[cpuid_i++]; 1417 c->function = function; 1418 } 1419 1420 c = &cpuid_ent[cpuid_i++]; 1421 c->function = HV_CPUID_NESTED_FEATURES; 1422 c->eax = cpu->hyperv_nested[0]; 1423 } 1424 1425 return cpuid_i; 1426 } 1427 1428 static Error *hv_passthrough_mig_blocker; 1429 static Error *hv_no_nonarch_cs_mig_blocker; 1430 1431 /* Checks that the exposed eVMCS version range is supported by KVM */ 1432 static bool evmcs_version_supported(uint16_t evmcs_version, 1433 uint16_t supported_evmcs_version) 1434 { 1435 uint8_t min_version = evmcs_version & 0xff; 1436 uint8_t max_version = evmcs_version >> 8; 1437 uint8_t min_supported_version = supported_evmcs_version & 0xff; 1438 uint8_t max_supported_version = supported_evmcs_version >> 8; 1439 1440 return (min_version >= min_supported_version) && 1441 (max_version <= max_supported_version); 1442 } 1443 1444 #define DEFAULT_EVMCS_VERSION ((1 << 8) | 1) 1445 1446 static int hyperv_init_vcpu(X86CPU *cpu) 1447 { 1448 CPUState *cs = CPU(cpu); 1449 Error *local_err = NULL; 1450 int ret; 1451 1452 if (cpu->hyperv_passthrough && hv_passthrough_mig_blocker == NULL) { 1453 error_setg(&hv_passthrough_mig_blocker, 1454 "'hv-passthrough' CPU flag prevents migration, use explicit" 1455 " set of hv-* flags instead"); 1456 ret = migrate_add_blocker(hv_passthrough_mig_blocker, &local_err); 1457 if (ret < 0) { 1458 error_report_err(local_err); 1459 return ret; 1460 } 1461 } 1462 1463 if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO && 1464 hv_no_nonarch_cs_mig_blocker == NULL) { 1465 error_setg(&hv_no_nonarch_cs_mig_blocker, 1466 "'hv-no-nonarch-coresharing=auto' CPU flag prevents migration" 1467 " use explicit 'hv-no-nonarch-coresharing=on' instead (but" 1468 " make sure SMT is disabled and/or that vCPUs are properly" 1469 " pinned)"); 1470 ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, &local_err); 1471 if (ret < 0) { 1472 error_report_err(local_err); 1473 return ret; 1474 } 1475 } 1476 1477 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && !hv_vpindex_settable) { 1478 /* 1479 * the kernel doesn't support setting vp_index; assert that its value 1480 * is in sync 1481 */ 1482 struct { 1483 struct kvm_msrs info; 1484 struct kvm_msr_entry entries[1]; 1485 } msr_data = { 1486 .info.nmsrs = 1, 1487 .entries[0].index = HV_X64_MSR_VP_INDEX, 1488 }; 1489 1490 ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); 1491 if (ret < 0) { 1492 return ret; 1493 } 1494 assert(ret == 1); 1495 1496 if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { 1497 error_report("kernel's vp_index != QEMU's vp_index"); 1498 return -ENXIO; 1499 } 1500 } 1501 1502 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { 1503 uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? 1504 KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; 1505 ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); 1506 if (ret < 0) { 1507 error_report("failed to turn on HyperV SynIC in KVM: %s", 1508 strerror(-ret)); 1509 return ret; 1510 } 1511 1512 if (!cpu->hyperv_synic_kvm_only) { 1513 ret = hyperv_x86_synic_add(cpu); 1514 if (ret < 0) { 1515 error_report("failed to create HyperV SynIC: %s", 1516 strerror(-ret)); 1517 return ret; 1518 } 1519 } 1520 } 1521 1522 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { 1523 uint16_t evmcs_version = DEFAULT_EVMCS_VERSION; 1524 uint16_t supported_evmcs_version; 1525 1526 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, 1527 (uintptr_t)&supported_evmcs_version); 1528 1529 /* 1530 * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs' 1531 * option sets. Note: we hardcode the maximum supported eVMCS version 1532 * to '1' as well so 'hv-evmcs' feature is migratable even when (and if) 1533 * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have 1534 * to be added. 1535 */ 1536 if (ret < 0) { 1537 error_report("Hyper-V %s is not supported by kernel", 1538 kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc); 1539 return ret; 1540 } 1541 1542 if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) { 1543 error_report("eVMCS version range [%d..%d] is not supported by " 1544 "kernel (supported: [%d..%d])", evmcs_version & 0xff, 1545 evmcs_version >> 8, supported_evmcs_version & 0xff, 1546 supported_evmcs_version >> 8); 1547 return -ENOTSUP; 1548 } 1549 1550 cpu->hyperv_nested[0] = evmcs_version; 1551 } 1552 1553 if (cpu->hyperv_enforce_cpuid) { 1554 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENFORCE_CPUID, 0, 1); 1555 if (ret < 0) { 1556 error_report("failed to enable KVM_CAP_HYPERV_ENFORCE_CPUID: %s", 1557 strerror(-ret)); 1558 return ret; 1559 } 1560 } 1561 1562 return 0; 1563 } 1564 1565 static Error *invtsc_mig_blocker; 1566 1567 #define KVM_MAX_CPUID_ENTRIES 100 1568 1569 int kvm_arch_init_vcpu(CPUState *cs) 1570 { 1571 struct { 1572 struct kvm_cpuid2 cpuid; 1573 struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; 1574 } cpuid_data; 1575 /* 1576 * The kernel defines these structs with padding fields so there 1577 * should be no extra padding in our cpuid_data struct. 1578 */ 1579 QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != 1580 sizeof(struct kvm_cpuid2) + 1581 sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 1582 1583 X86CPU *cpu = X86_CPU(cs); 1584 CPUX86State *env = &cpu->env; 1585 uint32_t limit, i, j, cpuid_i; 1586 uint32_t unused; 1587 struct kvm_cpuid_entry2 *c; 1588 uint32_t signature[3]; 1589 int kvm_base = KVM_CPUID_SIGNATURE; 1590 int max_nested_state_len; 1591 int r; 1592 Error *local_err = NULL; 1593 1594 memset(&cpuid_data, 0, sizeof(cpuid_data)); 1595 1596 cpuid_i = 0; 1597 1598 r = kvm_arch_set_tsc_khz(cs); 1599 if (r < 0) { 1600 return r; 1601 } 1602 1603 /* vcpu's TSC frequency is either specified by user, or following 1604 * the value used by KVM if the former is not present. In the 1605 * latter case, we query it from KVM and record in env->tsc_khz, 1606 * so that vcpu's TSC frequency can be migrated later via this field. 1607 */ 1608 if (!env->tsc_khz) { 1609 r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? 1610 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : 1611 -ENOTSUP; 1612 if (r > 0) { 1613 env->tsc_khz = r; 1614 } 1615 } 1616 1617 env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; 1618 1619 /* 1620 * kvm_hyperv_expand_features() is called here for the second time in case 1621 * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle 1622 * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to 1623 * check which Hyper-V enlightenments are supported and which are not, we 1624 * can still proceed and check/expand Hyper-V enlightenments here so legacy 1625 * behavior is preserved. 1626 */ 1627 if (!kvm_hyperv_expand_features(cpu, &local_err)) { 1628 error_report_err(local_err); 1629 return -ENOSYS; 1630 } 1631 1632 if (hyperv_enabled(cpu)) { 1633 r = hyperv_init_vcpu(cpu); 1634 if (r) { 1635 return r; 1636 } 1637 1638 cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); 1639 kvm_base = KVM_CPUID_SIGNATURE_NEXT; 1640 has_msr_hv_hypercall = true; 1641 } 1642 1643 if (cpu->expose_kvm) { 1644 memcpy(signature, "KVMKVMKVM\0\0\0", 12); 1645 c = &cpuid_data.entries[cpuid_i++]; 1646 c->function = KVM_CPUID_SIGNATURE | kvm_base; 1647 c->eax = KVM_CPUID_FEATURES | kvm_base; 1648 c->ebx = signature[0]; 1649 c->ecx = signature[1]; 1650 c->edx = signature[2]; 1651 1652 c = &cpuid_data.entries[cpuid_i++]; 1653 c->function = KVM_CPUID_FEATURES | kvm_base; 1654 c->eax = env->features[FEAT_KVM]; 1655 c->edx = env->features[FEAT_KVM_HINTS]; 1656 } 1657 1658 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); 1659 1660 if (cpu->kvm_pv_enforce_cpuid) { 1661 r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); 1662 if (r < 0) { 1663 fprintf(stderr, 1664 "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", 1665 strerror(-r)); 1666 abort(); 1667 } 1668 } 1669 1670 for (i = 0; i <= limit; i++) { 1671 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1672 fprintf(stderr, "unsupported level value: 0x%x\n", limit); 1673 abort(); 1674 } 1675 c = &cpuid_data.entries[cpuid_i++]; 1676 1677 switch (i) { 1678 case 2: { 1679 /* Keep reading function 2 till all the input is received */ 1680 int times; 1681 1682 c->function = i; 1683 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | 1684 KVM_CPUID_FLAG_STATE_READ_NEXT; 1685 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1686 times = c->eax & 0xff; 1687 1688 for (j = 1; j < times; ++j) { 1689 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1690 fprintf(stderr, "cpuid_data is full, no space for " 1691 "cpuid(eax:2):eax & 0xf = 0x%x\n", times); 1692 abort(); 1693 } 1694 c = &cpuid_data.entries[cpuid_i++]; 1695 c->function = i; 1696 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; 1697 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1698 } 1699 break; 1700 } 1701 case 0x1f: 1702 if (env->nr_dies < 2) { 1703 break; 1704 } 1705 /* fallthrough */ 1706 case 4: 1707 case 0xb: 1708 case 0xd: 1709 for (j = 0; ; j++) { 1710 if (i == 0xd && j == 64) { 1711 break; 1712 } 1713 1714 if (i == 0x1f && j == 64) { 1715 break; 1716 } 1717 1718 c->function = i; 1719 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1720 c->index = j; 1721 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 1722 1723 if (i == 4 && c->eax == 0) { 1724 break; 1725 } 1726 if (i == 0xb && !(c->ecx & 0xff00)) { 1727 break; 1728 } 1729 if (i == 0x1f && !(c->ecx & 0xff00)) { 1730 break; 1731 } 1732 if (i == 0xd && c->eax == 0) { 1733 continue; 1734 } 1735 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1736 fprintf(stderr, "cpuid_data is full, no space for " 1737 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); 1738 abort(); 1739 } 1740 c = &cpuid_data.entries[cpuid_i++]; 1741 } 1742 break; 1743 case 0x7: 1744 case 0x12: 1745 for (j = 0; ; j++) { 1746 c->function = i; 1747 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1748 c->index = j; 1749 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 1750 1751 if (j > 1 && (c->eax & 0xf) != 1) { 1752 break; 1753 } 1754 1755 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1756 fprintf(stderr, "cpuid_data is full, no space for " 1757 "cpuid(eax:0x12,ecx:0x%x)\n", j); 1758 abort(); 1759 } 1760 c = &cpuid_data.entries[cpuid_i++]; 1761 } 1762 break; 1763 case 0x14: { 1764 uint32_t times; 1765 1766 c->function = i; 1767 c->index = 0; 1768 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1769 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1770 times = c->eax; 1771 1772 for (j = 1; j <= times; ++j) { 1773 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1774 fprintf(stderr, "cpuid_data is full, no space for " 1775 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); 1776 abort(); 1777 } 1778 c = &cpuid_data.entries[cpuid_i++]; 1779 c->function = i; 1780 c->index = j; 1781 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1782 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 1783 } 1784 break; 1785 } 1786 default: 1787 c->function = i; 1788 c->flags = 0; 1789 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1790 if (!c->eax && !c->ebx && !c->ecx && !c->edx) { 1791 /* 1792 * KVM already returns all zeroes if a CPUID entry is missing, 1793 * so we can omit it and avoid hitting KVM's 80-entry limit. 1794 */ 1795 cpuid_i--; 1796 } 1797 break; 1798 } 1799 } 1800 1801 if (limit >= 0x0a) { 1802 uint32_t eax, edx; 1803 1804 cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx); 1805 1806 has_architectural_pmu_version = eax & 0xff; 1807 if (has_architectural_pmu_version > 0) { 1808 num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8; 1809 1810 /* Shouldn't be more than 32, since that's the number of bits 1811 * available in EBX to tell us _which_ counters are available. 1812 * Play it safe. 1813 */ 1814 if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) { 1815 num_architectural_pmu_gp_counters = MAX_GP_COUNTERS; 1816 } 1817 1818 if (has_architectural_pmu_version > 1) { 1819 num_architectural_pmu_fixed_counters = edx & 0x1f; 1820 1821 if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) { 1822 num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS; 1823 } 1824 } 1825 } 1826 } 1827 1828 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); 1829 1830 for (i = 0x80000000; i <= limit; i++) { 1831 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1832 fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); 1833 abort(); 1834 } 1835 c = &cpuid_data.entries[cpuid_i++]; 1836 1837 switch (i) { 1838 case 0x8000001d: 1839 /* Query for all AMD cache information leaves */ 1840 for (j = 0; ; j++) { 1841 c->function = i; 1842 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1843 c->index = j; 1844 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 1845 1846 if (c->eax == 0) { 1847 break; 1848 } 1849 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1850 fprintf(stderr, "cpuid_data is full, no space for " 1851 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); 1852 abort(); 1853 } 1854 c = &cpuid_data.entries[cpuid_i++]; 1855 } 1856 break; 1857 default: 1858 c->function = i; 1859 c->flags = 0; 1860 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1861 if (!c->eax && !c->ebx && !c->ecx && !c->edx) { 1862 /* 1863 * KVM already returns all zeroes if a CPUID entry is missing, 1864 * so we can omit it and avoid hitting KVM's 80-entry limit. 1865 */ 1866 cpuid_i--; 1867 } 1868 break; 1869 } 1870 } 1871 1872 /* Call Centaur's CPUID instructions they are supported. */ 1873 if (env->cpuid_xlevel2 > 0) { 1874 cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); 1875 1876 for (i = 0xC0000000; i <= limit; i++) { 1877 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 1878 fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); 1879 abort(); 1880 } 1881 c = &cpuid_data.entries[cpuid_i++]; 1882 1883 c->function = i; 1884 c->flags = 0; 1885 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 1886 } 1887 } 1888 1889 cpuid_data.cpuid.nent = cpuid_i; 1890 1891 if (((env->cpuid_version >> 8)&0xF) >= 6 1892 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == 1893 (CPUID_MCE | CPUID_MCA) 1894 && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) { 1895 uint64_t mcg_cap, unsupported_caps; 1896 int banks; 1897 int ret; 1898 1899 ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks); 1900 if (ret < 0) { 1901 fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret)); 1902 return ret; 1903 } 1904 1905 if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) { 1906 error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)", 1907 (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks); 1908 return -ENOTSUP; 1909 } 1910 1911 unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK); 1912 if (unsupported_caps) { 1913 if (unsupported_caps & MCG_LMCE_P) { 1914 error_report("kvm: LMCE not supported"); 1915 return -ENOTSUP; 1916 } 1917 warn_report("Unsupported MCG_CAP bits: 0x%" PRIx64, 1918 unsupported_caps); 1919 } 1920 1921 env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK; 1922 ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap); 1923 if (ret < 0) { 1924 fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret)); 1925 return ret; 1926 } 1927 } 1928 1929 cpu->vmsentry = qemu_add_vm_change_state_handler(cpu_update_state, env); 1930 1931 c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); 1932 if (c) { 1933 has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || 1934 !!(c->ecx & CPUID_EXT_SMX); 1935 } 1936 1937 c = cpuid_find_entry(&cpuid_data.cpuid, 7, 0); 1938 if (c && (c->ebx & CPUID_7_0_EBX_SGX)) { 1939 has_msr_feature_control = true; 1940 } 1941 1942 if (env->mcg_cap & MCG_LMCE_P) { 1943 has_msr_mcg_ext_ctl = has_msr_feature_control = true; 1944 } 1945 1946 if (!env->user_tsc_khz) { 1947 if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && 1948 invtsc_mig_blocker == NULL) { 1949 error_setg(&invtsc_mig_blocker, 1950 "State blocked by non-migratable CPU device" 1951 " (invtsc flag)"); 1952 r = migrate_add_blocker(invtsc_mig_blocker, &local_err); 1953 if (r < 0) { 1954 error_report_err(local_err); 1955 return r; 1956 } 1957 } 1958 } 1959 1960 if (cpu->vmware_cpuid_freq 1961 /* Guests depend on 0x40000000 to detect this feature, so only expose 1962 * it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */ 1963 && cpu->expose_kvm 1964 && kvm_base == KVM_CPUID_SIGNATURE 1965 /* TSC clock must be stable and known for this feature. */ 1966 && tsc_is_stable_and_known(env)) { 1967 1968 c = &cpuid_data.entries[cpuid_i++]; 1969 c->function = KVM_CPUID_SIGNATURE | 0x10; 1970 c->eax = env->tsc_khz; 1971 c->ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ 1972 c->ecx = c->edx = 0; 1973 1974 c = cpuid_find_entry(&cpuid_data.cpuid, kvm_base, 0); 1975 c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); 1976 } 1977 1978 cpuid_data.cpuid.nent = cpuid_i; 1979 1980 cpuid_data.cpuid.padding = 0; 1981 r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); 1982 if (r) { 1983 goto fail; 1984 } 1985 1986 if (has_xsave) { 1987 env->xsave_buf_len = sizeof(struct kvm_xsave); 1988 env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); 1989 memset(env->xsave_buf, 0, env->xsave_buf_len); 1990 1991 /* 1992 * The allocated storage must be large enough for all of the 1993 * possible XSAVE state components. 1994 */ 1995 assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) 1996 <= env->xsave_buf_len); 1997 } 1998 1999 max_nested_state_len = kvm_max_nested_state_length(); 2000 if (max_nested_state_len > 0) { 2001 assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); 2002 2003 if (cpu_has_vmx(env) || cpu_has_svm(env)) { 2004 struct kvm_vmx_nested_state_hdr *vmx_hdr; 2005 2006 env->nested_state = g_malloc0(max_nested_state_len); 2007 env->nested_state->size = max_nested_state_len; 2008 2009 if (cpu_has_vmx(env)) { 2010 env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; 2011 vmx_hdr = &env->nested_state->hdr.vmx; 2012 vmx_hdr->vmxon_pa = -1ull; 2013 vmx_hdr->vmcs12_pa = -1ull; 2014 } else { 2015 env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; 2016 } 2017 } 2018 } 2019 2020 cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); 2021 2022 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { 2023 has_msr_tsc_aux = false; 2024 } 2025 2026 kvm_init_msrs(cpu); 2027 2028 return 0; 2029 2030 fail: 2031 migrate_del_blocker(invtsc_mig_blocker); 2032 2033 return r; 2034 } 2035 2036 int kvm_arch_destroy_vcpu(CPUState *cs) 2037 { 2038 X86CPU *cpu = X86_CPU(cs); 2039 CPUX86State *env = &cpu->env; 2040 2041 if (cpu->kvm_msr_buf) { 2042 g_free(cpu->kvm_msr_buf); 2043 cpu->kvm_msr_buf = NULL; 2044 } 2045 2046 if (env->nested_state) { 2047 g_free(env->nested_state); 2048 env->nested_state = NULL; 2049 } 2050 2051 qemu_del_vm_change_state_handler(cpu->vmsentry); 2052 2053 return 0; 2054 } 2055 2056 void kvm_arch_reset_vcpu(X86CPU *cpu) 2057 { 2058 CPUX86State *env = &cpu->env; 2059 2060 env->xcr0 = 1; 2061 if (kvm_irqchip_in_kernel()) { 2062 env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE : 2063 KVM_MP_STATE_UNINITIALIZED; 2064 } else { 2065 env->mp_state = KVM_MP_STATE_RUNNABLE; 2066 } 2067 2068 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { 2069 int i; 2070 for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { 2071 env->msr_hv_synic_sint[i] = HV_SINT_MASKED; 2072 } 2073 2074 hyperv_x86_synic_reset(cpu); 2075 } 2076 /* enabled by default */ 2077 env->poll_control_msr = 1; 2078 2079 sev_es_set_reset_vector(CPU(cpu)); 2080 } 2081 2082 void kvm_arch_do_init_vcpu(X86CPU *cpu) 2083 { 2084 CPUX86State *env = &cpu->env; 2085 2086 /* APs get directly into wait-for-SIPI state. */ 2087 if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) { 2088 env->mp_state = KVM_MP_STATE_INIT_RECEIVED; 2089 } 2090 } 2091 2092 static int kvm_get_supported_feature_msrs(KVMState *s) 2093 { 2094 int ret = 0; 2095 2096 if (kvm_feature_msrs != NULL) { 2097 return 0; 2098 } 2099 2100 if (!kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES)) { 2101 return 0; 2102 } 2103 2104 struct kvm_msr_list msr_list; 2105 2106 msr_list.nmsrs = 0; 2107 ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list); 2108 if (ret < 0 && ret != -E2BIG) { 2109 error_report("Fetch KVM feature MSR list failed: %s", 2110 strerror(-ret)); 2111 return ret; 2112 } 2113 2114 assert(msr_list.nmsrs > 0); 2115 kvm_feature_msrs = (struct kvm_msr_list *) \ 2116 g_malloc0(sizeof(msr_list) + 2117 msr_list.nmsrs * sizeof(msr_list.indices[0])); 2118 2119 kvm_feature_msrs->nmsrs = msr_list.nmsrs; 2120 ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs); 2121 2122 if (ret < 0) { 2123 error_report("Fetch KVM feature MSR list failed: %s", 2124 strerror(-ret)); 2125 g_free(kvm_feature_msrs); 2126 kvm_feature_msrs = NULL; 2127 return ret; 2128 } 2129 2130 return 0; 2131 } 2132 2133 static int kvm_get_supported_msrs(KVMState *s) 2134 { 2135 int ret = 0; 2136 struct kvm_msr_list msr_list, *kvm_msr_list; 2137 2138 /* 2139 * Obtain MSR list from KVM. These are the MSRs that we must 2140 * save/restore. 2141 */ 2142 msr_list.nmsrs = 0; 2143 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); 2144 if (ret < 0 && ret != -E2BIG) { 2145 return ret; 2146 } 2147 /* 2148 * Old kernel modules had a bug and could write beyond the provided 2149 * memory. Allocate at least a safe amount of 1K. 2150 */ 2151 kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) + 2152 msr_list.nmsrs * 2153 sizeof(msr_list.indices[0]))); 2154 2155 kvm_msr_list->nmsrs = msr_list.nmsrs; 2156 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); 2157 if (ret >= 0) { 2158 int i; 2159 2160 for (i = 0; i < kvm_msr_list->nmsrs; i++) { 2161 switch (kvm_msr_list->indices[i]) { 2162 case MSR_STAR: 2163 has_msr_star = true; 2164 break; 2165 case MSR_VM_HSAVE_PA: 2166 has_msr_hsave_pa = true; 2167 break; 2168 case MSR_TSC_AUX: 2169 has_msr_tsc_aux = true; 2170 break; 2171 case MSR_TSC_ADJUST: 2172 has_msr_tsc_adjust = true; 2173 break; 2174 case MSR_IA32_TSCDEADLINE: 2175 has_msr_tsc_deadline = true; 2176 break; 2177 case MSR_IA32_SMBASE: 2178 has_msr_smbase = true; 2179 break; 2180 case MSR_SMI_COUNT: 2181 has_msr_smi_count = true; 2182 break; 2183 case MSR_IA32_MISC_ENABLE: 2184 has_msr_misc_enable = true; 2185 break; 2186 case MSR_IA32_BNDCFGS: 2187 has_msr_bndcfgs = true; 2188 break; 2189 case MSR_IA32_XSS: 2190 has_msr_xss = true; 2191 break; 2192 case MSR_IA32_UMWAIT_CONTROL: 2193 has_msr_umwait = true; 2194 break; 2195 case HV_X64_MSR_CRASH_CTL: 2196 has_msr_hv_crash = true; 2197 break; 2198 case HV_X64_MSR_RESET: 2199 has_msr_hv_reset = true; 2200 break; 2201 case HV_X64_MSR_VP_INDEX: 2202 has_msr_hv_vpindex = true; 2203 break; 2204 case HV_X64_MSR_VP_RUNTIME: 2205 has_msr_hv_runtime = true; 2206 break; 2207 case HV_X64_MSR_SCONTROL: 2208 has_msr_hv_synic = true; 2209 break; 2210 case HV_X64_MSR_STIMER0_CONFIG: 2211 has_msr_hv_stimer = true; 2212 break; 2213 case HV_X64_MSR_TSC_FREQUENCY: 2214 has_msr_hv_frequencies = true; 2215 break; 2216 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 2217 has_msr_hv_reenlightenment = true; 2218 break; 2219 case MSR_IA32_SPEC_CTRL: 2220 has_msr_spec_ctrl = true; 2221 break; 2222 case MSR_AMD64_TSC_RATIO: 2223 has_tsc_scale_msr = true; 2224 break; 2225 case MSR_IA32_TSX_CTRL: 2226 has_msr_tsx_ctrl = true; 2227 break; 2228 case MSR_VIRT_SSBD: 2229 has_msr_virt_ssbd = true; 2230 break; 2231 case MSR_IA32_ARCH_CAPABILITIES: 2232 has_msr_arch_capabs = true; 2233 break; 2234 case MSR_IA32_CORE_CAPABILITY: 2235 has_msr_core_capabs = true; 2236 break; 2237 case MSR_IA32_PERF_CAPABILITIES: 2238 has_msr_perf_capabs = true; 2239 break; 2240 case MSR_IA32_VMX_VMFUNC: 2241 has_msr_vmx_vmfunc = true; 2242 break; 2243 case MSR_IA32_UCODE_REV: 2244 has_msr_ucode_rev = true; 2245 break; 2246 case MSR_IA32_VMX_PROCBASED_CTLS2: 2247 has_msr_vmx_procbased_ctls2 = true; 2248 break; 2249 case MSR_IA32_PKRS: 2250 has_msr_pkrs = true; 2251 break; 2252 } 2253 } 2254 } 2255 2256 g_free(kvm_msr_list); 2257 2258 return ret; 2259 } 2260 2261 static Notifier smram_machine_done; 2262 static KVMMemoryListener smram_listener; 2263 static AddressSpace smram_address_space; 2264 static MemoryRegion smram_as_root; 2265 static MemoryRegion smram_as_mem; 2266 2267 static void register_smram_listener(Notifier *n, void *unused) 2268 { 2269 MemoryRegion *smram = 2270 (MemoryRegion *) object_resolve_path("/machine/smram", NULL); 2271 2272 /* Outer container... */ 2273 memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull); 2274 memory_region_set_enabled(&smram_as_root, true); 2275 2276 /* ... with two regions inside: normal system memory with low 2277 * priority, and... 2278 */ 2279 memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram", 2280 get_system_memory(), 0, ~0ull); 2281 memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0); 2282 memory_region_set_enabled(&smram_as_mem, true); 2283 2284 if (smram) { 2285 /* ... SMRAM with higher priority */ 2286 memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10); 2287 memory_region_set_enabled(smram, true); 2288 } 2289 2290 address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); 2291 kvm_memory_listener_register(kvm_state, &smram_listener, 2292 &smram_address_space, 1, "kvm-smram"); 2293 } 2294 2295 int kvm_arch_init(MachineState *ms, KVMState *s) 2296 { 2297 uint64_t identity_base = 0xfffbc000; 2298 uint64_t shadow_mem; 2299 int ret; 2300 struct utsname utsname; 2301 Error *local_err = NULL; 2302 2303 /* 2304 * Initialize SEV context, if required 2305 * 2306 * If no memory encryption is requested (ms->cgs == NULL) this is 2307 * a no-op. 2308 * 2309 * It's also a no-op if a non-SEV confidential guest support 2310 * mechanism is selected. SEV is the only mechanism available to 2311 * select on x86 at present, so this doesn't arise, but if new 2312 * mechanisms are supported in future (e.g. TDX), they'll need 2313 * their own initialization either here or elsewhere. 2314 */ 2315 ret = sev_kvm_init(ms->cgs, &local_err); 2316 if (ret < 0) { 2317 error_report_err(local_err); 2318 return ret; 2319 } 2320 2321 if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { 2322 error_report("kvm: KVM_CAP_IRQ_ROUTING not supported by KVM"); 2323 return -ENOTSUP; 2324 } 2325 2326 has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE); 2327 has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); 2328 has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); 2329 has_sregs2 = kvm_check_extension(s, KVM_CAP_SREGS2) > 0; 2330 2331 hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); 2332 2333 has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD); 2334 if (has_exception_payload) { 2335 ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true); 2336 if (ret < 0) { 2337 error_report("kvm: Failed to enable exception payload cap: %s", 2338 strerror(-ret)); 2339 return ret; 2340 } 2341 } 2342 2343 ret = kvm_get_supported_msrs(s); 2344 if (ret < 0) { 2345 return ret; 2346 } 2347 2348 kvm_get_supported_feature_msrs(s); 2349 2350 uname(&utsname); 2351 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; 2352 2353 /* 2354 * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. 2355 * In order to use vm86 mode, an EPT identity map and a TSS are needed. 2356 * Since these must be part of guest physical memory, we need to allocate 2357 * them, both by setting their start addresses in the kernel and by 2358 * creating a corresponding e820 entry. We need 4 pages before the BIOS. 2359 * 2360 * Older KVM versions may not support setting the identity map base. In 2361 * that case we need to stick with the default, i.e. a 256K maximum BIOS 2362 * size. 2363 */ 2364 if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { 2365 /* Allows up to 16M BIOSes. */ 2366 identity_base = 0xfeffc000; 2367 2368 ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); 2369 if (ret < 0) { 2370 return ret; 2371 } 2372 } 2373 2374 /* Set TSS base one page after EPT identity map. */ 2375 ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); 2376 if (ret < 0) { 2377 return ret; 2378 } 2379 2380 /* Tell fw_cfg to notify the BIOS to reserve the range. */ 2381 ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); 2382 if (ret < 0) { 2383 fprintf(stderr, "e820_add_entry() table is full\n"); 2384 return ret; 2385 } 2386 2387 shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort); 2388 if (shadow_mem != -1) { 2389 shadow_mem /= 4096; 2390 ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); 2391 if (ret < 0) { 2392 return ret; 2393 } 2394 } 2395 2396 if (kvm_check_extension(s, KVM_CAP_X86_SMM) && 2397 object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && 2398 x86_machine_is_smm_enabled(X86_MACHINE(ms))) { 2399 smram_machine_done.notify = register_smram_listener; 2400 qemu_add_machine_init_done_notifier(&smram_machine_done); 2401 } 2402 2403 if (enable_cpu_pm) { 2404 int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); 2405 int ret; 2406 2407 /* Work around for kernel header with a typo. TODO: fix header and drop. */ 2408 #if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) 2409 #define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL 2410 #endif 2411 if (disable_exits) { 2412 disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | 2413 KVM_X86_DISABLE_EXITS_HLT | 2414 KVM_X86_DISABLE_EXITS_PAUSE | 2415 KVM_X86_DISABLE_EXITS_CSTATE); 2416 } 2417 2418 ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, 2419 disable_exits); 2420 if (ret < 0) { 2421 error_report("kvm: guest stopping CPU not supported: %s", 2422 strerror(-ret)); 2423 } 2424 } 2425 2426 if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) { 2427 X86MachineState *x86ms = X86_MACHINE(ms); 2428 2429 if (x86ms->bus_lock_ratelimit > 0) { 2430 ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); 2431 if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { 2432 error_report("kvm: bus lock detection unsupported"); 2433 return -ENOTSUP; 2434 } 2435 ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, 2436 KVM_BUS_LOCK_DETECTION_EXIT); 2437 if (ret < 0) { 2438 error_report("kvm: Failed to enable bus lock detection cap: %s", 2439 strerror(-ret)); 2440 return ret; 2441 } 2442 ratelimit_init(&bus_lock_ratelimit_ctrl); 2443 ratelimit_set_speed(&bus_lock_ratelimit_ctrl, 2444 x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME); 2445 } 2446 } 2447 2448 return 0; 2449 } 2450 2451 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 2452 { 2453 lhs->selector = rhs->selector; 2454 lhs->base = rhs->base; 2455 lhs->limit = rhs->limit; 2456 lhs->type = 3; 2457 lhs->present = 1; 2458 lhs->dpl = 3; 2459 lhs->db = 0; 2460 lhs->s = 1; 2461 lhs->l = 0; 2462 lhs->g = 0; 2463 lhs->avl = 0; 2464 lhs->unusable = 0; 2465 } 2466 2467 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 2468 { 2469 unsigned flags = rhs->flags; 2470 lhs->selector = rhs->selector; 2471 lhs->base = rhs->base; 2472 lhs->limit = rhs->limit; 2473 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; 2474 lhs->present = (flags & DESC_P_MASK) != 0; 2475 lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; 2476 lhs->db = (flags >> DESC_B_SHIFT) & 1; 2477 lhs->s = (flags & DESC_S_MASK) != 0; 2478 lhs->l = (flags >> DESC_L_SHIFT) & 1; 2479 lhs->g = (flags & DESC_G_MASK) != 0; 2480 lhs->avl = (flags & DESC_AVL_MASK) != 0; 2481 lhs->unusable = !lhs->present; 2482 lhs->padding = 0; 2483 } 2484 2485 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) 2486 { 2487 lhs->selector = rhs->selector; 2488 lhs->base = rhs->base; 2489 lhs->limit = rhs->limit; 2490 lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | 2491 ((rhs->present && !rhs->unusable) * DESC_P_MASK) | 2492 (rhs->dpl << DESC_DPL_SHIFT) | 2493 (rhs->db << DESC_B_SHIFT) | 2494 (rhs->s * DESC_S_MASK) | 2495 (rhs->l << DESC_L_SHIFT) | 2496 (rhs->g * DESC_G_MASK) | 2497 (rhs->avl * DESC_AVL_MASK); 2498 } 2499 2500 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) 2501 { 2502 if (set) { 2503 *kvm_reg = *qemu_reg; 2504 } else { 2505 *qemu_reg = *kvm_reg; 2506 } 2507 } 2508 2509 static int kvm_getput_regs(X86CPU *cpu, int set) 2510 { 2511 CPUX86State *env = &cpu->env; 2512 struct kvm_regs regs; 2513 int ret = 0; 2514 2515 if (!set) { 2516 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, ®s); 2517 if (ret < 0) { 2518 return ret; 2519 } 2520 } 2521 2522 kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); 2523 kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); 2524 kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); 2525 kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); 2526 kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); 2527 kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); 2528 kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); 2529 kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); 2530 #ifdef TARGET_X86_64 2531 kvm_getput_reg(®s.r8, &env->regs[8], set); 2532 kvm_getput_reg(®s.r9, &env->regs[9], set); 2533 kvm_getput_reg(®s.r10, &env->regs[10], set); 2534 kvm_getput_reg(®s.r11, &env->regs[11], set); 2535 kvm_getput_reg(®s.r12, &env->regs[12], set); 2536 kvm_getput_reg(®s.r13, &env->regs[13], set); 2537 kvm_getput_reg(®s.r14, &env->regs[14], set); 2538 kvm_getput_reg(®s.r15, &env->regs[15], set); 2539 #endif 2540 2541 kvm_getput_reg(®s.rflags, &env->eflags, set); 2542 kvm_getput_reg(®s.rip, &env->eip, set); 2543 2544 if (set) { 2545 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, ®s); 2546 } 2547 2548 return ret; 2549 } 2550 2551 static int kvm_put_fpu(X86CPU *cpu) 2552 { 2553 CPUX86State *env = &cpu->env; 2554 struct kvm_fpu fpu; 2555 int i; 2556 2557 memset(&fpu, 0, sizeof fpu); 2558 fpu.fsw = env->fpus & ~(7 << 11); 2559 fpu.fsw |= (env->fpstt & 7) << 11; 2560 fpu.fcw = env->fpuc; 2561 fpu.last_opcode = env->fpop; 2562 fpu.last_ip = env->fpip; 2563 fpu.last_dp = env->fpdp; 2564 for (i = 0; i < 8; ++i) { 2565 fpu.ftwx |= (!env->fptags[i]) << i; 2566 } 2567 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); 2568 for (i = 0; i < CPU_NB_REGS; i++) { 2569 stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0)); 2570 stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1)); 2571 } 2572 fpu.mxcsr = env->mxcsr; 2573 2574 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu); 2575 } 2576 2577 static int kvm_put_xsave(X86CPU *cpu) 2578 { 2579 CPUX86State *env = &cpu->env; 2580 void *xsave = env->xsave_buf; 2581 2582 if (!has_xsave) { 2583 return kvm_put_fpu(cpu); 2584 } 2585 x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len); 2586 2587 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); 2588 } 2589 2590 static int kvm_put_xcrs(X86CPU *cpu) 2591 { 2592 CPUX86State *env = &cpu->env; 2593 struct kvm_xcrs xcrs = {}; 2594 2595 if (!has_xcrs) { 2596 return 0; 2597 } 2598 2599 xcrs.nr_xcrs = 1; 2600 xcrs.flags = 0; 2601 xcrs.xcrs[0].xcr = 0; 2602 xcrs.xcrs[0].value = env->xcr0; 2603 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs); 2604 } 2605 2606 static int kvm_put_sregs(X86CPU *cpu) 2607 { 2608 CPUX86State *env = &cpu->env; 2609 struct kvm_sregs sregs; 2610 2611 /* 2612 * The interrupt_bitmap is ignored because KVM_SET_SREGS is 2613 * always followed by KVM_SET_VCPU_EVENTS. 2614 */ 2615 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); 2616 2617 if ((env->eflags & VM_MASK)) { 2618 set_v8086_seg(&sregs.cs, &env->segs[R_CS]); 2619 set_v8086_seg(&sregs.ds, &env->segs[R_DS]); 2620 set_v8086_seg(&sregs.es, &env->segs[R_ES]); 2621 set_v8086_seg(&sregs.fs, &env->segs[R_FS]); 2622 set_v8086_seg(&sregs.gs, &env->segs[R_GS]); 2623 set_v8086_seg(&sregs.ss, &env->segs[R_SS]); 2624 } else { 2625 set_seg(&sregs.cs, &env->segs[R_CS]); 2626 set_seg(&sregs.ds, &env->segs[R_DS]); 2627 set_seg(&sregs.es, &env->segs[R_ES]); 2628 set_seg(&sregs.fs, &env->segs[R_FS]); 2629 set_seg(&sregs.gs, &env->segs[R_GS]); 2630 set_seg(&sregs.ss, &env->segs[R_SS]); 2631 } 2632 2633 set_seg(&sregs.tr, &env->tr); 2634 set_seg(&sregs.ldt, &env->ldt); 2635 2636 sregs.idt.limit = env->idt.limit; 2637 sregs.idt.base = env->idt.base; 2638 memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); 2639 sregs.gdt.limit = env->gdt.limit; 2640 sregs.gdt.base = env->gdt.base; 2641 memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); 2642 2643 sregs.cr0 = env->cr[0]; 2644 sregs.cr2 = env->cr[2]; 2645 sregs.cr3 = env->cr[3]; 2646 sregs.cr4 = env->cr[4]; 2647 2648 sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); 2649 sregs.apic_base = cpu_get_apic_base(cpu->apic_state); 2650 2651 sregs.efer = env->efer; 2652 2653 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 2654 } 2655 2656 static int kvm_put_sregs2(X86CPU *cpu) 2657 { 2658 CPUX86State *env = &cpu->env; 2659 struct kvm_sregs2 sregs; 2660 int i; 2661 2662 sregs.flags = 0; 2663 2664 if ((env->eflags & VM_MASK)) { 2665 set_v8086_seg(&sregs.cs, &env->segs[R_CS]); 2666 set_v8086_seg(&sregs.ds, &env->segs[R_DS]); 2667 set_v8086_seg(&sregs.es, &env->segs[R_ES]); 2668 set_v8086_seg(&sregs.fs, &env->segs[R_FS]); 2669 set_v8086_seg(&sregs.gs, &env->segs[R_GS]); 2670 set_v8086_seg(&sregs.ss, &env->segs[R_SS]); 2671 } else { 2672 set_seg(&sregs.cs, &env->segs[R_CS]); 2673 set_seg(&sregs.ds, &env->segs[R_DS]); 2674 set_seg(&sregs.es, &env->segs[R_ES]); 2675 set_seg(&sregs.fs, &env->segs[R_FS]); 2676 set_seg(&sregs.gs, &env->segs[R_GS]); 2677 set_seg(&sregs.ss, &env->segs[R_SS]); 2678 } 2679 2680 set_seg(&sregs.tr, &env->tr); 2681 set_seg(&sregs.ldt, &env->ldt); 2682 2683 sregs.idt.limit = env->idt.limit; 2684 sregs.idt.base = env->idt.base; 2685 memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); 2686 sregs.gdt.limit = env->gdt.limit; 2687 sregs.gdt.base = env->gdt.base; 2688 memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); 2689 2690 sregs.cr0 = env->cr[0]; 2691 sregs.cr2 = env->cr[2]; 2692 sregs.cr3 = env->cr[3]; 2693 sregs.cr4 = env->cr[4]; 2694 2695 sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); 2696 sregs.apic_base = cpu_get_apic_base(cpu->apic_state); 2697 2698 sregs.efer = env->efer; 2699 2700 if (env->pdptrs_valid) { 2701 for (i = 0; i < 4; i++) { 2702 sregs.pdptrs[i] = env->pdptrs[i]; 2703 } 2704 sregs.flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID; 2705 } 2706 2707 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs); 2708 } 2709 2710 2711 static void kvm_msr_buf_reset(X86CPU *cpu) 2712 { 2713 memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE); 2714 } 2715 2716 static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value) 2717 { 2718 struct kvm_msrs *msrs = cpu->kvm_msr_buf; 2719 void *limit = ((void *)msrs) + MSR_BUF_SIZE; 2720 struct kvm_msr_entry *entry = &msrs->entries[msrs->nmsrs]; 2721 2722 assert((void *)(entry + 1) <= limit); 2723 2724 entry->index = index; 2725 entry->reserved = 0; 2726 entry->data = value; 2727 msrs->nmsrs++; 2728 } 2729 2730 static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value) 2731 { 2732 kvm_msr_buf_reset(cpu); 2733 kvm_msr_entry_add(cpu, index, value); 2734 2735 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); 2736 } 2737 2738 void kvm_put_apicbase(X86CPU *cpu, uint64_t value) 2739 { 2740 int ret; 2741 2742 ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value); 2743 assert(ret == 1); 2744 } 2745 2746 static int kvm_put_tscdeadline_msr(X86CPU *cpu) 2747 { 2748 CPUX86State *env = &cpu->env; 2749 int ret; 2750 2751 if (!has_msr_tsc_deadline) { 2752 return 0; 2753 } 2754 2755 ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline); 2756 if (ret < 0) { 2757 return ret; 2758 } 2759 2760 assert(ret == 1); 2761 return 0; 2762 } 2763 2764 /* 2765 * Provide a separate write service for the feature control MSR in order to 2766 * kick the VCPU out of VMXON or even guest mode on reset. This has to be done 2767 * before writing any other state because forcibly leaving nested mode 2768 * invalidates the VCPU state. 2769 */ 2770 static int kvm_put_msr_feature_control(X86CPU *cpu) 2771 { 2772 int ret; 2773 2774 if (!has_msr_feature_control) { 2775 return 0; 2776 } 2777 2778 ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL, 2779 cpu->env.msr_ia32_feature_control); 2780 if (ret < 0) { 2781 return ret; 2782 } 2783 2784 assert(ret == 1); 2785 return 0; 2786 } 2787 2788 static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features) 2789 { 2790 uint32_t default1, can_be_one, can_be_zero; 2791 uint32_t must_be_one; 2792 2793 switch (index) { 2794 case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 2795 default1 = 0x00000016; 2796 break; 2797 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 2798 default1 = 0x0401e172; 2799 break; 2800 case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 2801 default1 = 0x000011ff; 2802 break; 2803 case MSR_IA32_VMX_TRUE_EXIT_CTLS: 2804 default1 = 0x00036dff; 2805 break; 2806 case MSR_IA32_VMX_PROCBASED_CTLS2: 2807 default1 = 0; 2808 break; 2809 default: 2810 abort(); 2811 } 2812 2813 /* If a feature bit is set, the control can be either set or clear. 2814 * Otherwise the value is limited to either 0 or 1 by default1. 2815 */ 2816 can_be_one = features | default1; 2817 can_be_zero = features | ~default1; 2818 must_be_one = ~can_be_zero; 2819 2820 /* 2821 * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one). 2822 * Bit 32:63 -> 1 if the control bit can be one. 2823 */ 2824 return must_be_one | (((uint64_t)can_be_one) << 32); 2825 } 2826 2827 static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) 2828 { 2829 uint64_t kvm_vmx_basic = 2830 kvm_arch_get_supported_msr_feature(kvm_state, 2831 MSR_IA32_VMX_BASIC); 2832 2833 if (!kvm_vmx_basic) { 2834 /* If the kernel doesn't support VMX feature (kvm_intel.nested=0), 2835 * then kvm_vmx_basic will be 0 and KVM_SET_MSR will fail. 2836 */ 2837 return; 2838 } 2839 2840 uint64_t kvm_vmx_misc = 2841 kvm_arch_get_supported_msr_feature(kvm_state, 2842 MSR_IA32_VMX_MISC); 2843 uint64_t kvm_vmx_ept_vpid = 2844 kvm_arch_get_supported_msr_feature(kvm_state, 2845 MSR_IA32_VMX_EPT_VPID_CAP); 2846 2847 /* 2848 * If the guest is 64-bit, a value of 1 is allowed for the host address 2849 * space size vmexit control. 2850 */ 2851 uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM 2852 ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0; 2853 2854 /* 2855 * Bits 0-30, 32-44 and 50-53 come from the host. KVM should 2856 * not change them for backwards compatibility. 2857 */ 2858 uint64_t fixed_vmx_basic = kvm_vmx_basic & 2859 (MSR_VMX_BASIC_VMCS_REVISION_MASK | 2860 MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK | 2861 MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK); 2862 2863 /* 2864 * Same for bits 0-4 and 25-27. Bits 16-24 (CR3 target count) can 2865 * change in the future but are always zero for now, clear them to be 2866 * future proof. Bits 32-63 in theory could change, though KVM does 2867 * not support dual-monitor treatment and probably never will; mask 2868 * them out as well. 2869 */ 2870 uint64_t fixed_vmx_misc = kvm_vmx_misc & 2871 (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK | 2872 MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK); 2873 2874 /* 2875 * EPT memory types should not change either, so we do not bother 2876 * adding features for them. 2877 */ 2878 uint64_t fixed_vmx_ept_mask = 2879 (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ? 2880 MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0); 2881 uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask; 2882 2883 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, 2884 make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, 2885 f[FEAT_VMX_PROCBASED_CTLS])); 2886 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, 2887 make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS, 2888 f[FEAT_VMX_PINBASED_CTLS])); 2889 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, 2890 make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS, 2891 f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit); 2892 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, 2893 make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS, 2894 f[FEAT_VMX_ENTRY_CTLS])); 2895 kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2, 2896 make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2, 2897 f[FEAT_VMX_SECONDARY_CTLS])); 2898 kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP, 2899 f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid); 2900 kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC, 2901 f[FEAT_VMX_BASIC] | fixed_vmx_basic); 2902 kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC, 2903 f[FEAT_VMX_MISC] | fixed_vmx_misc); 2904 if (has_msr_vmx_vmfunc) { 2905 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]); 2906 } 2907 2908 /* 2909 * Just to be safe, write these with constant values. The CRn_FIXED1 2910 * MSRs are generated by KVM based on the vCPU's CPUID. 2911 */ 2912 kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0, 2913 CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK); 2914 kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, 2915 CR4_VMXE_MASK); 2916 2917 if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { 2918 /* TSC multiplier (0x2032). */ 2919 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32); 2920 } else { 2921 /* Preemption timer (0x482E). */ 2922 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x2E); 2923 } 2924 } 2925 2926 static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f) 2927 { 2928 uint64_t kvm_perf_cap = 2929 kvm_arch_get_supported_msr_feature(kvm_state, 2930 MSR_IA32_PERF_CAPABILITIES); 2931 2932 if (kvm_perf_cap) { 2933 kvm_msr_entry_add(cpu, MSR_IA32_PERF_CAPABILITIES, 2934 kvm_perf_cap & f[FEAT_PERF_CAPABILITIES]); 2935 } 2936 } 2937 2938 static int kvm_buf_set_msrs(X86CPU *cpu) 2939 { 2940 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); 2941 if (ret < 0) { 2942 return ret; 2943 } 2944 2945 if (ret < cpu->kvm_msr_buf->nmsrs) { 2946 struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; 2947 error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, 2948 (uint32_t)e->index, (uint64_t)e->data); 2949 } 2950 2951 assert(ret == cpu->kvm_msr_buf->nmsrs); 2952 return 0; 2953 } 2954 2955 static void kvm_init_msrs(X86CPU *cpu) 2956 { 2957 CPUX86State *env = &cpu->env; 2958 2959 kvm_msr_buf_reset(cpu); 2960 if (has_msr_arch_capabs) { 2961 kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, 2962 env->features[FEAT_ARCH_CAPABILITIES]); 2963 } 2964 2965 if (has_msr_core_capabs) { 2966 kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, 2967 env->features[FEAT_CORE_CAPABILITY]); 2968 } 2969 2970 if (has_msr_perf_capabs && cpu->enable_pmu) { 2971 kvm_msr_entry_add_perf(cpu, env->features); 2972 } 2973 2974 if (has_msr_ucode_rev) { 2975 kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); 2976 } 2977 2978 /* 2979 * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but 2980 * all kernels with MSR features should have them. 2981 */ 2982 if (kvm_feature_msrs && cpu_has_vmx(env)) { 2983 kvm_msr_entry_add_vmx(cpu, env->features); 2984 } 2985 2986 assert(kvm_buf_set_msrs(cpu) == 0); 2987 } 2988 2989 static int kvm_put_msrs(X86CPU *cpu, int level) 2990 { 2991 CPUX86State *env = &cpu->env; 2992 int i; 2993 2994 kvm_msr_buf_reset(cpu); 2995 2996 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs); 2997 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp); 2998 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip); 2999 kvm_msr_entry_add(cpu, MSR_PAT, env->pat); 3000 if (has_msr_star) { 3001 kvm_msr_entry_add(cpu, MSR_STAR, env->star); 3002 } 3003 if (has_msr_hsave_pa) { 3004 kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave); 3005 } 3006 if (has_msr_tsc_aux) { 3007 kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux); 3008 } 3009 if (has_msr_tsc_adjust) { 3010 kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust); 3011 } 3012 if (has_msr_misc_enable) { 3013 kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 3014 env->msr_ia32_misc_enable); 3015 } 3016 if (has_msr_smbase) { 3017 kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase); 3018 } 3019 if (has_msr_smi_count) { 3020 kvm_msr_entry_add(cpu, MSR_SMI_COUNT, env->msr_smi_count); 3021 } 3022 if (has_msr_pkrs) { 3023 kvm_msr_entry_add(cpu, MSR_IA32_PKRS, env->pkrs); 3024 } 3025 if (has_msr_bndcfgs) { 3026 kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs); 3027 } 3028 if (has_msr_xss) { 3029 kvm_msr_entry_add(cpu, MSR_IA32_XSS, env->xss); 3030 } 3031 if (has_msr_umwait) { 3032 kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, env->umwait); 3033 } 3034 if (has_msr_spec_ctrl) { 3035 kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl); 3036 } 3037 if (has_tsc_scale_msr) { 3038 kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr); 3039 } 3040 3041 if (has_msr_tsx_ctrl) { 3042 kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl); 3043 } 3044 if (has_msr_virt_ssbd) { 3045 kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); 3046 } 3047 3048 #ifdef TARGET_X86_64 3049 if (lm_capable_kernel) { 3050 kvm_msr_entry_add(cpu, MSR_CSTAR, env->cstar); 3051 kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); 3052 kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); 3053 kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); 3054 } 3055 #endif 3056 3057 /* 3058 * The following MSRs have side effects on the guest or are too heavy 3059 * for normal writeback. Limit them to reset or full state updates. 3060 */ 3061 if (level >= KVM_PUT_RESET_STATE) { 3062 kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc); 3063 kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr); 3064 kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr); 3065 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) { 3066 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, env->async_pf_int_msr); 3067 } 3068 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { 3069 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); 3070 } 3071 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { 3072 kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr); 3073 } 3074 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { 3075 kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr); 3076 } 3077 3078 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { 3079 kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, env->poll_control_msr); 3080 } 3081 3082 if (has_architectural_pmu_version > 0) { 3083 if (has_architectural_pmu_version > 1) { 3084 /* Stop the counter. */ 3085 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); 3086 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); 3087 } 3088 3089 /* Set the counter values. */ 3090 for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { 3091 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 3092 env->msr_fixed_counters[i]); 3093 } 3094 for (i = 0; i < num_architectural_pmu_gp_counters; i++) { 3095 kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 3096 env->msr_gp_counters[i]); 3097 kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 3098 env->msr_gp_evtsel[i]); 3099 } 3100 if (has_architectural_pmu_version > 1) { 3101 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 3102 env->msr_global_status); 3103 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 3104 env->msr_global_ovf_ctrl); 3105 3106 /* Now start the PMU. */ 3107 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 3108 env->msr_fixed_ctr_ctrl); 3109 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 3110 env->msr_global_ctrl); 3111 } 3112 } 3113 /* 3114 * Hyper-V partition-wide MSRs: to avoid clearing them on cpu hot-add, 3115 * only sync them to KVM on the first cpu 3116 */ 3117 if (current_cpu == first_cpu) { 3118 if (has_msr_hv_hypercall) { 3119 kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 3120 env->msr_hv_guest_os_id); 3121 kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 3122 env->msr_hv_hypercall); 3123 } 3124 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) { 3125 kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 3126 env->msr_hv_tsc); 3127 } 3128 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) { 3129 kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 3130 env->msr_hv_reenlightenment_control); 3131 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 3132 env->msr_hv_tsc_emulation_control); 3133 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 3134 env->msr_hv_tsc_emulation_status); 3135 } 3136 } 3137 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) { 3138 kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 3139 env->msr_hv_vapic); 3140 } 3141 if (has_msr_hv_crash) { 3142 int j; 3143 3144 for (j = 0; j < HV_CRASH_PARAMS; j++) 3145 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 3146 env->msr_hv_crash_params[j]); 3147 3148 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_NOTIFY); 3149 } 3150 if (has_msr_hv_runtime) { 3151 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); 3152 } 3153 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) 3154 && hv_vpindex_settable) { 3155 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, 3156 hyperv_vp_index(CPU(cpu))); 3157 } 3158 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { 3159 int j; 3160 3161 kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION); 3162 3163 kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 3164 env->msr_hv_synic_control); 3165 kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 3166 env->msr_hv_synic_evt_page); 3167 kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 3168 env->msr_hv_synic_msg_page); 3169 3170 for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) { 3171 kvm_msr_entry_add(cpu, HV_X64_MSR_SINT0 + j, 3172 env->msr_hv_synic_sint[j]); 3173 } 3174 } 3175 if (has_msr_hv_stimer) { 3176 int j; 3177 3178 for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) { 3179 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_CONFIG + j * 2, 3180 env->msr_hv_stimer_config[j]); 3181 } 3182 3183 for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) { 3184 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_COUNT + j * 2, 3185 env->msr_hv_stimer_count[j]); 3186 } 3187 } 3188 if (env->features[FEAT_1_EDX] & CPUID_MTRR) { 3189 uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits); 3190 3191 kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype); 3192 kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]); 3193 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]); 3194 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]); 3195 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]); 3196 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]); 3197 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]); 3198 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]); 3199 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]); 3200 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]); 3201 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]); 3202 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]); 3203 for (i = 0; i < MSR_MTRRcap_VCNT; i++) { 3204 /* The CPU GPs if we write to a bit above the physical limit of 3205 * the host CPU (and KVM emulates that) 3206 */ 3207 uint64_t mask = env->mtrr_var[i].mask; 3208 mask &= phys_mask; 3209 3210 kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 3211 env->mtrr_var[i].base); 3212 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask); 3213 } 3214 } 3215 if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { 3216 int addr_num = kvm_arch_get_supported_cpuid(kvm_state, 3217 0x14, 1, R_EAX) & 0x7; 3218 3219 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 3220 env->msr_rtit_ctrl); 3221 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 3222 env->msr_rtit_status); 3223 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 3224 env->msr_rtit_output_base); 3225 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 3226 env->msr_rtit_output_mask); 3227 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 3228 env->msr_rtit_cr3_match); 3229 for (i = 0; i < addr_num; i++) { 3230 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 3231 env->msr_rtit_addrs[i]); 3232 } 3233 } 3234 3235 if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) { 3236 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 3237 env->msr_ia32_sgxlepubkeyhash[0]); 3238 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 3239 env->msr_ia32_sgxlepubkeyhash[1]); 3240 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 3241 env->msr_ia32_sgxlepubkeyhash[2]); 3242 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 3243 env->msr_ia32_sgxlepubkeyhash[3]); 3244 } 3245 3246 /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see 3247 * kvm_put_msr_feature_control. */ 3248 } 3249 3250 if (env->mcg_cap) { 3251 int i; 3252 3253 kvm_msr_entry_add(cpu, MSR_MCG_STATUS, env->mcg_status); 3254 kvm_msr_entry_add(cpu, MSR_MCG_CTL, env->mcg_ctl); 3255 if (has_msr_mcg_ext_ctl) { 3256 kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, env->mcg_ext_ctl); 3257 } 3258 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { 3259 kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, env->mce_banks[i]); 3260 } 3261 } 3262 3263 return kvm_buf_set_msrs(cpu); 3264 } 3265 3266 3267 static int kvm_get_fpu(X86CPU *cpu) 3268 { 3269 CPUX86State *env = &cpu->env; 3270 struct kvm_fpu fpu; 3271 int i, ret; 3272 3273 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu); 3274 if (ret < 0) { 3275 return ret; 3276 } 3277 3278 env->fpstt = (fpu.fsw >> 11) & 7; 3279 env->fpus = fpu.fsw; 3280 env->fpuc = fpu.fcw; 3281 env->fpop = fpu.last_opcode; 3282 env->fpip = fpu.last_ip; 3283 env->fpdp = fpu.last_dp; 3284 for (i = 0; i < 8; ++i) { 3285 env->fptags[i] = !((fpu.ftwx >> i) & 1); 3286 } 3287 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); 3288 for (i = 0; i < CPU_NB_REGS; i++) { 3289 env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]); 3290 env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]); 3291 } 3292 env->mxcsr = fpu.mxcsr; 3293 3294 return 0; 3295 } 3296 3297 static int kvm_get_xsave(X86CPU *cpu) 3298 { 3299 CPUX86State *env = &cpu->env; 3300 void *xsave = env->xsave_buf; 3301 int ret; 3302 3303 if (!has_xsave) { 3304 return kvm_get_fpu(cpu); 3305 } 3306 3307 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); 3308 if (ret < 0) { 3309 return ret; 3310 } 3311 x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len); 3312 3313 return 0; 3314 } 3315 3316 static int kvm_get_xcrs(X86CPU *cpu) 3317 { 3318 CPUX86State *env = &cpu->env; 3319 int i, ret; 3320 struct kvm_xcrs xcrs; 3321 3322 if (!has_xcrs) { 3323 return 0; 3324 } 3325 3326 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs); 3327 if (ret < 0) { 3328 return ret; 3329 } 3330 3331 for (i = 0; i < xcrs.nr_xcrs; i++) { 3332 /* Only support xcr0 now */ 3333 if (xcrs.xcrs[i].xcr == 0) { 3334 env->xcr0 = xcrs.xcrs[i].value; 3335 break; 3336 } 3337 } 3338 return 0; 3339 } 3340 3341 static int kvm_get_sregs(X86CPU *cpu) 3342 { 3343 CPUX86State *env = &cpu->env; 3344 struct kvm_sregs sregs; 3345 int ret; 3346 3347 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 3348 if (ret < 0) { 3349 return ret; 3350 } 3351 3352 /* 3353 * The interrupt_bitmap is ignored because KVM_GET_SREGS is 3354 * always preceded by KVM_GET_VCPU_EVENTS. 3355 */ 3356 3357 get_seg(&env->segs[R_CS], &sregs.cs); 3358 get_seg(&env->segs[R_DS], &sregs.ds); 3359 get_seg(&env->segs[R_ES], &sregs.es); 3360 get_seg(&env->segs[R_FS], &sregs.fs); 3361 get_seg(&env->segs[R_GS], &sregs.gs); 3362 get_seg(&env->segs[R_SS], &sregs.ss); 3363 3364 get_seg(&env->tr, &sregs.tr); 3365 get_seg(&env->ldt, &sregs.ldt); 3366 3367 env->idt.limit = sregs.idt.limit; 3368 env->idt.base = sregs.idt.base; 3369 env->gdt.limit = sregs.gdt.limit; 3370 env->gdt.base = sregs.gdt.base; 3371 3372 env->cr[0] = sregs.cr0; 3373 env->cr[2] = sregs.cr2; 3374 env->cr[3] = sregs.cr3; 3375 env->cr[4] = sregs.cr4; 3376 3377 env->efer = sregs.efer; 3378 3379 /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ 3380 x86_update_hflags(env); 3381 3382 return 0; 3383 } 3384 3385 static int kvm_get_sregs2(X86CPU *cpu) 3386 { 3387 CPUX86State *env = &cpu->env; 3388 struct kvm_sregs2 sregs; 3389 int i, ret; 3390 3391 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs); 3392 if (ret < 0) { 3393 return ret; 3394 } 3395 3396 get_seg(&env->segs[R_CS], &sregs.cs); 3397 get_seg(&env->segs[R_DS], &sregs.ds); 3398 get_seg(&env->segs[R_ES], &sregs.es); 3399 get_seg(&env->segs[R_FS], &sregs.fs); 3400 get_seg(&env->segs[R_GS], &sregs.gs); 3401 get_seg(&env->segs[R_SS], &sregs.ss); 3402 3403 get_seg(&env->tr, &sregs.tr); 3404 get_seg(&env->ldt, &sregs.ldt); 3405 3406 env->idt.limit = sregs.idt.limit; 3407 env->idt.base = sregs.idt.base; 3408 env->gdt.limit = sregs.gdt.limit; 3409 env->gdt.base = sregs.gdt.base; 3410 3411 env->cr[0] = sregs.cr0; 3412 env->cr[2] = sregs.cr2; 3413 env->cr[3] = sregs.cr3; 3414 env->cr[4] = sregs.cr4; 3415 3416 env->efer = sregs.efer; 3417 3418 env->pdptrs_valid = sregs.flags & KVM_SREGS2_FLAGS_PDPTRS_VALID; 3419 3420 if (env->pdptrs_valid) { 3421 for (i = 0; i < 4; i++) { 3422 env->pdptrs[i] = sregs.pdptrs[i]; 3423 } 3424 } 3425 3426 /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ 3427 x86_update_hflags(env); 3428 3429 return 0; 3430 } 3431 3432 static int kvm_get_msrs(X86CPU *cpu) 3433 { 3434 CPUX86State *env = &cpu->env; 3435 struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; 3436 int ret, i; 3437 uint64_t mtrr_top_bits; 3438 3439 kvm_msr_buf_reset(cpu); 3440 3441 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0); 3442 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0); 3443 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0); 3444 kvm_msr_entry_add(cpu, MSR_PAT, 0); 3445 if (has_msr_star) { 3446 kvm_msr_entry_add(cpu, MSR_STAR, 0); 3447 } 3448 if (has_msr_hsave_pa) { 3449 kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0); 3450 } 3451 if (has_msr_tsc_aux) { 3452 kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0); 3453 } 3454 if (has_msr_tsc_adjust) { 3455 kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0); 3456 } 3457 if (has_msr_tsc_deadline) { 3458 kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0); 3459 } 3460 if (has_msr_misc_enable) { 3461 kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0); 3462 } 3463 if (has_msr_smbase) { 3464 kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, 0); 3465 } 3466 if (has_msr_smi_count) { 3467 kvm_msr_entry_add(cpu, MSR_SMI_COUNT, 0); 3468 } 3469 if (has_msr_feature_control) { 3470 kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0); 3471 } 3472 if (has_msr_pkrs) { 3473 kvm_msr_entry_add(cpu, MSR_IA32_PKRS, 0); 3474 } 3475 if (has_msr_bndcfgs) { 3476 kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0); 3477 } 3478 if (has_msr_xss) { 3479 kvm_msr_entry_add(cpu, MSR_IA32_XSS, 0); 3480 } 3481 if (has_msr_umwait) { 3482 kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, 0); 3483 } 3484 if (has_msr_spec_ctrl) { 3485 kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0); 3486 } 3487 if (has_tsc_scale_msr) { 3488 kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, 0); 3489 } 3490 3491 if (has_msr_tsx_ctrl) { 3492 kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0); 3493 } 3494 if (has_msr_virt_ssbd) { 3495 kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); 3496 } 3497 if (!env->tsc_valid) { 3498 kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0); 3499 env->tsc_valid = !runstate_is_running(); 3500 } 3501 3502 #ifdef TARGET_X86_64 3503 if (lm_capable_kernel) { 3504 kvm_msr_entry_add(cpu, MSR_CSTAR, 0); 3505 kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); 3506 kvm_msr_entry_add(cpu, MSR_FMASK, 0); 3507 kvm_msr_entry_add(cpu, MSR_LSTAR, 0); 3508 } 3509 #endif 3510 kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); 3511 kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0); 3512 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) { 3513 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, 0); 3514 } 3515 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { 3516 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0); 3517 } 3518 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { 3519 kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0); 3520 } 3521 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { 3522 kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0); 3523 } 3524 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { 3525 kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1); 3526 } 3527 if (has_architectural_pmu_version > 0) { 3528 if (has_architectural_pmu_version > 1) { 3529 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); 3530 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); 3531 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0); 3532 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0); 3533 } 3534 for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { 3535 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0); 3536 } 3537 for (i = 0; i < num_architectural_pmu_gp_counters; i++) { 3538 kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0); 3539 kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0); 3540 } 3541 } 3542 3543 if (env->mcg_cap) { 3544 kvm_msr_entry_add(cpu, MSR_MCG_STATUS, 0); 3545 kvm_msr_entry_add(cpu, MSR_MCG_CTL, 0); 3546 if (has_msr_mcg_ext_ctl) { 3547 kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, 0); 3548 } 3549 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { 3550 kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, 0); 3551 } 3552 } 3553 3554 if (has_msr_hv_hypercall) { 3555 kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0); 3556 kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0); 3557 } 3558 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) { 3559 kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0); 3560 } 3561 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) { 3562 kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0); 3563 } 3564 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) { 3565 kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); 3566 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0); 3567 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0); 3568 } 3569 if (has_msr_hv_crash) { 3570 int j; 3571 3572 for (j = 0; j < HV_CRASH_PARAMS; j++) { 3573 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 0); 3574 } 3575 } 3576 if (has_msr_hv_runtime) { 3577 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, 0); 3578 } 3579 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { 3580 uint32_t msr; 3581 3582 kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0); 3583 kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0); 3584 kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0); 3585 for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) { 3586 kvm_msr_entry_add(cpu, msr, 0); 3587 } 3588 } 3589 if (has_msr_hv_stimer) { 3590 uint32_t msr; 3591 3592 for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT; 3593 msr++) { 3594 kvm_msr_entry_add(cpu, msr, 0); 3595 } 3596 } 3597 if (env->features[FEAT_1_EDX] & CPUID_MTRR) { 3598 kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0); 3599 kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0); 3600 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0); 3601 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0); 3602 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0); 3603 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0); 3604 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0); 3605 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0); 3606 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0); 3607 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0); 3608 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0); 3609 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0); 3610 for (i = 0; i < MSR_MTRRcap_VCNT; i++) { 3611 kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0); 3612 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0); 3613 } 3614 } 3615 3616 if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { 3617 int addr_num = 3618 kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7; 3619 3620 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0); 3621 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0); 3622 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0); 3623 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0); 3624 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0); 3625 for (i = 0; i < addr_num; i++) { 3626 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0); 3627 } 3628 } 3629 3630 if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) { 3631 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 0); 3632 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 0); 3633 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 0); 3634 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); 3635 } 3636 3637 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); 3638 if (ret < 0) { 3639 return ret; 3640 } 3641 3642 if (ret < cpu->kvm_msr_buf->nmsrs) { 3643 struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; 3644 error_report("error: failed to get MSR 0x%" PRIx32, 3645 (uint32_t)e->index); 3646 } 3647 3648 assert(ret == cpu->kvm_msr_buf->nmsrs); 3649 /* 3650 * MTRR masks: Each mask consists of 5 parts 3651 * a 10..0: must be zero 3652 * b 11 : valid bit 3653 * c n-1.12: actual mask bits 3654 * d 51..n: reserved must be zero 3655 * e 63.52: reserved must be zero 3656 * 3657 * 'n' is the number of physical bits supported by the CPU and is 3658 * apparently always <= 52. We know our 'n' but don't know what 3659 * the destinations 'n' is; it might be smaller, in which case 3660 * it masks (c) on loading. It might be larger, in which case 3661 * we fill 'd' so that d..c is consistent irrespetive of the 'n' 3662 * we're migrating to. 3663 */ 3664 3665 if (cpu->fill_mtrr_mask) { 3666 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52); 3667 assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS); 3668 mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits); 3669 } else { 3670 mtrr_top_bits = 0; 3671 } 3672 3673 for (i = 0; i < ret; i++) { 3674 uint32_t index = msrs[i].index; 3675 switch (index) { 3676 case MSR_IA32_SYSENTER_CS: 3677 env->sysenter_cs = msrs[i].data; 3678 break; 3679 case MSR_IA32_SYSENTER_ESP: 3680 env->sysenter_esp = msrs[i].data; 3681 break; 3682 case MSR_IA32_SYSENTER_EIP: 3683 env->sysenter_eip = msrs[i].data; 3684 break; 3685 case MSR_PAT: 3686 env->pat = msrs[i].data; 3687 break; 3688 case MSR_STAR: 3689 env->star = msrs[i].data; 3690 break; 3691 #ifdef TARGET_X86_64 3692 case MSR_CSTAR: 3693 env->cstar = msrs[i].data; 3694 break; 3695 case MSR_KERNELGSBASE: 3696 env->kernelgsbase = msrs[i].data; 3697 break; 3698 case MSR_FMASK: 3699 env->fmask = msrs[i].data; 3700 break; 3701 case MSR_LSTAR: 3702 env->lstar = msrs[i].data; 3703 break; 3704 #endif 3705 case MSR_IA32_TSC: 3706 env->tsc = msrs[i].data; 3707 break; 3708 case MSR_TSC_AUX: 3709 env->tsc_aux = msrs[i].data; 3710 break; 3711 case MSR_TSC_ADJUST: 3712 env->tsc_adjust = msrs[i].data; 3713 break; 3714 case MSR_IA32_TSCDEADLINE: 3715 env->tsc_deadline = msrs[i].data; 3716 break; 3717 case MSR_VM_HSAVE_PA: 3718 env->vm_hsave = msrs[i].data; 3719 break; 3720 case MSR_KVM_SYSTEM_TIME: 3721 env->system_time_msr = msrs[i].data; 3722 break; 3723 case MSR_KVM_WALL_CLOCK: 3724 env->wall_clock_msr = msrs[i].data; 3725 break; 3726 case MSR_MCG_STATUS: 3727 env->mcg_status = msrs[i].data; 3728 break; 3729 case MSR_MCG_CTL: 3730 env->mcg_ctl = msrs[i].data; 3731 break; 3732 case MSR_MCG_EXT_CTL: 3733 env->mcg_ext_ctl = msrs[i].data; 3734 break; 3735 case MSR_IA32_MISC_ENABLE: 3736 env->msr_ia32_misc_enable = msrs[i].data; 3737 break; 3738 case MSR_IA32_SMBASE: 3739 env->smbase = msrs[i].data; 3740 break; 3741 case MSR_SMI_COUNT: 3742 env->msr_smi_count = msrs[i].data; 3743 break; 3744 case MSR_IA32_FEATURE_CONTROL: 3745 env->msr_ia32_feature_control = msrs[i].data; 3746 break; 3747 case MSR_IA32_BNDCFGS: 3748 env->msr_bndcfgs = msrs[i].data; 3749 break; 3750 case MSR_IA32_XSS: 3751 env->xss = msrs[i].data; 3752 break; 3753 case MSR_IA32_UMWAIT_CONTROL: 3754 env->umwait = msrs[i].data; 3755 break; 3756 case MSR_IA32_PKRS: 3757 env->pkrs = msrs[i].data; 3758 break; 3759 default: 3760 if (msrs[i].index >= MSR_MC0_CTL && 3761 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { 3762 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data; 3763 } 3764 break; 3765 case MSR_KVM_ASYNC_PF_EN: 3766 env->async_pf_en_msr = msrs[i].data; 3767 break; 3768 case MSR_KVM_ASYNC_PF_INT: 3769 env->async_pf_int_msr = msrs[i].data; 3770 break; 3771 case MSR_KVM_PV_EOI_EN: 3772 env->pv_eoi_en_msr = msrs[i].data; 3773 break; 3774 case MSR_KVM_STEAL_TIME: 3775 env->steal_time_msr = msrs[i].data; 3776 break; 3777 case MSR_KVM_POLL_CONTROL: { 3778 env->poll_control_msr = msrs[i].data; 3779 break; 3780 } 3781 case MSR_CORE_PERF_FIXED_CTR_CTRL: 3782 env->msr_fixed_ctr_ctrl = msrs[i].data; 3783 break; 3784 case MSR_CORE_PERF_GLOBAL_CTRL: 3785 env->msr_global_ctrl = msrs[i].data; 3786 break; 3787 case MSR_CORE_PERF_GLOBAL_STATUS: 3788 env->msr_global_status = msrs[i].data; 3789 break; 3790 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 3791 env->msr_global_ovf_ctrl = msrs[i].data; 3792 break; 3793 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1: 3794 env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data; 3795 break; 3796 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1: 3797 env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data; 3798 break; 3799 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1: 3800 env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data; 3801 break; 3802 case HV_X64_MSR_HYPERCALL: 3803 env->msr_hv_hypercall = msrs[i].data; 3804 break; 3805 case HV_X64_MSR_GUEST_OS_ID: 3806 env->msr_hv_guest_os_id = msrs[i].data; 3807 break; 3808 case HV_X64_MSR_APIC_ASSIST_PAGE: 3809 env->msr_hv_vapic = msrs[i].data; 3810 break; 3811 case HV_X64_MSR_REFERENCE_TSC: 3812 env->msr_hv_tsc = msrs[i].data; 3813 break; 3814 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 3815 env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data; 3816 break; 3817 case HV_X64_MSR_VP_RUNTIME: 3818 env->msr_hv_runtime = msrs[i].data; 3819 break; 3820 case HV_X64_MSR_SCONTROL: 3821 env->msr_hv_synic_control = msrs[i].data; 3822 break; 3823 case HV_X64_MSR_SIEFP: 3824 env->msr_hv_synic_evt_page = msrs[i].data; 3825 break; 3826 case HV_X64_MSR_SIMP: 3827 env->msr_hv_synic_msg_page = msrs[i].data; 3828 break; 3829 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 3830 env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data; 3831 break; 3832 case HV_X64_MSR_STIMER0_CONFIG: 3833 case HV_X64_MSR_STIMER1_CONFIG: 3834 case HV_X64_MSR_STIMER2_CONFIG: 3835 case HV_X64_MSR_STIMER3_CONFIG: 3836 env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] = 3837 msrs[i].data; 3838 break; 3839 case HV_X64_MSR_STIMER0_COUNT: 3840 case HV_X64_MSR_STIMER1_COUNT: 3841 case HV_X64_MSR_STIMER2_COUNT: 3842 case HV_X64_MSR_STIMER3_COUNT: 3843 env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = 3844 msrs[i].data; 3845 break; 3846 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 3847 env->msr_hv_reenlightenment_control = msrs[i].data; 3848 break; 3849 case HV_X64_MSR_TSC_EMULATION_CONTROL: 3850 env->msr_hv_tsc_emulation_control = msrs[i].data; 3851 break; 3852 case HV_X64_MSR_TSC_EMULATION_STATUS: 3853 env->msr_hv_tsc_emulation_status = msrs[i].data; 3854 break; 3855 case MSR_MTRRdefType: 3856 env->mtrr_deftype = msrs[i].data; 3857 break; 3858 case MSR_MTRRfix64K_00000: 3859 env->mtrr_fixed[0] = msrs[i].data; 3860 break; 3861 case MSR_MTRRfix16K_80000: 3862 env->mtrr_fixed[1] = msrs[i].data; 3863 break; 3864 case MSR_MTRRfix16K_A0000: 3865 env->mtrr_fixed[2] = msrs[i].data; 3866 break; 3867 case MSR_MTRRfix4K_C0000: 3868 env->mtrr_fixed[3] = msrs[i].data; 3869 break; 3870 case MSR_MTRRfix4K_C8000: 3871 env->mtrr_fixed[4] = msrs[i].data; 3872 break; 3873 case MSR_MTRRfix4K_D0000: 3874 env->mtrr_fixed[5] = msrs[i].data; 3875 break; 3876 case MSR_MTRRfix4K_D8000: 3877 env->mtrr_fixed[6] = msrs[i].data; 3878 break; 3879 case MSR_MTRRfix4K_E0000: 3880 env->mtrr_fixed[7] = msrs[i].data; 3881 break; 3882 case MSR_MTRRfix4K_E8000: 3883 env->mtrr_fixed[8] = msrs[i].data; 3884 break; 3885 case MSR_MTRRfix4K_F0000: 3886 env->mtrr_fixed[9] = msrs[i].data; 3887 break; 3888 case MSR_MTRRfix4K_F8000: 3889 env->mtrr_fixed[10] = msrs[i].data; 3890 break; 3891 case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1): 3892 if (index & 1) { 3893 env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data | 3894 mtrr_top_bits; 3895 } else { 3896 env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data; 3897 } 3898 break; 3899 case MSR_IA32_SPEC_CTRL: 3900 env->spec_ctrl = msrs[i].data; 3901 break; 3902 case MSR_AMD64_TSC_RATIO: 3903 env->amd_tsc_scale_msr = msrs[i].data; 3904 break; 3905 case MSR_IA32_TSX_CTRL: 3906 env->tsx_ctrl = msrs[i].data; 3907 break; 3908 case MSR_VIRT_SSBD: 3909 env->virt_ssbd = msrs[i].data; 3910 break; 3911 case MSR_IA32_RTIT_CTL: 3912 env->msr_rtit_ctrl = msrs[i].data; 3913 break; 3914 case MSR_IA32_RTIT_STATUS: 3915 env->msr_rtit_status = msrs[i].data; 3916 break; 3917 case MSR_IA32_RTIT_OUTPUT_BASE: 3918 env->msr_rtit_output_base = msrs[i].data; 3919 break; 3920 case MSR_IA32_RTIT_OUTPUT_MASK: 3921 env->msr_rtit_output_mask = msrs[i].data; 3922 break; 3923 case MSR_IA32_RTIT_CR3_MATCH: 3924 env->msr_rtit_cr3_match = msrs[i].data; 3925 break; 3926 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 3927 env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data; 3928 break; 3929 case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: 3930 env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = 3931 msrs[i].data; 3932 break; 3933 } 3934 } 3935 3936 return 0; 3937 } 3938 3939 static int kvm_put_mp_state(X86CPU *cpu) 3940 { 3941 struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state }; 3942 3943 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); 3944 } 3945 3946 static int kvm_get_mp_state(X86CPU *cpu) 3947 { 3948 CPUState *cs = CPU(cpu); 3949 CPUX86State *env = &cpu->env; 3950 struct kvm_mp_state mp_state; 3951 int ret; 3952 3953 ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); 3954 if (ret < 0) { 3955 return ret; 3956 } 3957 env->mp_state = mp_state.mp_state; 3958 if (kvm_irqchip_in_kernel()) { 3959 cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); 3960 } 3961 return 0; 3962 } 3963 3964 static int kvm_get_apic(X86CPU *cpu) 3965 { 3966 DeviceState *apic = cpu->apic_state; 3967 struct kvm_lapic_state kapic; 3968 int ret; 3969 3970 if (apic && kvm_irqchip_in_kernel()) { 3971 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic); 3972 if (ret < 0) { 3973 return ret; 3974 } 3975 3976 kvm_get_apic_state(apic, &kapic); 3977 } 3978 return 0; 3979 } 3980 3981 static int kvm_put_vcpu_events(X86CPU *cpu, int level) 3982 { 3983 CPUState *cs = CPU(cpu); 3984 CPUX86State *env = &cpu->env; 3985 struct kvm_vcpu_events events = {}; 3986 3987 if (!kvm_has_vcpu_events()) { 3988 return 0; 3989 } 3990 3991 events.flags = 0; 3992 3993 if (has_exception_payload) { 3994 events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD; 3995 events.exception.pending = env->exception_pending; 3996 events.exception_has_payload = env->exception_has_payload; 3997 events.exception_payload = env->exception_payload; 3998 } 3999 events.exception.nr = env->exception_nr; 4000 events.exception.injected = env->exception_injected; 4001 events.exception.has_error_code = env->has_error_code; 4002 events.exception.error_code = env->error_code; 4003 4004 events.interrupt.injected = (env->interrupt_injected >= 0); 4005 events.interrupt.nr = env->interrupt_injected; 4006 events.interrupt.soft = env->soft_interrupt; 4007 4008 events.nmi.injected = env->nmi_injected; 4009 events.nmi.pending = env->nmi_pending; 4010 events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); 4011 4012 events.sipi_vector = env->sipi_vector; 4013 4014 if (has_msr_smbase) { 4015 events.smi.smm = !!(env->hflags & HF_SMM_MASK); 4016 events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK); 4017 if (kvm_irqchip_in_kernel()) { 4018 /* As soon as these are moved to the kernel, remove them 4019 * from cs->interrupt_request. 4020 */ 4021 events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; 4022 events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; 4023 cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); 4024 } else { 4025 /* Keep these in cs->interrupt_request. */ 4026 events.smi.pending = 0; 4027 events.smi.latched_init = 0; 4028 } 4029 /* Stop SMI delivery on old machine types to avoid a reboot 4030 * on an inward migration of an old VM. 4031 */ 4032 if (!cpu->kvm_no_smi_migration) { 4033 events.flags |= KVM_VCPUEVENT_VALID_SMM; 4034 } 4035 } 4036 4037 if (level >= KVM_PUT_RESET_STATE) { 4038 events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; 4039 if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { 4040 events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR; 4041 } 4042 } 4043 4044 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); 4045 } 4046 4047 static int kvm_get_vcpu_events(X86CPU *cpu) 4048 { 4049 CPUX86State *env = &cpu->env; 4050 struct kvm_vcpu_events events; 4051 int ret; 4052 4053 if (!kvm_has_vcpu_events()) { 4054 return 0; 4055 } 4056 4057 memset(&events, 0, sizeof(events)); 4058 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); 4059 if (ret < 0) { 4060 return ret; 4061 } 4062 4063 if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) { 4064 env->exception_pending = events.exception.pending; 4065 env->exception_has_payload = events.exception_has_payload; 4066 env->exception_payload = events.exception_payload; 4067 } else { 4068 env->exception_pending = 0; 4069 env->exception_has_payload = false; 4070 } 4071 env->exception_injected = events.exception.injected; 4072 env->exception_nr = 4073 (env->exception_pending || env->exception_injected) ? 4074 events.exception.nr : -1; 4075 env->has_error_code = events.exception.has_error_code; 4076 env->error_code = events.exception.error_code; 4077 4078 env->interrupt_injected = 4079 events.interrupt.injected ? events.interrupt.nr : -1; 4080 env->soft_interrupt = events.interrupt.soft; 4081 4082 env->nmi_injected = events.nmi.injected; 4083 env->nmi_pending = events.nmi.pending; 4084 if (events.nmi.masked) { 4085 env->hflags2 |= HF2_NMI_MASK; 4086 } else { 4087 env->hflags2 &= ~HF2_NMI_MASK; 4088 } 4089 4090 if (events.flags & KVM_VCPUEVENT_VALID_SMM) { 4091 if (events.smi.smm) { 4092 env->hflags |= HF_SMM_MASK; 4093 } else { 4094 env->hflags &= ~HF_SMM_MASK; 4095 } 4096 if (events.smi.pending) { 4097 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); 4098 } else { 4099 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); 4100 } 4101 if (events.smi.smm_inside_nmi) { 4102 env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK; 4103 } else { 4104 env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; 4105 } 4106 if (events.smi.latched_init) { 4107 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); 4108 } else { 4109 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); 4110 } 4111 } 4112 4113 env->sipi_vector = events.sipi_vector; 4114 4115 return 0; 4116 } 4117 4118 static int kvm_guest_debug_workarounds(X86CPU *cpu) 4119 { 4120 CPUState *cs = CPU(cpu); 4121 CPUX86State *env = &cpu->env; 4122 int ret = 0; 4123 unsigned long reinject_trap = 0; 4124 4125 if (!kvm_has_vcpu_events()) { 4126 if (env->exception_nr == EXCP01_DB) { 4127 reinject_trap = KVM_GUESTDBG_INJECT_DB; 4128 } else if (env->exception_injected == EXCP03_INT3) { 4129 reinject_trap = KVM_GUESTDBG_INJECT_BP; 4130 } 4131 kvm_reset_exception(env); 4132 } 4133 4134 /* 4135 * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF 4136 * injected via SET_GUEST_DEBUG while updating GP regs. Work around this 4137 * by updating the debug state once again if single-stepping is on. 4138 * Another reason to call kvm_update_guest_debug here is a pending debug 4139 * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to 4140 * reinject them via SET_GUEST_DEBUG. 4141 */ 4142 if (reinject_trap || 4143 (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) { 4144 ret = kvm_update_guest_debug(cs, reinject_trap); 4145 } 4146 return ret; 4147 } 4148 4149 static int kvm_put_debugregs(X86CPU *cpu) 4150 { 4151 CPUX86State *env = &cpu->env; 4152 struct kvm_debugregs dbgregs; 4153 int i; 4154 4155 if (!kvm_has_debugregs()) { 4156 return 0; 4157 } 4158 4159 memset(&dbgregs, 0, sizeof(dbgregs)); 4160 for (i = 0; i < 4; i++) { 4161 dbgregs.db[i] = env->dr[i]; 4162 } 4163 dbgregs.dr6 = env->dr[6]; 4164 dbgregs.dr7 = env->dr[7]; 4165 dbgregs.flags = 0; 4166 4167 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs); 4168 } 4169 4170 static int kvm_get_debugregs(X86CPU *cpu) 4171 { 4172 CPUX86State *env = &cpu->env; 4173 struct kvm_debugregs dbgregs; 4174 int i, ret; 4175 4176 if (!kvm_has_debugregs()) { 4177 return 0; 4178 } 4179 4180 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs); 4181 if (ret < 0) { 4182 return ret; 4183 } 4184 for (i = 0; i < 4; i++) { 4185 env->dr[i] = dbgregs.db[i]; 4186 } 4187 env->dr[4] = env->dr[6] = dbgregs.dr6; 4188 env->dr[5] = env->dr[7] = dbgregs.dr7; 4189 4190 return 0; 4191 } 4192 4193 static int kvm_put_nested_state(X86CPU *cpu) 4194 { 4195 CPUX86State *env = &cpu->env; 4196 int max_nested_state_len = kvm_max_nested_state_length(); 4197 4198 if (!env->nested_state) { 4199 return 0; 4200 } 4201 4202 /* 4203 * Copy flags that are affected by reset from env->hflags and env->hflags2. 4204 */ 4205 if (env->hflags & HF_GUEST_MASK) { 4206 env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE; 4207 } else { 4208 env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE; 4209 } 4210 4211 /* Don't set KVM_STATE_NESTED_GIF_SET on VMX as it is illegal */ 4212 if (cpu_has_svm(env) && (env->hflags2 & HF2_GIF_MASK)) { 4213 env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET; 4214 } else { 4215 env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET; 4216 } 4217 4218 assert(env->nested_state->size <= max_nested_state_len); 4219 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state); 4220 } 4221 4222 static int kvm_get_nested_state(X86CPU *cpu) 4223 { 4224 CPUX86State *env = &cpu->env; 4225 int max_nested_state_len = kvm_max_nested_state_length(); 4226 int ret; 4227 4228 if (!env->nested_state) { 4229 return 0; 4230 } 4231 4232 /* 4233 * It is possible that migration restored a smaller size into 4234 * nested_state->hdr.size than what our kernel support. 4235 * We preserve migration origin nested_state->hdr.size for 4236 * call to KVM_SET_NESTED_STATE but wish that our next call 4237 * to KVM_GET_NESTED_STATE will use max size our kernel support. 4238 */ 4239 env->nested_state->size = max_nested_state_len; 4240 4241 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state); 4242 if (ret < 0) { 4243 return ret; 4244 } 4245 4246 /* 4247 * Copy flags that are affected by reset to env->hflags and env->hflags2. 4248 */ 4249 if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) { 4250 env->hflags |= HF_GUEST_MASK; 4251 } else { 4252 env->hflags &= ~HF_GUEST_MASK; 4253 } 4254 4255 /* Keep HF2_GIF_MASK set on !SVM as x86_cpu_pending_interrupt() needs it */ 4256 if (cpu_has_svm(env)) { 4257 if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) { 4258 env->hflags2 |= HF2_GIF_MASK; 4259 } else { 4260 env->hflags2 &= ~HF2_GIF_MASK; 4261 } 4262 } 4263 4264 return ret; 4265 } 4266 4267 int kvm_arch_put_registers(CPUState *cpu, int level) 4268 { 4269 X86CPU *x86_cpu = X86_CPU(cpu); 4270 int ret; 4271 4272 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); 4273 4274 /* must be before kvm_put_nested_state so that EFER.SVME is set */ 4275 ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); 4276 if (ret < 0) { 4277 return ret; 4278 } 4279 4280 if (level >= KVM_PUT_RESET_STATE) { 4281 ret = kvm_put_nested_state(x86_cpu); 4282 if (ret < 0) { 4283 return ret; 4284 } 4285 4286 ret = kvm_put_msr_feature_control(x86_cpu); 4287 if (ret < 0) { 4288 return ret; 4289 } 4290 } 4291 4292 if (level == KVM_PUT_FULL_STATE) { 4293 /* We don't check for kvm_arch_set_tsc_khz() errors here, 4294 * because TSC frequency mismatch shouldn't abort migration, 4295 * unless the user explicitly asked for a more strict TSC 4296 * setting (e.g. using an explicit "tsc-freq" option). 4297 */ 4298 kvm_arch_set_tsc_khz(cpu); 4299 } 4300 4301 ret = kvm_getput_regs(x86_cpu, 1); 4302 if (ret < 0) { 4303 return ret; 4304 } 4305 ret = kvm_put_xsave(x86_cpu); 4306 if (ret < 0) { 4307 return ret; 4308 } 4309 ret = kvm_put_xcrs(x86_cpu); 4310 if (ret < 0) { 4311 return ret; 4312 } 4313 /* must be before kvm_put_msrs */ 4314 ret = kvm_inject_mce_oldstyle(x86_cpu); 4315 if (ret < 0) { 4316 return ret; 4317 } 4318 ret = kvm_put_msrs(x86_cpu, level); 4319 if (ret < 0) { 4320 return ret; 4321 } 4322 ret = kvm_put_vcpu_events(x86_cpu, level); 4323 if (ret < 0) { 4324 return ret; 4325 } 4326 if (level >= KVM_PUT_RESET_STATE) { 4327 ret = kvm_put_mp_state(x86_cpu); 4328 if (ret < 0) { 4329 return ret; 4330 } 4331 } 4332 4333 ret = kvm_put_tscdeadline_msr(x86_cpu); 4334 if (ret < 0) { 4335 return ret; 4336 } 4337 ret = kvm_put_debugregs(x86_cpu); 4338 if (ret < 0) { 4339 return ret; 4340 } 4341 /* must be last */ 4342 ret = kvm_guest_debug_workarounds(x86_cpu); 4343 if (ret < 0) { 4344 return ret; 4345 } 4346 return 0; 4347 } 4348 4349 int kvm_arch_get_registers(CPUState *cs) 4350 { 4351 X86CPU *cpu = X86_CPU(cs); 4352 int ret; 4353 4354 assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs)); 4355 4356 ret = kvm_get_vcpu_events(cpu); 4357 if (ret < 0) { 4358 goto out; 4359 } 4360 /* 4361 * KVM_GET_MPSTATE can modify CS and RIP, call it before 4362 * KVM_GET_REGS and KVM_GET_SREGS. 4363 */ 4364 ret = kvm_get_mp_state(cpu); 4365 if (ret < 0) { 4366 goto out; 4367 } 4368 ret = kvm_getput_regs(cpu, 0); 4369 if (ret < 0) { 4370 goto out; 4371 } 4372 ret = kvm_get_xsave(cpu); 4373 if (ret < 0) { 4374 goto out; 4375 } 4376 ret = kvm_get_xcrs(cpu); 4377 if (ret < 0) { 4378 goto out; 4379 } 4380 ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu); 4381 if (ret < 0) { 4382 goto out; 4383 } 4384 ret = kvm_get_msrs(cpu); 4385 if (ret < 0) { 4386 goto out; 4387 } 4388 ret = kvm_get_apic(cpu); 4389 if (ret < 0) { 4390 goto out; 4391 } 4392 ret = kvm_get_debugregs(cpu); 4393 if (ret < 0) { 4394 goto out; 4395 } 4396 ret = kvm_get_nested_state(cpu); 4397 if (ret < 0) { 4398 goto out; 4399 } 4400 ret = 0; 4401 out: 4402 cpu_sync_bndcs_hflags(&cpu->env); 4403 return ret; 4404 } 4405 4406 void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) 4407 { 4408 X86CPU *x86_cpu = X86_CPU(cpu); 4409 CPUX86State *env = &x86_cpu->env; 4410 int ret; 4411 4412 /* Inject NMI */ 4413 if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { 4414 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { 4415 qemu_mutex_lock_iothread(); 4416 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; 4417 qemu_mutex_unlock_iothread(); 4418 DPRINTF("injected NMI\n"); 4419 ret = kvm_vcpu_ioctl(cpu, KVM_NMI); 4420 if (ret < 0) { 4421 fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", 4422 strerror(-ret)); 4423 } 4424 } 4425 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { 4426 qemu_mutex_lock_iothread(); 4427 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; 4428 qemu_mutex_unlock_iothread(); 4429 DPRINTF("injected SMI\n"); 4430 ret = kvm_vcpu_ioctl(cpu, KVM_SMI); 4431 if (ret < 0) { 4432 fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n", 4433 strerror(-ret)); 4434 } 4435 } 4436 } 4437 4438 if (!kvm_pic_in_kernel()) { 4439 qemu_mutex_lock_iothread(); 4440 } 4441 4442 /* Force the VCPU out of its inner loop to process any INIT requests 4443 * or (for userspace APIC, but it is cheap to combine the checks here) 4444 * pending TPR access reports. 4445 */ 4446 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { 4447 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && 4448 !(env->hflags & HF_SMM_MASK)) { 4449 cpu->exit_request = 1; 4450 } 4451 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { 4452 cpu->exit_request = 1; 4453 } 4454 } 4455 4456 if (!kvm_pic_in_kernel()) { 4457 /* Try to inject an interrupt if the guest can accept it */ 4458 if (run->ready_for_interrupt_injection && 4459 (cpu->interrupt_request & CPU_INTERRUPT_HARD) && 4460 (env->eflags & IF_MASK)) { 4461 int irq; 4462 4463 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; 4464 irq = cpu_get_pic_interrupt(env); 4465 if (irq >= 0) { 4466 struct kvm_interrupt intr; 4467 4468 intr.irq = irq; 4469 DPRINTF("injected interrupt %d\n", irq); 4470 ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr); 4471 if (ret < 0) { 4472 fprintf(stderr, 4473 "KVM: injection failed, interrupt lost (%s)\n", 4474 strerror(-ret)); 4475 } 4476 } 4477 } 4478 4479 /* If we have an interrupt but the guest is not ready to receive an 4480 * interrupt, request an interrupt window exit. This will 4481 * cause a return to userspace as soon as the guest is ready to 4482 * receive interrupts. */ 4483 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { 4484 run->request_interrupt_window = 1; 4485 } else { 4486 run->request_interrupt_window = 0; 4487 } 4488 4489 DPRINTF("setting tpr\n"); 4490 run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); 4491 4492 qemu_mutex_unlock_iothread(); 4493 } 4494 } 4495 4496 static void kvm_rate_limit_on_bus_lock(void) 4497 { 4498 uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1); 4499 4500 if (delay_ns) { 4501 g_usleep(delay_ns / SCALE_US); 4502 } 4503 } 4504 4505 MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) 4506 { 4507 X86CPU *x86_cpu = X86_CPU(cpu); 4508 CPUX86State *env = &x86_cpu->env; 4509 4510 if (run->flags & KVM_RUN_X86_SMM) { 4511 env->hflags |= HF_SMM_MASK; 4512 } else { 4513 env->hflags &= ~HF_SMM_MASK; 4514 } 4515 if (run->if_flag) { 4516 env->eflags |= IF_MASK; 4517 } else { 4518 env->eflags &= ~IF_MASK; 4519 } 4520 if (run->flags & KVM_RUN_X86_BUS_LOCK) { 4521 kvm_rate_limit_on_bus_lock(); 4522 } 4523 4524 /* We need to protect the apic state against concurrent accesses from 4525 * different threads in case the userspace irqchip is used. */ 4526 if (!kvm_irqchip_in_kernel()) { 4527 qemu_mutex_lock_iothread(); 4528 } 4529 cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); 4530 cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); 4531 if (!kvm_irqchip_in_kernel()) { 4532 qemu_mutex_unlock_iothread(); 4533 } 4534 return cpu_get_mem_attrs(env); 4535 } 4536 4537 int kvm_arch_process_async_events(CPUState *cs) 4538 { 4539 X86CPU *cpu = X86_CPU(cs); 4540 CPUX86State *env = &cpu->env; 4541 4542 if (cs->interrupt_request & CPU_INTERRUPT_MCE) { 4543 /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ 4544 assert(env->mcg_cap); 4545 4546 cs->interrupt_request &= ~CPU_INTERRUPT_MCE; 4547 4548 kvm_cpu_synchronize_state(cs); 4549 4550 if (env->exception_nr == EXCP08_DBLE) { 4551 /* this means triple fault */ 4552 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 4553 cs->exit_request = 1; 4554 return 0; 4555 } 4556 kvm_queue_exception(env, EXCP12_MCHK, 0, 0); 4557 env->has_error_code = 0; 4558 4559 cs->halted = 0; 4560 if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) { 4561 env->mp_state = KVM_MP_STATE_RUNNABLE; 4562 } 4563 } 4564 4565 if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && 4566 !(env->hflags & HF_SMM_MASK)) { 4567 kvm_cpu_synchronize_state(cs); 4568 do_cpu_init(cpu); 4569 } 4570 4571 if (kvm_irqchip_in_kernel()) { 4572 return 0; 4573 } 4574 4575 if (cs->interrupt_request & CPU_INTERRUPT_POLL) { 4576 cs->interrupt_request &= ~CPU_INTERRUPT_POLL; 4577 apic_poll_irq(cpu->apic_state); 4578 } 4579 if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && 4580 (env->eflags & IF_MASK)) || 4581 (cs->interrupt_request & CPU_INTERRUPT_NMI)) { 4582 cs->halted = 0; 4583 } 4584 if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { 4585 kvm_cpu_synchronize_state(cs); 4586 do_cpu_sipi(cpu); 4587 } 4588 if (cs->interrupt_request & CPU_INTERRUPT_TPR) { 4589 cs->interrupt_request &= ~CPU_INTERRUPT_TPR; 4590 kvm_cpu_synchronize_state(cs); 4591 apic_handle_tpr_access_report(cpu->apic_state, env->eip, 4592 env->tpr_access_type); 4593 } 4594 4595 return cs->halted; 4596 } 4597 4598 static int kvm_handle_halt(X86CPU *cpu) 4599 { 4600 CPUState *cs = CPU(cpu); 4601 CPUX86State *env = &cpu->env; 4602 4603 if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) && 4604 (env->eflags & IF_MASK)) && 4605 !(cs->interrupt_request & CPU_INTERRUPT_NMI)) { 4606 cs->halted = 1; 4607 return EXCP_HLT; 4608 } 4609 4610 return 0; 4611 } 4612 4613 static int kvm_handle_tpr_access(X86CPU *cpu) 4614 { 4615 CPUState *cs = CPU(cpu); 4616 struct kvm_run *run = cs->kvm_run; 4617 4618 apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip, 4619 run->tpr_access.is_write ? TPR_ACCESS_WRITE 4620 : TPR_ACCESS_READ); 4621 return 1; 4622 } 4623 4624 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 4625 { 4626 static const uint8_t int3 = 0xcc; 4627 4628 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || 4629 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) { 4630 return -EINVAL; 4631 } 4632 return 0; 4633 } 4634 4635 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 4636 { 4637 uint8_t int3; 4638 4639 if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0)) { 4640 return -EINVAL; 4641 } 4642 if (int3 != 0xcc) { 4643 return 0; 4644 } 4645 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { 4646 return -EINVAL; 4647 } 4648 return 0; 4649 } 4650 4651 static struct { 4652 target_ulong addr; 4653 int len; 4654 int type; 4655 } hw_breakpoint[4]; 4656 4657 static int nb_hw_breakpoint; 4658 4659 static int find_hw_breakpoint(target_ulong addr, int len, int type) 4660 { 4661 int n; 4662 4663 for (n = 0; n < nb_hw_breakpoint; n++) { 4664 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && 4665 (hw_breakpoint[n].len == len || len == -1)) { 4666 return n; 4667 } 4668 } 4669 return -1; 4670 } 4671 4672 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 4673 target_ulong len, int type) 4674 { 4675 switch (type) { 4676 case GDB_BREAKPOINT_HW: 4677 len = 1; 4678 break; 4679 case GDB_WATCHPOINT_WRITE: 4680 case GDB_WATCHPOINT_ACCESS: 4681 switch (len) { 4682 case 1: 4683 break; 4684 case 2: 4685 case 4: 4686 case 8: 4687 if (addr & (len - 1)) { 4688 return -EINVAL; 4689 } 4690 break; 4691 default: 4692 return -EINVAL; 4693 } 4694 break; 4695 default: 4696 return -ENOSYS; 4697 } 4698 4699 if (nb_hw_breakpoint == 4) { 4700 return -ENOBUFS; 4701 } 4702 if (find_hw_breakpoint(addr, len, type) >= 0) { 4703 return -EEXIST; 4704 } 4705 hw_breakpoint[nb_hw_breakpoint].addr = addr; 4706 hw_breakpoint[nb_hw_breakpoint].len = len; 4707 hw_breakpoint[nb_hw_breakpoint].type = type; 4708 nb_hw_breakpoint++; 4709 4710 return 0; 4711 } 4712 4713 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 4714 target_ulong len, int type) 4715 { 4716 int n; 4717 4718 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); 4719 if (n < 0) { 4720 return -ENOENT; 4721 } 4722 nb_hw_breakpoint--; 4723 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; 4724 4725 return 0; 4726 } 4727 4728 void kvm_arch_remove_all_hw_breakpoints(void) 4729 { 4730 nb_hw_breakpoint = 0; 4731 } 4732 4733 static CPUWatchpoint hw_watchpoint; 4734 4735 static int kvm_handle_debug(X86CPU *cpu, 4736 struct kvm_debug_exit_arch *arch_info) 4737 { 4738 CPUState *cs = CPU(cpu); 4739 CPUX86State *env = &cpu->env; 4740 int ret = 0; 4741 int n; 4742 4743 if (arch_info->exception == EXCP01_DB) { 4744 if (arch_info->dr6 & DR6_BS) { 4745 if (cs->singlestep_enabled) { 4746 ret = EXCP_DEBUG; 4747 } 4748 } else { 4749 for (n = 0; n < 4; n++) { 4750 if (arch_info->dr6 & (1 << n)) { 4751 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { 4752 case 0x0: 4753 ret = EXCP_DEBUG; 4754 break; 4755 case 0x1: 4756 ret = EXCP_DEBUG; 4757 cs->watchpoint_hit = &hw_watchpoint; 4758 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 4759 hw_watchpoint.flags = BP_MEM_WRITE; 4760 break; 4761 case 0x3: 4762 ret = EXCP_DEBUG; 4763 cs->watchpoint_hit = &hw_watchpoint; 4764 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 4765 hw_watchpoint.flags = BP_MEM_ACCESS; 4766 break; 4767 } 4768 } 4769 } 4770 } 4771 } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) { 4772 ret = EXCP_DEBUG; 4773 } 4774 if (ret == 0) { 4775 cpu_synchronize_state(cs); 4776 assert(env->exception_nr == -1); 4777 4778 /* pass to guest */ 4779 kvm_queue_exception(env, arch_info->exception, 4780 arch_info->exception == EXCP01_DB, 4781 arch_info->dr6); 4782 env->has_error_code = 0; 4783 } 4784 4785 return ret; 4786 } 4787 4788 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) 4789 { 4790 const uint8_t type_code[] = { 4791 [GDB_BREAKPOINT_HW] = 0x0, 4792 [GDB_WATCHPOINT_WRITE] = 0x1, 4793 [GDB_WATCHPOINT_ACCESS] = 0x3 4794 }; 4795 const uint8_t len_code[] = { 4796 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2 4797 }; 4798 int n; 4799 4800 if (kvm_sw_breakpoints_active(cpu)) { 4801 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 4802 } 4803 if (nb_hw_breakpoint > 0) { 4804 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 4805 dbg->arch.debugreg[7] = 0x0600; 4806 for (n = 0; n < nb_hw_breakpoint; n++) { 4807 dbg->arch.debugreg[n] = hw_breakpoint[n].addr; 4808 dbg->arch.debugreg[7] |= (2 << (n * 2)) | 4809 (type_code[hw_breakpoint[n].type] << (16 + n*4)) | 4810 ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); 4811 } 4812 } 4813 } 4814 4815 static bool has_sgx_provisioning; 4816 4817 static bool __kvm_enable_sgx_provisioning(KVMState *s) 4818 { 4819 int fd, ret; 4820 4821 if (!kvm_vm_check_extension(s, KVM_CAP_SGX_ATTRIBUTE)) { 4822 return false; 4823 } 4824 4825 fd = qemu_open_old("/dev/sgx_provision", O_RDONLY); 4826 if (fd < 0) { 4827 return false; 4828 } 4829 4830 ret = kvm_vm_enable_cap(s, KVM_CAP_SGX_ATTRIBUTE, 0, fd); 4831 if (ret) { 4832 error_report("Could not enable SGX PROVISIONKEY: %s", strerror(-ret)); 4833 exit(1); 4834 } 4835 close(fd); 4836 return true; 4837 } 4838 4839 bool kvm_enable_sgx_provisioning(KVMState *s) 4840 { 4841 return MEMORIZE(__kvm_enable_sgx_provisioning(s), has_sgx_provisioning); 4842 } 4843 4844 static bool host_supports_vmx(void) 4845 { 4846 uint32_t ecx, unused; 4847 4848 host_cpuid(1, 0, &unused, &unused, &ecx, &unused); 4849 return ecx & CPUID_EXT_VMX; 4850 } 4851 4852 #define VMX_INVALID_GUEST_STATE 0x80000021 4853 4854 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 4855 { 4856 X86CPU *cpu = X86_CPU(cs); 4857 uint64_t code; 4858 int ret; 4859 4860 switch (run->exit_reason) { 4861 case KVM_EXIT_HLT: 4862 DPRINTF("handle_hlt\n"); 4863 qemu_mutex_lock_iothread(); 4864 ret = kvm_handle_halt(cpu); 4865 qemu_mutex_unlock_iothread(); 4866 break; 4867 case KVM_EXIT_SET_TPR: 4868 ret = 0; 4869 break; 4870 case KVM_EXIT_TPR_ACCESS: 4871 qemu_mutex_lock_iothread(); 4872 ret = kvm_handle_tpr_access(cpu); 4873 qemu_mutex_unlock_iothread(); 4874 break; 4875 case KVM_EXIT_FAIL_ENTRY: 4876 code = run->fail_entry.hardware_entry_failure_reason; 4877 fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", 4878 code); 4879 if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { 4880 fprintf(stderr, 4881 "\nIf you're running a guest on an Intel machine without " 4882 "unrestricted mode\n" 4883 "support, the failure can be most likely due to the guest " 4884 "entering an invalid\n" 4885 "state for Intel VT. For example, the guest maybe running " 4886 "in big real mode\n" 4887 "which is not supported on less recent Intel processors." 4888 "\n\n"); 4889 } 4890 ret = -1; 4891 break; 4892 case KVM_EXIT_EXCEPTION: 4893 fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", 4894 run->ex.exception, run->ex.error_code); 4895 ret = -1; 4896 break; 4897 case KVM_EXIT_DEBUG: 4898 DPRINTF("kvm_exit_debug\n"); 4899 qemu_mutex_lock_iothread(); 4900 ret = kvm_handle_debug(cpu, &run->debug.arch); 4901 qemu_mutex_unlock_iothread(); 4902 break; 4903 case KVM_EXIT_HYPERV: 4904 ret = kvm_hv_handle_exit(cpu, &run->hyperv); 4905 break; 4906 case KVM_EXIT_IOAPIC_EOI: 4907 ioapic_eoi_broadcast(run->eoi.vector); 4908 ret = 0; 4909 break; 4910 case KVM_EXIT_X86_BUS_LOCK: 4911 /* already handled in kvm_arch_post_run */ 4912 ret = 0; 4913 break; 4914 default: 4915 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 4916 ret = -1; 4917 break; 4918 } 4919 4920 return ret; 4921 } 4922 4923 bool kvm_arch_stop_on_emulation_error(CPUState *cs) 4924 { 4925 X86CPU *cpu = X86_CPU(cs); 4926 CPUX86State *env = &cpu->env; 4927 4928 kvm_cpu_synchronize_state(cs); 4929 return !(env->cr[0] & CR0_PE_MASK) || 4930 ((env->segs[R_CS].selector & 3) != 3); 4931 } 4932 4933 void kvm_arch_init_irq_routing(KVMState *s) 4934 { 4935 /* We know at this point that we're using the in-kernel 4936 * irqchip, so we can use irqfds, and on x86 we know 4937 * we can use msi via irqfd and GSI routing. 4938 */ 4939 kvm_msi_via_irqfd_allowed = true; 4940 kvm_gsi_routing_allowed = true; 4941 4942 if (kvm_irqchip_is_split()) { 4943 int i; 4944 4945 /* If the ioapic is in QEMU and the lapics are in KVM, reserve 4946 MSI routes for signaling interrupts to the local apics. */ 4947 for (i = 0; i < IOAPIC_NUM_PINS; i++) { 4948 if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { 4949 error_report("Could not enable split IRQ mode."); 4950 exit(1); 4951 } 4952 } 4953 } 4954 } 4955 4956 int kvm_arch_irqchip_create(KVMState *s) 4957 { 4958 int ret; 4959 if (kvm_kernel_irqchip_split()) { 4960 ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); 4961 if (ret) { 4962 error_report("Could not enable split irqchip mode: %s", 4963 strerror(-ret)); 4964 exit(1); 4965 } else { 4966 DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n"); 4967 kvm_split_irqchip = true; 4968 return 1; 4969 } 4970 } else { 4971 return 0; 4972 } 4973 } 4974 4975 uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address) 4976 { 4977 CPUX86State *env; 4978 uint64_t ext_id; 4979 4980 if (!first_cpu) { 4981 return address; 4982 } 4983 env = &X86_CPU(first_cpu)->env; 4984 if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) { 4985 return address; 4986 } 4987 4988 /* 4989 * If the remappable format bit is set, or the upper bits are 4990 * already set in address_hi, or the low extended bits aren't 4991 * there anyway, do nothing. 4992 */ 4993 ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT); 4994 if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) { 4995 return address; 4996 } 4997 4998 address &= ~ext_id; 4999 address |= ext_id << 35; 5000 return address; 5001 } 5002 5003 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 5004 uint64_t address, uint32_t data, PCIDevice *dev) 5005 { 5006 X86IOMMUState *iommu = x86_iommu_get_default(); 5007 5008 if (iommu) { 5009 X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu); 5010 5011 if (class->int_remap) { 5012 int ret; 5013 MSIMessage src, dst; 5014 5015 src.address = route->u.msi.address_hi; 5016 src.address <<= VTD_MSI_ADDR_HI_SHIFT; 5017 src.address |= route->u.msi.address_lo; 5018 src.data = route->u.msi.data; 5019 5020 ret = class->int_remap(iommu, &src, &dst, dev ? \ 5021 pci_requester_id(dev) : \ 5022 X86_IOMMU_SID_INVALID); 5023 if (ret) { 5024 trace_kvm_x86_fixup_msi_error(route->gsi); 5025 return 1; 5026 } 5027 5028 /* 5029 * Handled untranslated compatibilty format interrupt with 5030 * extended destination ID in the low bits 11-5. */ 5031 dst.address = kvm_swizzle_msi_ext_dest_id(dst.address); 5032 5033 route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; 5034 route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; 5035 route->u.msi.data = dst.data; 5036 return 0; 5037 } 5038 } 5039 5040 address = kvm_swizzle_msi_ext_dest_id(address); 5041 route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT; 5042 route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK; 5043 return 0; 5044 } 5045 5046 typedef struct MSIRouteEntry MSIRouteEntry; 5047 5048 struct MSIRouteEntry { 5049 PCIDevice *dev; /* Device pointer */ 5050 int vector; /* MSI/MSIX vector index */ 5051 int virq; /* Virtual IRQ index */ 5052 QLIST_ENTRY(MSIRouteEntry) list; 5053 }; 5054 5055 /* List of used GSI routes */ 5056 static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ 5057 QLIST_HEAD_INITIALIZER(msi_route_list); 5058 5059 static void kvm_update_msi_routes_all(void *private, bool global, 5060 uint32_t index, uint32_t mask) 5061 { 5062 int cnt = 0, vector; 5063 MSIRouteEntry *entry; 5064 MSIMessage msg; 5065 PCIDevice *dev; 5066 5067 /* TODO: explicit route update */ 5068 QLIST_FOREACH(entry, &msi_route_list, list) { 5069 cnt++; 5070 vector = entry->vector; 5071 dev = entry->dev; 5072 if (msix_enabled(dev) && !msix_is_masked(dev, vector)) { 5073 msg = msix_get_message(dev, vector); 5074 } else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) { 5075 msg = msi_get_message(dev, vector); 5076 } else { 5077 /* 5078 * Either MSI/MSIX is disabled for the device, or the 5079 * specific message was masked out. Skip this one. 5080 */ 5081 continue; 5082 } 5083 kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); 5084 } 5085 kvm_irqchip_commit_routes(kvm_state); 5086 trace_kvm_x86_update_msi_routes(cnt); 5087 } 5088 5089 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 5090 int vector, PCIDevice *dev) 5091 { 5092 static bool notify_list_inited = false; 5093 MSIRouteEntry *entry; 5094 5095 if (!dev) { 5096 /* These are (possibly) IOAPIC routes only used for split 5097 * kernel irqchip mode, while what we are housekeeping are 5098 * PCI devices only. */ 5099 return 0; 5100 } 5101 5102 entry = g_new0(MSIRouteEntry, 1); 5103 entry->dev = dev; 5104 entry->vector = vector; 5105 entry->virq = route->gsi; 5106 QLIST_INSERT_HEAD(&msi_route_list, entry, list); 5107 5108 trace_kvm_x86_add_msi_route(route->gsi); 5109 5110 if (!notify_list_inited) { 5111 /* For the first time we do add route, add ourselves into 5112 * IOMMU's IEC notify list if needed. */ 5113 X86IOMMUState *iommu = x86_iommu_get_default(); 5114 if (iommu) { 5115 x86_iommu_iec_register_notifier(iommu, 5116 kvm_update_msi_routes_all, 5117 NULL); 5118 } 5119 notify_list_inited = true; 5120 } 5121 return 0; 5122 } 5123 5124 int kvm_arch_release_virq_post(int virq) 5125 { 5126 MSIRouteEntry *entry, *next; 5127 QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) { 5128 if (entry->virq == virq) { 5129 trace_kvm_x86_remove_msi_route(virq); 5130 QLIST_REMOVE(entry, list); 5131 g_free(entry); 5132 break; 5133 } 5134 } 5135 return 0; 5136 } 5137 5138 int kvm_arch_msi_data_to_gsi(uint32_t data) 5139 { 5140 abort(); 5141 } 5142 5143 bool kvm_has_waitpkg(void) 5144 { 5145 return has_msr_umwait; 5146 } 5147 5148 bool kvm_arch_cpu_check_are_resettable(void) 5149 { 5150 return !sev_es_enabled(); 5151 } 5152