1 /* Copyright 2008 IBM Corporation 2 * 2008 Red Hat, Inc. 3 * Copyright 2011 Intel Corporation 4 * Copyright 2016 Veertu, Inc. 5 * Copyright 2017 The Android Open Source Project 6 * 7 * QEMU Hypervisor.framework support 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of version 2 of the GNU General Public 11 * License as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, see <http://www.gnu.org/licenses/>. 20 * 21 * This file contain code under public domain from the hvdos project: 22 * https://github.com/mist64/hvdos 23 * 24 * Parts Copyright (c) 2011 NetApp, Inc. 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions 29 * are met: 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions and the following disclaimer in the 34 * documentation and/or other materials provided with the distribution. 35 * 36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 49 #include "qemu/osdep.h" 50 #include "qemu-common.h" 51 #include "qemu/error-report.h" 52 53 #include "sysemu/hvf.h" 54 #include "sysemu/runstate.h" 55 #include "hvf-i386.h" 56 #include "vmcs.h" 57 #include "vmx.h" 58 #include "x86.h" 59 #include "x86_descr.h" 60 #include "x86_mmu.h" 61 #include "x86_decode.h" 62 #include "x86_emu.h" 63 #include "x86_task.h" 64 #include "x86hvf.h" 65 66 #include <Hypervisor/hv.h> 67 #include <Hypervisor/hv_vmx.h> 68 #include <sys/sysctl.h> 69 70 #include "hw/i386/apic_internal.h" 71 #include "qemu/main-loop.h" 72 #include "qemu/accel.h" 73 #include "target/i386/cpu.h" 74 75 #include "hvf-accel-ops.h" 76 77 HVFState *hvf_state; 78 79 static void assert_hvf_ok(hv_return_t ret) 80 { 81 if (ret == HV_SUCCESS) { 82 return; 83 } 84 85 switch (ret) { 86 case HV_ERROR: 87 error_report("Error: HV_ERROR"); 88 break; 89 case HV_BUSY: 90 error_report("Error: HV_BUSY"); 91 break; 92 case HV_BAD_ARGUMENT: 93 error_report("Error: HV_BAD_ARGUMENT"); 94 break; 95 case HV_NO_RESOURCES: 96 error_report("Error: HV_NO_RESOURCES"); 97 break; 98 case HV_NO_DEVICE: 99 error_report("Error: HV_NO_DEVICE"); 100 break; 101 case HV_UNSUPPORTED: 102 error_report("Error: HV_UNSUPPORTED"); 103 break; 104 default: 105 error_report("Unknown Error"); 106 } 107 108 abort(); 109 } 110 111 /* Memory slots */ 112 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) 113 { 114 hvf_slot *slot; 115 int x; 116 for (x = 0; x < hvf_state->num_slots; ++x) { 117 slot = &hvf_state->slots[x]; 118 if (slot->size && start < (slot->start + slot->size) && 119 (start + size) > slot->start) { 120 return slot; 121 } 122 } 123 return NULL; 124 } 125 126 struct mac_slot { 127 int present; 128 uint64_t size; 129 uint64_t gpa_start; 130 uint64_t gva; 131 }; 132 133 struct mac_slot mac_slots[32]; 134 135 static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) 136 { 137 struct mac_slot *macslot; 138 hv_return_t ret; 139 140 macslot = &mac_slots[slot->slot_id]; 141 142 if (macslot->present) { 143 if (macslot->size != slot->size) { 144 macslot->present = 0; 145 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 146 assert_hvf_ok(ret); 147 } 148 } 149 150 if (!slot->size) { 151 return 0; 152 } 153 154 macslot->present = 1; 155 macslot->gpa_start = slot->start; 156 macslot->size = slot->size; 157 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); 158 assert_hvf_ok(ret); 159 return 0; 160 } 161 162 void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 163 { 164 hvf_slot *mem; 165 MemoryRegion *area = section->mr; 166 bool writeable = !area->readonly && !area->rom_device; 167 hv_memory_flags_t flags; 168 169 if (!memory_region_is_ram(area)) { 170 if (writeable) { 171 return; 172 } else if (!memory_region_is_romd(area)) { 173 /* 174 * If the memory device is not in romd_mode, then we actually want 175 * to remove the hvf memory slot so all accesses will trap. 176 */ 177 add = false; 178 } 179 } 180 181 mem = hvf_find_overlap_slot( 182 section->offset_within_address_space, 183 int128_get64(section->size)); 184 185 if (mem && add) { 186 if (mem->size == int128_get64(section->size) && 187 mem->start == section->offset_within_address_space && 188 mem->mem == (memory_region_get_ram_ptr(area) + 189 section->offset_within_region)) { 190 return; /* Same region was attempted to register, go away. */ 191 } 192 } 193 194 /* Region needs to be reset. set the size to 0 and remap it. */ 195 if (mem) { 196 mem->size = 0; 197 if (do_hvf_set_memory(mem, 0)) { 198 error_report("Failed to reset overlapping slot"); 199 abort(); 200 } 201 } 202 203 if (!add) { 204 return; 205 } 206 207 if (area->readonly || 208 (!memory_region_is_ram(area) && memory_region_is_romd(area))) { 209 flags = HV_MEMORY_READ | HV_MEMORY_EXEC; 210 } else { 211 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 212 } 213 214 /* Now make a new slot. */ 215 int x; 216 217 for (x = 0; x < hvf_state->num_slots; ++x) { 218 mem = &hvf_state->slots[x]; 219 if (!mem->size) { 220 break; 221 } 222 } 223 224 if (x == hvf_state->num_slots) { 225 error_report("No free slots"); 226 abort(); 227 } 228 229 mem->size = int128_get64(section->size); 230 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 231 mem->start = section->offset_within_address_space; 232 mem->region = area; 233 234 if (do_hvf_set_memory(mem, flags)) { 235 error_report("Error registering new memory slot"); 236 abort(); 237 } 238 } 239 240 void vmx_update_tpr(CPUState *cpu) 241 { 242 /* TODO: need integrate APIC handling */ 243 X86CPU *x86_cpu = X86_CPU(cpu); 244 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; 245 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); 246 247 wreg(cpu->hvf_fd, HV_X86_TPR, tpr); 248 if (irr == -1) { 249 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 250 } else { 251 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : 252 irr >> 4); 253 } 254 } 255 256 static void update_apic_tpr(CPUState *cpu) 257 { 258 X86CPU *x86_cpu = X86_CPU(cpu); 259 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; 260 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 261 } 262 263 #define VECTORING_INFO_VECTOR_MASK 0xff 264 265 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, 266 int direction, int size, int count) 267 { 268 int i; 269 uint8_t *ptr = buffer; 270 271 for (i = 0; i < count; i++) { 272 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, 273 ptr, size, 274 direction); 275 ptr += size; 276 } 277 } 278 279 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 280 { 281 if (!cpu->vcpu_dirty) { 282 hvf_get_registers(cpu); 283 cpu->vcpu_dirty = true; 284 } 285 } 286 287 void hvf_cpu_synchronize_state(CPUState *cpu) 288 { 289 if (!cpu->vcpu_dirty) { 290 run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 291 } 292 } 293 294 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, 295 run_on_cpu_data arg) 296 { 297 hvf_put_registers(cpu); 298 cpu->vcpu_dirty = false; 299 } 300 301 void hvf_cpu_synchronize_post_reset(CPUState *cpu) 302 { 303 run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); 304 } 305 306 static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, 307 run_on_cpu_data arg) 308 { 309 hvf_put_registers(cpu); 310 cpu->vcpu_dirty = false; 311 } 312 313 void hvf_cpu_synchronize_post_init(CPUState *cpu) 314 { 315 run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); 316 } 317 318 static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu, 319 run_on_cpu_data arg) 320 { 321 cpu->vcpu_dirty = true; 322 } 323 324 void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) 325 { 326 run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); 327 } 328 329 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) 330 { 331 int read, write; 332 333 /* EPT fault on an instruction fetch doesn't make sense here */ 334 if (ept_qual & EPT_VIOLATION_INST_FETCH) { 335 return false; 336 } 337 338 /* EPT fault must be a read fault or a write fault */ 339 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 340 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 341 if ((read | write) == 0) { 342 return false; 343 } 344 345 if (write && slot) { 346 if (slot->flags & HVF_SLOT_LOG) { 347 memory_region_set_dirty(slot->region, gpa - slot->start, 1); 348 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 349 HV_MEMORY_READ | HV_MEMORY_WRITE); 350 } 351 } 352 353 /* 354 * The EPT violation must have been caused by accessing a 355 * guest-physical address that is a translation of a guest-linear 356 * address. 357 */ 358 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 359 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 360 return false; 361 } 362 363 if (!slot) { 364 return true; 365 } 366 if (!memory_region_is_ram(slot->region) && 367 !(read && memory_region_is_romd(slot->region))) { 368 return true; 369 } 370 return false; 371 } 372 373 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 374 { 375 hvf_slot *slot; 376 377 slot = hvf_find_overlap_slot( 378 section->offset_within_address_space, 379 int128_get64(section->size)); 380 381 /* protect region against writes; begin tracking it */ 382 if (on) { 383 slot->flags |= HVF_SLOT_LOG; 384 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 385 HV_MEMORY_READ); 386 /* stop tracking region*/ 387 } else { 388 slot->flags &= ~HVF_SLOT_LOG; 389 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 390 HV_MEMORY_READ | HV_MEMORY_WRITE); 391 } 392 } 393 394 static void hvf_log_start(MemoryListener *listener, 395 MemoryRegionSection *section, int old, int new) 396 { 397 if (old != 0) { 398 return; 399 } 400 401 hvf_set_dirty_tracking(section, 1); 402 } 403 404 static void hvf_log_stop(MemoryListener *listener, 405 MemoryRegionSection *section, int old, int new) 406 { 407 if (new != 0) { 408 return; 409 } 410 411 hvf_set_dirty_tracking(section, 0); 412 } 413 414 static void hvf_log_sync(MemoryListener *listener, 415 MemoryRegionSection *section) 416 { 417 /* 418 * sync of dirty pages is handled elsewhere; just make sure we keep 419 * tracking the region. 420 */ 421 hvf_set_dirty_tracking(section, 1); 422 } 423 424 static void hvf_region_add(MemoryListener *listener, 425 MemoryRegionSection *section) 426 { 427 hvf_set_phys_mem(section, true); 428 } 429 430 static void hvf_region_del(MemoryListener *listener, 431 MemoryRegionSection *section) 432 { 433 hvf_set_phys_mem(section, false); 434 } 435 436 static MemoryListener hvf_memory_listener = { 437 .priority = 10, 438 .region_add = hvf_region_add, 439 .region_del = hvf_region_del, 440 .log_start = hvf_log_start, 441 .log_stop = hvf_log_stop, 442 .log_sync = hvf_log_sync, 443 }; 444 445 void hvf_vcpu_destroy(CPUState *cpu) 446 { 447 X86CPU *x86_cpu = X86_CPU(cpu); 448 CPUX86State *env = &x86_cpu->env; 449 450 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); 451 g_free(env->hvf_mmio_buf); 452 assert_hvf_ok(ret); 453 } 454 455 static void dummy_signal(int sig) 456 { 457 } 458 459 static void init_tsc_freq(CPUX86State *env) 460 { 461 size_t length; 462 uint64_t tsc_freq; 463 464 if (env->tsc_khz != 0) { 465 return; 466 } 467 468 length = sizeof(uint64_t); 469 if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) { 470 return; 471 } 472 env->tsc_khz = tsc_freq / 1000; /* Hz to KHz */ 473 } 474 475 static void init_apic_bus_freq(CPUX86State *env) 476 { 477 size_t length; 478 uint64_t bus_freq; 479 480 if (env->apic_bus_freq != 0) { 481 return; 482 } 483 484 length = sizeof(uint64_t); 485 if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) { 486 return; 487 } 488 env->apic_bus_freq = bus_freq; 489 } 490 491 static inline bool tsc_is_known(CPUX86State *env) 492 { 493 return env->tsc_khz != 0; 494 } 495 496 static inline bool apic_bus_freq_is_known(CPUX86State *env) 497 { 498 return env->apic_bus_freq != 0; 499 } 500 501 int hvf_init_vcpu(CPUState *cpu) 502 { 503 504 X86CPU *x86cpu = X86_CPU(cpu); 505 CPUX86State *env = &x86cpu->env; 506 int r; 507 508 /* init cpu signals */ 509 sigset_t set; 510 struct sigaction sigact; 511 512 memset(&sigact, 0, sizeof(sigact)); 513 sigact.sa_handler = dummy_signal; 514 sigaction(SIG_IPI, &sigact, NULL); 515 516 pthread_sigmask(SIG_BLOCK, NULL, &set); 517 sigdelset(&set, SIG_IPI); 518 519 init_emu(); 520 init_decoder(); 521 522 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); 523 env->hvf_mmio_buf = g_new(char, 4096); 524 525 if (x86cpu->vmware_cpuid_freq) { 526 init_tsc_freq(env); 527 init_apic_bus_freq(env); 528 529 if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) { 530 error_report("vmware-cpuid-freq: feature couldn't be enabled"); 531 } 532 } 533 534 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); 535 cpu->vcpu_dirty = 1; 536 assert_hvf_ok(r); 537 538 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, 539 &hvf_state->hvf_caps->vmx_cap_pinbased)) { 540 abort(); 541 } 542 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, 543 &hvf_state->hvf_caps->vmx_cap_procbased)) { 544 abort(); 545 } 546 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, 547 &hvf_state->hvf_caps->vmx_cap_procbased2)) { 548 abort(); 549 } 550 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY, 551 &hvf_state->hvf_caps->vmx_cap_entry)) { 552 abort(); 553 } 554 555 /* set VMCS control fields */ 556 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, 557 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, 558 VMCS_PIN_BASED_CTLS_EXTINT | 559 VMCS_PIN_BASED_CTLS_NMI | 560 VMCS_PIN_BASED_CTLS_VNMI)); 561 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, 562 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, 563 VMCS_PRI_PROC_BASED_CTLS_HLT | 564 VMCS_PRI_PROC_BASED_CTLS_MWAIT | 565 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | 566 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | 567 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); 568 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, 569 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, 570 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); 571 572 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 573 0)); 574 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ 575 576 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 577 578 x86cpu = X86_CPU(cpu); 579 x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); 580 581 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); 582 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); 583 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); 584 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); 585 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); 586 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); 587 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); 588 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); 589 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1); 590 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); 591 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); 592 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); 593 594 return 0; 595 } 596 597 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) 598 { 599 X86CPU *x86_cpu = X86_CPU(cpu); 600 CPUX86State *env = &x86_cpu->env; 601 602 env->exception_nr = -1; 603 env->exception_pending = 0; 604 env->exception_injected = 0; 605 env->interrupt_injected = -1; 606 env->nmi_injected = false; 607 env->ins_len = 0; 608 env->has_error_code = false; 609 if (idtvec_info & VMCS_IDT_VEC_VALID) { 610 switch (idtvec_info & VMCS_IDT_VEC_TYPE) { 611 case VMCS_IDT_VEC_HWINTR: 612 case VMCS_IDT_VEC_SWINTR: 613 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 614 break; 615 case VMCS_IDT_VEC_NMI: 616 env->nmi_injected = true; 617 break; 618 case VMCS_IDT_VEC_HWEXCEPTION: 619 case VMCS_IDT_VEC_SWEXCEPTION: 620 env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; 621 env->exception_injected = 1; 622 break; 623 case VMCS_IDT_VEC_PRIV_SWEXCEPTION: 624 default: 625 abort(); 626 } 627 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || 628 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { 629 env->ins_len = ins_len; 630 } 631 if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { 632 env->has_error_code = true; 633 env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); 634 } 635 } 636 if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 637 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { 638 env->hflags2 |= HF2_NMI_MASK; 639 } else { 640 env->hflags2 &= ~HF2_NMI_MASK; 641 } 642 if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 643 (VMCS_INTERRUPTIBILITY_STI_BLOCKING | 644 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { 645 env->hflags |= HF_INHIBIT_IRQ_MASK; 646 } else { 647 env->hflags &= ~HF_INHIBIT_IRQ_MASK; 648 } 649 } 650 651 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, 652 uint32_t *eax, uint32_t *ebx, 653 uint32_t *ecx, uint32_t *edx) 654 { 655 /* 656 * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs, 657 * leafs 0x40000001-0x4000000F are filled with zeros 658 * Provides vmware-cpuid-freq support to hvf 659 * 660 * Note: leaf 0x40000000 not exposes HVF, 661 * leaving hypervisor signature empty 662 */ 663 664 if (index < 0x40000000 || index > 0x40000010 || 665 !tsc_is_known(env) || !apic_bus_freq_is_known(env)) { 666 667 cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx); 668 return; 669 } 670 671 switch (index) { 672 case 0x40000000: 673 *eax = 0x40000010; /* Max available cpuid leaf */ 674 *ebx = 0; /* Leave signature empty */ 675 *ecx = 0; 676 *edx = 0; 677 break; 678 case 0x40000010: 679 *eax = env->tsc_khz; 680 *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ 681 *ecx = 0; 682 *edx = 0; 683 break; 684 default: 685 *eax = 0; 686 *ebx = 0; 687 *ecx = 0; 688 *edx = 0; 689 break; 690 } 691 } 692 693 int hvf_vcpu_exec(CPUState *cpu) 694 { 695 X86CPU *x86_cpu = X86_CPU(cpu); 696 CPUX86State *env = &x86_cpu->env; 697 int ret = 0; 698 uint64_t rip = 0; 699 700 if (hvf_process_events(cpu)) { 701 return EXCP_HLT; 702 } 703 704 do { 705 if (cpu->vcpu_dirty) { 706 hvf_put_registers(cpu); 707 cpu->vcpu_dirty = false; 708 } 709 710 if (hvf_inject_interrupts(cpu)) { 711 return EXCP_INTERRUPT; 712 } 713 vmx_update_tpr(cpu); 714 715 qemu_mutex_unlock_iothread(); 716 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { 717 qemu_mutex_lock_iothread(); 718 return EXCP_HLT; 719 } 720 721 hv_return_t r = hv_vcpu_run(cpu->hvf_fd); 722 assert_hvf_ok(r); 723 724 /* handle VMEXIT */ 725 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); 726 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); 727 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, 728 VMCS_EXIT_INSTRUCTION_LENGTH); 729 730 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 731 732 hvf_store_events(cpu, ins_len, idtvec_info); 733 rip = rreg(cpu->hvf_fd, HV_X86_RIP); 734 env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS); 735 736 qemu_mutex_lock_iothread(); 737 738 update_apic_tpr(cpu); 739 current_cpu = cpu; 740 741 ret = 0; 742 switch (exit_reason) { 743 case EXIT_REASON_HLT: { 744 macvm_set_rip(cpu, rip + ins_len); 745 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 746 (env->eflags & IF_MASK)) 747 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && 748 !(idtvec_info & VMCS_IDT_VEC_VALID)) { 749 cpu->halted = 1; 750 ret = EXCP_HLT; 751 break; 752 } 753 ret = EXCP_INTERRUPT; 754 break; 755 } 756 case EXIT_REASON_MWAIT: { 757 ret = EXCP_INTERRUPT; 758 break; 759 } 760 /* Need to check if MMIO or unmapped fault */ 761 case EXIT_REASON_EPT_FAULT: 762 { 763 hvf_slot *slot; 764 uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); 765 766 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && 767 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { 768 vmx_set_nmi_blocking(cpu); 769 } 770 771 slot = hvf_find_overlap_slot(gpa, 1); 772 /* mmio */ 773 if (ept_emulation_fault(slot, gpa, exit_qual)) { 774 struct x86_decode decode; 775 776 load_regs(cpu); 777 decode_instruction(env, &decode); 778 exec_instruction(env, &decode); 779 store_regs(cpu); 780 break; 781 } 782 break; 783 } 784 case EXIT_REASON_INOUT: 785 { 786 uint32_t in = (exit_qual & 8) != 0; 787 uint32_t size = (exit_qual & 7) + 1; 788 uint32_t string = (exit_qual & 16) != 0; 789 uint32_t port = exit_qual >> 16; 790 /*uint32_t rep = (exit_qual & 0x20) != 0;*/ 791 792 if (!string && in) { 793 uint64_t val = 0; 794 load_regs(cpu); 795 hvf_handle_io(env, port, &val, 0, size, 1); 796 if (size == 1) { 797 AL(env) = val; 798 } else if (size == 2) { 799 AX(env) = val; 800 } else if (size == 4) { 801 RAX(env) = (uint32_t)val; 802 } else { 803 RAX(env) = (uint64_t)val; 804 } 805 env->eip += ins_len; 806 store_regs(cpu); 807 break; 808 } else if (!string && !in) { 809 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); 810 hvf_handle_io(env, port, &RAX(env), 1, size, 1); 811 macvm_set_rip(cpu, rip + ins_len); 812 break; 813 } 814 struct x86_decode decode; 815 816 load_regs(cpu); 817 decode_instruction(env, &decode); 818 assert(ins_len == decode.len); 819 exec_instruction(env, &decode); 820 store_regs(cpu); 821 822 break; 823 } 824 case EXIT_REASON_CPUID: { 825 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 826 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); 827 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 828 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 829 830 if (rax == 1) { 831 /* CPUID1.ecx.OSXSAVE needs to know CR4 */ 832 env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); 833 } 834 hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); 835 836 wreg(cpu->hvf_fd, HV_X86_RAX, rax); 837 wreg(cpu->hvf_fd, HV_X86_RBX, rbx); 838 wreg(cpu->hvf_fd, HV_X86_RCX, rcx); 839 wreg(cpu->hvf_fd, HV_X86_RDX, rdx); 840 841 macvm_set_rip(cpu, rip + ins_len); 842 break; 843 } 844 case EXIT_REASON_XSETBV: { 845 X86CPU *x86_cpu = X86_CPU(cpu); 846 CPUX86State *env = &x86_cpu->env; 847 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 848 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 849 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 850 851 if (ecx) { 852 macvm_set_rip(cpu, rip + ins_len); 853 break; 854 } 855 env->xcr0 = ((uint64_t)edx << 32) | eax; 856 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); 857 macvm_set_rip(cpu, rip + ins_len); 858 break; 859 } 860 case EXIT_REASON_INTR_WINDOW: 861 vmx_clear_int_window_exiting(cpu); 862 ret = EXCP_INTERRUPT; 863 break; 864 case EXIT_REASON_NMI_WINDOW: 865 vmx_clear_nmi_window_exiting(cpu); 866 ret = EXCP_INTERRUPT; 867 break; 868 case EXIT_REASON_EXT_INTR: 869 /* force exit and allow io handling */ 870 ret = EXCP_INTERRUPT; 871 break; 872 case EXIT_REASON_RDMSR: 873 case EXIT_REASON_WRMSR: 874 { 875 load_regs(cpu); 876 if (exit_reason == EXIT_REASON_RDMSR) { 877 simulate_rdmsr(cpu); 878 } else { 879 simulate_wrmsr(cpu); 880 } 881 env->eip += ins_len; 882 store_regs(cpu); 883 break; 884 } 885 case EXIT_REASON_CR_ACCESS: { 886 int cr; 887 int reg; 888 889 load_regs(cpu); 890 cr = exit_qual & 15; 891 reg = (exit_qual >> 8) & 15; 892 893 switch (cr) { 894 case 0x0: { 895 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); 896 break; 897 } 898 case 4: { 899 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); 900 break; 901 } 902 case 8: { 903 X86CPU *x86_cpu = X86_CPU(cpu); 904 if (exit_qual & 0x10) { 905 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state); 906 } else { 907 int tpr = RRX(env, reg); 908 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 909 ret = EXCP_INTERRUPT; 910 } 911 break; 912 } 913 default: 914 error_report("Unrecognized CR %d", cr); 915 abort(); 916 } 917 env->eip += ins_len; 918 store_regs(cpu); 919 break; 920 } 921 case EXIT_REASON_APIC_ACCESS: { /* TODO */ 922 struct x86_decode decode; 923 924 load_regs(cpu); 925 decode_instruction(env, &decode); 926 exec_instruction(env, &decode); 927 store_regs(cpu); 928 break; 929 } 930 case EXIT_REASON_TPR: { 931 ret = 1; 932 break; 933 } 934 case EXIT_REASON_TASK_SWITCH: { 935 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 936 x68_segment_selector sel = {.sel = exit_qual & 0xffff}; 937 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, 938 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo 939 & VMCS_INTR_T_MASK); 940 break; 941 } 942 case EXIT_REASON_TRIPLE_FAULT: { 943 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 944 ret = EXCP_INTERRUPT; 945 break; 946 } 947 case EXIT_REASON_RDPMC: 948 wreg(cpu->hvf_fd, HV_X86_RAX, 0); 949 wreg(cpu->hvf_fd, HV_X86_RDX, 0); 950 macvm_set_rip(cpu, rip + ins_len); 951 break; 952 case VMX_REASON_VMCALL: 953 env->exception_nr = EXCP0D_GPF; 954 env->exception_injected = 1; 955 env->has_error_code = true; 956 env->error_code = 0; 957 break; 958 default: 959 error_report("%llx: unhandled exit %llx", rip, exit_reason); 960 } 961 } while (ret == 0); 962 963 return ret; 964 } 965 966 bool hvf_allowed; 967 968 static int hvf_accel_init(MachineState *ms) 969 { 970 int x; 971 hv_return_t ret; 972 HVFState *s; 973 974 ret = hv_vm_create(HV_VM_DEFAULT); 975 assert_hvf_ok(ret); 976 977 s = g_new0(HVFState, 1); 978 979 s->num_slots = 32; 980 for (x = 0; x < s->num_slots; ++x) { 981 s->slots[x].size = 0; 982 s->slots[x].slot_id = x; 983 } 984 985 hvf_state = s; 986 memory_listener_register(&hvf_memory_listener, &address_space_memory); 987 return 0; 988 } 989 990 static void hvf_accel_class_init(ObjectClass *oc, void *data) 991 { 992 AccelClass *ac = ACCEL_CLASS(oc); 993 ac->name = "HVF"; 994 ac->init_machine = hvf_accel_init; 995 ac->allowed = &hvf_allowed; 996 } 997 998 static const TypeInfo hvf_accel_type = { 999 .name = TYPE_HVF_ACCEL, 1000 .parent = TYPE_ACCEL, 1001 .class_init = hvf_accel_class_init, 1002 }; 1003 1004 static void hvf_type_init(void) 1005 { 1006 type_register_static(&hvf_accel_type); 1007 } 1008 1009 type_init(hvf_type_init); 1010