1 /* Copyright 2008 IBM Corporation 2 * 2008 Red Hat, Inc. 3 * Copyright 2011 Intel Corporation 4 * Copyright 2016 Veertu, Inc. 5 * Copyright 2017 The Android Open Source Project 6 * 7 * QEMU Hypervisor.framework support 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of version 2 of the GNU General Public 11 * License as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this program; if not, see <http://www.gnu.org/licenses/>. 20 * 21 * This file contain code under public domain from the hvdos project: 22 * https://github.com/mist64/hvdos 23 * 24 * Parts Copyright (c) 2011 NetApp, Inc. 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions 29 * are met: 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions and the following disclaimer in the 34 * documentation and/or other materials provided with the distribution. 35 * 36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 #include "qemu/osdep.h" 49 #include "qemu-common.h" 50 #include "qemu/error-report.h" 51 52 #include "sysemu/hvf.h" 53 #include "hvf-i386.h" 54 #include "vmcs.h" 55 #include "vmx.h" 56 #include "x86.h" 57 #include "x86_descr.h" 58 #include "x86_mmu.h" 59 #include "x86_decode.h" 60 #include "x86_emu.h" 61 #include "x86_task.h" 62 #include "x86hvf.h" 63 64 #include <Hypervisor/hv.h> 65 #include <Hypervisor/hv_vmx.h> 66 67 #include "exec/address-spaces.h" 68 #include "hw/i386/apic_internal.h" 69 #include "hw/boards.h" 70 #include "qemu/main-loop.h" 71 #include "sysemu/accel.h" 72 #include "sysemu/sysemu.h" 73 #include "target/i386/cpu.h" 74 75 HVFState *hvf_state; 76 77 static void assert_hvf_ok(hv_return_t ret) 78 { 79 if (ret == HV_SUCCESS) { 80 return; 81 } 82 83 switch (ret) { 84 case HV_ERROR: 85 error_report("Error: HV_ERROR"); 86 break; 87 case HV_BUSY: 88 error_report("Error: HV_BUSY"); 89 break; 90 case HV_BAD_ARGUMENT: 91 error_report("Error: HV_BAD_ARGUMENT"); 92 break; 93 case HV_NO_RESOURCES: 94 error_report("Error: HV_NO_RESOURCES"); 95 break; 96 case HV_NO_DEVICE: 97 error_report("Error: HV_NO_DEVICE"); 98 break; 99 case HV_UNSUPPORTED: 100 error_report("Error: HV_UNSUPPORTED"); 101 break; 102 default: 103 error_report("Unknown Error"); 104 } 105 106 abort(); 107 } 108 109 /* Memory slots */ 110 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) 111 { 112 hvf_slot *slot; 113 int x; 114 for (x = 0; x < hvf_state->num_slots; ++x) { 115 slot = &hvf_state->slots[x]; 116 if (slot->size && start < (slot->start + slot->size) && 117 end > slot->start) { 118 return slot; 119 } 120 } 121 return NULL; 122 } 123 124 struct mac_slot { 125 int present; 126 uint64_t size; 127 uint64_t gpa_start; 128 uint64_t gva; 129 }; 130 131 struct mac_slot mac_slots[32]; 132 #define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) 133 134 static int do_hvf_set_memory(hvf_slot *slot) 135 { 136 struct mac_slot *macslot; 137 hv_memory_flags_t flags; 138 hv_return_t ret; 139 140 macslot = &mac_slots[slot->slot_id]; 141 142 if (macslot->present) { 143 if (macslot->size != slot->size) { 144 macslot->present = 0; 145 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 146 assert_hvf_ok(ret); 147 } 148 } 149 150 if (!slot->size) { 151 return 0; 152 } 153 154 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 155 156 macslot->present = 1; 157 macslot->gpa_start = slot->start; 158 macslot->size = slot->size; 159 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); 160 assert_hvf_ok(ret); 161 return 0; 162 } 163 164 void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 165 { 166 hvf_slot *mem; 167 MemoryRegion *area = section->mr; 168 169 if (!memory_region_is_ram(area)) { 170 return; 171 } 172 173 mem = hvf_find_overlap_slot( 174 section->offset_within_address_space, 175 section->offset_within_address_space + int128_get64(section->size)); 176 177 if (mem && add) { 178 if (mem->size == int128_get64(section->size) && 179 mem->start == section->offset_within_address_space && 180 mem->mem == (memory_region_get_ram_ptr(area) + 181 section->offset_within_region)) { 182 return; /* Same region was attempted to register, go away. */ 183 } 184 } 185 186 /* Region needs to be reset. set the size to 0 and remap it. */ 187 if (mem) { 188 mem->size = 0; 189 if (do_hvf_set_memory(mem)) { 190 error_report("Failed to reset overlapping slot"); 191 abort(); 192 } 193 } 194 195 if (!add) { 196 return; 197 } 198 199 /* Now make a new slot. */ 200 int x; 201 202 for (x = 0; x < hvf_state->num_slots; ++x) { 203 mem = &hvf_state->slots[x]; 204 if (!mem->size) { 205 break; 206 } 207 } 208 209 if (x == hvf_state->num_slots) { 210 error_report("No free slots"); 211 abort(); 212 } 213 214 mem->size = int128_get64(section->size); 215 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 216 mem->start = section->offset_within_address_space; 217 mem->region = area; 218 219 if (do_hvf_set_memory(mem)) { 220 error_report("Error registering new memory slot"); 221 abort(); 222 } 223 } 224 225 void vmx_update_tpr(CPUState *cpu) 226 { 227 /* TODO: need integrate APIC handling */ 228 X86CPU *x86_cpu = X86_CPU(cpu); 229 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; 230 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); 231 232 wreg(cpu->hvf_fd, HV_X86_TPR, tpr); 233 if (irr == -1) { 234 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 235 } else { 236 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : 237 irr >> 4); 238 } 239 } 240 241 void update_apic_tpr(CPUState *cpu) 242 { 243 X86CPU *x86_cpu = X86_CPU(cpu); 244 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; 245 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 246 } 247 248 #define VECTORING_INFO_VECTOR_MASK 0xff 249 250 static void hvf_handle_interrupt(CPUState * cpu, int mask) 251 { 252 cpu->interrupt_request |= mask; 253 if (!qemu_cpu_is_self(cpu)) { 254 qemu_cpu_kick(cpu); 255 } 256 } 257 258 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, 259 int direction, int size, int count) 260 { 261 int i; 262 uint8_t *ptr = buffer; 263 264 for (i = 0; i < count; i++) { 265 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, 266 ptr, size, 267 direction); 268 ptr += size; 269 } 270 } 271 272 /* TODO: synchronize vcpu state */ 273 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 274 { 275 CPUState *cpu_state = cpu; 276 if (cpu_state->vcpu_dirty == 0) { 277 hvf_get_registers(cpu_state); 278 } 279 280 cpu_state->vcpu_dirty = 1; 281 } 282 283 void hvf_cpu_synchronize_state(CPUState *cpu_state) 284 { 285 if (cpu_state->vcpu_dirty == 0) { 286 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 287 } 288 } 289 290 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) 291 { 292 CPUState *cpu_state = cpu; 293 hvf_put_registers(cpu_state); 294 cpu_state->vcpu_dirty = false; 295 } 296 297 void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) 298 { 299 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); 300 } 301 302 void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) 303 { 304 CPUState *cpu_state = cpu; 305 hvf_put_registers(cpu_state); 306 cpu_state->vcpu_dirty = false; 307 } 308 309 void hvf_cpu_synchronize_post_init(CPUState *cpu_state) 310 { 311 run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); 312 } 313 314 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) 315 { 316 int read, write; 317 318 /* EPT fault on an instruction fetch doesn't make sense here */ 319 if (ept_qual & EPT_VIOLATION_INST_FETCH) { 320 return false; 321 } 322 323 /* EPT fault must be a read fault or a write fault */ 324 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 325 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 326 if ((read | write) == 0) { 327 return false; 328 } 329 330 if (write && slot) { 331 if (slot->flags & HVF_SLOT_LOG) { 332 memory_region_set_dirty(slot->region, gpa - slot->start, 1); 333 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 334 HV_MEMORY_READ | HV_MEMORY_WRITE); 335 } 336 } 337 338 /* 339 * The EPT violation must have been caused by accessing a 340 * guest-physical address that is a translation of a guest-linear 341 * address. 342 */ 343 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 344 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 345 return false; 346 } 347 348 return !slot; 349 } 350 351 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 352 { 353 hvf_slot *slot; 354 355 slot = hvf_find_overlap_slot( 356 section->offset_within_address_space, 357 section->offset_within_address_space + int128_get64(section->size)); 358 359 /* protect region against writes; begin tracking it */ 360 if (on) { 361 slot->flags |= HVF_SLOT_LOG; 362 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 363 HV_MEMORY_READ); 364 /* stop tracking region*/ 365 } else { 366 slot->flags &= ~HVF_SLOT_LOG; 367 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 368 HV_MEMORY_READ | HV_MEMORY_WRITE); 369 } 370 } 371 372 static void hvf_log_start(MemoryListener *listener, 373 MemoryRegionSection *section, int old, int new) 374 { 375 if (old != 0) { 376 return; 377 } 378 379 hvf_set_dirty_tracking(section, 1); 380 } 381 382 static void hvf_log_stop(MemoryListener *listener, 383 MemoryRegionSection *section, int old, int new) 384 { 385 if (new != 0) { 386 return; 387 } 388 389 hvf_set_dirty_tracking(section, 0); 390 } 391 392 static void hvf_log_sync(MemoryListener *listener, 393 MemoryRegionSection *section) 394 { 395 /* 396 * sync of dirty pages is handled elsewhere; just make sure we keep 397 * tracking the region. 398 */ 399 hvf_set_dirty_tracking(section, 1); 400 } 401 402 static void hvf_region_add(MemoryListener *listener, 403 MemoryRegionSection *section) 404 { 405 hvf_set_phys_mem(section, true); 406 } 407 408 static void hvf_region_del(MemoryListener *listener, 409 MemoryRegionSection *section) 410 { 411 hvf_set_phys_mem(section, false); 412 } 413 414 static MemoryListener hvf_memory_listener = { 415 .priority = 10, 416 .region_add = hvf_region_add, 417 .region_del = hvf_region_del, 418 .log_start = hvf_log_start, 419 .log_stop = hvf_log_stop, 420 .log_sync = hvf_log_sync, 421 }; 422 423 void hvf_reset_vcpu(CPUState *cpu) { 424 425 /* TODO: this shouldn't be needed; there is already a call to 426 * cpu_synchronize_all_post_reset in vl.c 427 */ 428 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); 429 wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); 430 macvm_set_cr0(cpu->hvf_fd, 0x60000010); 431 432 wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); 433 wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); 434 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); 435 436 /* set VMCS guest state fields */ 437 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); 438 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); 439 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); 440 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); 441 442 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); 443 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); 444 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); 445 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); 446 447 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); 448 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); 449 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); 450 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); 451 452 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); 453 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); 454 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); 455 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); 456 457 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); 458 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); 459 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); 460 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); 461 462 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); 463 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); 464 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); 465 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); 466 467 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); 468 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); 469 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); 470 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); 471 472 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); 473 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); 474 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); 475 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); 476 477 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); 478 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); 479 480 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); 481 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); 482 483 /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ 484 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); 485 486 wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); 487 wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); 488 wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); 489 wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); 490 wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); 491 wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); 492 wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); 493 wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); 494 wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); 495 wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); 496 497 for (int i = 0; i < 8; i++) { 498 wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); 499 } 500 501 hv_vm_sync_tsc(0); 502 cpu->halted = 0; 503 hv_vcpu_invalidate_tlb(cpu->hvf_fd); 504 hv_vcpu_flush(cpu->hvf_fd); 505 } 506 507 void hvf_vcpu_destroy(CPUState *cpu) 508 { 509 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); 510 assert_hvf_ok(ret); 511 } 512 513 static void dummy_signal(int sig) 514 { 515 } 516 517 int hvf_init_vcpu(CPUState *cpu) 518 { 519 520 X86CPU *x86cpu = X86_CPU(cpu); 521 CPUX86State *env = &x86cpu->env; 522 int r; 523 524 /* init cpu signals */ 525 sigset_t set; 526 struct sigaction sigact; 527 528 memset(&sigact, 0, sizeof(sigact)); 529 sigact.sa_handler = dummy_signal; 530 sigaction(SIG_IPI, &sigact, NULL); 531 532 pthread_sigmask(SIG_BLOCK, NULL, &set); 533 sigdelset(&set, SIG_IPI); 534 535 init_emu(); 536 init_decoder(); 537 538 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); 539 env->hvf_emul = g_new0(HVFX86EmulatorState, 1); 540 541 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); 542 cpu->vcpu_dirty = 1; 543 assert_hvf_ok(r); 544 545 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, 546 &hvf_state->hvf_caps->vmx_cap_pinbased)) { 547 abort(); 548 } 549 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, 550 &hvf_state->hvf_caps->vmx_cap_procbased)) { 551 abort(); 552 } 553 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, 554 &hvf_state->hvf_caps->vmx_cap_procbased2)) { 555 abort(); 556 } 557 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY, 558 &hvf_state->hvf_caps->vmx_cap_entry)) { 559 abort(); 560 } 561 562 /* set VMCS control fields */ 563 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, 564 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, 565 VMCS_PIN_BASED_CTLS_EXTINT | 566 VMCS_PIN_BASED_CTLS_NMI | 567 VMCS_PIN_BASED_CTLS_VNMI)); 568 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, 569 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, 570 VMCS_PRI_PROC_BASED_CTLS_HLT | 571 VMCS_PRI_PROC_BASED_CTLS_MWAIT | 572 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | 573 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | 574 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); 575 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, 576 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, 577 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); 578 579 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 580 0)); 581 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ 582 583 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 584 585 hvf_reset_vcpu(cpu); 586 587 x86cpu = X86_CPU(cpu); 588 x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); 589 590 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); 591 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); 592 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); 593 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); 594 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); 595 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); 596 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); 597 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); 598 /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ 599 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); 600 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); 601 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); 602 603 return 0; 604 } 605 606 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) 607 { 608 X86CPU *x86_cpu = X86_CPU(cpu); 609 CPUX86State *env = &x86_cpu->env; 610 611 env->exception_injected = -1; 612 env->interrupt_injected = -1; 613 env->nmi_injected = false; 614 if (idtvec_info & VMCS_IDT_VEC_VALID) { 615 switch (idtvec_info & VMCS_IDT_VEC_TYPE) { 616 case VMCS_IDT_VEC_HWINTR: 617 case VMCS_IDT_VEC_SWINTR: 618 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 619 break; 620 case VMCS_IDT_VEC_NMI: 621 env->nmi_injected = true; 622 break; 623 case VMCS_IDT_VEC_HWEXCEPTION: 624 case VMCS_IDT_VEC_SWEXCEPTION: 625 env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 626 break; 627 case VMCS_IDT_VEC_PRIV_SWEXCEPTION: 628 default: 629 abort(); 630 } 631 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || 632 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { 633 env->ins_len = ins_len; 634 } 635 if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { 636 env->has_error_code = true; 637 env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); 638 } 639 } 640 if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 641 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { 642 env->hflags2 |= HF2_NMI_MASK; 643 } else { 644 env->hflags2 &= ~HF2_NMI_MASK; 645 } 646 if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 647 (VMCS_INTERRUPTIBILITY_STI_BLOCKING | 648 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { 649 env->hflags |= HF_INHIBIT_IRQ_MASK; 650 } else { 651 env->hflags &= ~HF_INHIBIT_IRQ_MASK; 652 } 653 } 654 655 int hvf_vcpu_exec(CPUState *cpu) 656 { 657 X86CPU *x86_cpu = X86_CPU(cpu); 658 CPUX86State *env = &x86_cpu->env; 659 int ret = 0; 660 uint64_t rip = 0; 661 662 cpu->halted = 0; 663 664 if (hvf_process_events(cpu)) { 665 return EXCP_HLT; 666 } 667 668 do { 669 if (cpu->vcpu_dirty) { 670 hvf_put_registers(cpu); 671 cpu->vcpu_dirty = false; 672 } 673 674 if (hvf_inject_interrupts(cpu)) { 675 return EXCP_INTERRUPT; 676 } 677 vmx_update_tpr(cpu); 678 679 qemu_mutex_unlock_iothread(); 680 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { 681 qemu_mutex_lock_iothread(); 682 return EXCP_HLT; 683 } 684 685 hv_return_t r = hv_vcpu_run(cpu->hvf_fd); 686 assert_hvf_ok(r); 687 688 /* handle VMEXIT */ 689 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); 690 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); 691 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, 692 VMCS_EXIT_INSTRUCTION_LENGTH); 693 694 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 695 696 hvf_store_events(cpu, ins_len, idtvec_info); 697 rip = rreg(cpu->hvf_fd, HV_X86_RIP); 698 RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); 699 env->eflags = RFLAGS(env); 700 701 qemu_mutex_lock_iothread(); 702 703 update_apic_tpr(cpu); 704 current_cpu = cpu; 705 706 ret = 0; 707 switch (exit_reason) { 708 case EXIT_REASON_HLT: { 709 macvm_set_rip(cpu, rip + ins_len); 710 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 711 (EFLAGS(env) & IF_MASK)) 712 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && 713 !(idtvec_info & VMCS_IDT_VEC_VALID)) { 714 cpu->halted = 1; 715 ret = EXCP_HLT; 716 } 717 ret = EXCP_INTERRUPT; 718 break; 719 } 720 case EXIT_REASON_MWAIT: { 721 ret = EXCP_INTERRUPT; 722 break; 723 } 724 /* Need to check if MMIO or unmmaped fault */ 725 case EXIT_REASON_EPT_FAULT: 726 { 727 hvf_slot *slot; 728 uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); 729 730 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && 731 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { 732 vmx_set_nmi_blocking(cpu); 733 } 734 735 slot = hvf_find_overlap_slot(gpa, gpa); 736 /* mmio */ 737 if (ept_emulation_fault(slot, gpa, exit_qual)) { 738 struct x86_decode decode; 739 740 load_regs(cpu); 741 env->hvf_emul->fetch_rip = rip; 742 743 decode_instruction(env, &decode); 744 exec_instruction(env, &decode); 745 store_regs(cpu); 746 break; 747 } 748 break; 749 } 750 case EXIT_REASON_INOUT: 751 { 752 uint32_t in = (exit_qual & 8) != 0; 753 uint32_t size = (exit_qual & 7) + 1; 754 uint32_t string = (exit_qual & 16) != 0; 755 uint32_t port = exit_qual >> 16; 756 /*uint32_t rep = (exit_qual & 0x20) != 0;*/ 757 758 if (!string && in) { 759 uint64_t val = 0; 760 load_regs(cpu); 761 hvf_handle_io(env, port, &val, 0, size, 1); 762 if (size == 1) { 763 AL(env) = val; 764 } else if (size == 2) { 765 AX(env) = val; 766 } else if (size == 4) { 767 RAX(env) = (uint32_t)val; 768 } else { 769 RAX(env) = (uint64_t)val; 770 } 771 RIP(env) += ins_len; 772 store_regs(cpu); 773 break; 774 } else if (!string && !in) { 775 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); 776 hvf_handle_io(env, port, &RAX(env), 1, size, 1); 777 macvm_set_rip(cpu, rip + ins_len); 778 break; 779 } 780 struct x86_decode decode; 781 782 load_regs(cpu); 783 env->hvf_emul->fetch_rip = rip; 784 785 decode_instruction(env, &decode); 786 assert(ins_len == decode.len); 787 exec_instruction(env, &decode); 788 store_regs(cpu); 789 790 break; 791 } 792 case EXIT_REASON_CPUID: { 793 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 794 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); 795 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 796 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 797 798 cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); 799 800 wreg(cpu->hvf_fd, HV_X86_RAX, rax); 801 wreg(cpu->hvf_fd, HV_X86_RBX, rbx); 802 wreg(cpu->hvf_fd, HV_X86_RCX, rcx); 803 wreg(cpu->hvf_fd, HV_X86_RDX, rdx); 804 805 macvm_set_rip(cpu, rip + ins_len); 806 break; 807 } 808 case EXIT_REASON_XSETBV: { 809 X86CPU *x86_cpu = X86_CPU(cpu); 810 CPUX86State *env = &x86_cpu->env; 811 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 812 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 813 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 814 815 if (ecx) { 816 macvm_set_rip(cpu, rip + ins_len); 817 break; 818 } 819 env->xcr0 = ((uint64_t)edx << 32) | eax; 820 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); 821 macvm_set_rip(cpu, rip + ins_len); 822 break; 823 } 824 case EXIT_REASON_INTR_WINDOW: 825 vmx_clear_int_window_exiting(cpu); 826 ret = EXCP_INTERRUPT; 827 break; 828 case EXIT_REASON_NMI_WINDOW: 829 vmx_clear_nmi_window_exiting(cpu); 830 ret = EXCP_INTERRUPT; 831 break; 832 case EXIT_REASON_EXT_INTR: 833 /* force exit and allow io handling */ 834 ret = EXCP_INTERRUPT; 835 break; 836 case EXIT_REASON_RDMSR: 837 case EXIT_REASON_WRMSR: 838 { 839 load_regs(cpu); 840 if (exit_reason == EXIT_REASON_RDMSR) { 841 simulate_rdmsr(cpu); 842 } else { 843 simulate_wrmsr(cpu); 844 } 845 RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); 846 store_regs(cpu); 847 break; 848 } 849 case EXIT_REASON_CR_ACCESS: { 850 int cr; 851 int reg; 852 853 load_regs(cpu); 854 cr = exit_qual & 15; 855 reg = (exit_qual >> 8) & 15; 856 857 switch (cr) { 858 case 0x0: { 859 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); 860 break; 861 } 862 case 4: { 863 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); 864 break; 865 } 866 case 8: { 867 X86CPU *x86_cpu = X86_CPU(cpu); 868 if (exit_qual & 0x10) { 869 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state); 870 } else { 871 int tpr = RRX(env, reg); 872 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 873 ret = EXCP_INTERRUPT; 874 } 875 break; 876 } 877 default: 878 error_report("Unrecognized CR %d", cr); 879 abort(); 880 } 881 RIP(env) += ins_len; 882 store_regs(cpu); 883 break; 884 } 885 case EXIT_REASON_APIC_ACCESS: { /* TODO */ 886 struct x86_decode decode; 887 888 load_regs(cpu); 889 env->hvf_emul->fetch_rip = rip; 890 891 decode_instruction(env, &decode); 892 exec_instruction(env, &decode); 893 store_regs(cpu); 894 break; 895 } 896 case EXIT_REASON_TPR: { 897 ret = 1; 898 break; 899 } 900 case EXIT_REASON_TASK_SWITCH: { 901 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 902 x68_segment_selector sel = {.sel = exit_qual & 0xffff}; 903 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, 904 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo 905 & VMCS_INTR_T_MASK); 906 break; 907 } 908 case EXIT_REASON_TRIPLE_FAULT: { 909 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 910 ret = EXCP_INTERRUPT; 911 break; 912 } 913 case EXIT_REASON_RDPMC: 914 wreg(cpu->hvf_fd, HV_X86_RAX, 0); 915 wreg(cpu->hvf_fd, HV_X86_RDX, 0); 916 macvm_set_rip(cpu, rip + ins_len); 917 break; 918 case VMX_REASON_VMCALL: 919 env->exception_injected = EXCP0D_GPF; 920 env->has_error_code = true; 921 env->error_code = 0; 922 break; 923 default: 924 error_report("%llx: unhandled exit %llx", rip, exit_reason); 925 } 926 } while (ret == 0); 927 928 return ret; 929 } 930 931 bool hvf_allowed; 932 933 static int hvf_accel_init(MachineState *ms) 934 { 935 int x; 936 hv_return_t ret; 937 HVFState *s; 938 939 ret = hv_vm_create(HV_VM_DEFAULT); 940 assert_hvf_ok(ret); 941 942 s = g_new0(HVFState, 1); 943 944 s->num_slots = 32; 945 for (x = 0; x < s->num_slots; ++x) { 946 s->slots[x].size = 0; 947 s->slots[x].slot_id = x; 948 } 949 950 hvf_state = s; 951 cpu_interrupt_handler = hvf_handle_interrupt; 952 memory_listener_register(&hvf_memory_listener, &address_space_memory); 953 return 0; 954 } 955 956 static void hvf_accel_class_init(ObjectClass *oc, void *data) 957 { 958 AccelClass *ac = ACCEL_CLASS(oc); 959 ac->name = "HVF"; 960 ac->init_machine = hvf_accel_init; 961 ac->allowed = &hvf_allowed; 962 } 963 964 static const TypeInfo hvf_accel_type = { 965 .name = TYPE_HVF_ACCEL, 966 .parent = TYPE_ACCEL, 967 .class_init = hvf_accel_class_init, 968 }; 969 970 static void hvf_type_init(void) 971 { 972 type_register_static(&hvf_accel_type); 973 } 974 975 type_init(hvf_type_init); 976