1 /* Copyright 2008 IBM Corporation 2 * 2008 Red Hat, Inc. 3 * Copyright 2011 Intel Corporation 4 * Copyright 2016 Veertu, Inc. 5 * Copyright 2017 The Android Open Source Project 6 * 7 * QEMU Hypervisor.framework support 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of version 2 of the GNU General Public 11 * License as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, see <http://www.gnu.org/licenses/>. 20 * 21 * This file contain code under public domain from the hvdos project: 22 * https://github.com/mist64/hvdos 23 * 24 * Parts Copyright (c) 2011 NetApp, Inc. 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions 29 * are met: 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions and the following disclaimer in the 34 * documentation and/or other materials provided with the distribution. 35 * 36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 #include "qemu/osdep.h" 49 #include "qemu-common.h" 50 #include "qemu/error-report.h" 51 52 #include "sysemu/hvf.h" 53 #include "hvf-i386.h" 54 #include "vmcs.h" 55 #include "vmx.h" 56 #include "x86.h" 57 #include "x86_descr.h" 58 #include "x86_mmu.h" 59 #include "x86_decode.h" 60 #include "x86_emu.h" 61 #include "x86_task.h" 62 #include "x86hvf.h" 63 64 #include <Hypervisor/hv.h> 65 #include <Hypervisor/hv_vmx.h> 66 67 #include "exec/address-spaces.h" 68 #include "hw/i386/apic_internal.h" 69 #include "hw/boards.h" 70 #include "qemu/main-loop.h" 71 #include "sysemu/accel.h" 72 #include "sysemu/sysemu.h" 73 #include "target/i386/cpu.h" 74 75 HVFState *hvf_state; 76 77 static void assert_hvf_ok(hv_return_t ret) 78 { 79 if (ret == HV_SUCCESS) { 80 return; 81 } 82 83 switch (ret) { 84 case HV_ERROR: 85 error_report("Error: HV_ERROR"); 86 break; 87 case HV_BUSY: 88 error_report("Error: HV_BUSY"); 89 break; 90 case HV_BAD_ARGUMENT: 91 error_report("Error: HV_BAD_ARGUMENT"); 92 break; 93 case HV_NO_RESOURCES: 94 error_report("Error: HV_NO_RESOURCES"); 95 break; 96 case HV_NO_DEVICE: 97 error_report("Error: HV_NO_DEVICE"); 98 break; 99 case HV_UNSUPPORTED: 100 error_report("Error: HV_UNSUPPORTED"); 101 break; 102 default: 103 error_report("Unknown Error"); 104 } 105 106 abort(); 107 } 108 109 /* Memory slots */ 110 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) 111 { 112 hvf_slot *slot; 113 int x; 114 for (x = 0; x < hvf_state->num_slots; ++x) { 115 slot = &hvf_state->slots[x]; 116 if (slot->size && start < (slot->start + slot->size) && 117 end > slot->start) { 118 return slot; 119 } 120 } 121 return NULL; 122 } 123 124 struct mac_slot { 125 int present; 126 uint64_t size; 127 uint64_t gpa_start; 128 uint64_t gva; 129 }; 130 131 struct mac_slot mac_slots[32]; 132 #define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) 133 134 static int do_hvf_set_memory(hvf_slot *slot) 135 { 136 struct mac_slot *macslot; 137 hv_memory_flags_t flags; 138 hv_return_t ret; 139 140 macslot = &mac_slots[slot->slot_id]; 141 142 if (macslot->present) { 143 if (macslot->size != slot->size) { 144 macslot->present = 0; 145 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 146 assert_hvf_ok(ret); 147 } 148 } 149 150 if (!slot->size) { 151 return 0; 152 } 153 154 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 155 156 macslot->present = 1; 157 macslot->gpa_start = slot->start; 158 macslot->size = slot->size; 159 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); 160 assert_hvf_ok(ret); 161 return 0; 162 } 163 164 void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 165 { 166 hvf_slot *mem; 167 MemoryRegion *area = section->mr; 168 169 if (!memory_region_is_ram(area)) { 170 return; 171 } 172 173 mem = hvf_find_overlap_slot( 174 section->offset_within_address_space, 175 section->offset_within_address_space + int128_get64(section->size)); 176 177 if (mem && add) { 178 if (mem->size == int128_get64(section->size) && 179 mem->start == section->offset_within_address_space && 180 mem->mem == (memory_region_get_ram_ptr(area) + 181 section->offset_within_region)) { 182 return; /* Same region was attempted to register, go away. */ 183 } 184 } 185 186 /* Region needs to be reset. set the size to 0 and remap it. */ 187 if (mem) { 188 mem->size = 0; 189 if (do_hvf_set_memory(mem)) { 190 error_report("Failed to reset overlapping slot"); 191 abort(); 192 } 193 } 194 195 if (!add) { 196 return; 197 } 198 199 /* Now make a new slot. */ 200 int x; 201 202 for (x = 0; x < hvf_state->num_slots; ++x) { 203 mem = &hvf_state->slots[x]; 204 if (!mem->size) { 205 break; 206 } 207 } 208 209 if (x == hvf_state->num_slots) { 210 error_report("No free slots"); 211 abort(); 212 } 213 214 mem->size = int128_get64(section->size); 215 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 216 mem->start = section->offset_within_address_space; 217 mem->region = area; 218 219 if (do_hvf_set_memory(mem)) { 220 error_report("Error registering new memory slot"); 221 abort(); 222 } 223 } 224 225 void vmx_update_tpr(CPUState *cpu) 226 { 227 /* TODO: need integrate APIC handling */ 228 X86CPU *x86_cpu = X86_CPU(cpu); 229 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; 230 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); 231 232 wreg(cpu->hvf_fd, HV_X86_TPR, tpr); 233 if (irr == -1) { 234 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 235 } else { 236 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : 237 irr >> 4); 238 } 239 } 240 241 void update_apic_tpr(CPUState *cpu) 242 { 243 X86CPU *x86_cpu = X86_CPU(cpu); 244 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; 245 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 246 } 247 248 #define VECTORING_INFO_VECTOR_MASK 0xff 249 250 static void hvf_handle_interrupt(CPUState * cpu, int mask) 251 { 252 cpu->interrupt_request |= mask; 253 if (!qemu_cpu_is_self(cpu)) { 254 qemu_cpu_kick(cpu); 255 } 256 } 257 258 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, 259 int direction, int size, int count) 260 { 261 int i; 262 uint8_t *ptr = buffer; 263 264 for (i = 0; i < count; i++) { 265 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, 266 ptr, size, 267 direction); 268 ptr += size; 269 } 270 } 271 272 /* TODO: synchronize vcpu state */ 273 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 274 { 275 CPUState *cpu_state = cpu; 276 if (cpu_state->vcpu_dirty == 0) { 277 hvf_get_registers(cpu_state); 278 } 279 280 cpu_state->vcpu_dirty = 1; 281 } 282 283 void hvf_cpu_synchronize_state(CPUState *cpu_state) 284 { 285 if (cpu_state->vcpu_dirty == 0) { 286 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 287 } 288 } 289 290 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) 291 { 292 CPUState *cpu_state = cpu; 293 hvf_put_registers(cpu_state); 294 cpu_state->vcpu_dirty = false; 295 } 296 297 void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) 298 { 299 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); 300 } 301 302 void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) 303 { 304 CPUState *cpu_state = cpu; 305 hvf_put_registers(cpu_state); 306 cpu_state->vcpu_dirty = false; 307 } 308 309 void hvf_cpu_synchronize_post_init(CPUState *cpu_state) 310 { 311 run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); 312 } 313 314 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) 315 { 316 int read, write; 317 318 /* EPT fault on an instruction fetch doesn't make sense here */ 319 if (ept_qual & EPT_VIOLATION_INST_FETCH) { 320 return false; 321 } 322 323 /* EPT fault must be a read fault or a write fault */ 324 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 325 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 326 if ((read | write) == 0) { 327 return false; 328 } 329 330 if (write && slot) { 331 if (slot->flags & HVF_SLOT_LOG) { 332 memory_region_set_dirty(slot->region, gpa - slot->start, 1); 333 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 334 HV_MEMORY_READ | HV_MEMORY_WRITE); 335 } 336 } 337 338 /* 339 * The EPT violation must have been caused by accessing a 340 * guest-physical address that is a translation of a guest-linear 341 * address. 342 */ 343 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 344 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 345 return false; 346 } 347 348 return !slot; 349 } 350 351 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 352 { 353 hvf_slot *slot; 354 355 slot = hvf_find_overlap_slot( 356 section->offset_within_address_space, 357 section->offset_within_address_space + int128_get64(section->size)); 358 359 /* protect region against writes; begin tracking it */ 360 if (on) { 361 slot->flags |= HVF_SLOT_LOG; 362 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 363 HV_MEMORY_READ); 364 /* stop tracking region*/ 365 } else { 366 slot->flags &= ~HVF_SLOT_LOG; 367 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 368 HV_MEMORY_READ | HV_MEMORY_WRITE); 369 } 370 } 371 372 static void hvf_log_start(MemoryListener *listener, 373 MemoryRegionSection *section, int old, int new) 374 { 375 if (old != 0) { 376 return; 377 } 378 379 hvf_set_dirty_tracking(section, 1); 380 } 381 382 static void hvf_log_stop(MemoryListener *listener, 383 MemoryRegionSection *section, int old, int new) 384 { 385 if (new != 0) { 386 return; 387 } 388 389 hvf_set_dirty_tracking(section, 0); 390 } 391 392 static void hvf_log_sync(MemoryListener *listener, 393 MemoryRegionSection *section) 394 { 395 /* 396 * sync of dirty pages is handled elsewhere; just make sure we keep 397 * tracking the region. 398 */ 399 hvf_set_dirty_tracking(section, 1); 400 } 401 402 static void hvf_region_add(MemoryListener *listener, 403 MemoryRegionSection *section) 404 { 405 hvf_set_phys_mem(section, true); 406 } 407 408 static void hvf_region_del(MemoryListener *listener, 409 MemoryRegionSection *section) 410 { 411 hvf_set_phys_mem(section, false); 412 } 413 414 static MemoryListener hvf_memory_listener = { 415 .priority = 10, 416 .region_add = hvf_region_add, 417 .region_del = hvf_region_del, 418 .log_start = hvf_log_start, 419 .log_stop = hvf_log_stop, 420 .log_sync = hvf_log_sync, 421 }; 422 423 void hvf_reset_vcpu(CPUState *cpu) { 424 425 /* TODO: this shouldn't be needed; there is already a call to 426 * cpu_synchronize_all_post_reset in vl.c 427 */ 428 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); 429 wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); 430 macvm_set_cr0(cpu->hvf_fd, 0x60000010); 431 432 wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); 433 wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); 434 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); 435 436 /* set VMCS guest state fields */ 437 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); 438 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); 439 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); 440 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); 441 442 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); 443 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); 444 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); 445 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); 446 447 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); 448 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); 449 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); 450 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); 451 452 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); 453 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); 454 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); 455 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); 456 457 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); 458 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); 459 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); 460 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); 461 462 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); 463 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); 464 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); 465 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); 466 467 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); 468 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); 469 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); 470 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); 471 472 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); 473 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); 474 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); 475 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); 476 477 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); 478 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); 479 480 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); 481 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); 482 483 /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ 484 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); 485 486 wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); 487 wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); 488 wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); 489 wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); 490 wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); 491 wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); 492 wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); 493 wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); 494 wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); 495 wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); 496 497 for (int i = 0; i < 8; i++) { 498 wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); 499 } 500 501 hv_vm_sync_tsc(0); 502 hv_vcpu_invalidate_tlb(cpu->hvf_fd); 503 hv_vcpu_flush(cpu->hvf_fd); 504 } 505 506 void hvf_vcpu_destroy(CPUState *cpu) 507 { 508 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); 509 assert_hvf_ok(ret); 510 } 511 512 static void dummy_signal(int sig) 513 { 514 } 515 516 int hvf_init_vcpu(CPUState *cpu) 517 { 518 519 X86CPU *x86cpu = X86_CPU(cpu); 520 CPUX86State *env = &x86cpu->env; 521 int r; 522 523 /* init cpu signals */ 524 sigset_t set; 525 struct sigaction sigact; 526 527 memset(&sigact, 0, sizeof(sigact)); 528 sigact.sa_handler = dummy_signal; 529 sigaction(SIG_IPI, &sigact, NULL); 530 531 pthread_sigmask(SIG_BLOCK, NULL, &set); 532 sigdelset(&set, SIG_IPI); 533 534 init_emu(); 535 init_decoder(); 536 537 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); 538 env->hvf_emul = g_new0(HVFX86EmulatorState, 1); 539 540 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); 541 cpu->vcpu_dirty = 1; 542 assert_hvf_ok(r); 543 544 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, 545 &hvf_state->hvf_caps->vmx_cap_pinbased)) { 546 abort(); 547 } 548 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, 549 &hvf_state->hvf_caps->vmx_cap_procbased)) { 550 abort(); 551 } 552 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, 553 &hvf_state->hvf_caps->vmx_cap_procbased2)) { 554 abort(); 555 } 556 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY, 557 &hvf_state->hvf_caps->vmx_cap_entry)) { 558 abort(); 559 } 560 561 /* set VMCS control fields */ 562 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, 563 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, 564 VMCS_PIN_BASED_CTLS_EXTINT | 565 VMCS_PIN_BASED_CTLS_NMI | 566 VMCS_PIN_BASED_CTLS_VNMI)); 567 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, 568 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, 569 VMCS_PRI_PROC_BASED_CTLS_HLT | 570 VMCS_PRI_PROC_BASED_CTLS_MWAIT | 571 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | 572 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | 573 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); 574 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, 575 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, 576 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); 577 578 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 579 0)); 580 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ 581 582 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 583 584 x86cpu = X86_CPU(cpu); 585 x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); 586 587 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); 588 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); 589 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); 590 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); 591 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); 592 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); 593 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); 594 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); 595 /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ 596 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); 597 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); 598 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); 599 600 return 0; 601 } 602 603 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) 604 { 605 X86CPU *x86_cpu = X86_CPU(cpu); 606 CPUX86State *env = &x86_cpu->env; 607 608 env->exception_injected = -1; 609 env->interrupt_injected = -1; 610 env->nmi_injected = false; 611 if (idtvec_info & VMCS_IDT_VEC_VALID) { 612 switch (idtvec_info & VMCS_IDT_VEC_TYPE) { 613 case VMCS_IDT_VEC_HWINTR: 614 case VMCS_IDT_VEC_SWINTR: 615 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 616 break; 617 case VMCS_IDT_VEC_NMI: 618 env->nmi_injected = true; 619 break; 620 case VMCS_IDT_VEC_HWEXCEPTION: 621 case VMCS_IDT_VEC_SWEXCEPTION: 622 env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 623 break; 624 case VMCS_IDT_VEC_PRIV_SWEXCEPTION: 625 default: 626 abort(); 627 } 628 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || 629 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { 630 env->ins_len = ins_len; 631 } 632 if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { 633 env->has_error_code = true; 634 env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); 635 } 636 } 637 if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 638 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { 639 env->hflags2 |= HF2_NMI_MASK; 640 } else { 641 env->hflags2 &= ~HF2_NMI_MASK; 642 } 643 if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 644 (VMCS_INTERRUPTIBILITY_STI_BLOCKING | 645 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { 646 env->hflags |= HF_INHIBIT_IRQ_MASK; 647 } else { 648 env->hflags &= ~HF_INHIBIT_IRQ_MASK; 649 } 650 } 651 652 int hvf_vcpu_exec(CPUState *cpu) 653 { 654 X86CPU *x86_cpu = X86_CPU(cpu); 655 CPUX86State *env = &x86_cpu->env; 656 int ret = 0; 657 uint64_t rip = 0; 658 659 if (hvf_process_events(cpu)) { 660 return EXCP_HLT; 661 } 662 663 do { 664 if (cpu->vcpu_dirty) { 665 hvf_put_registers(cpu); 666 cpu->vcpu_dirty = false; 667 } 668 669 if (hvf_inject_interrupts(cpu)) { 670 return EXCP_INTERRUPT; 671 } 672 vmx_update_tpr(cpu); 673 674 qemu_mutex_unlock_iothread(); 675 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { 676 qemu_mutex_lock_iothread(); 677 return EXCP_HLT; 678 } 679 680 hv_return_t r = hv_vcpu_run(cpu->hvf_fd); 681 assert_hvf_ok(r); 682 683 /* handle VMEXIT */ 684 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); 685 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); 686 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, 687 VMCS_EXIT_INSTRUCTION_LENGTH); 688 689 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 690 691 hvf_store_events(cpu, ins_len, idtvec_info); 692 rip = rreg(cpu->hvf_fd, HV_X86_RIP); 693 RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); 694 env->eflags = RFLAGS(env); 695 696 qemu_mutex_lock_iothread(); 697 698 update_apic_tpr(cpu); 699 current_cpu = cpu; 700 701 ret = 0; 702 switch (exit_reason) { 703 case EXIT_REASON_HLT: { 704 macvm_set_rip(cpu, rip + ins_len); 705 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 706 (EFLAGS(env) & IF_MASK)) 707 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && 708 !(idtvec_info & VMCS_IDT_VEC_VALID)) { 709 cpu->halted = 1; 710 ret = EXCP_HLT; 711 } 712 ret = EXCP_INTERRUPT; 713 break; 714 } 715 case EXIT_REASON_MWAIT: { 716 ret = EXCP_INTERRUPT; 717 break; 718 } 719 /* Need to check if MMIO or unmmaped fault */ 720 case EXIT_REASON_EPT_FAULT: 721 { 722 hvf_slot *slot; 723 uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); 724 725 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && 726 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { 727 vmx_set_nmi_blocking(cpu); 728 } 729 730 slot = hvf_find_overlap_slot(gpa, gpa); 731 /* mmio */ 732 if (ept_emulation_fault(slot, gpa, exit_qual)) { 733 struct x86_decode decode; 734 735 load_regs(cpu); 736 env->hvf_emul->fetch_rip = rip; 737 738 decode_instruction(env, &decode); 739 exec_instruction(env, &decode); 740 store_regs(cpu); 741 break; 742 } 743 break; 744 } 745 case EXIT_REASON_INOUT: 746 { 747 uint32_t in = (exit_qual & 8) != 0; 748 uint32_t size = (exit_qual & 7) + 1; 749 uint32_t string = (exit_qual & 16) != 0; 750 uint32_t port = exit_qual >> 16; 751 /*uint32_t rep = (exit_qual & 0x20) != 0;*/ 752 753 if (!string && in) { 754 uint64_t val = 0; 755 load_regs(cpu); 756 hvf_handle_io(env, port, &val, 0, size, 1); 757 if (size == 1) { 758 AL(env) = val; 759 } else if (size == 2) { 760 AX(env) = val; 761 } else if (size == 4) { 762 RAX(env) = (uint32_t)val; 763 } else { 764 RAX(env) = (uint64_t)val; 765 } 766 RIP(env) += ins_len; 767 store_regs(cpu); 768 break; 769 } else if (!string && !in) { 770 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); 771 hvf_handle_io(env, port, &RAX(env), 1, size, 1); 772 macvm_set_rip(cpu, rip + ins_len); 773 break; 774 } 775 struct x86_decode decode; 776 777 load_regs(cpu); 778 env->hvf_emul->fetch_rip = rip; 779 780 decode_instruction(env, &decode); 781 assert(ins_len == decode.len); 782 exec_instruction(env, &decode); 783 store_regs(cpu); 784 785 break; 786 } 787 case EXIT_REASON_CPUID: { 788 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 789 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); 790 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 791 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 792 793 cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); 794 795 wreg(cpu->hvf_fd, HV_X86_RAX, rax); 796 wreg(cpu->hvf_fd, HV_X86_RBX, rbx); 797 wreg(cpu->hvf_fd, HV_X86_RCX, rcx); 798 wreg(cpu->hvf_fd, HV_X86_RDX, rdx); 799 800 macvm_set_rip(cpu, rip + ins_len); 801 break; 802 } 803 case EXIT_REASON_XSETBV: { 804 X86CPU *x86_cpu = X86_CPU(cpu); 805 CPUX86State *env = &x86_cpu->env; 806 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 807 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 808 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 809 810 if (ecx) { 811 macvm_set_rip(cpu, rip + ins_len); 812 break; 813 } 814 env->xcr0 = ((uint64_t)edx << 32) | eax; 815 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); 816 macvm_set_rip(cpu, rip + ins_len); 817 break; 818 } 819 case EXIT_REASON_INTR_WINDOW: 820 vmx_clear_int_window_exiting(cpu); 821 ret = EXCP_INTERRUPT; 822 break; 823 case EXIT_REASON_NMI_WINDOW: 824 vmx_clear_nmi_window_exiting(cpu); 825 ret = EXCP_INTERRUPT; 826 break; 827 case EXIT_REASON_EXT_INTR: 828 /* force exit and allow io handling */ 829 ret = EXCP_INTERRUPT; 830 break; 831 case EXIT_REASON_RDMSR: 832 case EXIT_REASON_WRMSR: 833 { 834 load_regs(cpu); 835 if (exit_reason == EXIT_REASON_RDMSR) { 836 simulate_rdmsr(cpu); 837 } else { 838 simulate_wrmsr(cpu); 839 } 840 RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); 841 store_regs(cpu); 842 break; 843 } 844 case EXIT_REASON_CR_ACCESS: { 845 int cr; 846 int reg; 847 848 load_regs(cpu); 849 cr = exit_qual & 15; 850 reg = (exit_qual >> 8) & 15; 851 852 switch (cr) { 853 case 0x0: { 854 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); 855 break; 856 } 857 case 4: { 858 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); 859 break; 860 } 861 case 8: { 862 X86CPU *x86_cpu = X86_CPU(cpu); 863 if (exit_qual & 0x10) { 864 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state); 865 } else { 866 int tpr = RRX(env, reg); 867 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 868 ret = EXCP_INTERRUPT; 869 } 870 break; 871 } 872 default: 873 error_report("Unrecognized CR %d", cr); 874 abort(); 875 } 876 RIP(env) += ins_len; 877 store_regs(cpu); 878 break; 879 } 880 case EXIT_REASON_APIC_ACCESS: { /* TODO */ 881 struct x86_decode decode; 882 883 load_regs(cpu); 884 env->hvf_emul->fetch_rip = rip; 885 886 decode_instruction(env, &decode); 887 exec_instruction(env, &decode); 888 store_regs(cpu); 889 break; 890 } 891 case EXIT_REASON_TPR: { 892 ret = 1; 893 break; 894 } 895 case EXIT_REASON_TASK_SWITCH: { 896 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 897 x68_segment_selector sel = {.sel = exit_qual & 0xffff}; 898 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, 899 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo 900 & VMCS_INTR_T_MASK); 901 break; 902 } 903 case EXIT_REASON_TRIPLE_FAULT: { 904 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 905 ret = EXCP_INTERRUPT; 906 break; 907 } 908 case EXIT_REASON_RDPMC: 909 wreg(cpu->hvf_fd, HV_X86_RAX, 0); 910 wreg(cpu->hvf_fd, HV_X86_RDX, 0); 911 macvm_set_rip(cpu, rip + ins_len); 912 break; 913 case VMX_REASON_VMCALL: 914 env->exception_injected = EXCP0D_GPF; 915 env->has_error_code = true; 916 env->error_code = 0; 917 break; 918 default: 919 error_report("%llx: unhandled exit %llx", rip, exit_reason); 920 } 921 } while (ret == 0); 922 923 return ret; 924 } 925 926 bool hvf_allowed; 927 928 static int hvf_accel_init(MachineState *ms) 929 { 930 int x; 931 hv_return_t ret; 932 HVFState *s; 933 934 ret = hv_vm_create(HV_VM_DEFAULT); 935 assert_hvf_ok(ret); 936 937 s = g_new0(HVFState, 1); 938 939 s->num_slots = 32; 940 for (x = 0; x < s->num_slots; ++x) { 941 s->slots[x].size = 0; 942 s->slots[x].slot_id = x; 943 } 944 945 hvf_state = s; 946 cpu_interrupt_handler = hvf_handle_interrupt; 947 memory_listener_register(&hvf_memory_listener, &address_space_memory); 948 return 0; 949 } 950 951 static void hvf_accel_class_init(ObjectClass *oc, void *data) 952 { 953 AccelClass *ac = ACCEL_CLASS(oc); 954 ac->name = "HVF"; 955 ac->init_machine = hvf_accel_init; 956 ac->allowed = &hvf_allowed; 957 } 958 959 static const TypeInfo hvf_accel_type = { 960 .name = TYPE_HVF_ACCEL, 961 .parent = TYPE_ACCEL, 962 .class_init = hvf_accel_class_init, 963 }; 964 965 static void hvf_type_init(void) 966 { 967 type_register_static(&hvf_accel_type); 968 } 969 970 type_init(hvf_type_init); 971