1 /* Copyright 2008 IBM Corporation 2 * 2008 Red Hat, Inc. 3 * Copyright 2011 Intel Corporation 4 * Copyright 2016 Veertu, Inc. 5 * Copyright 2017 The Android Open Source Project 6 * 7 * QEMU Hypervisor.framework support 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of version 2 of the GNU General Public 11 * License as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, see <http://www.gnu.org/licenses/>. 20 * 21 * This file contain code under public domain from the hvdos project: 22 * https://github.com/mist64/hvdos 23 * 24 * Parts Copyright (c) 2011 NetApp, Inc. 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions 29 * are met: 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions and the following disclaimer in the 34 * documentation and/or other materials provided with the distribution. 35 * 36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 #include "qemu/osdep.h" 49 #include "qemu-common.h" 50 #include "qemu/error-report.h" 51 52 #include "sysemu/hvf.h" 53 #include "hvf-i386.h" 54 #include "vmcs.h" 55 #include "vmx.h" 56 #include "x86.h" 57 #include "x86_descr.h" 58 #include "x86_mmu.h" 59 #include "x86_decode.h" 60 #include "x86_emu.h" 61 #include "x86_task.h" 62 #include "x86hvf.h" 63 64 #include <Hypervisor/hv.h> 65 #include <Hypervisor/hv_vmx.h> 66 67 #include "exec/address-spaces.h" 68 #include "hw/i386/apic_internal.h" 69 #include "hw/boards.h" 70 #include "qemu/main-loop.h" 71 #include "sysemu/accel.h" 72 #include "sysemu/sysemu.h" 73 #include "target/i386/cpu.h" 74 75 HVFState *hvf_state; 76 77 static void assert_hvf_ok(hv_return_t ret) 78 { 79 if (ret == HV_SUCCESS) { 80 return; 81 } 82 83 switch (ret) { 84 case HV_ERROR: 85 error_report("Error: HV_ERROR"); 86 break; 87 case HV_BUSY: 88 error_report("Error: HV_BUSY"); 89 break; 90 case HV_BAD_ARGUMENT: 91 error_report("Error: HV_BAD_ARGUMENT"); 92 break; 93 case HV_NO_RESOURCES: 94 error_report("Error: HV_NO_RESOURCES"); 95 break; 96 case HV_NO_DEVICE: 97 error_report("Error: HV_NO_DEVICE"); 98 break; 99 case HV_UNSUPPORTED: 100 error_report("Error: HV_UNSUPPORTED"); 101 break; 102 default: 103 error_report("Unknown Error"); 104 } 105 106 abort(); 107 } 108 109 /* Memory slots */ 110 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) 111 { 112 hvf_slot *slot; 113 int x; 114 for (x = 0; x < hvf_state->num_slots; ++x) { 115 slot = &hvf_state->slots[x]; 116 if (slot->size && start < (slot->start + slot->size) && 117 end > slot->start) { 118 return slot; 119 } 120 } 121 return NULL; 122 } 123 124 struct mac_slot { 125 int present; 126 uint64_t size; 127 uint64_t gpa_start; 128 uint64_t gva; 129 }; 130 131 struct mac_slot mac_slots[32]; 132 #define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) 133 134 static int do_hvf_set_memory(hvf_slot *slot) 135 { 136 struct mac_slot *macslot; 137 hv_memory_flags_t flags; 138 hv_return_t ret; 139 140 macslot = &mac_slots[slot->slot_id]; 141 142 if (macslot->present) { 143 if (macslot->size != slot->size) { 144 macslot->present = 0; 145 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 146 assert_hvf_ok(ret); 147 } 148 } 149 150 if (!slot->size) { 151 return 0; 152 } 153 154 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 155 156 macslot->present = 1; 157 macslot->gpa_start = slot->start; 158 macslot->size = slot->size; 159 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); 160 assert_hvf_ok(ret); 161 return 0; 162 } 163 164 void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 165 { 166 hvf_slot *mem; 167 MemoryRegion *area = section->mr; 168 169 if (!memory_region_is_ram(area)) { 170 return; 171 } 172 173 mem = hvf_find_overlap_slot( 174 section->offset_within_address_space, 175 section->offset_within_address_space + int128_get64(section->size)); 176 177 if (mem && add) { 178 if (mem->size == int128_get64(section->size) && 179 mem->start == section->offset_within_address_space && 180 mem->mem == (memory_region_get_ram_ptr(area) + 181 section->offset_within_region)) { 182 return; /* Same region was attempted to register, go away. */ 183 } 184 } 185 186 /* Region needs to be reset. set the size to 0 and remap it. */ 187 if (mem) { 188 mem->size = 0; 189 if (do_hvf_set_memory(mem)) { 190 error_report("Failed to reset overlapping slot"); 191 abort(); 192 } 193 } 194 195 if (!add) { 196 return; 197 } 198 199 /* Now make a new slot. */ 200 int x; 201 202 for (x = 0; x < hvf_state->num_slots; ++x) { 203 mem = &hvf_state->slots[x]; 204 if (!mem->size) { 205 break; 206 } 207 } 208 209 if (x == hvf_state->num_slots) { 210 error_report("No free slots"); 211 abort(); 212 } 213 214 mem->size = int128_get64(section->size); 215 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 216 mem->start = section->offset_within_address_space; 217 mem->region = area; 218 219 if (do_hvf_set_memory(mem)) { 220 error_report("Error registering new memory slot"); 221 abort(); 222 } 223 } 224 225 void vmx_update_tpr(CPUState *cpu) 226 { 227 /* TODO: need integrate APIC handling */ 228 X86CPU *x86_cpu = X86_CPU(cpu); 229 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; 230 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); 231 232 wreg(cpu->hvf_fd, HV_X86_TPR, tpr); 233 if (irr == -1) { 234 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 235 } else { 236 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : 237 irr >> 4); 238 } 239 } 240 241 void update_apic_tpr(CPUState *cpu) 242 { 243 X86CPU *x86_cpu = X86_CPU(cpu); 244 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; 245 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 246 } 247 248 #define VECTORING_INFO_VECTOR_MASK 0xff 249 250 static void hvf_handle_interrupt(CPUState * cpu, int mask) 251 { 252 cpu->interrupt_request |= mask; 253 if (!qemu_cpu_is_self(cpu)) { 254 qemu_cpu_kick(cpu); 255 } 256 } 257 258 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, 259 int direction, int size, int count) 260 { 261 int i; 262 uint8_t *ptr = buffer; 263 264 for (i = 0; i < count; i++) { 265 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, 266 ptr, size, 267 direction); 268 ptr += size; 269 } 270 } 271 272 /* TODO: synchronize vcpu state */ 273 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 274 { 275 CPUState *cpu_state = cpu; 276 if (cpu_state->vcpu_dirty == 0) { 277 hvf_get_registers(cpu_state); 278 } 279 280 cpu_state->vcpu_dirty = 1; 281 } 282 283 void hvf_cpu_synchronize_state(CPUState *cpu_state) 284 { 285 if (cpu_state->vcpu_dirty == 0) { 286 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 287 } 288 } 289 290 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) 291 { 292 CPUState *cpu_state = cpu; 293 hvf_put_registers(cpu_state); 294 cpu_state->vcpu_dirty = false; 295 } 296 297 void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) 298 { 299 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); 300 } 301 302 void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) 303 { 304 CPUState *cpu_state = cpu; 305 hvf_put_registers(cpu_state); 306 cpu_state->vcpu_dirty = false; 307 } 308 309 void hvf_cpu_synchronize_post_init(CPUState *cpu_state) 310 { 311 run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); 312 } 313 314 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) 315 { 316 int read, write; 317 318 /* EPT fault on an instruction fetch doesn't make sense here */ 319 if (ept_qual & EPT_VIOLATION_INST_FETCH) { 320 return false; 321 } 322 323 /* EPT fault must be a read fault or a write fault */ 324 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 325 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 326 if ((read | write) == 0) { 327 return false; 328 } 329 330 if (write && slot) { 331 if (slot->flags & HVF_SLOT_LOG) { 332 memory_region_set_dirty(slot->region, gpa - slot->start, 1); 333 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 334 HV_MEMORY_READ | HV_MEMORY_WRITE); 335 } 336 } 337 338 /* 339 * The EPT violation must have been caused by accessing a 340 * guest-physical address that is a translation of a guest-linear 341 * address. 342 */ 343 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 344 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 345 return false; 346 } 347 348 return !slot; 349 } 350 351 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 352 { 353 hvf_slot *slot; 354 355 slot = hvf_find_overlap_slot( 356 section->offset_within_address_space, 357 section->offset_within_address_space + int128_get64(section->size)); 358 359 /* protect region against writes; begin tracking it */ 360 if (on) { 361 slot->flags |= HVF_SLOT_LOG; 362 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 363 HV_MEMORY_READ); 364 /* stop tracking region*/ 365 } else { 366 slot->flags &= ~HVF_SLOT_LOG; 367 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 368 HV_MEMORY_READ | HV_MEMORY_WRITE); 369 } 370 } 371 372 static void hvf_log_start(MemoryListener *listener, 373 MemoryRegionSection *section, int old, int new) 374 { 375 if (old != 0) { 376 return; 377 } 378 379 hvf_set_dirty_tracking(section, 1); 380 } 381 382 static void hvf_log_stop(MemoryListener *listener, 383 MemoryRegionSection *section, int old, int new) 384 { 385 if (new != 0) { 386 return; 387 } 388 389 hvf_set_dirty_tracking(section, 0); 390 } 391 392 static void hvf_log_sync(MemoryListener *listener, 393 MemoryRegionSection *section) 394 { 395 /* 396 * sync of dirty pages is handled elsewhere; just make sure we keep 397 * tracking the region. 398 */ 399 hvf_set_dirty_tracking(section, 1); 400 } 401 402 static void hvf_region_add(MemoryListener *listener, 403 MemoryRegionSection *section) 404 { 405 hvf_set_phys_mem(section, true); 406 } 407 408 static void hvf_region_del(MemoryListener *listener, 409 MemoryRegionSection *section) 410 { 411 hvf_set_phys_mem(section, false); 412 } 413 414 static MemoryListener hvf_memory_listener = { 415 .priority = 10, 416 .region_add = hvf_region_add, 417 .region_del = hvf_region_del, 418 .log_start = hvf_log_start, 419 .log_stop = hvf_log_stop, 420 .log_sync = hvf_log_sync, 421 }; 422 423 void hvf_reset_vcpu(CPUState *cpu) { 424 425 /* TODO: this shouldn't be needed; there is already a call to 426 * cpu_synchronize_all_post_reset in vl.c 427 */ 428 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); 429 wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); 430 macvm_set_cr0(cpu->hvf_fd, 0x60000010); 431 432 wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); 433 wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); 434 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); 435 436 /* set VMCS guest state fields */ 437 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); 438 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); 439 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); 440 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); 441 442 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); 443 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); 444 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); 445 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); 446 447 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); 448 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); 449 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); 450 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); 451 452 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); 453 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); 454 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); 455 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); 456 457 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); 458 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); 459 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); 460 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); 461 462 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); 463 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); 464 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); 465 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); 466 467 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); 468 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); 469 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); 470 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); 471 472 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); 473 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); 474 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); 475 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); 476 477 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); 478 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); 479 480 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); 481 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); 482 483 /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ 484 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); 485 486 wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); 487 wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); 488 wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); 489 wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); 490 wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); 491 wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); 492 wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); 493 wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); 494 wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); 495 wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); 496 497 for (int i = 0; i < 8; i++) { 498 wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); 499 } 500 501 hv_vm_sync_tsc(0); 502 hv_vcpu_invalidate_tlb(cpu->hvf_fd); 503 hv_vcpu_flush(cpu->hvf_fd); 504 } 505 506 void hvf_vcpu_destroy(CPUState *cpu) 507 { 508 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); 509 assert_hvf_ok(ret); 510 } 511 512 static void dummy_signal(int sig) 513 { 514 } 515 516 int hvf_init_vcpu(CPUState *cpu) 517 { 518 519 X86CPU *x86cpu = X86_CPU(cpu); 520 CPUX86State *env = &x86cpu->env; 521 int r; 522 523 /* init cpu signals */ 524 sigset_t set; 525 struct sigaction sigact; 526 527 memset(&sigact, 0, sizeof(sigact)); 528 sigact.sa_handler = dummy_signal; 529 sigaction(SIG_IPI, &sigact, NULL); 530 531 pthread_sigmask(SIG_BLOCK, NULL, &set); 532 sigdelset(&set, SIG_IPI); 533 534 init_emu(); 535 init_decoder(); 536 537 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); 538 env->hvf_emul = g_new0(HVFX86EmulatorState, 1); 539 540 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); 541 cpu->vcpu_dirty = 1; 542 assert_hvf_ok(r); 543 544 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, 545 &hvf_state->hvf_caps->vmx_cap_pinbased)) { 546 abort(); 547 } 548 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, 549 &hvf_state->hvf_caps->vmx_cap_procbased)) { 550 abort(); 551 } 552 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, 553 &hvf_state->hvf_caps->vmx_cap_procbased2)) { 554 abort(); 555 } 556 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY, 557 &hvf_state->hvf_caps->vmx_cap_entry)) { 558 abort(); 559 } 560 561 /* set VMCS control fields */ 562 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, 563 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, 564 VMCS_PIN_BASED_CTLS_EXTINT | 565 VMCS_PIN_BASED_CTLS_NMI | 566 VMCS_PIN_BASED_CTLS_VNMI)); 567 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, 568 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, 569 VMCS_PRI_PROC_BASED_CTLS_HLT | 570 VMCS_PRI_PROC_BASED_CTLS_MWAIT | 571 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | 572 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | 573 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); 574 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, 575 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, 576 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); 577 578 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 579 0)); 580 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ 581 582 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 583 584 x86cpu = X86_CPU(cpu); 585 x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); 586 587 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); 588 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); 589 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); 590 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); 591 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); 592 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); 593 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); 594 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); 595 /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ 596 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); 597 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); 598 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); 599 600 return 0; 601 } 602 603 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) 604 { 605 X86CPU *x86_cpu = X86_CPU(cpu); 606 CPUX86State *env = &x86_cpu->env; 607 608 env->exception_injected = -1; 609 env->interrupt_injected = -1; 610 env->nmi_injected = false; 611 if (idtvec_info & VMCS_IDT_VEC_VALID) { 612 switch (idtvec_info & VMCS_IDT_VEC_TYPE) { 613 case VMCS_IDT_VEC_HWINTR: 614 case VMCS_IDT_VEC_SWINTR: 615 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 616 break; 617 case VMCS_IDT_VEC_NMI: 618 env->nmi_injected = true; 619 break; 620 case VMCS_IDT_VEC_HWEXCEPTION: 621 case VMCS_IDT_VEC_SWEXCEPTION: 622 env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 623 break; 624 case VMCS_IDT_VEC_PRIV_SWEXCEPTION: 625 default: 626 abort(); 627 } 628 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || 629 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { 630 env->ins_len = ins_len; 631 } 632 if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { 633 env->has_error_code = true; 634 env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); 635 } 636 } 637 if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 638 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { 639 env->hflags2 |= HF2_NMI_MASK; 640 } else { 641 env->hflags2 &= ~HF2_NMI_MASK; 642 } 643 if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 644 (VMCS_INTERRUPTIBILITY_STI_BLOCKING | 645 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { 646 env->hflags |= HF_INHIBIT_IRQ_MASK; 647 } else { 648 env->hflags &= ~HF_INHIBIT_IRQ_MASK; 649 } 650 } 651 652 int hvf_vcpu_exec(CPUState *cpu) 653 { 654 X86CPU *x86_cpu = X86_CPU(cpu); 655 CPUX86State *env = &x86_cpu->env; 656 int ret = 0; 657 uint64_t rip = 0; 658 659 if (hvf_process_events(cpu)) { 660 return EXCP_HLT; 661 } 662 663 do { 664 if (cpu->vcpu_dirty) { 665 hvf_put_registers(cpu); 666 cpu->vcpu_dirty = false; 667 } 668 669 if (hvf_inject_interrupts(cpu)) { 670 return EXCP_INTERRUPT; 671 } 672 vmx_update_tpr(cpu); 673 674 qemu_mutex_unlock_iothread(); 675 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { 676 qemu_mutex_lock_iothread(); 677 return EXCP_HLT; 678 } 679 680 hv_return_t r = hv_vcpu_run(cpu->hvf_fd); 681 assert_hvf_ok(r); 682 683 /* handle VMEXIT */ 684 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); 685 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); 686 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, 687 VMCS_EXIT_INSTRUCTION_LENGTH); 688 689 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 690 691 hvf_store_events(cpu, ins_len, idtvec_info); 692 rip = rreg(cpu->hvf_fd, HV_X86_RIP); 693 RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); 694 env->eflags = RFLAGS(env); 695 696 qemu_mutex_lock_iothread(); 697 698 update_apic_tpr(cpu); 699 current_cpu = cpu; 700 701 ret = 0; 702 switch (exit_reason) { 703 case EXIT_REASON_HLT: { 704 macvm_set_rip(cpu, rip + ins_len); 705 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 706 (EFLAGS(env) & IF_MASK)) 707 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && 708 !(idtvec_info & VMCS_IDT_VEC_VALID)) { 709 cpu->halted = 1; 710 ret = EXCP_HLT; 711 break; 712 } 713 ret = EXCP_INTERRUPT; 714 break; 715 } 716 case EXIT_REASON_MWAIT: { 717 ret = EXCP_INTERRUPT; 718 break; 719 } 720 /* Need to check if MMIO or unmmaped fault */ 721 case EXIT_REASON_EPT_FAULT: 722 { 723 hvf_slot *slot; 724 uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); 725 726 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && 727 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { 728 vmx_set_nmi_blocking(cpu); 729 } 730 731 slot = hvf_find_overlap_slot(gpa, gpa); 732 /* mmio */ 733 if (ept_emulation_fault(slot, gpa, exit_qual)) { 734 struct x86_decode decode; 735 736 load_regs(cpu); 737 env->hvf_emul->fetch_rip = rip; 738 739 decode_instruction(env, &decode); 740 exec_instruction(env, &decode); 741 store_regs(cpu); 742 break; 743 } 744 break; 745 } 746 case EXIT_REASON_INOUT: 747 { 748 uint32_t in = (exit_qual & 8) != 0; 749 uint32_t size = (exit_qual & 7) + 1; 750 uint32_t string = (exit_qual & 16) != 0; 751 uint32_t port = exit_qual >> 16; 752 /*uint32_t rep = (exit_qual & 0x20) != 0;*/ 753 754 if (!string && in) { 755 uint64_t val = 0; 756 load_regs(cpu); 757 hvf_handle_io(env, port, &val, 0, size, 1); 758 if (size == 1) { 759 AL(env) = val; 760 } else if (size == 2) { 761 AX(env) = val; 762 } else if (size == 4) { 763 RAX(env) = (uint32_t)val; 764 } else { 765 RAX(env) = (uint64_t)val; 766 } 767 RIP(env) += ins_len; 768 store_regs(cpu); 769 break; 770 } else if (!string && !in) { 771 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); 772 hvf_handle_io(env, port, &RAX(env), 1, size, 1); 773 macvm_set_rip(cpu, rip + ins_len); 774 break; 775 } 776 struct x86_decode decode; 777 778 load_regs(cpu); 779 env->hvf_emul->fetch_rip = rip; 780 781 decode_instruction(env, &decode); 782 assert(ins_len == decode.len); 783 exec_instruction(env, &decode); 784 store_regs(cpu); 785 786 break; 787 } 788 case EXIT_REASON_CPUID: { 789 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 790 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); 791 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 792 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 793 794 cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); 795 796 wreg(cpu->hvf_fd, HV_X86_RAX, rax); 797 wreg(cpu->hvf_fd, HV_X86_RBX, rbx); 798 wreg(cpu->hvf_fd, HV_X86_RCX, rcx); 799 wreg(cpu->hvf_fd, HV_X86_RDX, rdx); 800 801 macvm_set_rip(cpu, rip + ins_len); 802 break; 803 } 804 case EXIT_REASON_XSETBV: { 805 X86CPU *x86_cpu = X86_CPU(cpu); 806 CPUX86State *env = &x86_cpu->env; 807 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 808 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 809 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 810 811 if (ecx) { 812 macvm_set_rip(cpu, rip + ins_len); 813 break; 814 } 815 env->xcr0 = ((uint64_t)edx << 32) | eax; 816 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); 817 macvm_set_rip(cpu, rip + ins_len); 818 break; 819 } 820 case EXIT_REASON_INTR_WINDOW: 821 vmx_clear_int_window_exiting(cpu); 822 ret = EXCP_INTERRUPT; 823 break; 824 case EXIT_REASON_NMI_WINDOW: 825 vmx_clear_nmi_window_exiting(cpu); 826 ret = EXCP_INTERRUPT; 827 break; 828 case EXIT_REASON_EXT_INTR: 829 /* force exit and allow io handling */ 830 ret = EXCP_INTERRUPT; 831 break; 832 case EXIT_REASON_RDMSR: 833 case EXIT_REASON_WRMSR: 834 { 835 load_regs(cpu); 836 if (exit_reason == EXIT_REASON_RDMSR) { 837 simulate_rdmsr(cpu); 838 } else { 839 simulate_wrmsr(cpu); 840 } 841 RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); 842 store_regs(cpu); 843 break; 844 } 845 case EXIT_REASON_CR_ACCESS: { 846 int cr; 847 int reg; 848 849 load_regs(cpu); 850 cr = exit_qual & 15; 851 reg = (exit_qual >> 8) & 15; 852 853 switch (cr) { 854 case 0x0: { 855 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); 856 break; 857 } 858 case 4: { 859 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); 860 break; 861 } 862 case 8: { 863 X86CPU *x86_cpu = X86_CPU(cpu); 864 if (exit_qual & 0x10) { 865 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state); 866 } else { 867 int tpr = RRX(env, reg); 868 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 869 ret = EXCP_INTERRUPT; 870 } 871 break; 872 } 873 default: 874 error_report("Unrecognized CR %d", cr); 875 abort(); 876 } 877 RIP(env) += ins_len; 878 store_regs(cpu); 879 break; 880 } 881 case EXIT_REASON_APIC_ACCESS: { /* TODO */ 882 struct x86_decode decode; 883 884 load_regs(cpu); 885 env->hvf_emul->fetch_rip = rip; 886 887 decode_instruction(env, &decode); 888 exec_instruction(env, &decode); 889 store_regs(cpu); 890 break; 891 } 892 case EXIT_REASON_TPR: { 893 ret = 1; 894 break; 895 } 896 case EXIT_REASON_TASK_SWITCH: { 897 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 898 x68_segment_selector sel = {.sel = exit_qual & 0xffff}; 899 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, 900 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo 901 & VMCS_INTR_T_MASK); 902 break; 903 } 904 case EXIT_REASON_TRIPLE_FAULT: { 905 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 906 ret = EXCP_INTERRUPT; 907 break; 908 } 909 case EXIT_REASON_RDPMC: 910 wreg(cpu->hvf_fd, HV_X86_RAX, 0); 911 wreg(cpu->hvf_fd, HV_X86_RDX, 0); 912 macvm_set_rip(cpu, rip + ins_len); 913 break; 914 case VMX_REASON_VMCALL: 915 env->exception_injected = EXCP0D_GPF; 916 env->has_error_code = true; 917 env->error_code = 0; 918 break; 919 default: 920 error_report("%llx: unhandled exit %llx", rip, exit_reason); 921 } 922 } while (ret == 0); 923 924 return ret; 925 } 926 927 bool hvf_allowed; 928 929 static int hvf_accel_init(MachineState *ms) 930 { 931 int x; 932 hv_return_t ret; 933 HVFState *s; 934 935 ret = hv_vm_create(HV_VM_DEFAULT); 936 assert_hvf_ok(ret); 937 938 s = g_new0(HVFState, 1); 939 940 s->num_slots = 32; 941 for (x = 0; x < s->num_slots; ++x) { 942 s->slots[x].size = 0; 943 s->slots[x].slot_id = x; 944 } 945 946 hvf_state = s; 947 cpu_interrupt_handler = hvf_handle_interrupt; 948 memory_listener_register(&hvf_memory_listener, &address_space_memory); 949 return 0; 950 } 951 952 static void hvf_accel_class_init(ObjectClass *oc, void *data) 953 { 954 AccelClass *ac = ACCEL_CLASS(oc); 955 ac->name = "HVF"; 956 ac->init_machine = hvf_accel_init; 957 ac->allowed = &hvf_allowed; 958 } 959 960 static const TypeInfo hvf_accel_type = { 961 .name = TYPE_HVF_ACCEL, 962 .parent = TYPE_ACCEL, 963 .class_init = hvf_accel_class_init, 964 }; 965 966 static void hvf_type_init(void) 967 { 968 type_register_static(&hvf_accel_type); 969 } 970 971 type_init(hvf_type_init); 972