1 /* Copyright 2008 IBM Corporation 2 * 2008 Red Hat, Inc. 3 * Copyright 2011 Intel Corporation 4 * Copyright 2016 Veertu, Inc. 5 * Copyright 2017 The Android Open Source Project 6 * 7 * QEMU Hypervisor.framework support 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of version 2 of the GNU General Public 11 * License as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, see <http://www.gnu.org/licenses/>. 20 * 21 * This file contain code under public domain from the hvdos project: 22 * https://github.com/mist64/hvdos 23 * 24 * Parts Copyright (c) 2011 NetApp, Inc. 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions 29 * are met: 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions and the following disclaimer in the 34 * documentation and/or other materials provided with the distribution. 35 * 36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 #include "qemu/osdep.h" 49 #include "qemu-common.h" 50 #include "qemu/error-report.h" 51 52 #include "sysemu/hvf.h" 53 #include "hvf-i386.h" 54 #include "vmcs.h" 55 #include "vmx.h" 56 #include "x86.h" 57 #include "x86_descr.h" 58 #include "x86_mmu.h" 59 #include "x86_decode.h" 60 #include "x86_emu.h" 61 #include "x86_task.h" 62 #include "x86hvf.h" 63 64 #include <Hypervisor/hv.h> 65 #include <Hypervisor/hv_vmx.h> 66 67 #include "exec/address-spaces.h" 68 #include "hw/i386/apic_internal.h" 69 #include "hw/boards.h" 70 #include "qemu/main-loop.h" 71 #include "sysemu/accel.h" 72 #include "sysemu/sysemu.h" 73 #include "target/i386/cpu.h" 74 75 HVFState *hvf_state; 76 77 static void assert_hvf_ok(hv_return_t ret) 78 { 79 if (ret == HV_SUCCESS) { 80 return; 81 } 82 83 switch (ret) { 84 case HV_ERROR: 85 error_report("Error: HV_ERROR"); 86 break; 87 case HV_BUSY: 88 error_report("Error: HV_BUSY"); 89 break; 90 case HV_BAD_ARGUMENT: 91 error_report("Error: HV_BAD_ARGUMENT"); 92 break; 93 case HV_NO_RESOURCES: 94 error_report("Error: HV_NO_RESOURCES"); 95 break; 96 case HV_NO_DEVICE: 97 error_report("Error: HV_NO_DEVICE"); 98 break; 99 case HV_UNSUPPORTED: 100 error_report("Error: HV_UNSUPPORTED"); 101 break; 102 default: 103 error_report("Unknown Error"); 104 } 105 106 abort(); 107 } 108 109 /* Memory slots */ 110 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) 111 { 112 hvf_slot *slot; 113 int x; 114 for (x = 0; x < hvf_state->num_slots; ++x) { 115 slot = &hvf_state->slots[x]; 116 if (slot->size && start < (slot->start + slot->size) && 117 end > slot->start) { 118 return slot; 119 } 120 } 121 return NULL; 122 } 123 124 struct mac_slot { 125 int present; 126 uint64_t size; 127 uint64_t gpa_start; 128 uint64_t gva; 129 }; 130 131 struct mac_slot mac_slots[32]; 132 #define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) 133 134 static int do_hvf_set_memory(hvf_slot *slot) 135 { 136 struct mac_slot *macslot; 137 hv_memory_flags_t flags; 138 hv_return_t ret; 139 140 macslot = &mac_slots[slot->slot_id]; 141 142 if (macslot->present) { 143 if (macslot->size != slot->size) { 144 macslot->present = 0; 145 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 146 assert_hvf_ok(ret); 147 } 148 } 149 150 if (!slot->size) { 151 return 0; 152 } 153 154 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 155 156 macslot->present = 1; 157 macslot->gpa_start = slot->start; 158 macslot->size = slot->size; 159 ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); 160 assert_hvf_ok(ret); 161 return 0; 162 } 163 164 void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 165 { 166 hvf_slot *mem; 167 MemoryRegion *area = section->mr; 168 169 if (!memory_region_is_ram(area)) { 170 return; 171 } 172 173 mem = hvf_find_overlap_slot( 174 section->offset_within_address_space, 175 section->offset_within_address_space + int128_get64(section->size)); 176 177 if (mem && add) { 178 if (mem->size == int128_get64(section->size) && 179 mem->start == section->offset_within_address_space && 180 mem->mem == (memory_region_get_ram_ptr(area) + 181 section->offset_within_region)) { 182 return; /* Same region was attempted to register, go away. */ 183 } 184 } 185 186 /* Region needs to be reset. set the size to 0 and remap it. */ 187 if (mem) { 188 mem->size = 0; 189 if (do_hvf_set_memory(mem)) { 190 error_report("Failed to reset overlapping slot"); 191 abort(); 192 } 193 } 194 195 if (!add) { 196 return; 197 } 198 199 /* Now make a new slot. */ 200 int x; 201 202 for (x = 0; x < hvf_state->num_slots; ++x) { 203 mem = &hvf_state->slots[x]; 204 if (!mem->size) { 205 break; 206 } 207 } 208 209 if (x == hvf_state->num_slots) { 210 error_report("No free slots"); 211 abort(); 212 } 213 214 mem->size = int128_get64(section->size); 215 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 216 mem->start = section->offset_within_address_space; 217 mem->region = area; 218 219 if (do_hvf_set_memory(mem)) { 220 error_report("Error registering new memory slot"); 221 abort(); 222 } 223 } 224 225 void vmx_update_tpr(CPUState *cpu) 226 { 227 /* TODO: need integrate APIC handling */ 228 X86CPU *x86_cpu = X86_CPU(cpu); 229 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; 230 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); 231 232 wreg(cpu->hvf_fd, HV_X86_TPR, tpr); 233 if (irr == -1) { 234 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 235 } else { 236 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : 237 irr >> 4); 238 } 239 } 240 241 void update_apic_tpr(CPUState *cpu) 242 { 243 X86CPU *x86_cpu = X86_CPU(cpu); 244 int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; 245 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 246 } 247 248 #define VECTORING_INFO_VECTOR_MASK 0xff 249 250 static void hvf_handle_interrupt(CPUState * cpu, int mask) 251 { 252 cpu->interrupt_request |= mask; 253 if (!qemu_cpu_is_self(cpu)) { 254 qemu_cpu_kick(cpu); 255 } 256 } 257 258 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, 259 int direction, int size, int count) 260 { 261 int i; 262 uint8_t *ptr = buffer; 263 264 for (i = 0; i < count; i++) { 265 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, 266 ptr, size, 267 direction); 268 ptr += size; 269 } 270 } 271 272 /* TODO: synchronize vcpu state */ 273 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 274 { 275 CPUState *cpu_state = cpu; 276 if (cpu_state->vcpu_dirty == 0) { 277 hvf_get_registers(cpu_state); 278 } 279 280 cpu_state->vcpu_dirty = 1; 281 } 282 283 void hvf_cpu_synchronize_state(CPUState *cpu_state) 284 { 285 if (cpu_state->vcpu_dirty == 0) { 286 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 287 } 288 } 289 290 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) 291 { 292 CPUState *cpu_state = cpu; 293 hvf_put_registers(cpu_state); 294 cpu_state->vcpu_dirty = false; 295 } 296 297 void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) 298 { 299 run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); 300 } 301 302 void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) 303 { 304 CPUState *cpu_state = cpu; 305 hvf_put_registers(cpu_state); 306 cpu_state->vcpu_dirty = false; 307 } 308 309 void hvf_cpu_synchronize_post_init(CPUState *cpu_state) 310 { 311 run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); 312 } 313 314 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) 315 { 316 int read, write; 317 318 /* EPT fault on an instruction fetch doesn't make sense here */ 319 if (ept_qual & EPT_VIOLATION_INST_FETCH) { 320 return false; 321 } 322 323 /* EPT fault must be a read fault or a write fault */ 324 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 325 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 326 if ((read | write) == 0) { 327 return false; 328 } 329 330 if (write && slot) { 331 if (slot->flags & HVF_SLOT_LOG) { 332 memory_region_set_dirty(slot->region, gpa - slot->start, 1); 333 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 334 HV_MEMORY_READ | HV_MEMORY_WRITE); 335 } 336 } 337 338 /* 339 * The EPT violation must have been caused by accessing a 340 * guest-physical address that is a translation of a guest-linear 341 * address. 342 */ 343 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 344 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 345 return false; 346 } 347 348 return !slot; 349 } 350 351 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 352 { 353 hvf_slot *slot; 354 355 slot = hvf_find_overlap_slot( 356 section->offset_within_address_space, 357 section->offset_within_address_space + int128_get64(section->size)); 358 359 /* protect region against writes; begin tracking it */ 360 if (on) { 361 slot->flags |= HVF_SLOT_LOG; 362 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 363 HV_MEMORY_READ); 364 /* stop tracking region*/ 365 } else { 366 slot->flags &= ~HVF_SLOT_LOG; 367 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, 368 HV_MEMORY_READ | HV_MEMORY_WRITE); 369 } 370 } 371 372 static void hvf_log_start(MemoryListener *listener, 373 MemoryRegionSection *section, int old, int new) 374 { 375 if (old != 0) { 376 return; 377 } 378 379 hvf_set_dirty_tracking(section, 1); 380 } 381 382 static void hvf_log_stop(MemoryListener *listener, 383 MemoryRegionSection *section, int old, int new) 384 { 385 if (new != 0) { 386 return; 387 } 388 389 hvf_set_dirty_tracking(section, 0); 390 } 391 392 static void hvf_log_sync(MemoryListener *listener, 393 MemoryRegionSection *section) 394 { 395 /* 396 * sync of dirty pages is handled elsewhere; just make sure we keep 397 * tracking the region. 398 */ 399 hvf_set_dirty_tracking(section, 1); 400 } 401 402 static void hvf_region_add(MemoryListener *listener, 403 MemoryRegionSection *section) 404 { 405 hvf_set_phys_mem(section, true); 406 } 407 408 static void hvf_region_del(MemoryListener *listener, 409 MemoryRegionSection *section) 410 { 411 hvf_set_phys_mem(section, false); 412 } 413 414 static MemoryListener hvf_memory_listener = { 415 .priority = 10, 416 .region_add = hvf_region_add, 417 .region_del = hvf_region_del, 418 .log_start = hvf_log_start, 419 .log_stop = hvf_log_stop, 420 .log_sync = hvf_log_sync, 421 }; 422 423 void hvf_reset_vcpu(CPUState *cpu) { 424 425 /* TODO: this shouldn't be needed; there is already a call to 426 * cpu_synchronize_all_post_reset in vl.c 427 */ 428 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); 429 wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); 430 macvm_set_cr0(cpu->hvf_fd, 0x60000010); 431 432 wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); 433 wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); 434 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); 435 436 /* set VMCS guest state fields */ 437 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); 438 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); 439 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); 440 wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); 441 442 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); 443 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); 444 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); 445 wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); 446 447 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); 448 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); 449 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); 450 wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); 451 452 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); 453 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); 454 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); 455 wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); 456 457 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); 458 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); 459 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); 460 wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); 461 462 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); 463 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); 464 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); 465 wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); 466 467 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); 468 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); 469 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); 470 wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); 471 472 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); 473 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); 474 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); 475 wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); 476 477 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); 478 wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); 479 480 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); 481 wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); 482 483 /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ 484 wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); 485 486 wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); 487 wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); 488 wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); 489 wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); 490 wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); 491 wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); 492 wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); 493 wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); 494 wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); 495 wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); 496 497 for (int i = 0; i < 8; i++) { 498 wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); 499 } 500 501 hv_vm_sync_tsc(0); 502 hv_vcpu_invalidate_tlb(cpu->hvf_fd); 503 hv_vcpu_flush(cpu->hvf_fd); 504 } 505 506 void hvf_vcpu_destroy(CPUState *cpu) 507 { 508 hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); 509 assert_hvf_ok(ret); 510 } 511 512 static void dummy_signal(int sig) 513 { 514 } 515 516 int hvf_init_vcpu(CPUState *cpu) 517 { 518 519 X86CPU *x86cpu = X86_CPU(cpu); 520 CPUX86State *env = &x86cpu->env; 521 int r; 522 523 /* init cpu signals */ 524 sigset_t set; 525 struct sigaction sigact; 526 527 memset(&sigact, 0, sizeof(sigact)); 528 sigact.sa_handler = dummy_signal; 529 sigaction(SIG_IPI, &sigact, NULL); 530 531 pthread_sigmask(SIG_BLOCK, NULL, &set); 532 sigdelset(&set, SIG_IPI); 533 534 init_emu(); 535 init_decoder(); 536 537 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); 538 env->hvf_emul = g_new0(HVFX86EmulatorState, 1); 539 540 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); 541 cpu->vcpu_dirty = 1; 542 assert_hvf_ok(r); 543 544 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, 545 &hvf_state->hvf_caps->vmx_cap_pinbased)) { 546 abort(); 547 } 548 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, 549 &hvf_state->hvf_caps->vmx_cap_procbased)) { 550 abort(); 551 } 552 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, 553 &hvf_state->hvf_caps->vmx_cap_procbased2)) { 554 abort(); 555 } 556 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY, 557 &hvf_state->hvf_caps->vmx_cap_entry)) { 558 abort(); 559 } 560 561 /* set VMCS control fields */ 562 wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, 563 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, 564 VMCS_PIN_BASED_CTLS_EXTINT | 565 VMCS_PIN_BASED_CTLS_NMI | 566 VMCS_PIN_BASED_CTLS_VNMI)); 567 wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, 568 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, 569 VMCS_PRI_PROC_BASED_CTLS_HLT | 570 VMCS_PRI_PROC_BASED_CTLS_MWAIT | 571 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | 572 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | 573 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); 574 wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, 575 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, 576 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); 577 578 wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 579 0)); 580 wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ 581 582 wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); 583 584 x86cpu = X86_CPU(cpu); 585 x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); 586 587 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); 588 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); 589 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); 590 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); 591 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); 592 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); 593 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); 594 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); 595 /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ 596 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); 597 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); 598 hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); 599 600 return 0; 601 } 602 603 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) 604 { 605 X86CPU *x86_cpu = X86_CPU(cpu); 606 CPUX86State *env = &x86_cpu->env; 607 608 env->exception_nr = -1; 609 env->exception_pending = 0; 610 env->exception_injected = 0; 611 env->interrupt_injected = -1; 612 env->nmi_injected = false; 613 if (idtvec_info & VMCS_IDT_VEC_VALID) { 614 switch (idtvec_info & VMCS_IDT_VEC_TYPE) { 615 case VMCS_IDT_VEC_HWINTR: 616 case VMCS_IDT_VEC_SWINTR: 617 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; 618 break; 619 case VMCS_IDT_VEC_NMI: 620 env->nmi_injected = true; 621 break; 622 case VMCS_IDT_VEC_HWEXCEPTION: 623 case VMCS_IDT_VEC_SWEXCEPTION: 624 env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; 625 env->exception_injected = 1; 626 break; 627 case VMCS_IDT_VEC_PRIV_SWEXCEPTION: 628 default: 629 abort(); 630 } 631 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || 632 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { 633 env->ins_len = ins_len; 634 } 635 if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { 636 env->has_error_code = true; 637 env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); 638 } 639 } 640 if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 641 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { 642 env->hflags2 |= HF2_NMI_MASK; 643 } else { 644 env->hflags2 &= ~HF2_NMI_MASK; 645 } 646 if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & 647 (VMCS_INTERRUPTIBILITY_STI_BLOCKING | 648 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { 649 env->hflags |= HF_INHIBIT_IRQ_MASK; 650 } else { 651 env->hflags &= ~HF_INHIBIT_IRQ_MASK; 652 } 653 } 654 655 int hvf_vcpu_exec(CPUState *cpu) 656 { 657 X86CPU *x86_cpu = X86_CPU(cpu); 658 CPUX86State *env = &x86_cpu->env; 659 int ret = 0; 660 uint64_t rip = 0; 661 662 if (hvf_process_events(cpu)) { 663 return EXCP_HLT; 664 } 665 666 do { 667 if (cpu->vcpu_dirty) { 668 hvf_put_registers(cpu); 669 cpu->vcpu_dirty = false; 670 } 671 672 if (hvf_inject_interrupts(cpu)) { 673 return EXCP_INTERRUPT; 674 } 675 vmx_update_tpr(cpu); 676 677 qemu_mutex_unlock_iothread(); 678 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { 679 qemu_mutex_lock_iothread(); 680 return EXCP_HLT; 681 } 682 683 hv_return_t r = hv_vcpu_run(cpu->hvf_fd); 684 assert_hvf_ok(r); 685 686 /* handle VMEXIT */ 687 uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); 688 uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); 689 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, 690 VMCS_EXIT_INSTRUCTION_LENGTH); 691 692 uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 693 694 hvf_store_events(cpu, ins_len, idtvec_info); 695 rip = rreg(cpu->hvf_fd, HV_X86_RIP); 696 RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); 697 env->eflags = RFLAGS(env); 698 699 qemu_mutex_lock_iothread(); 700 701 update_apic_tpr(cpu); 702 current_cpu = cpu; 703 704 ret = 0; 705 switch (exit_reason) { 706 case EXIT_REASON_HLT: { 707 macvm_set_rip(cpu, rip + ins_len); 708 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 709 (EFLAGS(env) & IF_MASK)) 710 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && 711 !(idtvec_info & VMCS_IDT_VEC_VALID)) { 712 cpu->halted = 1; 713 ret = EXCP_HLT; 714 break; 715 } 716 ret = EXCP_INTERRUPT; 717 break; 718 } 719 case EXIT_REASON_MWAIT: { 720 ret = EXCP_INTERRUPT; 721 break; 722 } 723 /* Need to check if MMIO or unmmaped fault */ 724 case EXIT_REASON_EPT_FAULT: 725 { 726 hvf_slot *slot; 727 uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); 728 729 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && 730 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { 731 vmx_set_nmi_blocking(cpu); 732 } 733 734 slot = hvf_find_overlap_slot(gpa, gpa); 735 /* mmio */ 736 if (ept_emulation_fault(slot, gpa, exit_qual)) { 737 struct x86_decode decode; 738 739 load_regs(cpu); 740 env->hvf_emul->fetch_rip = rip; 741 742 decode_instruction(env, &decode); 743 exec_instruction(env, &decode); 744 store_regs(cpu); 745 break; 746 } 747 break; 748 } 749 case EXIT_REASON_INOUT: 750 { 751 uint32_t in = (exit_qual & 8) != 0; 752 uint32_t size = (exit_qual & 7) + 1; 753 uint32_t string = (exit_qual & 16) != 0; 754 uint32_t port = exit_qual >> 16; 755 /*uint32_t rep = (exit_qual & 0x20) != 0;*/ 756 757 if (!string && in) { 758 uint64_t val = 0; 759 load_regs(cpu); 760 hvf_handle_io(env, port, &val, 0, size, 1); 761 if (size == 1) { 762 AL(env) = val; 763 } else if (size == 2) { 764 AX(env) = val; 765 } else if (size == 4) { 766 RAX(env) = (uint32_t)val; 767 } else { 768 RAX(env) = (uint64_t)val; 769 } 770 RIP(env) += ins_len; 771 store_regs(cpu); 772 break; 773 } else if (!string && !in) { 774 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); 775 hvf_handle_io(env, port, &RAX(env), 1, size, 1); 776 macvm_set_rip(cpu, rip + ins_len); 777 break; 778 } 779 struct x86_decode decode; 780 781 load_regs(cpu); 782 env->hvf_emul->fetch_rip = rip; 783 784 decode_instruction(env, &decode); 785 assert(ins_len == decode.len); 786 exec_instruction(env, &decode); 787 store_regs(cpu); 788 789 break; 790 } 791 case EXIT_REASON_CPUID: { 792 uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 793 uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); 794 uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 795 uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 796 797 cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); 798 799 wreg(cpu->hvf_fd, HV_X86_RAX, rax); 800 wreg(cpu->hvf_fd, HV_X86_RBX, rbx); 801 wreg(cpu->hvf_fd, HV_X86_RCX, rcx); 802 wreg(cpu->hvf_fd, HV_X86_RDX, rdx); 803 804 macvm_set_rip(cpu, rip + ins_len); 805 break; 806 } 807 case EXIT_REASON_XSETBV: { 808 X86CPU *x86_cpu = X86_CPU(cpu); 809 CPUX86State *env = &x86_cpu->env; 810 uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); 811 uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); 812 uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); 813 814 if (ecx) { 815 macvm_set_rip(cpu, rip + ins_len); 816 break; 817 } 818 env->xcr0 = ((uint64_t)edx << 32) | eax; 819 wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); 820 macvm_set_rip(cpu, rip + ins_len); 821 break; 822 } 823 case EXIT_REASON_INTR_WINDOW: 824 vmx_clear_int_window_exiting(cpu); 825 ret = EXCP_INTERRUPT; 826 break; 827 case EXIT_REASON_NMI_WINDOW: 828 vmx_clear_nmi_window_exiting(cpu); 829 ret = EXCP_INTERRUPT; 830 break; 831 case EXIT_REASON_EXT_INTR: 832 /* force exit and allow io handling */ 833 ret = EXCP_INTERRUPT; 834 break; 835 case EXIT_REASON_RDMSR: 836 case EXIT_REASON_WRMSR: 837 { 838 load_regs(cpu); 839 if (exit_reason == EXIT_REASON_RDMSR) { 840 simulate_rdmsr(cpu); 841 } else { 842 simulate_wrmsr(cpu); 843 } 844 RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); 845 store_regs(cpu); 846 break; 847 } 848 case EXIT_REASON_CR_ACCESS: { 849 int cr; 850 int reg; 851 852 load_regs(cpu); 853 cr = exit_qual & 15; 854 reg = (exit_qual >> 8) & 15; 855 856 switch (cr) { 857 case 0x0: { 858 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); 859 break; 860 } 861 case 4: { 862 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); 863 break; 864 } 865 case 8: { 866 X86CPU *x86_cpu = X86_CPU(cpu); 867 if (exit_qual & 0x10) { 868 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state); 869 } else { 870 int tpr = RRX(env, reg); 871 cpu_set_apic_tpr(x86_cpu->apic_state, tpr); 872 ret = EXCP_INTERRUPT; 873 } 874 break; 875 } 876 default: 877 error_report("Unrecognized CR %d", cr); 878 abort(); 879 } 880 RIP(env) += ins_len; 881 store_regs(cpu); 882 break; 883 } 884 case EXIT_REASON_APIC_ACCESS: { /* TODO */ 885 struct x86_decode decode; 886 887 load_regs(cpu); 888 env->hvf_emul->fetch_rip = rip; 889 890 decode_instruction(env, &decode); 891 exec_instruction(env, &decode); 892 store_regs(cpu); 893 break; 894 } 895 case EXIT_REASON_TPR: { 896 ret = 1; 897 break; 898 } 899 case EXIT_REASON_TASK_SWITCH: { 900 uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); 901 x68_segment_selector sel = {.sel = exit_qual & 0xffff}; 902 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, 903 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo 904 & VMCS_INTR_T_MASK); 905 break; 906 } 907 case EXIT_REASON_TRIPLE_FAULT: { 908 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 909 ret = EXCP_INTERRUPT; 910 break; 911 } 912 case EXIT_REASON_RDPMC: 913 wreg(cpu->hvf_fd, HV_X86_RAX, 0); 914 wreg(cpu->hvf_fd, HV_X86_RDX, 0); 915 macvm_set_rip(cpu, rip + ins_len); 916 break; 917 case VMX_REASON_VMCALL: 918 env->exception_nr = EXCP0D_GPF; 919 env->exception_injected = 1; 920 env->has_error_code = true; 921 env->error_code = 0; 922 break; 923 default: 924 error_report("%llx: unhandled exit %llx", rip, exit_reason); 925 } 926 } while (ret == 0); 927 928 return ret; 929 } 930 931 bool hvf_allowed; 932 933 static int hvf_accel_init(MachineState *ms) 934 { 935 int x; 936 hv_return_t ret; 937 HVFState *s; 938 939 ret = hv_vm_create(HV_VM_DEFAULT); 940 assert_hvf_ok(ret); 941 942 s = g_new0(HVFState, 1); 943 944 s->num_slots = 32; 945 for (x = 0; x < s->num_slots; ++x) { 946 s->slots[x].size = 0; 947 s->slots[x].slot_id = x; 948 } 949 950 hvf_state = s; 951 cpu_interrupt_handler = hvf_handle_interrupt; 952 memory_listener_register(&hvf_memory_listener, &address_space_memory); 953 return 0; 954 } 955 956 static void hvf_accel_class_init(ObjectClass *oc, void *data) 957 { 958 AccelClass *ac = ACCEL_CLASS(oc); 959 ac->name = "HVF"; 960 ac->init_machine = hvf_accel_init; 961 ac->allowed = &hvf_allowed; 962 } 963 964 static const TypeInfo hvf_accel_type = { 965 .name = TYPE_HVF_ACCEL, 966 .parent = TYPE_ACCEL, 967 .class_init = hvf_accel_class_init, 968 }; 969 970 static void hvf_type_init(void) 971 { 972 type_register_static(&hvf_accel_type); 973 } 974 975 type_init(hvf_type_init); 976