1 /* 2 * Copyright (c) 2003-2008 Fabrice Bellard 3 * Copyright (C) 2016 Veertu Inc, 4 * Copyright (C) 2017 Google Inc, 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #include "x86hvf.h" 23 #include "vmx.h" 24 #include "vmcs.h" 25 #include "cpu.h" 26 #include "x86_descr.h" 27 #include "x86_decode.h" 28 #include "sysemu/hw_accel.h" 29 30 #include "hw/i386/apic_internal.h" 31 32 #include <Hypervisor/hv.h> 33 #include <Hypervisor/hv_vmx.h> 34 35 void hvf_set_segment(CPUState *cs, struct vmx_segment *vmx_seg, 36 SegmentCache *qseg, bool is_tr) 37 { 38 vmx_seg->sel = qseg->selector; 39 vmx_seg->base = qseg->base; 40 vmx_seg->limit = qseg->limit; 41 42 if (!qseg->selector && !x86_is_real(cs) && !is_tr) { 43 /* the TR register is usable after processor reset despite 44 * having a null selector */ 45 vmx_seg->ar = 1 << 16; 46 return; 47 } 48 vmx_seg->ar = (qseg->flags >> DESC_TYPE_SHIFT) & 0xf; 49 vmx_seg->ar |= ((qseg->flags >> DESC_G_SHIFT) & 1) << 15; 50 vmx_seg->ar |= ((qseg->flags >> DESC_B_SHIFT) & 1) << 14; 51 vmx_seg->ar |= ((qseg->flags >> DESC_L_SHIFT) & 1) << 13; 52 vmx_seg->ar |= ((qseg->flags >> DESC_AVL_SHIFT) & 1) << 12; 53 vmx_seg->ar |= ((qseg->flags >> DESC_P_SHIFT) & 1) << 7; 54 vmx_seg->ar |= ((qseg->flags >> DESC_DPL_SHIFT) & 3) << 5; 55 vmx_seg->ar |= ((qseg->flags >> DESC_S_SHIFT) & 1) << 4; 56 } 57 58 void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg) 59 { 60 qseg->limit = vmx_seg->limit; 61 qseg->base = vmx_seg->base; 62 qseg->selector = vmx_seg->sel; 63 qseg->flags = ((vmx_seg->ar & 0xf) << DESC_TYPE_SHIFT) | 64 (((vmx_seg->ar >> 4) & 1) << DESC_S_SHIFT) | 65 (((vmx_seg->ar >> 5) & 3) << DESC_DPL_SHIFT) | 66 (((vmx_seg->ar >> 7) & 1) << DESC_P_SHIFT) | 67 (((vmx_seg->ar >> 12) & 1) << DESC_AVL_SHIFT) | 68 (((vmx_seg->ar >> 13) & 1) << DESC_L_SHIFT) | 69 (((vmx_seg->ar >> 14) & 1) << DESC_B_SHIFT) | 70 (((vmx_seg->ar >> 15) & 1) << DESC_G_SHIFT); 71 } 72 73 void hvf_put_xsave(CPUState *cs) 74 { 75 void *xsave = X86_CPU(cs)->env.xsave_buf; 76 uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len; 77 78 x86_cpu_xsave_all_areas(X86_CPU(cs), xsave, xsave_len); 79 80 if (hv_vcpu_write_fpstate(cs->accel->fd, xsave, xsave_len)) { 81 abort(); 82 } 83 } 84 85 static void hvf_put_segments(CPUState *cs) 86 { 87 CPUX86State *env = &X86_CPU(cs)->env; 88 struct vmx_segment seg; 89 90 wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit); 91 wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE, env->idt.base); 92 93 wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit); 94 wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base); 95 96 /* wvmcs(cs->accel->fd, VMCS_GUEST_CR2, env->cr[2]); */ 97 wvmcs(cs->accel->fd, VMCS_GUEST_CR3, env->cr[3]); 98 vmx_update_tpr(cs); 99 wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, env->efer); 100 101 macvm_set_cr4(cs->accel->fd, env->cr[4]); 102 macvm_set_cr0(cs->accel->fd, env->cr[0]); 103 104 hvf_set_segment(cs, &seg, &env->segs[R_CS], false); 105 vmx_write_segment_descriptor(cs, &seg, R_CS); 106 107 hvf_set_segment(cs, &seg, &env->segs[R_DS], false); 108 vmx_write_segment_descriptor(cs, &seg, R_DS); 109 110 hvf_set_segment(cs, &seg, &env->segs[R_ES], false); 111 vmx_write_segment_descriptor(cs, &seg, R_ES); 112 113 hvf_set_segment(cs, &seg, &env->segs[R_SS], false); 114 vmx_write_segment_descriptor(cs, &seg, R_SS); 115 116 hvf_set_segment(cs, &seg, &env->segs[R_FS], false); 117 vmx_write_segment_descriptor(cs, &seg, R_FS); 118 119 hvf_set_segment(cs, &seg, &env->segs[R_GS], false); 120 vmx_write_segment_descriptor(cs, &seg, R_GS); 121 122 hvf_set_segment(cs, &seg, &env->tr, true); 123 vmx_write_segment_descriptor(cs, &seg, R_TR); 124 125 hvf_set_segment(cs, &seg, &env->ldt, false); 126 vmx_write_segment_descriptor(cs, &seg, R_LDTR); 127 } 128 129 void hvf_put_msrs(CPUState *cs) 130 { 131 CPUX86State *env = &X86_CPU(cs)->env; 132 133 hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS, 134 env->sysenter_cs); 135 hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP, 136 env->sysenter_esp); 137 hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP, 138 env->sysenter_eip); 139 140 hv_vcpu_write_msr(cs->accel->fd, MSR_STAR, env->star); 141 142 #ifdef TARGET_X86_64 143 hv_vcpu_write_msr(cs->accel->fd, MSR_CSTAR, env->cstar); 144 hv_vcpu_write_msr(cs->accel->fd, MSR_KERNELGSBASE, env->kernelgsbase); 145 hv_vcpu_write_msr(cs->accel->fd, MSR_FMASK, env->fmask); 146 hv_vcpu_write_msr(cs->accel->fd, MSR_LSTAR, env->lstar); 147 #endif 148 149 hv_vcpu_write_msr(cs->accel->fd, MSR_GSBASE, env->segs[R_GS].base); 150 hv_vcpu_write_msr(cs->accel->fd, MSR_FSBASE, env->segs[R_FS].base); 151 } 152 153 154 void hvf_get_xsave(CPUState *cs) 155 { 156 void *xsave = X86_CPU(cs)->env.xsave_buf; 157 uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len; 158 159 if (hv_vcpu_read_fpstate(cs->accel->fd, xsave, xsave_len)) { 160 abort(); 161 } 162 163 x86_cpu_xrstor_all_areas(X86_CPU(cs), xsave, xsave_len); 164 } 165 166 static void hvf_get_segments(CPUState *cs) 167 { 168 CPUX86State *env = &X86_CPU(cs)->env; 169 170 struct vmx_segment seg; 171 172 env->interrupt_injected = -1; 173 174 vmx_read_segment_descriptor(cs, &seg, R_CS); 175 hvf_get_segment(&env->segs[R_CS], &seg); 176 177 vmx_read_segment_descriptor(cs, &seg, R_DS); 178 hvf_get_segment(&env->segs[R_DS], &seg); 179 180 vmx_read_segment_descriptor(cs, &seg, R_ES); 181 hvf_get_segment(&env->segs[R_ES], &seg); 182 183 vmx_read_segment_descriptor(cs, &seg, R_FS); 184 hvf_get_segment(&env->segs[R_FS], &seg); 185 186 vmx_read_segment_descriptor(cs, &seg, R_GS); 187 hvf_get_segment(&env->segs[R_GS], &seg); 188 189 vmx_read_segment_descriptor(cs, &seg, R_SS); 190 hvf_get_segment(&env->segs[R_SS], &seg); 191 192 vmx_read_segment_descriptor(cs, &seg, R_TR); 193 hvf_get_segment(&env->tr, &seg); 194 195 vmx_read_segment_descriptor(cs, &seg, R_LDTR); 196 hvf_get_segment(&env->ldt, &seg); 197 198 env->idt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT); 199 env->idt.base = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE); 200 env->gdt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT); 201 env->gdt.base = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE); 202 203 env->cr[0] = rvmcs(cs->accel->fd, VMCS_GUEST_CR0); 204 env->cr[2] = 0; 205 env->cr[3] = rvmcs(cs->accel->fd, VMCS_GUEST_CR3); 206 env->cr[4] = rvmcs(cs->accel->fd, VMCS_GUEST_CR4); 207 208 env->efer = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER); 209 } 210 211 void hvf_get_msrs(CPUState *cs) 212 { 213 CPUX86State *env = &X86_CPU(cs)->env; 214 uint64_t tmp; 215 216 hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS, &tmp); 217 env->sysenter_cs = tmp; 218 219 hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP, &tmp); 220 env->sysenter_esp = tmp; 221 222 hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP, &tmp); 223 env->sysenter_eip = tmp; 224 225 hv_vcpu_read_msr(cs->accel->fd, MSR_STAR, &env->star); 226 227 #ifdef TARGET_X86_64 228 hv_vcpu_read_msr(cs->accel->fd, MSR_CSTAR, &env->cstar); 229 hv_vcpu_read_msr(cs->accel->fd, MSR_KERNELGSBASE, &env->kernelgsbase); 230 hv_vcpu_read_msr(cs->accel->fd, MSR_FMASK, &env->fmask); 231 hv_vcpu_read_msr(cs->accel->fd, MSR_LSTAR, &env->lstar); 232 #endif 233 234 hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_APICBASE, &tmp); 235 236 env->tsc = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET); 237 } 238 239 int hvf_put_registers(CPUState *cs) 240 { 241 X86CPU *x86cpu = X86_CPU(cs); 242 CPUX86State *env = &x86cpu->env; 243 244 wreg(cs->accel->fd, HV_X86_RAX, env->regs[R_EAX]); 245 wreg(cs->accel->fd, HV_X86_RBX, env->regs[R_EBX]); 246 wreg(cs->accel->fd, HV_X86_RCX, env->regs[R_ECX]); 247 wreg(cs->accel->fd, HV_X86_RDX, env->regs[R_EDX]); 248 wreg(cs->accel->fd, HV_X86_RBP, env->regs[R_EBP]); 249 wreg(cs->accel->fd, HV_X86_RSP, env->regs[R_ESP]); 250 wreg(cs->accel->fd, HV_X86_RSI, env->regs[R_ESI]); 251 wreg(cs->accel->fd, HV_X86_RDI, env->regs[R_EDI]); 252 wreg(cs->accel->fd, HV_X86_R8, env->regs[8]); 253 wreg(cs->accel->fd, HV_X86_R9, env->regs[9]); 254 wreg(cs->accel->fd, HV_X86_R10, env->regs[10]); 255 wreg(cs->accel->fd, HV_X86_R11, env->regs[11]); 256 wreg(cs->accel->fd, HV_X86_R12, env->regs[12]); 257 wreg(cs->accel->fd, HV_X86_R13, env->regs[13]); 258 wreg(cs->accel->fd, HV_X86_R14, env->regs[14]); 259 wreg(cs->accel->fd, HV_X86_R15, env->regs[15]); 260 wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags); 261 wreg(cs->accel->fd, HV_X86_RIP, env->eip); 262 263 wreg(cs->accel->fd, HV_X86_XCR0, env->xcr0); 264 265 hvf_put_xsave(cs); 266 267 hvf_put_segments(cs); 268 269 hvf_put_msrs(cs); 270 271 wreg(cs->accel->fd, HV_X86_DR0, env->dr[0]); 272 wreg(cs->accel->fd, HV_X86_DR1, env->dr[1]); 273 wreg(cs->accel->fd, HV_X86_DR2, env->dr[2]); 274 wreg(cs->accel->fd, HV_X86_DR3, env->dr[3]); 275 wreg(cs->accel->fd, HV_X86_DR4, env->dr[4]); 276 wreg(cs->accel->fd, HV_X86_DR5, env->dr[5]); 277 wreg(cs->accel->fd, HV_X86_DR6, env->dr[6]); 278 wreg(cs->accel->fd, HV_X86_DR7, env->dr[7]); 279 280 return 0; 281 } 282 283 int hvf_get_registers(CPUState *cs) 284 { 285 X86CPU *x86cpu = X86_CPU(cs); 286 CPUX86State *env = &x86cpu->env; 287 288 env->regs[R_EAX] = rreg(cs->accel->fd, HV_X86_RAX); 289 env->regs[R_EBX] = rreg(cs->accel->fd, HV_X86_RBX); 290 env->regs[R_ECX] = rreg(cs->accel->fd, HV_X86_RCX); 291 env->regs[R_EDX] = rreg(cs->accel->fd, HV_X86_RDX); 292 env->regs[R_EBP] = rreg(cs->accel->fd, HV_X86_RBP); 293 env->regs[R_ESP] = rreg(cs->accel->fd, HV_X86_RSP); 294 env->regs[R_ESI] = rreg(cs->accel->fd, HV_X86_RSI); 295 env->regs[R_EDI] = rreg(cs->accel->fd, HV_X86_RDI); 296 env->regs[8] = rreg(cs->accel->fd, HV_X86_R8); 297 env->regs[9] = rreg(cs->accel->fd, HV_X86_R9); 298 env->regs[10] = rreg(cs->accel->fd, HV_X86_R10); 299 env->regs[11] = rreg(cs->accel->fd, HV_X86_R11); 300 env->regs[12] = rreg(cs->accel->fd, HV_X86_R12); 301 env->regs[13] = rreg(cs->accel->fd, HV_X86_R13); 302 env->regs[14] = rreg(cs->accel->fd, HV_X86_R14); 303 env->regs[15] = rreg(cs->accel->fd, HV_X86_R15); 304 305 env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); 306 env->eip = rreg(cs->accel->fd, HV_X86_RIP); 307 308 hvf_get_xsave(cs); 309 env->xcr0 = rreg(cs->accel->fd, HV_X86_XCR0); 310 311 hvf_get_segments(cs); 312 hvf_get_msrs(cs); 313 314 env->dr[0] = rreg(cs->accel->fd, HV_X86_DR0); 315 env->dr[1] = rreg(cs->accel->fd, HV_X86_DR1); 316 env->dr[2] = rreg(cs->accel->fd, HV_X86_DR2); 317 env->dr[3] = rreg(cs->accel->fd, HV_X86_DR3); 318 env->dr[4] = rreg(cs->accel->fd, HV_X86_DR4); 319 env->dr[5] = rreg(cs->accel->fd, HV_X86_DR5); 320 env->dr[6] = rreg(cs->accel->fd, HV_X86_DR6); 321 env->dr[7] = rreg(cs->accel->fd, HV_X86_DR7); 322 323 x86_update_hflags(env); 324 return 0; 325 } 326 327 static void vmx_set_int_window_exiting(CPUState *cs) 328 { 329 uint64_t val; 330 val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS); 331 wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val | 332 VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); 333 } 334 335 void vmx_clear_int_window_exiting(CPUState *cs) 336 { 337 uint64_t val; 338 val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS); 339 wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val & 340 ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); 341 } 342 343 bool hvf_inject_interrupts(CPUState *cs) 344 { 345 X86CPU *x86cpu = X86_CPU(cs); 346 CPUX86State *env = &x86cpu->env; 347 348 uint8_t vector; 349 uint64_t intr_type; 350 bool have_event = true; 351 if (env->interrupt_injected != -1) { 352 vector = env->interrupt_injected; 353 if (env->ins_len) { 354 intr_type = VMCS_INTR_T_SWINTR; 355 } else { 356 intr_type = VMCS_INTR_T_HWINTR; 357 } 358 } else if (env->exception_nr != -1) { 359 vector = env->exception_nr; 360 if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { 361 intr_type = VMCS_INTR_T_SWEXCEPTION; 362 } else { 363 intr_type = VMCS_INTR_T_HWEXCEPTION; 364 } 365 } else if (env->nmi_injected) { 366 vector = EXCP02_NMI; 367 intr_type = VMCS_INTR_T_NMI; 368 } else { 369 have_event = false; 370 } 371 372 uint64_t info = 0; 373 if (have_event) { 374 info = vector | intr_type | VMCS_INTR_VALID; 375 uint64_t reason = rvmcs(cs->accel->fd, VMCS_EXIT_REASON); 376 if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) { 377 vmx_clear_nmi_blocking(cs); 378 } 379 380 if (!(env->hflags2 & HF2_NMI_MASK) || intr_type != VMCS_INTR_T_NMI) { 381 info &= ~(1 << 12); /* clear undefined bit */ 382 if (intr_type == VMCS_INTR_T_SWINTR || 383 intr_type == VMCS_INTR_T_SWEXCEPTION) { 384 wvmcs(cs->accel->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len); 385 } 386 387 if (env->has_error_code) { 388 wvmcs(cs->accel->fd, VMCS_ENTRY_EXCEPTION_ERROR, 389 env->error_code); 390 /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ 391 info |= VMCS_INTR_DEL_ERRCODE; 392 } 393 /*printf("reinject %lx err %d\n", info, err);*/ 394 wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); 395 }; 396 } 397 398 if (cs->interrupt_request & CPU_INTERRUPT_NMI) { 399 if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { 400 cs->interrupt_request &= ~CPU_INTERRUPT_NMI; 401 info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; 402 wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); 403 } else { 404 vmx_set_nmi_window_exiting(cs); 405 } 406 } 407 408 if (!(env->hflags & HF_INHIBIT_IRQ_MASK) && 409 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 410 (env->eflags & IF_MASK) && !(info & VMCS_INTR_VALID)) { 411 int line = cpu_get_pic_interrupt(env); 412 cs->interrupt_request &= ~CPU_INTERRUPT_HARD; 413 if (line >= 0) { 414 wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, line | 415 VMCS_INTR_VALID | VMCS_INTR_T_HWINTR); 416 } 417 } 418 if (cs->interrupt_request & CPU_INTERRUPT_HARD) { 419 vmx_set_int_window_exiting(cs); 420 } 421 return (cs->interrupt_request 422 & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)); 423 } 424 425 int hvf_process_events(CPUState *cs) 426 { 427 X86CPU *cpu = X86_CPU(cs); 428 CPUX86State *env = &cpu->env; 429 430 if (!cs->vcpu_dirty) { 431 /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ 432 env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); 433 } 434 435 if (cs->interrupt_request & CPU_INTERRUPT_INIT) { 436 cpu_synchronize_state(cs); 437 do_cpu_init(cpu); 438 } 439 440 if (cs->interrupt_request & CPU_INTERRUPT_POLL) { 441 cs->interrupt_request &= ~CPU_INTERRUPT_POLL; 442 apic_poll_irq(cpu->apic_state); 443 } 444 if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && 445 (env->eflags & IF_MASK)) || 446 (cs->interrupt_request & CPU_INTERRUPT_NMI)) { 447 cs->halted = 0; 448 } 449 if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { 450 cpu_synchronize_state(cs); 451 do_cpu_sipi(cpu); 452 } 453 if (cs->interrupt_request & CPU_INTERRUPT_TPR) { 454 cs->interrupt_request &= ~CPU_INTERRUPT_TPR; 455 cpu_synchronize_state(cs); 456 apic_handle_tpr_access_report(cpu->apic_state, env->eip, 457 env->tpr_access_type); 458 } 459 return cs->halted; 460 } 461