1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM support 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * Avi Kivity <avi@qumranet.com> 13 */ 14 15 #define pr_fmt(fmt) "SVM: " fmt 16 17 #include <linux/kvm_types.h> 18 #include <linux/kvm_host.h> 19 #include <linux/kernel.h> 20 21 #include <asm/msr-index.h> 22 #include <asm/debugreg.h> 23 24 #include "kvm_emulate.h" 25 #include "trace.h" 26 #include "mmu.h" 27 #include "x86.h" 28 #include "svm.h" 29 30 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 31 struct x86_exception *fault) 32 { 33 struct vcpu_svm *svm = to_svm(vcpu); 34 35 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 36 /* 37 * TODO: track the cause of the nested page fault, and 38 * correctly fill in the high bits of exit_info_1. 39 */ 40 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 41 svm->vmcb->control.exit_code_hi = 0; 42 svm->vmcb->control.exit_info_1 = (1ULL << 32); 43 svm->vmcb->control.exit_info_2 = fault->address; 44 } 45 46 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 47 svm->vmcb->control.exit_info_1 |= fault->error_code; 48 49 /* 50 * The present bit is always zero for page structure faults on real 51 * hardware. 52 */ 53 if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) 54 svm->vmcb->control.exit_info_1 &= ~1; 55 56 nested_svm_vmexit(svm); 57 } 58 59 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 60 { 61 struct vcpu_svm *svm = to_svm(vcpu); 62 u64 cr3 = svm->nested.nested_cr3; 63 u64 pdpte; 64 int ret; 65 66 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, 67 offset_in_page(cr3) + index * 8, 8); 68 if (ret) 69 return 0; 70 return pdpte; 71 } 72 73 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 74 { 75 struct vcpu_svm *svm = to_svm(vcpu); 76 77 return svm->nested.nested_cr3; 78 } 79 80 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 81 { 82 WARN_ON(mmu_is_nested(vcpu)); 83 84 vcpu->arch.mmu = &vcpu->arch.guest_mmu; 85 kvm_init_shadow_mmu(vcpu); 86 vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; 87 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; 88 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; 89 vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); 90 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); 91 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 92 } 93 94 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 95 { 96 vcpu->arch.mmu = &vcpu->arch.root_mmu; 97 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 98 } 99 100 void recalc_intercepts(struct vcpu_svm *svm) 101 { 102 struct vmcb_control_area *c, *h; 103 struct nested_state *g; 104 105 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 106 107 if (!is_guest_mode(&svm->vcpu)) 108 return; 109 110 c = &svm->vmcb->control; 111 h = &svm->nested.hsave->control; 112 g = &svm->nested; 113 114 c->intercept_cr = h->intercept_cr; 115 c->intercept_dr = h->intercept_dr; 116 c->intercept_exceptions = h->intercept_exceptions; 117 c->intercept = h->intercept; 118 119 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 120 /* We only want the cr8 intercept bits of L1 */ 121 c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); 122 c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); 123 124 /* 125 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not 126 * affect any interrupt we may want to inject; therefore, 127 * interrupt window vmexits are irrelevant to L0. 128 */ 129 c->intercept &= ~(1ULL << INTERCEPT_VINTR); 130 } 131 132 /* We don't want to see VMMCALLs from a nested guest */ 133 c->intercept &= ~(1ULL << INTERCEPT_VMMCALL); 134 135 c->intercept_cr |= g->intercept_cr; 136 c->intercept_dr |= g->intercept_dr; 137 c->intercept_exceptions |= g->intercept_exceptions; 138 c->intercept |= g->intercept; 139 } 140 141 static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) 142 { 143 struct vmcb_control_area *dst = &dst_vmcb->control; 144 struct vmcb_control_area *from = &from_vmcb->control; 145 146 dst->intercept_cr = from->intercept_cr; 147 dst->intercept_dr = from->intercept_dr; 148 dst->intercept_exceptions = from->intercept_exceptions; 149 dst->intercept = from->intercept; 150 dst->iopm_base_pa = from->iopm_base_pa; 151 dst->msrpm_base_pa = from->msrpm_base_pa; 152 dst->tsc_offset = from->tsc_offset; 153 dst->asid = from->asid; 154 dst->tlb_ctl = from->tlb_ctl; 155 dst->int_ctl = from->int_ctl; 156 dst->int_vector = from->int_vector; 157 dst->int_state = from->int_state; 158 dst->exit_code = from->exit_code; 159 dst->exit_code_hi = from->exit_code_hi; 160 dst->exit_info_1 = from->exit_info_1; 161 dst->exit_info_2 = from->exit_info_2; 162 dst->exit_int_info = from->exit_int_info; 163 dst->exit_int_info_err = from->exit_int_info_err; 164 dst->nested_ctl = from->nested_ctl; 165 dst->event_inj = from->event_inj; 166 dst->event_inj_err = from->event_inj_err; 167 dst->nested_cr3 = from->nested_cr3; 168 dst->virt_ext = from->virt_ext; 169 dst->pause_filter_count = from->pause_filter_count; 170 dst->pause_filter_thresh = from->pause_filter_thresh; 171 } 172 173 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 174 { 175 /* 176 * This function merges the msr permission bitmaps of kvm and the 177 * nested vmcb. It is optimized in that it only merges the parts where 178 * the kvm msr permission bitmap may contain zero bits 179 */ 180 int i; 181 182 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 183 return true; 184 185 for (i = 0; i < MSRPM_OFFSETS; i++) { 186 u32 value, p; 187 u64 offset; 188 189 if (msrpm_offsets[i] == 0xffffffff) 190 break; 191 192 p = msrpm_offsets[i]; 193 offset = svm->nested.vmcb_msrpm + (p * 4); 194 195 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) 196 return false; 197 198 svm->nested.msrpm[p] = svm->msrpm[p] | value; 199 } 200 201 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); 202 203 return true; 204 } 205 206 static bool nested_vmcb_checks(struct vmcb *vmcb) 207 { 208 if ((vmcb->save.efer & EFER_SVME) == 0) 209 return false; 210 211 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) 212 return false; 213 214 if (vmcb->control.asid == 0) 215 return false; 216 217 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && 218 !npt_enabled) 219 return false; 220 221 return true; 222 } 223 224 void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, 225 struct vmcb *nested_vmcb, struct kvm_host_map *map) 226 { 227 bool evaluate_pending_interrupts = 228 is_intercept(svm, INTERCEPT_VINTR) || 229 is_intercept(svm, INTERCEPT_IRET); 230 231 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 232 svm->vcpu.arch.hflags |= HF_HIF_MASK; 233 else 234 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 235 236 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { 237 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; 238 nested_svm_init_mmu_context(&svm->vcpu); 239 } 240 241 /* Load the nested guest state */ 242 svm->vmcb->save.es = nested_vmcb->save.es; 243 svm->vmcb->save.cs = nested_vmcb->save.cs; 244 svm->vmcb->save.ss = nested_vmcb->save.ss; 245 svm->vmcb->save.ds = nested_vmcb->save.ds; 246 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 247 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 248 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 249 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 250 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 251 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 252 if (npt_enabled) { 253 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 254 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 255 } else 256 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 257 258 /* Guest paging mode is active - reset mmu */ 259 kvm_mmu_reset_context(&svm->vcpu); 260 261 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 262 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); 263 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp); 264 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip); 265 266 /* In case we don't even reach vcpu_run, the fields are not updated */ 267 svm->vmcb->save.rax = nested_vmcb->save.rax; 268 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 269 svm->vmcb->save.rip = nested_vmcb->save.rip; 270 svm->vmcb->save.dr7 = nested_vmcb->save.dr7; 271 svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; 272 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 273 274 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 275 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 276 277 /* cache intercepts */ 278 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; 279 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; 280 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; 281 svm->nested.intercept = nested_vmcb->control.intercept; 282 283 svm_flush_tlb(&svm->vcpu, true); 284 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 285 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 286 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 287 else 288 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 289 290 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; 291 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; 292 293 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; 294 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 295 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 296 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 297 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 298 299 svm->vmcb->control.pause_filter_count = 300 nested_vmcb->control.pause_filter_count; 301 svm->vmcb->control.pause_filter_thresh = 302 nested_vmcb->control.pause_filter_thresh; 303 304 kvm_vcpu_unmap(&svm->vcpu, map, true); 305 306 /* Enter Guest-Mode */ 307 enter_guest_mode(&svm->vcpu); 308 309 /* 310 * Merge guest and host intercepts - must be called with vcpu in 311 * guest-mode to take affect here 312 */ 313 recalc_intercepts(svm); 314 315 svm->nested.vmcb = vmcb_gpa; 316 317 /* 318 * If L1 had a pending IRQ/NMI before executing VMRUN, 319 * which wasn't delivered because it was disallowed (e.g. 320 * interrupts disabled), L0 needs to evaluate if this pending 321 * event should cause an exit from L2 to L1 or be delivered 322 * directly to L2. 323 * 324 * Usually this would be handled by the processor noticing an 325 * IRQ/NMI window request. However, VMRUN can unblock interrupts 326 * by implicitly setting GIF, so force L0 to perform pending event 327 * evaluation by requesting a KVM_REQ_EVENT. 328 */ 329 enable_gif(svm); 330 if (unlikely(evaluate_pending_interrupts)) 331 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 332 333 mark_all_dirty(svm->vmcb); 334 } 335 336 int nested_svm_vmrun(struct vcpu_svm *svm) 337 { 338 int ret; 339 struct vmcb *nested_vmcb; 340 struct vmcb *hsave = svm->nested.hsave; 341 struct vmcb *vmcb = svm->vmcb; 342 struct kvm_host_map map; 343 u64 vmcb_gpa; 344 345 vmcb_gpa = svm->vmcb->save.rax; 346 347 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); 348 if (ret == -EINVAL) { 349 kvm_inject_gp(&svm->vcpu, 0); 350 return 1; 351 } else if (ret) { 352 return kvm_skip_emulated_instruction(&svm->vcpu); 353 } 354 355 ret = kvm_skip_emulated_instruction(&svm->vcpu); 356 357 nested_vmcb = map.hva; 358 359 if (!nested_vmcb_checks(nested_vmcb)) { 360 nested_vmcb->control.exit_code = SVM_EXIT_ERR; 361 nested_vmcb->control.exit_code_hi = 0; 362 nested_vmcb->control.exit_info_1 = 0; 363 nested_vmcb->control.exit_info_2 = 0; 364 365 kvm_vcpu_unmap(&svm->vcpu, &map, true); 366 367 return ret; 368 } 369 370 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, 371 nested_vmcb->save.rip, 372 nested_vmcb->control.int_ctl, 373 nested_vmcb->control.event_inj, 374 nested_vmcb->control.nested_ctl); 375 376 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, 377 nested_vmcb->control.intercept_cr >> 16, 378 nested_vmcb->control.intercept_exceptions, 379 nested_vmcb->control.intercept); 380 381 /* Clear internal status */ 382 kvm_clear_exception_queue(&svm->vcpu); 383 kvm_clear_interrupt_queue(&svm->vcpu); 384 385 /* 386 * Save the old vmcb, so we don't need to pick what we save, but can 387 * restore everything when a VMEXIT occurs 388 */ 389 hsave->save.es = vmcb->save.es; 390 hsave->save.cs = vmcb->save.cs; 391 hsave->save.ss = vmcb->save.ss; 392 hsave->save.ds = vmcb->save.ds; 393 hsave->save.gdtr = vmcb->save.gdtr; 394 hsave->save.idtr = vmcb->save.idtr; 395 hsave->save.efer = svm->vcpu.arch.efer; 396 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 397 hsave->save.cr4 = svm->vcpu.arch.cr4; 398 hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 399 hsave->save.rip = kvm_rip_read(&svm->vcpu); 400 hsave->save.rsp = vmcb->save.rsp; 401 hsave->save.rax = vmcb->save.rax; 402 if (npt_enabled) 403 hsave->save.cr3 = vmcb->save.cr3; 404 else 405 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); 406 407 copy_vmcb_control_area(hsave, vmcb); 408 409 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); 410 411 if (!nested_svm_vmrun_msrpm(svm)) { 412 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 413 svm->vmcb->control.exit_code_hi = 0; 414 svm->vmcb->control.exit_info_1 = 0; 415 svm->vmcb->control.exit_info_2 = 0; 416 417 nested_svm_vmexit(svm); 418 } 419 420 return ret; 421 } 422 423 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 424 { 425 to_vmcb->save.fs = from_vmcb->save.fs; 426 to_vmcb->save.gs = from_vmcb->save.gs; 427 to_vmcb->save.tr = from_vmcb->save.tr; 428 to_vmcb->save.ldtr = from_vmcb->save.ldtr; 429 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 430 to_vmcb->save.star = from_vmcb->save.star; 431 to_vmcb->save.lstar = from_vmcb->save.lstar; 432 to_vmcb->save.cstar = from_vmcb->save.cstar; 433 to_vmcb->save.sfmask = from_vmcb->save.sfmask; 434 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 435 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 436 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 437 } 438 439 int nested_svm_vmexit(struct vcpu_svm *svm) 440 { 441 int rc; 442 struct vmcb *nested_vmcb; 443 struct vmcb *hsave = svm->nested.hsave; 444 struct vmcb *vmcb = svm->vmcb; 445 struct kvm_host_map map; 446 447 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 448 vmcb->control.exit_info_1, 449 vmcb->control.exit_info_2, 450 vmcb->control.exit_int_info, 451 vmcb->control.exit_int_info_err, 452 KVM_ISA_SVM); 453 454 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); 455 if (rc) { 456 if (rc == -EINVAL) 457 kvm_inject_gp(&svm->vcpu, 0); 458 return 1; 459 } 460 461 nested_vmcb = map.hva; 462 463 /* Exit Guest-Mode */ 464 leave_guest_mode(&svm->vcpu); 465 svm->nested.vmcb = 0; 466 467 /* Give the current vmcb to the guest */ 468 disable_gif(svm); 469 470 nested_vmcb->save.es = vmcb->save.es; 471 nested_vmcb->save.cs = vmcb->save.cs; 472 nested_vmcb->save.ss = vmcb->save.ss; 473 nested_vmcb->save.ds = vmcb->save.ds; 474 nested_vmcb->save.gdtr = vmcb->save.gdtr; 475 nested_vmcb->save.idtr = vmcb->save.idtr; 476 nested_vmcb->save.efer = svm->vcpu.arch.efer; 477 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 478 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 479 nested_vmcb->save.cr2 = vmcb->save.cr2; 480 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 481 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 482 nested_vmcb->save.rip = vmcb->save.rip; 483 nested_vmcb->save.rsp = vmcb->save.rsp; 484 nested_vmcb->save.rax = vmcb->save.rax; 485 nested_vmcb->save.dr7 = vmcb->save.dr7; 486 nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; 487 nested_vmcb->save.cpl = vmcb->save.cpl; 488 489 nested_vmcb->control.int_ctl = vmcb->control.int_ctl; 490 nested_vmcb->control.int_vector = vmcb->control.int_vector; 491 nested_vmcb->control.int_state = vmcb->control.int_state; 492 nested_vmcb->control.exit_code = vmcb->control.exit_code; 493 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; 494 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; 495 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 496 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 497 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 498 499 if (svm->nrips_enabled) 500 nested_vmcb->control.next_rip = vmcb->control.next_rip; 501 502 /* 503 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have 504 * to make sure that we do not lose injected events. So check event_inj 505 * here and copy it to exit_int_info if it is valid. 506 * Exit_int_info and event_inj can't be both valid because the case 507 * below only happens on a VMRUN instruction intercept which has 508 * no valid exit_int_info set. 509 */ 510 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { 511 struct vmcb_control_area *nc = &nested_vmcb->control; 512 513 nc->exit_int_info = vmcb->control.event_inj; 514 nc->exit_int_info_err = vmcb->control.event_inj_err; 515 } 516 517 nested_vmcb->control.tlb_ctl = 0; 518 nested_vmcb->control.event_inj = 0; 519 nested_vmcb->control.event_inj_err = 0; 520 521 nested_vmcb->control.pause_filter_count = 522 svm->vmcb->control.pause_filter_count; 523 nested_vmcb->control.pause_filter_thresh = 524 svm->vmcb->control.pause_filter_thresh; 525 526 /* We always set V_INTR_MASKING and remember the old value in hflags */ 527 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 528 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 529 530 /* Restore the original control entries */ 531 copy_vmcb_control_area(vmcb, hsave); 532 533 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; 534 kvm_clear_exception_queue(&svm->vcpu); 535 kvm_clear_interrupt_queue(&svm->vcpu); 536 537 svm->nested.nested_cr3 = 0; 538 539 /* Restore selected save entries */ 540 svm->vmcb->save.es = hsave->save.es; 541 svm->vmcb->save.cs = hsave->save.cs; 542 svm->vmcb->save.ss = hsave->save.ss; 543 svm->vmcb->save.ds = hsave->save.ds; 544 svm->vmcb->save.gdtr = hsave->save.gdtr; 545 svm->vmcb->save.idtr = hsave->save.idtr; 546 kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 547 svm_set_efer(&svm->vcpu, hsave->save.efer); 548 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 549 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 550 if (npt_enabled) { 551 svm->vmcb->save.cr3 = hsave->save.cr3; 552 svm->vcpu.arch.cr3 = hsave->save.cr3; 553 } else { 554 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 555 } 556 kvm_rax_write(&svm->vcpu, hsave->save.rax); 557 kvm_rsp_write(&svm->vcpu, hsave->save.rsp); 558 kvm_rip_write(&svm->vcpu, hsave->save.rip); 559 svm->vmcb->save.dr7 = 0; 560 svm->vmcb->save.cpl = 0; 561 svm->vmcb->control.exit_int_info = 0; 562 563 mark_all_dirty(svm->vmcb); 564 565 kvm_vcpu_unmap(&svm->vcpu, &map, true); 566 567 nested_svm_uninit_mmu_context(&svm->vcpu); 568 kvm_mmu_reset_context(&svm->vcpu); 569 kvm_mmu_load(&svm->vcpu); 570 571 /* 572 * Drop what we picked up for L2 via svm_complete_interrupts() so it 573 * doesn't end up in L1. 574 */ 575 svm->vcpu.arch.nmi_injected = false; 576 kvm_clear_exception_queue(&svm->vcpu); 577 kvm_clear_interrupt_queue(&svm->vcpu); 578 579 return 0; 580 } 581 582 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 583 { 584 u32 offset, msr, value; 585 int write, mask; 586 587 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 588 return NESTED_EXIT_HOST; 589 590 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 591 offset = svm_msrpm_offset(msr); 592 write = svm->vmcb->control.exit_info_1 & 1; 593 mask = 1 << ((2 * (msr & 0xf)) + write); 594 595 if (offset == MSR_INVALID) 596 return NESTED_EXIT_DONE; 597 598 /* Offset is in 32 bit units but need in 8 bit units */ 599 offset *= 4; 600 601 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) 602 return NESTED_EXIT_DONE; 603 604 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 605 } 606 607 /* DB exceptions for our internal use must not cause vmexit */ 608 static int nested_svm_intercept_db(struct vcpu_svm *svm) 609 { 610 unsigned long dr6 = svm->vmcb->save.dr6; 611 612 /* Always catch it and pass it to userspace if debugging. */ 613 if (svm->vcpu.guest_debug & 614 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) 615 return NESTED_EXIT_HOST; 616 617 /* if we're not singlestepping, it's not ours */ 618 if (!svm->nmi_singlestep) 619 goto reflected_db; 620 621 /* if it's not a singlestep exception, it's not ours */ 622 if (!(dr6 & DR6_BS)) 623 goto reflected_db; 624 625 /* if the guest is singlestepping, it should get the vmexit */ 626 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { 627 disable_nmi_singlestep(svm); 628 goto reflected_db; 629 } 630 631 /* it's ours, the nested hypervisor must not see this one */ 632 return NESTED_EXIT_HOST; 633 634 reflected_db: 635 /* 636 * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; 637 * it will be moved into the nested VMCB by nested_svm_vmexit. Once 638 * exceptions will be moved to svm_check_nested_events, all this stuff 639 * will just go away and we could just return NESTED_EXIT_HOST 640 * unconditionally. db_interception will queue the exception, which 641 * will be processed by svm_check_nested_events if a nested vmexit is 642 * required, and we will just use kvm_deliver_exception_payload to copy 643 * the payload to DR6 before vmexit. 644 */ 645 WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); 646 svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); 647 svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; 648 return NESTED_EXIT_DONE; 649 } 650 651 static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 652 { 653 unsigned port, size, iopm_len; 654 u16 val, mask; 655 u8 start_bit; 656 u64 gpa; 657 658 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 659 return NESTED_EXIT_HOST; 660 661 port = svm->vmcb->control.exit_info_1 >> 16; 662 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 663 SVM_IOIO_SIZE_SHIFT; 664 gpa = svm->nested.vmcb_iopm + (port / 8); 665 start_bit = port % 8; 666 iopm_len = (start_bit + size > 8) ? 2 : 1; 667 mask = (0xf >> (4 - size)) << start_bit; 668 val = 0; 669 670 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) 671 return NESTED_EXIT_DONE; 672 673 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 674 } 675 676 static int nested_svm_intercept(struct vcpu_svm *svm) 677 { 678 u32 exit_code = svm->vmcb->control.exit_code; 679 int vmexit = NESTED_EXIT_HOST; 680 681 switch (exit_code) { 682 case SVM_EXIT_MSR: 683 vmexit = nested_svm_exit_handled_msr(svm); 684 break; 685 case SVM_EXIT_IOIO: 686 vmexit = nested_svm_intercept_ioio(svm); 687 break; 688 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 689 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); 690 if (svm->nested.intercept_cr & bit) 691 vmexit = NESTED_EXIT_DONE; 692 break; 693 } 694 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 695 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); 696 if (svm->nested.intercept_dr & bit) 697 vmexit = NESTED_EXIT_DONE; 698 break; 699 } 700 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 701 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 702 if (svm->nested.intercept_exceptions & excp_bits) { 703 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) 704 vmexit = nested_svm_intercept_db(svm); 705 else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && 706 svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) 707 vmexit = NESTED_EXIT_HOST; 708 else 709 vmexit = NESTED_EXIT_DONE; 710 } 711 /* async page fault always cause vmexit */ 712 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && 713 svm->vcpu.arch.exception.nested_apf != 0) 714 vmexit = NESTED_EXIT_DONE; 715 break; 716 } 717 case SVM_EXIT_ERR: { 718 vmexit = NESTED_EXIT_DONE; 719 break; 720 } 721 default: { 722 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 723 if (svm->nested.intercept & exit_bits) 724 vmexit = NESTED_EXIT_DONE; 725 } 726 } 727 728 return vmexit; 729 } 730 731 int nested_svm_exit_handled(struct vcpu_svm *svm) 732 { 733 int vmexit; 734 735 vmexit = nested_svm_intercept(svm); 736 737 if (vmexit == NESTED_EXIT_DONE) 738 nested_svm_vmexit(svm); 739 740 return vmexit; 741 } 742 743 int nested_svm_check_permissions(struct vcpu_svm *svm) 744 { 745 if (!(svm->vcpu.arch.efer & EFER_SVME) || 746 !is_paging(&svm->vcpu)) { 747 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 748 return 1; 749 } 750 751 if (svm->vmcb->save.cpl) { 752 kvm_inject_gp(&svm->vcpu, 0); 753 return 1; 754 } 755 756 return 0; 757 } 758 759 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 760 bool has_error_code, u32 error_code) 761 { 762 int vmexit; 763 764 if (!is_guest_mode(&svm->vcpu)) 765 return 0; 766 767 vmexit = nested_svm_intercept(svm); 768 if (vmexit != NESTED_EXIT_DONE) 769 return 0; 770 771 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 772 svm->vmcb->control.exit_code_hi = 0; 773 svm->vmcb->control.exit_info_1 = error_code; 774 775 /* 776 * EXITINFO2 is undefined for all exception intercepts other 777 * than #PF. 778 */ 779 if (svm->vcpu.arch.exception.nested_apf) 780 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; 781 else if (svm->vcpu.arch.exception.has_payload) 782 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; 783 else 784 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 785 786 svm->nested.exit_required = true; 787 return vmexit; 788 } 789 790 static void nested_svm_intr(struct vcpu_svm *svm) 791 { 792 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 793 svm->vmcb->control.exit_info_1 = 0; 794 svm->vmcb->control.exit_info_2 = 0; 795 796 /* nested_svm_vmexit this gets called afterwards from handle_exit */ 797 svm->nested.exit_required = true; 798 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 799 } 800 801 static bool nested_exit_on_intr(struct vcpu_svm *svm) 802 { 803 return (svm->nested.intercept & 1ULL); 804 } 805 806 int svm_check_nested_events(struct kvm_vcpu *vcpu) 807 { 808 struct vcpu_svm *svm = to_svm(vcpu); 809 bool block_nested_events = 810 kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; 811 812 if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { 813 if (block_nested_events) 814 return -EBUSY; 815 nested_svm_intr(svm); 816 return 0; 817 } 818 819 return 0; 820 } 821 822 int nested_svm_exit_special(struct vcpu_svm *svm) 823 { 824 u32 exit_code = svm->vmcb->control.exit_code; 825 826 switch (exit_code) { 827 case SVM_EXIT_INTR: 828 case SVM_EXIT_NMI: 829 case SVM_EXIT_EXCP_BASE + MC_VECTOR: 830 return NESTED_EXIT_HOST; 831 case SVM_EXIT_NPF: 832 /* For now we are always handling NPFs when using them */ 833 if (npt_enabled) 834 return NESTED_EXIT_HOST; 835 break; 836 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 837 /* When we're shadowing, trap PFs, but not async PF */ 838 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) 839 return NESTED_EXIT_HOST; 840 break; 841 default: 842 break; 843 } 844 845 return NESTED_EXIT_CONTINUE; 846 } 847