1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018, Red Hat, Inc. 4 * 5 * Tests for Enlightened VMCS, including nested guest state. 6 */ 7 #define _GNU_SOURCE /* for program_invocation_short_name */ 8 #include <fcntl.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <sys/ioctl.h> 13 #include <linux/bitmap.h> 14 15 #include "test_util.h" 16 17 #include "kvm_util.h" 18 19 #include "hyperv.h" 20 #include "vmx.h" 21 22 static int ud_count; 23 24 static void guest_ud_handler(struct ex_regs *regs) 25 { 26 ud_count++; 27 regs->rip += 3; /* VMLAUNCH */ 28 } 29 30 static void guest_nmi_handler(struct ex_regs *regs) 31 { 32 } 33 34 static inline void rdmsr_from_l2(uint32_t msr) 35 { 36 /* Currently, L1 doesn't preserve GPRs during vmexits. */ 37 __asm__ __volatile__ ("rdmsr" : : "c"(msr) : 38 "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", 39 "r10", "r11", "r12", "r13", "r14", "r15"); 40 } 41 42 /* Exit to L1 from L2 with RDMSR instruction */ 43 void l2_guest_code(void) 44 { 45 u64 unused; 46 47 GUEST_SYNC(7); 48 49 GUEST_SYNC(8); 50 51 /* Forced exit to L1 upon restore */ 52 GUEST_SYNC(9); 53 54 vmcall(); 55 56 /* MSR-Bitmap tests */ 57 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ 58 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ 59 rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ 60 vmcall(); 61 rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ 62 63 /* L2 TLB flush tests */ 64 hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, 65 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); 66 rdmsr_from_l2(MSR_FS_BASE); 67 /* 68 * Note: hypercall status (RAX) is not preserved correctly by L1 after 69 * synthetic vmexit, use unchecked version. 70 */ 71 __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, 72 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, 73 &unused); 74 75 /* Done, exit to L1 and never come back. */ 76 vmcall(); 77 } 78 79 void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, 80 vm_vaddr_t hv_hcall_page_gpa) 81 { 82 #define L2_GUEST_STACK_SIZE 64 83 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 84 85 wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); 86 wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); 87 88 x2apic_enable(); 89 90 GUEST_SYNC(1); 91 GUEST_SYNC(2); 92 93 enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); 94 evmcs_enable(); 95 96 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 97 GUEST_SYNC(3); 98 GUEST_ASSERT(load_evmcs(hv_pages)); 99 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 100 101 GUEST_SYNC(4); 102 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 103 104 prepare_vmcs(vmx_pages, l2_guest_code, 105 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 106 107 GUEST_SYNC(5); 108 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 109 current_evmcs->revision_id = -1u; 110 GUEST_ASSERT(vmlaunch()); 111 current_evmcs->revision_id = EVMCS_VERSION; 112 GUEST_SYNC(6); 113 114 vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | 115 PIN_BASED_NMI_EXITING); 116 117 /* L2 TLB flush setup */ 118 current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; 119 current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; 120 current_evmcs->hv_vm_id = 1; 121 current_evmcs->hv_vp_id = 1; 122 current_vp_assist->nested_control.features.directhypercall = 1; 123 *(u32 *)(hv_pages->partition_assist) = 0; 124 125 GUEST_ASSERT(!vmlaunch()); 126 GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); 127 GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); 128 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 129 130 /* 131 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is 132 * up-to-date (RIP points where it should and not at the beginning 133 * of l2_guest_code(). GUEST_SYNC(9) checkes that. 134 */ 135 GUEST_ASSERT(!vmresume()); 136 137 GUEST_SYNC(10); 138 139 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 140 current_evmcs->guest_rip += 3; /* vmcall */ 141 142 /* Intercept RDMSR 0xc0000100 */ 143 vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | 144 CPU_BASED_USE_MSR_BITMAPS); 145 __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); 146 GUEST_ASSERT(!vmresume()); 147 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 148 current_evmcs->guest_rip += 2; /* rdmsr */ 149 150 /* Enable enlightened MSR bitmap */ 151 current_evmcs->hv_enlightenments_control.msr_bitmap = 1; 152 GUEST_ASSERT(!vmresume()); 153 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 154 current_evmcs->guest_rip += 2; /* rdmsr */ 155 156 /* Intercept RDMSR 0xc0000101 without telling KVM about it */ 157 __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); 158 /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ 159 current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 160 GUEST_ASSERT(!vmresume()); 161 /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */ 162 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 163 current_evmcs->guest_rip += 3; /* vmcall */ 164 165 /* Now tell KVM we've changed MSR-Bitmap */ 166 current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 167 GUEST_ASSERT(!vmresume()); 168 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 169 current_evmcs->guest_rip += 2; /* rdmsr */ 170 171 /* 172 * L2 TLB flush test. First VMCALL should be handled directly by L0, 173 * no VMCALL exit expected. 174 */ 175 GUEST_ASSERT(!vmresume()); 176 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 177 current_evmcs->guest_rip += 2; /* rdmsr */ 178 /* Enable synthetic vmexit */ 179 *(u32 *)(hv_pages->partition_assist) = 1; 180 GUEST_ASSERT(!vmresume()); 181 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); 182 183 GUEST_ASSERT(!vmresume()); 184 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 185 GUEST_SYNC(11); 186 187 /* Try enlightened vmptrld with an incorrect GPA */ 188 evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); 189 GUEST_ASSERT(vmlaunch()); 190 GUEST_ASSERT(ud_count == 1); 191 GUEST_DONE(); 192 } 193 194 void inject_nmi(struct kvm_vcpu *vcpu) 195 { 196 struct kvm_vcpu_events events; 197 198 vcpu_events_get(vcpu, &events); 199 200 events.nmi.pending = 1; 201 events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; 202 203 vcpu_events_set(vcpu, &events); 204 } 205 206 static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, 207 struct kvm_vcpu *vcpu) 208 { 209 struct kvm_regs regs1, regs2; 210 struct kvm_x86_state *state; 211 212 state = vcpu_save_state(vcpu); 213 memset(®s1, 0, sizeof(regs1)); 214 vcpu_regs_get(vcpu, ®s1); 215 216 kvm_vm_release(vm); 217 218 /* Restore state in a new VM. */ 219 vcpu = vm_recreate_with_one_vcpu(vm); 220 vcpu_set_hv_cpuid(vcpu); 221 vcpu_enable_evmcs(vcpu); 222 vcpu_load_state(vcpu, state); 223 kvm_x86_state_cleanup(state); 224 225 memset(®s2, 0, sizeof(regs2)); 226 vcpu_regs_get(vcpu, ®s2); 227 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), 228 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 229 (ulong) regs2.rdi, (ulong) regs2.rsi); 230 return vcpu; 231 } 232 233 int main(int argc, char *argv[]) 234 { 235 vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; 236 vm_vaddr_t hcall_page; 237 238 struct kvm_vcpu *vcpu; 239 struct kvm_vm *vm; 240 struct ucall uc; 241 int stage; 242 243 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 244 245 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 246 TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); 247 TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); 248 249 hcall_page = vm_vaddr_alloc_pages(vm, 1); 250 memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); 251 252 vcpu_set_hv_cpuid(vcpu); 253 vcpu_enable_evmcs(vcpu); 254 255 vcpu_alloc_vmx(vm, &vmx_pages_gva); 256 vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); 257 vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); 258 vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); 259 260 vm_init_descriptor_tables(vm); 261 vcpu_init_descriptor_tables(vcpu); 262 vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); 263 vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); 264 265 pr_info("Running L1 which uses EVMCS to run L2\n"); 266 267 for (stage = 1;; stage++) { 268 vcpu_run(vcpu); 269 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 270 271 switch (get_ucall(vcpu, &uc)) { 272 case UCALL_ABORT: 273 REPORT_GUEST_ASSERT(uc); 274 /* NOT REACHED */ 275 case UCALL_SYNC: 276 break; 277 case UCALL_DONE: 278 goto done; 279 default: 280 TEST_FAIL("Unknown ucall %lu", uc.cmd); 281 } 282 283 /* UCALL_SYNC is handled here. */ 284 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 285 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 286 stage, (ulong)uc.args[1]); 287 288 vcpu = save_restore_vm(vm, vcpu); 289 290 /* Force immediate L2->L1 exit before resuming */ 291 if (stage == 8) { 292 pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); 293 inject_nmi(vcpu); 294 } 295 296 /* 297 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly 298 * restored VM (before the first KVM_RUN) to check that 299 * KVM_STATE_NESTED_EVMCS is not lost. 300 */ 301 if (stage == 9) { 302 pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); 303 vcpu = save_restore_vm(vm, vcpu); 304 } 305 } 306 307 done: 308 kvm_vm_free(vm); 309 } 310