1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM_GET/SET_* tests 4 * 5 * Copyright (C) 2018, Red Hat, Inc. 6 * 7 * Tests for vCPU state save/restore, including nested guest state. 8 */ 9 #define _GNU_SOURCE /* for program_invocation_short_name */ 10 #include <fcntl.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <sys/ioctl.h> 15 16 #include "test_util.h" 17 18 #include "kvm_util.h" 19 #include "processor.h" 20 #include "vmx.h" 21 #include "svm_util.h" 22 23 #define L2_GUEST_STACK_SIZE 256 24 25 void svm_l2_guest_code(void) 26 { 27 GUEST_SYNC(4); 28 /* Exit to L1 */ 29 vmcall(); 30 GUEST_SYNC(6); 31 /* Done, exit to L1 and never come back. */ 32 vmcall(); 33 } 34 35 static void svm_l1_guest_code(struct svm_test_data *svm) 36 { 37 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 38 struct vmcb *vmcb = svm->vmcb; 39 40 GUEST_ASSERT(svm->vmcb_gpa); 41 /* Prepare for L2 execution. */ 42 generic_svm_setup(svm, svm_l2_guest_code, 43 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 44 45 GUEST_SYNC(3); 46 run_guest(vmcb, svm->vmcb_gpa); 47 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 48 GUEST_SYNC(5); 49 vmcb->save.rip += 3; 50 run_guest(vmcb, svm->vmcb_gpa); 51 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 52 GUEST_SYNC(7); 53 } 54 55 void vmx_l2_guest_code(void) 56 { 57 GUEST_SYNC(6); 58 59 /* Exit to L1 */ 60 vmcall(); 61 62 /* L1 has now set up a shadow VMCS for us. */ 63 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 64 GUEST_SYNC(10); 65 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 66 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); 67 GUEST_SYNC(11); 68 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); 69 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); 70 GUEST_SYNC(12); 71 72 /* Done, exit to L1 and never come back. */ 73 vmcall(); 74 } 75 76 static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) 77 { 78 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 79 80 GUEST_ASSERT(vmx_pages->vmcs_gpa); 81 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 82 GUEST_SYNC(3); 83 GUEST_ASSERT(load_vmcs(vmx_pages)); 84 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 85 86 GUEST_SYNC(4); 87 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 88 89 prepare_vmcs(vmx_pages, vmx_l2_guest_code, 90 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 91 92 GUEST_SYNC(5); 93 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 94 GUEST_ASSERT(!vmlaunch()); 95 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 96 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 97 98 /* Check that the launched state is preserved. */ 99 GUEST_ASSERT(vmlaunch()); 100 101 GUEST_ASSERT(!vmresume()); 102 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 103 104 GUEST_SYNC(7); 105 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 106 107 GUEST_ASSERT(!vmresume()); 108 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 109 110 vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); 111 112 vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); 113 vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); 114 115 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 116 GUEST_ASSERT(vmlaunch()); 117 GUEST_SYNC(8); 118 GUEST_ASSERT(vmlaunch()); 119 GUEST_ASSERT(vmresume()); 120 121 vmwrite(GUEST_RIP, 0xc0ffee); 122 GUEST_SYNC(9); 123 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 124 125 GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); 126 GUEST_ASSERT(!vmresume()); 127 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 128 129 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 130 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 131 GUEST_ASSERT(vmlaunch()); 132 GUEST_ASSERT(vmresume()); 133 GUEST_SYNC(13); 134 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 135 GUEST_ASSERT(vmlaunch()); 136 GUEST_ASSERT(vmresume()); 137 } 138 139 static void __attribute__((__flatten__)) guest_code(void *arg) 140 { 141 GUEST_SYNC(1); 142 143 if (this_cpu_has(X86_FEATURE_XSAVE)) { 144 uint64_t supported_xcr0 = this_cpu_supported_xcr0(); 145 uint8_t buffer[4096]; 146 147 memset(buffer, 0xcc, sizeof(buffer)); 148 149 set_cr4(get_cr4() | X86_CR4_OSXSAVE); 150 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); 151 152 xsetbv(0, xgetbv(0) | supported_xcr0); 153 154 /* 155 * Modify state for all supported xfeatures to take them out of 156 * their "init" state, i.e. to make them show up in XSTATE_BV. 157 * 158 * Note off-by-default features, e.g. AMX, are out of scope for 159 * this particular testcase as they have a different ABI. 160 */ 161 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); 162 asm volatile ("fincstp"); 163 164 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); 165 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); 166 167 if (supported_xcr0 & XFEATURE_MASK_YMM) 168 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); 169 170 if (supported_xcr0 & XFEATURE_MASK_AVX512) { 171 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); 172 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); 173 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); 174 } 175 176 if (this_cpu_has(X86_FEATURE_MPX)) { 177 uint64_t bounds[2] = { 10, 0xffffffffull }; 178 uint64_t output[2] = { }; 179 180 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); 181 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); 182 183 /* 184 * Don't bother trying to get BNDCSR into the INUSE 185 * state. MSR_IA32_BNDCFGS doesn't count as it isn't 186 * managed via XSAVE/XRSTOR, and BNDCFGU can only be 187 * modified by XRSTOR. Stuffing XSTATE_BV in the host 188 * is simpler than doing XRSTOR here in the guest. 189 * 190 * However, temporarily enable MPX in BNDCFGS so that 191 * BNDMOV actually loads BND1. If MPX isn't *fully* 192 * enabled, all MPX instructions are treated as NOPs. 193 * 194 * Hand encode "bndmov (%rax),%bnd1" as support for MPX 195 * mnemonics/registers has been removed from gcc and 196 * clang (and was never fully supported by clang). 197 */ 198 wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); 199 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); 200 /* 201 * Hand encode "bndmov %bnd1, (%rax)" to sanity check 202 * that BND1 actually got loaded. 203 */ 204 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); 205 wrmsr(MSR_IA32_BNDCFGS, 0); 206 207 GUEST_ASSERT_EQ(bounds[0], output[0]); 208 GUEST_ASSERT_EQ(bounds[1], output[1]); 209 } 210 if (this_cpu_has(X86_FEATURE_PKU)) { 211 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); 212 set_cr4(get_cr4() | X86_CR4_PKE); 213 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); 214 215 wrpkru(-1u); 216 } 217 } 218 219 GUEST_SYNC(2); 220 221 if (arg) { 222 if (this_cpu_has(X86_FEATURE_SVM)) 223 svm_l1_guest_code(arg); 224 else 225 vmx_l1_guest_code(arg); 226 } 227 228 GUEST_DONE(); 229 } 230 231 int main(int argc, char *argv[]) 232 { 233 uint64_t *xstate_bv, saved_xstate_bv; 234 vm_vaddr_t nested_gva = 0; 235 struct kvm_cpuid2 empty_cpuid = {}; 236 struct kvm_regs regs1, regs2; 237 struct kvm_vcpu *vcpu, *vcpuN; 238 struct kvm_vm *vm; 239 struct kvm_x86_state *state; 240 struct ucall uc; 241 int stage; 242 243 /* Create VM */ 244 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 245 246 vcpu_regs_get(vcpu, ®s1); 247 248 if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { 249 if (kvm_cpu_has(X86_FEATURE_SVM)) 250 vcpu_alloc_svm(vm, &nested_gva); 251 else if (kvm_cpu_has(X86_FEATURE_VMX)) 252 vcpu_alloc_vmx(vm, &nested_gva); 253 } 254 255 if (!nested_gva) 256 pr_info("will skip nested state checks\n"); 257 258 vcpu_args_set(vcpu, 1, nested_gva); 259 260 for (stage = 1;; stage++) { 261 vcpu_run(vcpu); 262 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 263 264 switch (get_ucall(vcpu, &uc)) { 265 case UCALL_ABORT: 266 REPORT_GUEST_ASSERT(uc); 267 /* NOT REACHED */ 268 case UCALL_SYNC: 269 break; 270 case UCALL_DONE: 271 goto done; 272 default: 273 TEST_FAIL("Unknown ucall %lu", uc.cmd); 274 } 275 276 /* UCALL_SYNC is handled here. */ 277 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 278 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 279 stage, (ulong)uc.args[1]); 280 281 state = vcpu_save_state(vcpu); 282 memset(®s1, 0, sizeof(regs1)); 283 vcpu_regs_get(vcpu, ®s1); 284 285 kvm_vm_release(vm); 286 287 /* Restore state in a new VM. */ 288 vcpu = vm_recreate_with_one_vcpu(vm); 289 vcpu_load_state(vcpu, state); 290 291 /* 292 * Restore XSAVE state in a dummy vCPU, first without doing 293 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except 294 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to 295 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually 296 * load only XSAVE state, MSRs in particular have a much more 297 * convoluted ABI. 298 * 299 * Load two versions of XSAVE state: one with the actual guest 300 * XSAVE state, and one with all supported features forced "on" 301 * in xstate_bv, e.g. to ensure that KVM allows loading all 302 * supported features, even if something goes awry in saving 303 * the original snapshot. 304 */ 305 xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; 306 saved_xstate_bv = *xstate_bv; 307 308 vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); 309 vcpu_xsave_set(vcpuN, state->xsave); 310 *xstate_bv = kvm_cpu_supported_xcr0(); 311 vcpu_xsave_set(vcpuN, state->xsave); 312 313 vcpu_init_cpuid(vcpuN, &empty_cpuid); 314 vcpu_xsave_set(vcpuN, state->xsave); 315 *xstate_bv = saved_xstate_bv; 316 vcpu_xsave_set(vcpuN, state->xsave); 317 318 kvm_x86_state_cleanup(state); 319 320 memset(®s2, 0, sizeof(regs2)); 321 vcpu_regs_get(vcpu, ®s2); 322 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), 323 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 324 (ulong) regs2.rdi, (ulong) regs2.rsi); 325 } 326 327 done: 328 kvm_vm_free(vm); 329 } 330