1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/x86_64/vmx.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #include <asm/msr-index.h> 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 #include "vmx.h" 14 15 #define PAGE_SHIFT_4K 12 16 17 #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 18 19 bool enable_evmcs; 20 21 struct hv_enlightened_vmcs *current_evmcs; 22 struct hv_vp_assist_page *current_vp_assist; 23 24 struct eptPageTableEntry { 25 uint64_t readable:1; 26 uint64_t writable:1; 27 uint64_t executable:1; 28 uint64_t memory_type:3; 29 uint64_t ignore_pat:1; 30 uint64_t page_size:1; 31 uint64_t accessed:1; 32 uint64_t dirty:1; 33 uint64_t ignored_11_10:2; 34 uint64_t address:40; 35 uint64_t ignored_62_52:11; 36 uint64_t suppress_ve:1; 37 }; 38 39 struct eptPageTablePointer { 40 uint64_t memory_type:3; 41 uint64_t page_walk_length:3; 42 uint64_t ad_enabled:1; 43 uint64_t reserved_11_07:5; 44 uint64_t address:40; 45 uint64_t reserved_63_52:12; 46 }; 47 int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) 48 { 49 uint16_t evmcs_ver; 50 51 vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 52 (unsigned long)&evmcs_ver); 53 54 /* KVM should return supported EVMCS version range */ 55 TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && 56 (evmcs_ver & 0xff) > 0, 57 "Incorrect EVMCS version range: %x:%x\n", 58 evmcs_ver & 0xff, evmcs_ver >> 8); 59 60 return evmcs_ver; 61 } 62 63 /* Allocate memory regions for nested VMX tests. 64 * 65 * Input Args: 66 * vm - The VM to allocate guest-virtual addresses in. 67 * 68 * Output Args: 69 * p_vmx_gva - The guest virtual address for the struct vmx_pages. 70 * 71 * Return: 72 * Pointer to structure with the addresses of the VMX areas. 73 */ 74 struct vmx_pages * 75 vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) 76 { 77 vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); 78 struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); 79 80 /* Setup of a region of guest memory for the vmxon region. */ 81 vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); 82 vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); 83 vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); 84 85 /* Setup of a region of guest memory for a vmcs. */ 86 vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); 87 vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); 88 vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); 89 90 /* Setup of a region of guest memory for the MSR bitmap. */ 91 vmx->msr = (void *)vm_vaddr_alloc_page(vm); 92 vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); 93 vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); 94 memset(vmx->msr_hva, 0, getpagesize()); 95 96 /* Setup of a region of guest memory for the shadow VMCS. */ 97 vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); 98 vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); 99 vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); 100 101 /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ 102 vmx->vmread = (void *)vm_vaddr_alloc_page(vm); 103 vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); 104 vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); 105 memset(vmx->vmread_hva, 0, getpagesize()); 106 107 vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); 108 vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); 109 vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); 110 memset(vmx->vmwrite_hva, 0, getpagesize()); 111 112 *p_vmx_gva = vmx_gva; 113 return vmx; 114 } 115 116 bool prepare_for_vmx_operation(struct vmx_pages *vmx) 117 { 118 uint64_t feature_control; 119 uint64_t required; 120 unsigned long cr0; 121 unsigned long cr4; 122 123 /* 124 * Ensure bits in CR0 and CR4 are valid in VMX operation: 125 * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. 126 * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. 127 */ 128 __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); 129 cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); 130 cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); 131 __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); 132 133 __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); 134 cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); 135 cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); 136 /* Enable VMX operation */ 137 cr4 |= X86_CR4_VMXE; 138 __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); 139 140 /* 141 * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: 142 * Bit 0: Lock bit. If clear, VMXON causes a #GP. 143 * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON 144 * outside of SMX causes a #GP. 145 */ 146 required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; 147 required |= FEAT_CTL_LOCKED; 148 feature_control = rdmsr(MSR_IA32_FEAT_CTL); 149 if ((feature_control & required) != required) 150 wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); 151 152 /* Enter VMX root operation. */ 153 *(uint32_t *)(vmx->vmxon) = vmcs_revision(); 154 if (vmxon(vmx->vmxon_gpa)) 155 return false; 156 157 return true; 158 } 159 160 bool load_vmcs(struct vmx_pages *vmx) 161 { 162 /* Load a VMCS. */ 163 *(uint32_t *)(vmx->vmcs) = vmcs_revision(); 164 if (vmclear(vmx->vmcs_gpa)) 165 return false; 166 167 if (vmptrld(vmx->vmcs_gpa)) 168 return false; 169 170 /* Setup shadow VMCS, do not load it yet. */ 171 *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; 172 if (vmclear(vmx->shadow_vmcs_gpa)) 173 return false; 174 175 return true; 176 } 177 178 static bool ept_vpid_cap_supported(uint64_t mask) 179 { 180 return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; 181 } 182 183 bool ept_1g_pages_supported(void) 184 { 185 return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES); 186 } 187 188 /* 189 * Initialize the control fields to the most basic settings possible. 190 */ 191 static inline void init_vmcs_control_fields(struct vmx_pages *vmx) 192 { 193 uint32_t sec_exec_ctl = 0; 194 195 vmwrite(VIRTUAL_PROCESSOR_ID, 0); 196 vmwrite(POSTED_INTR_NV, 0); 197 198 vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); 199 200 if (vmx->eptp_gpa) { 201 uint64_t ept_paddr; 202 struct eptPageTablePointer eptp = { 203 .memory_type = VMX_BASIC_MEM_TYPE_WB, 204 .page_walk_length = 3, /* + 1 */ 205 .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), 206 .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, 207 }; 208 209 memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); 210 vmwrite(EPT_POINTER, ept_paddr); 211 sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; 212 } 213 214 if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) 215 vmwrite(CPU_BASED_VM_EXEC_CONTROL, 216 rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); 217 else { 218 vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); 219 GUEST_ASSERT(!sec_exec_ctl); 220 } 221 222 vmwrite(EXCEPTION_BITMAP, 0); 223 vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); 224 vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ 225 vmwrite(CR3_TARGET_COUNT, 0); 226 vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | 227 VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ 228 vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); 229 vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); 230 vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | 231 VM_ENTRY_IA32E_MODE); /* 64-bit guest */ 232 vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); 233 vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); 234 vmwrite(TPR_THRESHOLD, 0); 235 236 vmwrite(CR0_GUEST_HOST_MASK, 0); 237 vmwrite(CR4_GUEST_HOST_MASK, 0); 238 vmwrite(CR0_READ_SHADOW, get_cr0()); 239 vmwrite(CR4_READ_SHADOW, get_cr4()); 240 241 vmwrite(MSR_BITMAP, vmx->msr_gpa); 242 vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); 243 vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); 244 } 245 246 /* 247 * Initialize the host state fields based on the current host state, with 248 * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch 249 * or vmresume. 250 */ 251 static inline void init_vmcs_host_state(void) 252 { 253 uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); 254 255 vmwrite(HOST_ES_SELECTOR, get_es()); 256 vmwrite(HOST_CS_SELECTOR, get_cs()); 257 vmwrite(HOST_SS_SELECTOR, get_ss()); 258 vmwrite(HOST_DS_SELECTOR, get_ds()); 259 vmwrite(HOST_FS_SELECTOR, get_fs()); 260 vmwrite(HOST_GS_SELECTOR, get_gs()); 261 vmwrite(HOST_TR_SELECTOR, get_tr()); 262 263 if (exit_controls & VM_EXIT_LOAD_IA32_PAT) 264 vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); 265 if (exit_controls & VM_EXIT_LOAD_IA32_EFER) 266 vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); 267 if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) 268 vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, 269 rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); 270 271 vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); 272 273 vmwrite(HOST_CR0, get_cr0()); 274 vmwrite(HOST_CR3, get_cr3()); 275 vmwrite(HOST_CR4, get_cr4()); 276 vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); 277 vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); 278 vmwrite(HOST_TR_BASE, 279 get_desc64_base((struct desc64 *)(get_gdt().address + get_tr()))); 280 vmwrite(HOST_GDTR_BASE, get_gdt().address); 281 vmwrite(HOST_IDTR_BASE, get_idt().address); 282 vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); 283 vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); 284 } 285 286 /* 287 * Initialize the guest state fields essentially as a clone of 288 * the host state fields. Some host state fields have fixed 289 * values, and we set the corresponding guest state fields accordingly. 290 */ 291 static inline void init_vmcs_guest_state(void *rip, void *rsp) 292 { 293 vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); 294 vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); 295 vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); 296 vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); 297 vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); 298 vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); 299 vmwrite(GUEST_LDTR_SELECTOR, 0); 300 vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); 301 vmwrite(GUEST_INTR_STATUS, 0); 302 vmwrite(GUEST_PML_INDEX, 0); 303 304 vmwrite(VMCS_LINK_POINTER, -1ll); 305 vmwrite(GUEST_IA32_DEBUGCTL, 0); 306 vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); 307 vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); 308 vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, 309 vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); 310 311 vmwrite(GUEST_ES_LIMIT, -1); 312 vmwrite(GUEST_CS_LIMIT, -1); 313 vmwrite(GUEST_SS_LIMIT, -1); 314 vmwrite(GUEST_DS_LIMIT, -1); 315 vmwrite(GUEST_FS_LIMIT, -1); 316 vmwrite(GUEST_GS_LIMIT, -1); 317 vmwrite(GUEST_LDTR_LIMIT, -1); 318 vmwrite(GUEST_TR_LIMIT, 0x67); 319 vmwrite(GUEST_GDTR_LIMIT, 0xffff); 320 vmwrite(GUEST_IDTR_LIMIT, 0xffff); 321 vmwrite(GUEST_ES_AR_BYTES, 322 vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); 323 vmwrite(GUEST_CS_AR_BYTES, 0xa09b); 324 vmwrite(GUEST_SS_AR_BYTES, 0xc093); 325 vmwrite(GUEST_DS_AR_BYTES, 326 vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); 327 vmwrite(GUEST_FS_AR_BYTES, 328 vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); 329 vmwrite(GUEST_GS_AR_BYTES, 330 vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); 331 vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); 332 vmwrite(GUEST_TR_AR_BYTES, 0x8b); 333 vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); 334 vmwrite(GUEST_ACTIVITY_STATE, 0); 335 vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); 336 vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); 337 338 vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); 339 vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); 340 vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); 341 vmwrite(GUEST_ES_BASE, 0); 342 vmwrite(GUEST_CS_BASE, 0); 343 vmwrite(GUEST_SS_BASE, 0); 344 vmwrite(GUEST_DS_BASE, 0); 345 vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); 346 vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); 347 vmwrite(GUEST_LDTR_BASE, 0); 348 vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); 349 vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); 350 vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); 351 vmwrite(GUEST_DR7, 0x400); 352 vmwrite(GUEST_RSP, (uint64_t)rsp); 353 vmwrite(GUEST_RIP, (uint64_t)rip); 354 vmwrite(GUEST_RFLAGS, 2); 355 vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); 356 vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); 357 vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); 358 } 359 360 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) 361 { 362 init_vmcs_control_fields(vmx); 363 init_vmcs_host_state(); 364 init_vmcs_guest_state(guest_rip, guest_rsp); 365 } 366 367 static void nested_create_pte(struct kvm_vm *vm, 368 struct eptPageTableEntry *pte, 369 uint64_t nested_paddr, 370 uint64_t paddr, 371 int current_level, 372 int target_level) 373 { 374 if (!pte->readable) { 375 pte->writable = true; 376 pte->readable = true; 377 pte->executable = true; 378 pte->page_size = (current_level == target_level); 379 if (pte->page_size) 380 pte->address = paddr >> vm->page_shift; 381 else 382 pte->address = vm_alloc_page_table(vm) >> vm->page_shift; 383 } else { 384 /* 385 * Entry already present. Assert that the caller doesn't want 386 * a hugepage at this level, and that there isn't a hugepage at 387 * this level. 388 */ 389 TEST_ASSERT(current_level != target_level, 390 "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", 391 current_level, nested_paddr); 392 TEST_ASSERT(!pte->page_size, 393 "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", 394 current_level, nested_paddr); 395 } 396 } 397 398 399 void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 400 uint64_t nested_paddr, uint64_t paddr, int target_level) 401 { 402 const uint64_t page_size = PG_LEVEL_SIZE(target_level); 403 struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; 404 uint16_t index; 405 406 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 407 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 408 409 TEST_ASSERT((nested_paddr >> 48) == 0, 410 "Nested physical address 0x%lx requires 5-level paging", 411 nested_paddr); 412 TEST_ASSERT((nested_paddr % page_size) == 0, 413 "Nested physical address not on page boundary,\n" 414 " nested_paddr: 0x%lx page_size: 0x%lx", 415 nested_paddr, page_size); 416 TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, 417 "Physical address beyond beyond maximum supported,\n" 418 " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 419 paddr, vm->max_gfn, vm->page_size); 420 TEST_ASSERT((paddr % page_size) == 0, 421 "Physical address not on page boundary,\n" 422 " paddr: 0x%lx page_size: 0x%lx", 423 paddr, page_size); 424 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 425 "Physical address beyond beyond maximum supported,\n" 426 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 427 paddr, vm->max_gfn, vm->page_size); 428 429 for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { 430 index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 431 pte = &pt[index]; 432 433 nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); 434 435 if (pte->page_size) 436 break; 437 438 pt = addr_gpa2hva(vm, pte->address * vm->page_size); 439 } 440 441 /* 442 * For now mark these as accessed and dirty because the only 443 * testcase we have needs that. Can be reconsidered later. 444 */ 445 pte->accessed = true; 446 pte->dirty = true; 447 448 } 449 450 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 451 uint64_t nested_paddr, uint64_t paddr) 452 { 453 __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); 454 } 455 456 /* 457 * Map a range of EPT guest physical addresses to the VM's physical address 458 * 459 * Input Args: 460 * vm - Virtual Machine 461 * nested_paddr - Nested guest physical address to map 462 * paddr - VM Physical Address 463 * size - The size of the range to map 464 * level - The level at which to map the range 465 * 466 * Output Args: None 467 * 468 * Return: None 469 * 470 * Within the VM given by vm, creates a nested guest translation for the 471 * page range starting at nested_paddr to the page range starting at paddr. 472 */ 473 void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 474 uint64_t nested_paddr, uint64_t paddr, uint64_t size, 475 int level) 476 { 477 size_t page_size = PG_LEVEL_SIZE(level); 478 size_t npages = size / page_size; 479 480 TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); 481 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 482 483 while (npages--) { 484 __nested_pg_map(vmx, vm, nested_paddr, paddr, level); 485 nested_paddr += page_size; 486 paddr += page_size; 487 } 488 } 489 490 void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 491 uint64_t nested_paddr, uint64_t paddr, uint64_t size) 492 { 493 __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); 494 } 495 496 /* Prepare an identity extended page table that maps all the 497 * physical pages in VM. 498 */ 499 void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, 500 uint32_t memslot) 501 { 502 sparsebit_idx_t i, last; 503 struct userspace_mem_region *region = 504 memslot2region(vm, memslot); 505 506 i = (region->region.guest_phys_addr >> vm->page_shift) - 1; 507 last = i + (region->region.memory_size >> vm->page_shift); 508 for (;;) { 509 i = sparsebit_next_clear(region->unused_phy_pages, i); 510 if (i > last) 511 break; 512 513 nested_map(vmx, vm, 514 (uint64_t)i << vm->page_shift, 515 (uint64_t)i << vm->page_shift, 516 1 << vm->page_shift); 517 } 518 } 519 520 /* Identity map a region with 1GiB Pages. */ 521 void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, 522 uint64_t addr, uint64_t size) 523 { 524 __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); 525 } 526 527 bool kvm_cpu_has_ept(void) 528 { 529 uint64_t ctrl; 530 531 ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; 532 if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 533 return false; 534 535 ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; 536 return ctrl & SECONDARY_EXEC_ENABLE_EPT; 537 } 538 539 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, 540 uint32_t eptp_memslot) 541 { 542 TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); 543 544 vmx->eptp = (void *)vm_vaddr_alloc_page(vm); 545 vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); 546 vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); 547 } 548 549 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) 550 { 551 vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); 552 vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); 553 vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); 554 } 555