1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/x86_64/processor.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #include "test_util.h" 9 #include "kvm_util.h" 10 #include "processor.h" 11 12 #ifndef NUM_INTERRUPTS 13 #define NUM_INTERRUPTS 256 14 #endif 15 16 #define DEFAULT_CODE_SELECTOR 0x8 17 #define DEFAULT_DATA_SELECTOR 0x10 18 19 #define MAX_NR_CPUID_ENTRIES 100 20 21 vm_vaddr_t exception_handlers; 22 23 static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) 24 { 25 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 26 "rcx: 0x%.16llx rdx: 0x%.16llx\n", 27 indent, "", 28 regs->rax, regs->rbx, regs->rcx, regs->rdx); 29 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 30 "rsp: 0x%.16llx rbp: 0x%.16llx\n", 31 indent, "", 32 regs->rsi, regs->rdi, regs->rsp, regs->rbp); 33 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 34 "r10: 0x%.16llx r11: 0x%.16llx\n", 35 indent, "", 36 regs->r8, regs->r9, regs->r10, regs->r11); 37 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 38 "r14: 0x%.16llx r15: 0x%.16llx\n", 39 indent, "", 40 regs->r12, regs->r13, regs->r14, regs->r15); 41 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 42 indent, "", 43 regs->rip, regs->rflags); 44 } 45 46 static void segment_dump(FILE *stream, struct kvm_segment *segment, 47 uint8_t indent) 48 { 49 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 50 "selector: 0x%.4x type: 0x%.2x\n", 51 indent, "", segment->base, segment->limit, 52 segment->selector, segment->type); 53 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 54 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 55 indent, "", segment->present, segment->dpl, 56 segment->db, segment->s, segment->l); 57 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 58 "unusable: 0x%.2x padding: 0x%.2x\n", 59 indent, "", segment->g, segment->avl, 60 segment->unusable, segment->padding); 61 } 62 63 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 64 uint8_t indent) 65 { 66 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 67 "padding: 0x%.4x 0x%.4x 0x%.4x\n", 68 indent, "", dtable->base, dtable->limit, 69 dtable->padding[0], dtable->padding[1], dtable->padding[2]); 70 } 71 72 static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) 73 { 74 unsigned int i; 75 76 fprintf(stream, "%*scs:\n", indent, ""); 77 segment_dump(stream, &sregs->cs, indent + 2); 78 fprintf(stream, "%*sds:\n", indent, ""); 79 segment_dump(stream, &sregs->ds, indent + 2); 80 fprintf(stream, "%*ses:\n", indent, ""); 81 segment_dump(stream, &sregs->es, indent + 2); 82 fprintf(stream, "%*sfs:\n", indent, ""); 83 segment_dump(stream, &sregs->fs, indent + 2); 84 fprintf(stream, "%*sgs:\n", indent, ""); 85 segment_dump(stream, &sregs->gs, indent + 2); 86 fprintf(stream, "%*sss:\n", indent, ""); 87 segment_dump(stream, &sregs->ss, indent + 2); 88 fprintf(stream, "%*str:\n", indent, ""); 89 segment_dump(stream, &sregs->tr, indent + 2); 90 fprintf(stream, "%*sldt:\n", indent, ""); 91 segment_dump(stream, &sregs->ldt, indent + 2); 92 93 fprintf(stream, "%*sgdt:\n", indent, ""); 94 dtable_dump(stream, &sregs->gdt, indent + 2); 95 fprintf(stream, "%*sidt:\n", indent, ""); 96 dtable_dump(stream, &sregs->idt, indent + 2); 97 98 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 99 "cr3: 0x%.16llx cr4: 0x%.16llx\n", 100 indent, "", 101 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 102 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 103 "apic_base: 0x%.16llx\n", 104 indent, "", 105 sregs->cr8, sregs->efer, sregs->apic_base); 106 107 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 108 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 109 fprintf(stream, "%*s%.16llx\n", indent + 2, "", 110 sregs->interrupt_bitmap[i]); 111 } 112 } 113 114 void virt_arch_pgd_alloc(struct kvm_vm *vm) 115 { 116 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 117 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 118 119 /* If needed, create page map l4 table. */ 120 if (!vm->pgd_created) { 121 vm->pgd = vm_alloc_page_table(vm); 122 vm->pgd_created = true; 123 } 124 } 125 126 static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, 127 int level) 128 { 129 uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); 130 int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 131 132 return &page_table[index]; 133 } 134 135 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, 136 uint64_t pt_pfn, 137 uint64_t vaddr, 138 uint64_t paddr, 139 int current_level, 140 int target_level) 141 { 142 uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); 143 144 if (!(*pte & PTE_PRESENT_MASK)) { 145 *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; 146 if (current_level == target_level) 147 *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); 148 else 149 *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; 150 } else { 151 /* 152 * Entry already present. Assert that the caller doesn't want 153 * a hugepage at this level, and that there isn't a hugepage at 154 * this level. 155 */ 156 TEST_ASSERT(current_level != target_level, 157 "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", 158 current_level, vaddr); 159 TEST_ASSERT(!(*pte & PTE_LARGE_MASK), 160 "Cannot create page table at level: %u, vaddr: 0x%lx\n", 161 current_level, vaddr); 162 } 163 return pte; 164 } 165 166 void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) 167 { 168 const uint64_t pg_size = PG_LEVEL_SIZE(level); 169 uint64_t *pml4e, *pdpe, *pde; 170 uint64_t *pte; 171 172 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, 173 "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); 174 175 TEST_ASSERT((vaddr % pg_size) == 0, 176 "Virtual address not aligned,\n" 177 "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); 178 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), 179 "Invalid virtual address, vaddr: 0x%lx", vaddr); 180 TEST_ASSERT((paddr % pg_size) == 0, 181 "Physical address not aligned,\n" 182 " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); 183 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 184 "Physical address beyond maximum supported,\n" 185 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 186 paddr, vm->max_gfn, vm->page_size); 187 188 /* 189 * Allocate upper level page tables, if not already present. Return 190 * early if a hugepage was created. 191 */ 192 pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, 193 vaddr, paddr, PG_LEVEL_512G, level); 194 if (*pml4e & PTE_LARGE_MASK) 195 return; 196 197 pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); 198 if (*pdpe & PTE_LARGE_MASK) 199 return; 200 201 pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); 202 if (*pde & PTE_LARGE_MASK) 203 return; 204 205 /* Fill in page table entry. */ 206 pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); 207 TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), 208 "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); 209 *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); 210 } 211 212 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 213 { 214 __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); 215 } 216 217 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, 218 struct kvm_vcpu *vcpu, 219 uint64_t vaddr) 220 { 221 uint16_t index[4]; 222 uint64_t *pml4e, *pdpe, *pde; 223 uint64_t *pte; 224 struct kvm_sregs sregs; 225 uint64_t rsvd_mask = 0; 226 227 /* Set the high bits in the reserved mask. */ 228 if (vm->pa_bits < 52) 229 rsvd_mask = GENMASK_ULL(51, vm->pa_bits); 230 231 /* 232 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries 233 * with 4-Level Paging and 5-Level Paging". 234 * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, 235 * the XD flag (bit 63) is reserved. 236 */ 237 vcpu_sregs_get(vcpu, &sregs); 238 if ((sregs.efer & EFER_NX) == 0) { 239 rsvd_mask |= PTE_NX_MASK; 240 } 241 242 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 243 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 244 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 245 (vaddr >> vm->page_shift)), 246 "Invalid virtual address, vaddr: 0x%lx", 247 vaddr); 248 /* 249 * Based on the mode check above there are 48 bits in the vaddr, so 250 * shift 16 to sign extend the last bit (bit-47), 251 */ 252 TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), 253 "Canonical check failed. The virtual address is invalid."); 254 255 index[0] = (vaddr >> 12) & 0x1ffu; 256 index[1] = (vaddr >> 21) & 0x1ffu; 257 index[2] = (vaddr >> 30) & 0x1ffu; 258 index[3] = (vaddr >> 39) & 0x1ffu; 259 260 pml4e = addr_gpa2hva(vm, vm->pgd); 261 TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, 262 "Expected pml4e to be present for gva: 0x%08lx", vaddr); 263 TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, 264 "Unexpected reserved bits set."); 265 266 pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); 267 TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, 268 "Expected pdpe to be present for gva: 0x%08lx", vaddr); 269 TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), 270 "Expected pdpe to map a pde not a 1-GByte page."); 271 TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, 272 "Unexpected reserved bits set."); 273 274 pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); 275 TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, 276 "Expected pde to be present for gva: 0x%08lx", vaddr); 277 TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), 278 "Expected pde to map a pte not a 2-MByte page."); 279 TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, 280 "Unexpected reserved bits set."); 281 282 pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); 283 TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, 284 "Expected pte to be present for gva: 0x%08lx", vaddr); 285 286 return &pte[index[0]]; 287 } 288 289 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 290 uint64_t vaddr) 291 { 292 uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); 293 294 return *(uint64_t *)pte; 295 } 296 297 void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 298 uint64_t vaddr, uint64_t pte) 299 { 300 uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); 301 302 *(uint64_t *)new_pte = pte; 303 } 304 305 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 306 { 307 uint64_t *pml4e, *pml4e_start; 308 uint64_t *pdpe, *pdpe_start; 309 uint64_t *pde, *pde_start; 310 uint64_t *pte, *pte_start; 311 312 if (!vm->pgd_created) 313 return; 314 315 fprintf(stream, "%*s " 316 " no\n", indent, ""); 317 fprintf(stream, "%*s index hvaddr gpaddr " 318 "addr w exec dirty\n", 319 indent, ""); 320 pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); 321 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 322 pml4e = &pml4e_start[n1]; 323 if (!(*pml4e & PTE_PRESENT_MASK)) 324 continue; 325 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " 326 " %u\n", 327 indent, "", 328 pml4e - pml4e_start, pml4e, 329 addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), 330 !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); 331 332 pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); 333 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 334 pdpe = &pdpe_start[n2]; 335 if (!(*pdpe & PTE_PRESENT_MASK)) 336 continue; 337 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " 338 "%u %u\n", 339 indent, "", 340 pdpe - pdpe_start, pdpe, 341 addr_hva2gpa(vm, pdpe), 342 PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), 343 !!(*pdpe & PTE_NX_MASK)); 344 345 pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); 346 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 347 pde = &pde_start[n3]; 348 if (!(*pde & PTE_PRESENT_MASK)) 349 continue; 350 fprintf(stream, "%*spde 0x%-3zx %p " 351 "0x%-12lx 0x%-10llx %u %u\n", 352 indent, "", pde - pde_start, pde, 353 addr_hva2gpa(vm, pde), 354 PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), 355 !!(*pde & PTE_NX_MASK)); 356 357 pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); 358 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 359 pte = &pte_start[n4]; 360 if (!(*pte & PTE_PRESENT_MASK)) 361 continue; 362 fprintf(stream, "%*spte 0x%-3zx %p " 363 "0x%-12lx 0x%-10llx %u %u " 364 " %u 0x%-10lx\n", 365 indent, "", 366 pte - pte_start, pte, 367 addr_hva2gpa(vm, pte), 368 PTE_GET_PFN(*pte), 369 !!(*pte & PTE_WRITABLE_MASK), 370 !!(*pte & PTE_NX_MASK), 371 !!(*pte & PTE_DIRTY_MASK), 372 ((uint64_t) n1 << 27) 373 | ((uint64_t) n2 << 18) 374 | ((uint64_t) n3 << 9) 375 | ((uint64_t) n4)); 376 } 377 } 378 } 379 } 380 } 381 382 /* 383 * Set Unusable Segment 384 * 385 * Input Args: None 386 * 387 * Output Args: 388 * segp - Pointer to segment register 389 * 390 * Return: None 391 * 392 * Sets the segment register pointed to by @segp to an unusable state. 393 */ 394 static void kvm_seg_set_unusable(struct kvm_segment *segp) 395 { 396 memset(segp, 0, sizeof(*segp)); 397 segp->unusable = true; 398 } 399 400 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) 401 { 402 void *gdt = addr_gva2hva(vm, vm->gdt); 403 struct desc64 *desc = gdt + (segp->selector >> 3) * 8; 404 405 desc->limit0 = segp->limit & 0xFFFF; 406 desc->base0 = segp->base & 0xFFFF; 407 desc->base1 = segp->base >> 16; 408 desc->type = segp->type; 409 desc->s = segp->s; 410 desc->dpl = segp->dpl; 411 desc->p = segp->present; 412 desc->limit1 = segp->limit >> 16; 413 desc->avl = segp->avl; 414 desc->l = segp->l; 415 desc->db = segp->db; 416 desc->g = segp->g; 417 desc->base2 = segp->base >> 24; 418 if (!segp->s) 419 desc->base3 = segp->base >> 32; 420 } 421 422 423 /* 424 * Set Long Mode Flat Kernel Code Segment 425 * 426 * Input Args: 427 * vm - VM whose GDT is being filled, or NULL to only write segp 428 * selector - selector value 429 * 430 * Output Args: 431 * segp - Pointer to KVM segment 432 * 433 * Return: None 434 * 435 * Sets up the KVM segment pointed to by @segp, to be a code segment 436 * with the selector value given by @selector. 437 */ 438 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, 439 struct kvm_segment *segp) 440 { 441 memset(segp, 0, sizeof(*segp)); 442 segp->selector = selector; 443 segp->limit = 0xFFFFFFFFu; 444 segp->s = 0x1; /* kTypeCodeData */ 445 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 446 * | kFlagCodeReadable 447 */ 448 segp->g = true; 449 segp->l = true; 450 segp->present = 1; 451 if (vm) 452 kvm_seg_fill_gdt_64bit(vm, segp); 453 } 454 455 /* 456 * Set Long Mode Flat Kernel Data Segment 457 * 458 * Input Args: 459 * vm - VM whose GDT is being filled, or NULL to only write segp 460 * selector - selector value 461 * 462 * Output Args: 463 * segp - Pointer to KVM segment 464 * 465 * Return: None 466 * 467 * Sets up the KVM segment pointed to by @segp, to be a data segment 468 * with the selector value given by @selector. 469 */ 470 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, 471 struct kvm_segment *segp) 472 { 473 memset(segp, 0, sizeof(*segp)); 474 segp->selector = selector; 475 segp->limit = 0xFFFFFFFFu; 476 segp->s = 0x1; /* kTypeCodeData */ 477 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 478 * | kFlagDataWritable 479 */ 480 segp->g = true; 481 segp->present = true; 482 if (vm) 483 kvm_seg_fill_gdt_64bit(vm, segp); 484 } 485 486 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 487 { 488 uint16_t index[4]; 489 uint64_t *pml4e, *pdpe, *pde; 490 uint64_t *pte; 491 492 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 493 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 494 495 index[0] = (gva >> 12) & 0x1ffu; 496 index[1] = (gva >> 21) & 0x1ffu; 497 index[2] = (gva >> 30) & 0x1ffu; 498 index[3] = (gva >> 39) & 0x1ffu; 499 500 if (!vm->pgd_created) 501 goto unmapped_gva; 502 pml4e = addr_gpa2hva(vm, vm->pgd); 503 if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) 504 goto unmapped_gva; 505 506 pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); 507 if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) 508 goto unmapped_gva; 509 510 pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); 511 if (!(pde[index[1]] & PTE_PRESENT_MASK)) 512 goto unmapped_gva; 513 514 pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); 515 if (!(pte[index[0]] & PTE_PRESENT_MASK)) 516 goto unmapped_gva; 517 518 return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); 519 520 unmapped_gva: 521 TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); 522 exit(EXIT_FAILURE); 523 } 524 525 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) 526 { 527 if (!vm->gdt) 528 vm->gdt = vm_vaddr_alloc_page(vm); 529 530 dt->base = vm->gdt; 531 dt->limit = getpagesize(); 532 } 533 534 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, 535 int selector) 536 { 537 if (!vm->tss) 538 vm->tss = vm_vaddr_alloc_page(vm); 539 540 memset(segp, 0, sizeof(*segp)); 541 segp->base = vm->tss; 542 segp->limit = 0x67; 543 segp->selector = selector; 544 segp->type = 0xb; 545 segp->present = 1; 546 kvm_seg_fill_gdt_64bit(vm, segp); 547 } 548 549 static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 550 { 551 struct kvm_sregs sregs; 552 553 /* Set mode specific system register values. */ 554 vcpu_sregs_get(vcpu, &sregs); 555 556 sregs.idt.limit = 0; 557 558 kvm_setup_gdt(vm, &sregs.gdt); 559 560 switch (vm->mode) { 561 case VM_MODE_PXXV48_4K: 562 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 563 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 564 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 565 566 kvm_seg_set_unusable(&sregs.ldt); 567 kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); 568 kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); 569 kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); 570 kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); 571 break; 572 573 default: 574 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 575 } 576 577 sregs.cr3 = vm->pgd; 578 vcpu_sregs_set(vcpu, &sregs); 579 } 580 581 void __vm_xsave_require_permission(int bit, const char *name) 582 { 583 int kvm_fd; 584 u64 bitmask; 585 long rc; 586 struct kvm_device_attr attr = { 587 .group = 0, 588 .attr = KVM_X86_XCOMP_GUEST_SUPP, 589 .addr = (unsigned long) &bitmask 590 }; 591 592 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); 593 594 kvm_fd = open_kvm_dev_path_or_exit(); 595 rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); 596 close(kvm_fd); 597 598 if (rc == -1 && (errno == ENXIO || errno == EINVAL)) 599 __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); 600 601 TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); 602 603 __TEST_REQUIRE(bitmask & (1ULL << bit), 604 "Required XSAVE feature '%s' not supported", name); 605 606 TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); 607 608 rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); 609 TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); 610 TEST_ASSERT(bitmask & (1ULL << bit), 611 "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", 612 bitmask); 613 } 614 615 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, 616 void *guest_code) 617 { 618 struct kvm_mp_state mp_state; 619 struct kvm_regs regs; 620 vm_vaddr_t stack_vaddr; 621 struct kvm_vcpu *vcpu; 622 623 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 624 DEFAULT_GUEST_STACK_VADDR_MIN); 625 626 vcpu = __vm_vcpu_add(vm, vcpu_id); 627 vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); 628 vcpu_setup(vm, vcpu); 629 630 /* Setup guest general purpose registers */ 631 vcpu_regs_get(vcpu, ®s); 632 regs.rflags = regs.rflags | 0x2; 633 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 634 regs.rip = (unsigned long) guest_code; 635 vcpu_regs_set(vcpu, ®s); 636 637 /* Setup the MP state */ 638 mp_state.mp_state = 0; 639 vcpu_mp_state_set(vcpu, &mp_state); 640 641 return vcpu; 642 } 643 644 struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) 645 { 646 struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); 647 648 vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); 649 650 return vcpu; 651 } 652 653 void vcpu_arch_free(struct kvm_vcpu *vcpu) 654 { 655 if (vcpu->cpuid) 656 free(vcpu->cpuid); 657 } 658 659 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) 660 { 661 static struct kvm_cpuid2 *cpuid; 662 int kvm_fd; 663 664 if (cpuid) 665 return cpuid; 666 667 cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); 668 kvm_fd = open_kvm_dev_path_or_exit(); 669 670 kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 671 672 close(kvm_fd); 673 return cpuid; 674 } 675 676 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, 677 struct kvm_x86_cpu_feature feature) 678 { 679 const struct kvm_cpuid_entry2 *entry; 680 int i; 681 682 for (i = 0; i < cpuid->nent; i++) { 683 entry = &cpuid->entries[i]; 684 685 /* 686 * The output registers in kvm_cpuid_entry2 are in alphabetical 687 * order, but kvm_x86_cpu_feature matches that mess, so yay 688 * pointer shenanigans! 689 */ 690 if (entry->function == feature.function && 691 entry->index == feature.index) 692 return (&entry->eax)[feature.reg] & BIT(feature.bit); 693 } 694 695 return false; 696 } 697 698 uint64_t kvm_get_feature_msr(uint64_t msr_index) 699 { 700 struct { 701 struct kvm_msrs header; 702 struct kvm_msr_entry entry; 703 } buffer = {}; 704 int r, kvm_fd; 705 706 buffer.header.nmsrs = 1; 707 buffer.entry.index = msr_index; 708 kvm_fd = open_kvm_dev_path_or_exit(); 709 710 r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); 711 TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r)); 712 713 close(kvm_fd); 714 return buffer.entry.data; 715 } 716 717 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) 718 { 719 TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); 720 721 /* Allow overriding the default CPUID. */ 722 if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) { 723 free(vcpu->cpuid); 724 vcpu->cpuid = NULL; 725 } 726 727 if (!vcpu->cpuid) 728 vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent); 729 730 memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent)); 731 vcpu_set_cpuid(vcpu); 732 } 733 734 void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr) 735 { 736 struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008); 737 738 entry->eax = (entry->eax & ~0xff) | maxphyaddr; 739 vcpu_set_cpuid(vcpu); 740 } 741 742 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) 743 { 744 struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); 745 746 entry->eax = 0; 747 entry->ebx = 0; 748 entry->ecx = 0; 749 entry->edx = 0; 750 vcpu_set_cpuid(vcpu); 751 } 752 753 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, 754 struct kvm_x86_cpu_feature feature, 755 bool set) 756 { 757 struct kvm_cpuid_entry2 *entry; 758 u32 *reg; 759 760 entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); 761 reg = (&entry->eax) + feature.reg; 762 763 if (set) 764 *reg |= BIT(feature.bit); 765 else 766 *reg &= ~BIT(feature.bit); 767 768 vcpu_set_cpuid(vcpu); 769 } 770 771 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) 772 { 773 struct { 774 struct kvm_msrs header; 775 struct kvm_msr_entry entry; 776 } buffer = {}; 777 778 buffer.header.nmsrs = 1; 779 buffer.entry.index = msr_index; 780 781 vcpu_msrs_get(vcpu, &buffer.header); 782 783 return buffer.entry.data; 784 } 785 786 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) 787 { 788 struct { 789 struct kvm_msrs header; 790 struct kvm_msr_entry entry; 791 } buffer = {}; 792 793 memset(&buffer, 0, sizeof(buffer)); 794 buffer.header.nmsrs = 1; 795 buffer.entry.index = msr_index; 796 buffer.entry.data = msr_value; 797 798 return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header); 799 } 800 801 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) 802 { 803 va_list ap; 804 struct kvm_regs regs; 805 806 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 807 " num: %u\n", 808 num); 809 810 va_start(ap, num); 811 vcpu_regs_get(vcpu, ®s); 812 813 if (num >= 1) 814 regs.rdi = va_arg(ap, uint64_t); 815 816 if (num >= 2) 817 regs.rsi = va_arg(ap, uint64_t); 818 819 if (num >= 3) 820 regs.rdx = va_arg(ap, uint64_t); 821 822 if (num >= 4) 823 regs.rcx = va_arg(ap, uint64_t); 824 825 if (num >= 5) 826 regs.r8 = va_arg(ap, uint64_t); 827 828 if (num >= 6) 829 regs.r9 = va_arg(ap, uint64_t); 830 831 vcpu_regs_set(vcpu, ®s); 832 va_end(ap); 833 } 834 835 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) 836 { 837 struct kvm_regs regs; 838 struct kvm_sregs sregs; 839 840 fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id); 841 842 fprintf(stream, "%*sregs:\n", indent + 2, ""); 843 vcpu_regs_get(vcpu, ®s); 844 regs_dump(stream, ®s, indent + 4); 845 846 fprintf(stream, "%*ssregs:\n", indent + 2, ""); 847 vcpu_sregs_get(vcpu, &sregs); 848 sregs_dump(stream, &sregs, indent + 4); 849 } 850 851 static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs) 852 { 853 struct kvm_msr_list *list; 854 struct kvm_msr_list nmsrs; 855 int kvm_fd, r; 856 857 kvm_fd = open_kvm_dev_path_or_exit(); 858 859 nmsrs.nmsrs = 0; 860 if (!feature_msrs) 861 r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); 862 else 863 r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs); 864 865 TEST_ASSERT(r == -1 && errno == E2BIG, 866 "Expected -E2BIG, got rc: %i errno: %i (%s)", 867 r, errno, strerror(errno)); 868 869 list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0])); 870 TEST_ASSERT(list, "-ENOMEM when allocating MSR index list"); 871 list->nmsrs = nmsrs.nmsrs; 872 873 if (!feature_msrs) 874 kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 875 else 876 kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); 877 close(kvm_fd); 878 879 TEST_ASSERT(list->nmsrs == nmsrs.nmsrs, 880 "Number of MSRs in list changed, was %d, now %d", 881 nmsrs.nmsrs, list->nmsrs); 882 return list; 883 } 884 885 const struct kvm_msr_list *kvm_get_msr_index_list(void) 886 { 887 static const struct kvm_msr_list *list; 888 889 if (!list) 890 list = __kvm_get_msr_index_list(false); 891 return list; 892 } 893 894 895 const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) 896 { 897 static const struct kvm_msr_list *list; 898 899 if (!list) 900 list = __kvm_get_msr_index_list(true); 901 return list; 902 } 903 904 bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) 905 { 906 const struct kvm_msr_list *list = kvm_get_msr_index_list(); 907 int i; 908 909 for (i = 0; i < list->nmsrs; ++i) { 910 if (list->indices[i] == msr_index) 911 return true; 912 } 913 914 return false; 915 } 916 917 static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu, 918 struct kvm_x86_state *state) 919 { 920 int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2); 921 922 if (size) { 923 state->xsave = malloc(size); 924 vcpu_xsave2_get(vcpu, state->xsave); 925 } else { 926 state->xsave = malloc(sizeof(struct kvm_xsave)); 927 vcpu_xsave_get(vcpu, state->xsave); 928 } 929 } 930 931 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu) 932 { 933 const struct kvm_msr_list *msr_list = kvm_get_msr_index_list(); 934 struct kvm_x86_state *state; 935 int i; 936 937 static int nested_size = -1; 938 939 if (nested_size == -1) { 940 nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); 941 TEST_ASSERT(nested_size <= sizeof(state->nested_), 942 "Nested state size too big, %i > %zi", 943 nested_size, sizeof(state->nested_)); 944 } 945 946 /* 947 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees 948 * guest state is consistent only after userspace re-enters the 949 * kernel with KVM_RUN. Complete IO prior to migrating state 950 * to a new VM. 951 */ 952 vcpu_run_complete_io(vcpu); 953 954 state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0])); 955 956 vcpu_events_get(vcpu, &state->events); 957 vcpu_mp_state_get(vcpu, &state->mp_state); 958 vcpu_regs_get(vcpu, &state->regs); 959 vcpu_save_xsave_state(vcpu, state); 960 961 if (kvm_has_cap(KVM_CAP_XCRS)) 962 vcpu_xcrs_get(vcpu, &state->xcrs); 963 964 vcpu_sregs_get(vcpu, &state->sregs); 965 966 if (nested_size) { 967 state->nested.size = sizeof(state->nested_); 968 969 vcpu_nested_state_get(vcpu, &state->nested); 970 TEST_ASSERT(state->nested.size <= nested_size, 971 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", 972 state->nested.size, nested_size); 973 } else { 974 state->nested.size = 0; 975 } 976 977 state->msrs.nmsrs = msr_list->nmsrs; 978 for (i = 0; i < msr_list->nmsrs; i++) 979 state->msrs.entries[i].index = msr_list->indices[i]; 980 vcpu_msrs_get(vcpu, &state->msrs); 981 982 vcpu_debugregs_get(vcpu, &state->debugregs); 983 984 return state; 985 } 986 987 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state) 988 { 989 vcpu_sregs_set(vcpu, &state->sregs); 990 vcpu_msrs_set(vcpu, &state->msrs); 991 992 if (kvm_has_cap(KVM_CAP_XCRS)) 993 vcpu_xcrs_set(vcpu, &state->xcrs); 994 995 vcpu_xsave_set(vcpu, state->xsave); 996 vcpu_events_set(vcpu, &state->events); 997 vcpu_mp_state_set(vcpu, &state->mp_state); 998 vcpu_debugregs_set(vcpu, &state->debugregs); 999 vcpu_regs_set(vcpu, &state->regs); 1000 1001 if (state->nested.size) 1002 vcpu_nested_state_set(vcpu, &state->nested); 1003 } 1004 1005 void kvm_x86_state_cleanup(struct kvm_x86_state *state) 1006 { 1007 free(state->xsave); 1008 free(state); 1009 } 1010 1011 static bool cpu_vendor_string_is(const char *vendor) 1012 { 1013 const uint32_t *chunk = (const uint32_t *)vendor; 1014 uint32_t eax, ebx, ecx, edx; 1015 1016 cpuid(0, &eax, &ebx, &ecx, &edx); 1017 return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); 1018 } 1019 1020 bool is_intel_cpu(void) 1021 { 1022 return cpu_vendor_string_is("GenuineIntel"); 1023 } 1024 1025 /* 1026 * Exclude early K5 samples with a vendor string of "AMDisbetter!" 1027 */ 1028 bool is_amd_cpu(void) 1029 { 1030 return cpu_vendor_string_is("AuthenticAMD"); 1031 } 1032 1033 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) 1034 { 1035 const struct kvm_cpuid_entry2 *entry; 1036 bool pae; 1037 1038 /* SDM 4.1.4 */ 1039 if (kvm_get_cpuid_max_extended() < 0x80000008) { 1040 pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); 1041 *pa_bits = pae ? 36 : 32; 1042 *va_bits = 32; 1043 } else { 1044 entry = kvm_get_supported_cpuid_entry(0x80000008); 1045 *pa_bits = entry->eax & 0xff; 1046 *va_bits = (entry->eax >> 8) & 0xff; 1047 } 1048 } 1049 1050 struct idt_entry { 1051 uint16_t offset0; 1052 uint16_t selector; 1053 uint16_t ist : 3; 1054 uint16_t : 5; 1055 uint16_t type : 4; 1056 uint16_t : 1; 1057 uint16_t dpl : 2; 1058 uint16_t p : 1; 1059 uint16_t offset1; 1060 uint32_t offset2; uint32_t reserved; 1061 }; 1062 1063 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, 1064 int dpl, unsigned short selector) 1065 { 1066 struct idt_entry *base = 1067 (struct idt_entry *)addr_gva2hva(vm, vm->idt); 1068 struct idt_entry *e = &base[vector]; 1069 1070 memset(e, 0, sizeof(*e)); 1071 e->offset0 = addr; 1072 e->selector = selector; 1073 e->ist = 0; 1074 e->type = 14; 1075 e->dpl = dpl; 1076 e->p = 1; 1077 e->offset1 = addr >> 16; 1078 e->offset2 = addr >> 32; 1079 } 1080 1081 1082 static bool kvm_fixup_exception(struct ex_regs *regs) 1083 { 1084 if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) 1085 return false; 1086 1087 if (regs->vector == DE_VECTOR) 1088 return false; 1089 1090 regs->rip = regs->r11; 1091 regs->r9 = regs->vector; 1092 return true; 1093 } 1094 1095 void kvm_exit_unexpected_vector(uint32_t value) 1096 { 1097 ucall(UCALL_UNHANDLED, 1, value); 1098 } 1099 1100 void route_exception(struct ex_regs *regs) 1101 { 1102 typedef void(*handler)(struct ex_regs *); 1103 handler *handlers = (handler *)exception_handlers; 1104 1105 if (handlers && handlers[regs->vector]) { 1106 handlers[regs->vector](regs); 1107 return; 1108 } 1109 1110 if (kvm_fixup_exception(regs)) 1111 return; 1112 1113 kvm_exit_unexpected_vector(regs->vector); 1114 } 1115 1116 void vm_init_descriptor_tables(struct kvm_vm *vm) 1117 { 1118 extern void *idt_handlers; 1119 int i; 1120 1121 vm->idt = vm_vaddr_alloc_page(vm); 1122 vm->handlers = vm_vaddr_alloc_page(vm); 1123 /* Handlers have the same address in both address spaces.*/ 1124 for (i = 0; i < NUM_INTERRUPTS; i++) 1125 set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, 1126 DEFAULT_CODE_SELECTOR); 1127 } 1128 1129 void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) 1130 { 1131 struct kvm_vm *vm = vcpu->vm; 1132 struct kvm_sregs sregs; 1133 1134 vcpu_sregs_get(vcpu, &sregs); 1135 sregs.idt.base = vm->idt; 1136 sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; 1137 sregs.gdt.base = vm->gdt; 1138 sregs.gdt.limit = getpagesize() - 1; 1139 kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); 1140 vcpu_sregs_set(vcpu, &sregs); 1141 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; 1142 } 1143 1144 void vm_install_exception_handler(struct kvm_vm *vm, int vector, 1145 void (*handler)(struct ex_regs *)) 1146 { 1147 vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); 1148 1149 handlers[vector] = (vm_vaddr_t)handler; 1150 } 1151 1152 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) 1153 { 1154 struct ucall uc; 1155 1156 if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { 1157 uint64_t vector = uc.args[0]; 1158 1159 TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", 1160 vector); 1161 } 1162 } 1163 1164 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, 1165 uint32_t function, uint32_t index) 1166 { 1167 int i; 1168 1169 for (i = 0; i < cpuid->nent; i++) { 1170 if (cpuid->entries[i].function == function && 1171 cpuid->entries[i].index == index) 1172 return &cpuid->entries[i]; 1173 } 1174 1175 TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); 1176 1177 return NULL; 1178 } 1179 1180 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, 1181 uint64_t a3) 1182 { 1183 uint64_t r; 1184 1185 asm volatile("vmcall" 1186 : "=a"(r) 1187 : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); 1188 return r; 1189 } 1190 1191 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) 1192 { 1193 static struct kvm_cpuid2 *cpuid; 1194 int kvm_fd; 1195 1196 if (cpuid) 1197 return cpuid; 1198 1199 cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); 1200 kvm_fd = open_kvm_dev_path_or_exit(); 1201 1202 kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); 1203 1204 close(kvm_fd); 1205 return cpuid; 1206 } 1207 1208 void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) 1209 { 1210 static struct kvm_cpuid2 *cpuid_full; 1211 const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; 1212 int i, nent = 0; 1213 1214 if (!cpuid_full) { 1215 cpuid_sys = kvm_get_supported_cpuid(); 1216 cpuid_hv = kvm_get_supported_hv_cpuid(); 1217 1218 cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); 1219 if (!cpuid_full) { 1220 perror("malloc"); 1221 abort(); 1222 } 1223 1224 /* Need to skip KVM CPUID leaves 0x400000xx */ 1225 for (i = 0; i < cpuid_sys->nent; i++) { 1226 if (cpuid_sys->entries[i].function >= 0x40000000 && 1227 cpuid_sys->entries[i].function < 0x40000100) 1228 continue; 1229 cpuid_full->entries[nent] = cpuid_sys->entries[i]; 1230 nent++; 1231 } 1232 1233 memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, 1234 cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); 1235 cpuid_full->nent = nent + cpuid_hv->nent; 1236 } 1237 1238 vcpu_init_cpuid(vcpu, cpuid_full); 1239 } 1240 1241 const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) 1242 { 1243 struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); 1244 1245 vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); 1246 1247 return cpuid; 1248 } 1249 1250 unsigned long vm_compute_max_gfn(struct kvm_vm *vm) 1251 { 1252 const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ 1253 unsigned long ht_gfn, max_gfn, max_pfn; 1254 uint32_t eax, ebx, ecx, edx, max_ext_leaf; 1255 1256 max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; 1257 1258 /* Avoid reserved HyperTransport region on AMD processors. */ 1259 if (!is_amd_cpu()) 1260 return max_gfn; 1261 1262 /* On parts with <40 physical address bits, the area is fully hidden */ 1263 if (vm->pa_bits < 40) 1264 return max_gfn; 1265 1266 /* Before family 17h, the HyperTransport area is just below 1T. */ 1267 ht_gfn = (1 << 28) - num_ht_pages; 1268 cpuid(1, &eax, &ebx, &ecx, &edx); 1269 if (x86_family(eax) < 0x17) 1270 goto done; 1271 1272 /* 1273 * Otherwise it's at the top of the physical address space, possibly 1274 * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use 1275 * the old conservative value if MAXPHYADDR is not enumerated. 1276 */ 1277 cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 1278 max_ext_leaf = eax; 1279 if (max_ext_leaf < 0x80000008) 1280 goto done; 1281 1282 cpuid(0x80000008, &eax, &ebx, &ecx, &edx); 1283 max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; 1284 if (max_ext_leaf >= 0x8000001f) { 1285 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); 1286 max_pfn >>= (ebx >> 6) & 0x3f; 1287 } 1288 1289 ht_gfn = max_pfn - num_ht_pages; 1290 done: 1291 return min(max_gfn, ht_gfn - 1); 1292 } 1293 1294 /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ 1295 bool vm_is_unrestricted_guest(struct kvm_vm *vm) 1296 { 1297 char val = 'N'; 1298 size_t count; 1299 FILE *f; 1300 1301 /* Ensure that a KVM vendor-specific module is loaded. */ 1302 if (vm == NULL) 1303 close(open_kvm_dev_path_or_exit()); 1304 1305 f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); 1306 if (f) { 1307 count = fread(&val, sizeof(char), 1, f); 1308 TEST_ASSERT(count == 1, "Unable to read from param file."); 1309 fclose(f); 1310 } 1311 1312 return val == 'Y'; 1313 } 1314