1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/x86_64/processor.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "../kvm_util_internal.h" 13 #include "processor.h" 14 15 /* Minimum physical address used for virtual translation tables. */ 16 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 17 18 /* Virtual translation table structure declarations */ 19 struct pageMapL4Entry { 20 uint64_t present:1; 21 uint64_t writable:1; 22 uint64_t user:1; 23 uint64_t write_through:1; 24 uint64_t cache_disable:1; 25 uint64_t accessed:1; 26 uint64_t ignored_06:1; 27 uint64_t page_size:1; 28 uint64_t ignored_11_08:4; 29 uint64_t address:40; 30 uint64_t ignored_62_52:11; 31 uint64_t execute_disable:1; 32 }; 33 34 struct pageDirectoryPointerEntry { 35 uint64_t present:1; 36 uint64_t writable:1; 37 uint64_t user:1; 38 uint64_t write_through:1; 39 uint64_t cache_disable:1; 40 uint64_t accessed:1; 41 uint64_t ignored_06:1; 42 uint64_t page_size:1; 43 uint64_t ignored_11_08:4; 44 uint64_t address:40; 45 uint64_t ignored_62_52:11; 46 uint64_t execute_disable:1; 47 }; 48 49 struct pageDirectoryEntry { 50 uint64_t present:1; 51 uint64_t writable:1; 52 uint64_t user:1; 53 uint64_t write_through:1; 54 uint64_t cache_disable:1; 55 uint64_t accessed:1; 56 uint64_t ignored_06:1; 57 uint64_t page_size:1; 58 uint64_t ignored_11_08:4; 59 uint64_t address:40; 60 uint64_t ignored_62_52:11; 61 uint64_t execute_disable:1; 62 }; 63 64 struct pageTableEntry { 65 uint64_t present:1; 66 uint64_t writable:1; 67 uint64_t user:1; 68 uint64_t write_through:1; 69 uint64_t cache_disable:1; 70 uint64_t accessed:1; 71 uint64_t dirty:1; 72 uint64_t reserved_07:1; 73 uint64_t global:1; 74 uint64_t ignored_11_09:3; 75 uint64_t address:40; 76 uint64_t ignored_62_52:11; 77 uint64_t execute_disable:1; 78 }; 79 80 /* Register Dump 81 * 82 * Input Args: 83 * indent - Left margin indent amount 84 * regs - register 85 * 86 * Output Args: 87 * stream - Output FILE stream 88 * 89 * Return: None 90 * 91 * Dumps the state of the registers given by regs, to the FILE stream 92 * given by steam. 93 */ 94 void regs_dump(FILE *stream, struct kvm_regs *regs, 95 uint8_t indent) 96 { 97 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 98 "rcx: 0x%.16llx rdx: 0x%.16llx\n", 99 indent, "", 100 regs->rax, regs->rbx, regs->rcx, regs->rdx); 101 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 102 "rsp: 0x%.16llx rbp: 0x%.16llx\n", 103 indent, "", 104 regs->rsi, regs->rdi, regs->rsp, regs->rbp); 105 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 106 "r10: 0x%.16llx r11: 0x%.16llx\n", 107 indent, "", 108 regs->r8, regs->r9, regs->r10, regs->r11); 109 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 110 "r14: 0x%.16llx r15: 0x%.16llx\n", 111 indent, "", 112 regs->r12, regs->r13, regs->r14, regs->r15); 113 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 114 indent, "", 115 regs->rip, regs->rflags); 116 } 117 118 /* Segment Dump 119 * 120 * Input Args: 121 * indent - Left margin indent amount 122 * segment - KVM segment 123 * 124 * Output Args: 125 * stream - Output FILE stream 126 * 127 * Return: None 128 * 129 * Dumps the state of the KVM segment given by segment, to the FILE stream 130 * given by steam. 131 */ 132 static void segment_dump(FILE *stream, struct kvm_segment *segment, 133 uint8_t indent) 134 { 135 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 136 "selector: 0x%.4x type: 0x%.2x\n", 137 indent, "", segment->base, segment->limit, 138 segment->selector, segment->type); 139 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 140 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 141 indent, "", segment->present, segment->dpl, 142 segment->db, segment->s, segment->l); 143 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 144 "unusable: 0x%.2x padding: 0x%.2x\n", 145 indent, "", segment->g, segment->avl, 146 segment->unusable, segment->padding); 147 } 148 149 /* dtable Dump 150 * 151 * Input Args: 152 * indent - Left margin indent amount 153 * dtable - KVM dtable 154 * 155 * Output Args: 156 * stream - Output FILE stream 157 * 158 * Return: None 159 * 160 * Dumps the state of the KVM dtable given by dtable, to the FILE stream 161 * given by steam. 162 */ 163 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 164 uint8_t indent) 165 { 166 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 167 "padding: 0x%.4x 0x%.4x 0x%.4x\n", 168 indent, "", dtable->base, dtable->limit, 169 dtable->padding[0], dtable->padding[1], dtable->padding[2]); 170 } 171 172 /* System Register Dump 173 * 174 * Input Args: 175 * indent - Left margin indent amount 176 * sregs - System registers 177 * 178 * Output Args: 179 * stream - Output FILE stream 180 * 181 * Return: None 182 * 183 * Dumps the state of the system registers given by sregs, to the FILE stream 184 * given by steam. 185 */ 186 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 187 uint8_t indent) 188 { 189 unsigned int i; 190 191 fprintf(stream, "%*scs:\n", indent, ""); 192 segment_dump(stream, &sregs->cs, indent + 2); 193 fprintf(stream, "%*sds:\n", indent, ""); 194 segment_dump(stream, &sregs->ds, indent + 2); 195 fprintf(stream, "%*ses:\n", indent, ""); 196 segment_dump(stream, &sregs->es, indent + 2); 197 fprintf(stream, "%*sfs:\n", indent, ""); 198 segment_dump(stream, &sregs->fs, indent + 2); 199 fprintf(stream, "%*sgs:\n", indent, ""); 200 segment_dump(stream, &sregs->gs, indent + 2); 201 fprintf(stream, "%*sss:\n", indent, ""); 202 segment_dump(stream, &sregs->ss, indent + 2); 203 fprintf(stream, "%*str:\n", indent, ""); 204 segment_dump(stream, &sregs->tr, indent + 2); 205 fprintf(stream, "%*sldt:\n", indent, ""); 206 segment_dump(stream, &sregs->ldt, indent + 2); 207 208 fprintf(stream, "%*sgdt:\n", indent, ""); 209 dtable_dump(stream, &sregs->gdt, indent + 2); 210 fprintf(stream, "%*sidt:\n", indent, ""); 211 dtable_dump(stream, &sregs->idt, indent + 2); 212 213 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 214 "cr3: 0x%.16llx cr4: 0x%.16llx\n", 215 indent, "", 216 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 217 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 218 "apic_base: 0x%.16llx\n", 219 indent, "", 220 sregs->cr8, sregs->efer, sregs->apic_base); 221 222 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 223 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 224 fprintf(stream, "%*s%.16llx\n", indent + 2, "", 225 sregs->interrupt_bitmap[i]); 226 } 227 } 228 229 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) 230 { 231 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 232 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 233 234 /* If needed, create page map l4 table. */ 235 if (!vm->pgd_created) { 236 vm_paddr_t paddr = vm_phy_page_alloc(vm, 237 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); 238 vm->pgd = paddr; 239 vm->pgd_created = true; 240 } 241 } 242 243 /* VM Virtual Page Map 244 * 245 * Input Args: 246 * vm - Virtual Machine 247 * vaddr - VM Virtual Address 248 * paddr - VM Physical Address 249 * pgd_memslot - Memory region slot for new virtual translation tables 250 * 251 * Output Args: None 252 * 253 * Return: None 254 * 255 * Within the VM given by vm, creates a virtual translation for the page 256 * starting at vaddr to the page starting at paddr. 257 */ 258 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 259 uint32_t pgd_memslot) 260 { 261 uint16_t index[4]; 262 struct pageMapL4Entry *pml4e; 263 264 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 265 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 266 267 TEST_ASSERT((vaddr % vm->page_size) == 0, 268 "Virtual address not on page boundary,\n" 269 " vaddr: 0x%lx vm->page_size: 0x%x", 270 vaddr, vm->page_size); 271 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 272 (vaddr >> vm->page_shift)), 273 "Invalid virtual address, vaddr: 0x%lx", 274 vaddr); 275 TEST_ASSERT((paddr % vm->page_size) == 0, 276 "Physical address not on page boundary,\n" 277 " paddr: 0x%lx vm->page_size: 0x%x", 278 paddr, vm->page_size); 279 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 280 "Physical address beyond beyond maximum supported,\n" 281 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 282 paddr, vm->max_gfn, vm->page_size); 283 284 index[0] = (vaddr >> 12) & 0x1ffu; 285 index[1] = (vaddr >> 21) & 0x1ffu; 286 index[2] = (vaddr >> 30) & 0x1ffu; 287 index[3] = (vaddr >> 39) & 0x1ffu; 288 289 /* Allocate page directory pointer table if not present. */ 290 pml4e = addr_gpa2hva(vm, vm->pgd); 291 if (!pml4e[index[3]].present) { 292 pml4e[index[3]].address = vm_phy_page_alloc(vm, 293 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 294 >> vm->page_shift; 295 pml4e[index[3]].writable = true; 296 pml4e[index[3]].present = true; 297 } 298 299 /* Allocate page directory table if not present. */ 300 struct pageDirectoryPointerEntry *pdpe; 301 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 302 if (!pdpe[index[2]].present) { 303 pdpe[index[2]].address = vm_phy_page_alloc(vm, 304 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 305 >> vm->page_shift; 306 pdpe[index[2]].writable = true; 307 pdpe[index[2]].present = true; 308 } 309 310 /* Allocate page table if not present. */ 311 struct pageDirectoryEntry *pde; 312 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 313 if (!pde[index[1]].present) { 314 pde[index[1]].address = vm_phy_page_alloc(vm, 315 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 316 >> vm->page_shift; 317 pde[index[1]].writable = true; 318 pde[index[1]].present = true; 319 } 320 321 /* Fill in page table entry. */ 322 struct pageTableEntry *pte; 323 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 324 pte[index[0]].address = paddr >> vm->page_shift; 325 pte[index[0]].writable = true; 326 pte[index[0]].present = 1; 327 } 328 329 /* Virtual Translation Tables Dump 330 * 331 * Input Args: 332 * vm - Virtual Machine 333 * indent - Left margin indent amount 334 * 335 * Output Args: 336 * stream - Output FILE stream 337 * 338 * Return: None 339 * 340 * Dumps to the FILE stream given by stream, the contents of all the 341 * virtual translation tables for the VM given by vm. 342 */ 343 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 344 { 345 struct pageMapL4Entry *pml4e, *pml4e_start; 346 struct pageDirectoryPointerEntry *pdpe, *pdpe_start; 347 struct pageDirectoryEntry *pde, *pde_start; 348 struct pageTableEntry *pte, *pte_start; 349 350 if (!vm->pgd_created) 351 return; 352 353 fprintf(stream, "%*s " 354 " no\n", indent, ""); 355 fprintf(stream, "%*s index hvaddr gpaddr " 356 "addr w exec dirty\n", 357 indent, ""); 358 pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, 359 vm->pgd); 360 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 361 pml4e = &pml4e_start[n1]; 362 if (!pml4e->present) 363 continue; 364 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " 365 " %u\n", 366 indent, "", 367 pml4e - pml4e_start, pml4e, 368 addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, 369 pml4e->writable, pml4e->execute_disable); 370 371 pdpe_start = addr_gpa2hva(vm, pml4e->address 372 * vm->page_size); 373 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 374 pdpe = &pdpe_start[n2]; 375 if (!pdpe->present) 376 continue; 377 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " 378 "%u %u\n", 379 indent, "", 380 pdpe - pdpe_start, pdpe, 381 addr_hva2gpa(vm, pdpe), 382 (uint64_t) pdpe->address, pdpe->writable, 383 pdpe->execute_disable); 384 385 pde_start = addr_gpa2hva(vm, 386 pdpe->address * vm->page_size); 387 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 388 pde = &pde_start[n3]; 389 if (!pde->present) 390 continue; 391 fprintf(stream, "%*spde 0x%-3zx %p " 392 "0x%-12lx 0x%-10lx %u %u\n", 393 indent, "", pde - pde_start, pde, 394 addr_hva2gpa(vm, pde), 395 (uint64_t) pde->address, pde->writable, 396 pde->execute_disable); 397 398 pte_start = addr_gpa2hva(vm, 399 pde->address * vm->page_size); 400 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 401 pte = &pte_start[n4]; 402 if (!pte->present) 403 continue; 404 fprintf(stream, "%*spte 0x%-3zx %p " 405 "0x%-12lx 0x%-10lx %u %u " 406 " %u 0x%-10lx\n", 407 indent, "", 408 pte - pte_start, pte, 409 addr_hva2gpa(vm, pte), 410 (uint64_t) pte->address, 411 pte->writable, 412 pte->execute_disable, 413 pte->dirty, 414 ((uint64_t) n1 << 27) 415 | ((uint64_t) n2 << 18) 416 | ((uint64_t) n3 << 9) 417 | ((uint64_t) n4)); 418 } 419 } 420 } 421 } 422 } 423 424 /* Set Unusable Segment 425 * 426 * Input Args: None 427 * 428 * Output Args: 429 * segp - Pointer to segment register 430 * 431 * Return: None 432 * 433 * Sets the segment register pointed to by segp to an unusable state. 434 */ 435 static void kvm_seg_set_unusable(struct kvm_segment *segp) 436 { 437 memset(segp, 0, sizeof(*segp)); 438 segp->unusable = true; 439 } 440 441 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) 442 { 443 void *gdt = addr_gva2hva(vm, vm->gdt); 444 struct desc64 *desc = gdt + (segp->selector >> 3) * 8; 445 446 desc->limit0 = segp->limit & 0xFFFF; 447 desc->base0 = segp->base & 0xFFFF; 448 desc->base1 = segp->base >> 16; 449 desc->s = segp->s; 450 desc->type = segp->type; 451 desc->dpl = segp->dpl; 452 desc->p = segp->present; 453 desc->limit1 = segp->limit >> 16; 454 desc->l = segp->l; 455 desc->db = segp->db; 456 desc->g = segp->g; 457 desc->base2 = segp->base >> 24; 458 if (!segp->s) 459 desc->base3 = segp->base >> 32; 460 } 461 462 463 /* Set Long Mode Flat Kernel Code Segment 464 * 465 * Input Args: 466 * vm - VM whose GDT is being filled, or NULL to only write segp 467 * selector - selector value 468 * 469 * Output Args: 470 * segp - Pointer to KVM segment 471 * 472 * Return: None 473 * 474 * Sets up the KVM segment pointed to by segp, to be a code segment 475 * with the selector value given by selector. 476 */ 477 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, 478 struct kvm_segment *segp) 479 { 480 memset(segp, 0, sizeof(*segp)); 481 segp->selector = selector; 482 segp->limit = 0xFFFFFFFFu; 483 segp->s = 0x1; /* kTypeCodeData */ 484 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 485 * | kFlagCodeReadable 486 */ 487 segp->g = true; 488 segp->l = true; 489 segp->present = 1; 490 if (vm) 491 kvm_seg_fill_gdt_64bit(vm, segp); 492 } 493 494 /* Set Long Mode Flat Kernel Data Segment 495 * 496 * Input Args: 497 * vm - VM whose GDT is being filled, or NULL to only write segp 498 * selector - selector value 499 * 500 * Output Args: 501 * segp - Pointer to KVM segment 502 * 503 * Return: None 504 * 505 * Sets up the KVM segment pointed to by segp, to be a data segment 506 * with the selector value given by selector. 507 */ 508 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, 509 struct kvm_segment *segp) 510 { 511 memset(segp, 0, sizeof(*segp)); 512 segp->selector = selector; 513 segp->limit = 0xFFFFFFFFu; 514 segp->s = 0x1; /* kTypeCodeData */ 515 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 516 * | kFlagDataWritable 517 */ 518 segp->g = true; 519 segp->present = true; 520 if (vm) 521 kvm_seg_fill_gdt_64bit(vm, segp); 522 } 523 524 /* Address Guest Virtual to Guest Physical 525 * 526 * Input Args: 527 * vm - Virtual Machine 528 * gpa - VM virtual address 529 * 530 * Output Args: None 531 * 532 * Return: 533 * Equivalent VM physical address 534 * 535 * Translates the VM virtual address given by gva to a VM physical 536 * address and then locates the memory region containing the VM 537 * physical address, within the VM given by vm. When found, the host 538 * virtual address providing the memory to the vm physical address is returned. 539 * A TEST_ASSERT failure occurs if no region containing translated 540 * VM virtual address exists. 541 */ 542 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 543 { 544 uint16_t index[4]; 545 struct pageMapL4Entry *pml4e; 546 struct pageDirectoryPointerEntry *pdpe; 547 struct pageDirectoryEntry *pde; 548 struct pageTableEntry *pte; 549 550 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 551 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 552 553 index[0] = (gva >> 12) & 0x1ffu; 554 index[1] = (gva >> 21) & 0x1ffu; 555 index[2] = (gva >> 30) & 0x1ffu; 556 index[3] = (gva >> 39) & 0x1ffu; 557 558 if (!vm->pgd_created) 559 goto unmapped_gva; 560 pml4e = addr_gpa2hva(vm, vm->pgd); 561 if (!pml4e[index[3]].present) 562 goto unmapped_gva; 563 564 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 565 if (!pdpe[index[2]].present) 566 goto unmapped_gva; 567 568 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 569 if (!pde[index[1]].present) 570 goto unmapped_gva; 571 572 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 573 if (!pte[index[0]].present) 574 goto unmapped_gva; 575 576 return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); 577 578 unmapped_gva: 579 TEST_ASSERT(false, "No mapping for vm virtual address, " 580 "gva: 0x%lx", gva); 581 exit(EXIT_FAILURE); 582 } 583 584 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, 585 int pgd_memslot) 586 { 587 if (!vm->gdt) 588 vm->gdt = vm_vaddr_alloc(vm, getpagesize(), 589 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 590 591 dt->base = vm->gdt; 592 dt->limit = getpagesize(); 593 } 594 595 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, 596 int selector, int gdt_memslot, 597 int pgd_memslot) 598 { 599 if (!vm->tss) 600 vm->tss = vm_vaddr_alloc(vm, getpagesize(), 601 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 602 603 memset(segp, 0, sizeof(*segp)); 604 segp->base = vm->tss; 605 segp->limit = 0x67; 606 segp->selector = selector; 607 segp->type = 0xb; 608 segp->present = 1; 609 kvm_seg_fill_gdt_64bit(vm, segp); 610 } 611 612 void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) 613 { 614 struct kvm_sregs sregs; 615 616 /* Set mode specific system register values. */ 617 vcpu_sregs_get(vm, vcpuid, &sregs); 618 619 sregs.idt.limit = 0; 620 621 kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); 622 623 switch (vm->mode) { 624 case VM_MODE_P52V48_4K: 625 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 626 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 627 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 628 629 kvm_seg_set_unusable(&sregs.ldt); 630 kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); 631 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); 632 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); 633 kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); 634 break; 635 636 default: 637 TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); 638 } 639 640 sregs.cr3 = vm->pgd; 641 vcpu_sregs_set(vm, vcpuid, &sregs); 642 } 643 /* Adds a vCPU with reasonable defaults (i.e., a stack) 644 * 645 * Input Args: 646 * vcpuid - The id of the VCPU to add to the VM. 647 * guest_code - The vCPU's entry point 648 */ 649 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 650 { 651 struct kvm_mp_state mp_state; 652 struct kvm_regs regs; 653 vm_vaddr_t stack_vaddr; 654 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 655 DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 656 657 /* Create VCPU */ 658 vm_vcpu_add(vm, vcpuid, 0, 0); 659 660 /* Setup guest general purpose registers */ 661 vcpu_regs_get(vm, vcpuid, ®s); 662 regs.rflags = regs.rflags | 0x2; 663 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 664 regs.rip = (unsigned long) guest_code; 665 vcpu_regs_set(vm, vcpuid, ®s); 666 667 /* Setup the MP state */ 668 mp_state.mp_state = 0; 669 vcpu_set_mp_state(vm, vcpuid, &mp_state); 670 } 671 672 /* Allocate an instance of struct kvm_cpuid2 673 * 674 * Input Args: None 675 * 676 * Output Args: None 677 * 678 * Return: A pointer to the allocated struct. The caller is responsible 679 * for freeing this struct. 680 * 681 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the 682 * array to be decided at allocation time, allocation is slightly 683 * complicated. This function uses a reasonable default length for 684 * the array and performs the appropriate allocation. 685 */ 686 static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 687 { 688 struct kvm_cpuid2 *cpuid; 689 int nent = 100; 690 size_t size; 691 692 size = sizeof(*cpuid); 693 size += nent * sizeof(struct kvm_cpuid_entry2); 694 cpuid = malloc(size); 695 if (!cpuid) { 696 perror("malloc"); 697 abort(); 698 } 699 700 cpuid->nent = nent; 701 702 return cpuid; 703 } 704 705 /* KVM Supported CPUID Get 706 * 707 * Input Args: None 708 * 709 * Output Args: 710 * 711 * Return: The supported KVM CPUID 712 * 713 * Get the guest CPUID supported by KVM. 714 */ 715 struct kvm_cpuid2 *kvm_get_supported_cpuid(void) 716 { 717 static struct kvm_cpuid2 *cpuid; 718 int ret; 719 int kvm_fd; 720 721 if (cpuid) 722 return cpuid; 723 724 cpuid = allocate_kvm_cpuid2(); 725 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 726 if (kvm_fd < 0) 727 exit(KSFT_SKIP); 728 729 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 730 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 731 ret, errno); 732 733 close(kvm_fd); 734 return cpuid; 735 } 736 737 /* Locate a cpuid entry. 738 * 739 * Input Args: 740 * cpuid: The cpuid. 741 * function: The function of the cpuid entry to find. 742 * 743 * Output Args: None 744 * 745 * Return: A pointer to the cpuid entry. Never returns NULL. 746 */ 747 struct kvm_cpuid_entry2 * 748 kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) 749 { 750 struct kvm_cpuid2 *cpuid; 751 struct kvm_cpuid_entry2 *entry = NULL; 752 int i; 753 754 cpuid = kvm_get_supported_cpuid(); 755 for (i = 0; i < cpuid->nent; i++) { 756 if (cpuid->entries[i].function == function && 757 cpuid->entries[i].index == index) { 758 entry = &cpuid->entries[i]; 759 break; 760 } 761 } 762 763 TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", 764 function, index); 765 return entry; 766 } 767 768 /* VM VCPU CPUID Set 769 * 770 * Input Args: 771 * vm - Virtual Machine 772 * vcpuid - VCPU id 773 * cpuid - The CPUID values to set. 774 * 775 * Output Args: None 776 * 777 * Return: void 778 * 779 * Set the VCPU's CPUID. 780 */ 781 void vcpu_set_cpuid(struct kvm_vm *vm, 782 uint32_t vcpuid, struct kvm_cpuid2 *cpuid) 783 { 784 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 785 int rc; 786 787 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 788 789 rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); 790 TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", 791 rc, errno); 792 793 } 794 795 /* Create a VM with reasonable defaults 796 * 797 * Input Args: 798 * vcpuid - The id of the single VCPU to add to the VM. 799 * extra_mem_pages - The size of extra memories to add (this will 800 * decide how much extra space we will need to 801 * setup the page tables using mem slot 0) 802 * guest_code - The vCPU's entry point 803 * 804 * Output Args: None 805 * 806 * Return: 807 * Pointer to opaque structure that describes the created VM. 808 */ 809 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 810 void *guest_code) 811 { 812 struct kvm_vm *vm; 813 /* 814 * For x86 the maximum page table size for a memory region 815 * will be when only 4K pages are used. In that case the 816 * total extra size for page tables (for extra N pages) will 817 * be: N/512+N/512^2+N/512^3+... which is definitely smaller 818 * than N/512*2. 819 */ 820 uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; 821 822 /* Create VM */ 823 vm = vm_create(VM_MODE_P52V48_4K, 824 DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, 825 O_RDWR); 826 827 /* Setup guest code */ 828 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 829 830 /* Setup IRQ Chip */ 831 vm_create_irqchip(vm); 832 833 /* Add the first vCPU. */ 834 vm_vcpu_add_default(vm, vcpuid, guest_code); 835 836 return vm; 837 } 838 839 /* VCPU Get MSR 840 * 841 * Input Args: 842 * vm - Virtual Machine 843 * vcpuid - VCPU ID 844 * msr_index - Index of MSR 845 * 846 * Output Args: None 847 * 848 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. 849 * 850 * Get value of MSR for VCPU. 851 */ 852 uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) 853 { 854 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 855 struct { 856 struct kvm_msrs header; 857 struct kvm_msr_entry entry; 858 } buffer = {}; 859 int r; 860 861 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 862 buffer.header.nmsrs = 1; 863 buffer.entry.index = msr_index; 864 r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); 865 TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" 866 " rc: %i errno: %i", r, errno); 867 868 return buffer.entry.data; 869 } 870 871 /* VCPU Set MSR 872 * 873 * Input Args: 874 * vm - Virtual Machine 875 * vcpuid - VCPU ID 876 * msr_index - Index of MSR 877 * msr_value - New value of MSR 878 * 879 * Output Args: None 880 * 881 * Return: On success, nothing. On failure a TEST_ASSERT is produced. 882 * 883 * Set value of MSR for VCPU. 884 */ 885 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, 886 uint64_t msr_value) 887 { 888 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 889 struct { 890 struct kvm_msrs header; 891 struct kvm_msr_entry entry; 892 } buffer = {}; 893 int r; 894 895 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 896 memset(&buffer, 0, sizeof(buffer)); 897 buffer.header.nmsrs = 1; 898 buffer.entry.index = msr_index; 899 buffer.entry.data = msr_value; 900 r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); 901 TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" 902 " rc: %i errno: %i", r, errno); 903 } 904 905 /* VM VCPU Args Set 906 * 907 * Input Args: 908 * vm - Virtual Machine 909 * vcpuid - VCPU ID 910 * num - number of arguments 911 * ... - arguments, each of type uint64_t 912 * 913 * Output Args: None 914 * 915 * Return: None 916 * 917 * Sets the first num function input arguments to the values 918 * given as variable args. Each of the variable args is expected to 919 * be of type uint64_t. 920 */ 921 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) 922 { 923 va_list ap; 924 struct kvm_regs regs; 925 926 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 927 " num: %u\n", 928 num); 929 930 va_start(ap, num); 931 vcpu_regs_get(vm, vcpuid, ®s); 932 933 if (num >= 1) 934 regs.rdi = va_arg(ap, uint64_t); 935 936 if (num >= 2) 937 regs.rsi = va_arg(ap, uint64_t); 938 939 if (num >= 3) 940 regs.rdx = va_arg(ap, uint64_t); 941 942 if (num >= 4) 943 regs.rcx = va_arg(ap, uint64_t); 944 945 if (num >= 5) 946 regs.r8 = va_arg(ap, uint64_t); 947 948 if (num >= 6) 949 regs.r9 = va_arg(ap, uint64_t); 950 951 vcpu_regs_set(vm, vcpuid, ®s); 952 va_end(ap); 953 } 954 955 /* 956 * VM VCPU Dump 957 * 958 * Input Args: 959 * vm - Virtual Machine 960 * vcpuid - VCPU ID 961 * indent - Left margin indent amount 962 * 963 * Output Args: 964 * stream - Output FILE stream 965 * 966 * Return: None 967 * 968 * Dumps the current state of the VCPU specified by vcpuid, within the VM 969 * given by vm, to the FILE stream given by stream. 970 */ 971 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) 972 { 973 struct kvm_regs regs; 974 struct kvm_sregs sregs; 975 976 fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); 977 978 fprintf(stream, "%*sregs:\n", indent + 2, ""); 979 vcpu_regs_get(vm, vcpuid, ®s); 980 regs_dump(stream, ®s, indent + 4); 981 982 fprintf(stream, "%*ssregs:\n", indent + 2, ""); 983 vcpu_sregs_get(vm, vcpuid, &sregs); 984 sregs_dump(stream, &sregs, indent + 4); 985 } 986 987 struct kvm_x86_state { 988 struct kvm_vcpu_events events; 989 struct kvm_mp_state mp_state; 990 struct kvm_regs regs; 991 struct kvm_xsave xsave; 992 struct kvm_xcrs xcrs; 993 struct kvm_sregs sregs; 994 struct kvm_debugregs debugregs; 995 union { 996 struct kvm_nested_state nested; 997 char nested_[16384]; 998 }; 999 struct kvm_msrs msrs; 1000 }; 1001 1002 static int kvm_get_num_msrs(struct kvm_vm *vm) 1003 { 1004 struct kvm_msr_list nmsrs; 1005 int r; 1006 1007 nmsrs.nmsrs = 0; 1008 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); 1009 TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", 1010 r); 1011 1012 return nmsrs.nmsrs; 1013 } 1014 1015 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) 1016 { 1017 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1018 struct kvm_msr_list *list; 1019 struct kvm_x86_state *state; 1020 int nmsrs, r, i; 1021 static int nested_size = -1; 1022 1023 if (nested_size == -1) { 1024 nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); 1025 TEST_ASSERT(nested_size <= sizeof(state->nested_), 1026 "Nested state size too big, %i > %zi", 1027 nested_size, sizeof(state->nested_)); 1028 } 1029 1030 /* 1031 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees 1032 * guest state is consistent only after userspace re-enters the 1033 * kernel with KVM_RUN. Complete IO prior to migrating state 1034 * to a new VM. 1035 */ 1036 vcpu_run_complete_io(vm, vcpuid); 1037 1038 nmsrs = kvm_get_num_msrs(vm); 1039 list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 1040 list->nmsrs = nmsrs; 1041 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 1042 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", 1043 r); 1044 1045 state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); 1046 r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); 1047 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", 1048 r); 1049 1050 r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); 1051 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", 1052 r); 1053 1054 r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); 1055 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", 1056 r); 1057 1058 r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); 1059 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", 1060 r); 1061 1062 r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); 1063 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", 1064 r); 1065 1066 r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); 1067 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", 1068 r); 1069 1070 if (nested_size) { 1071 state->nested.size = sizeof(state->nested_); 1072 r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); 1073 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", 1074 r); 1075 TEST_ASSERT(state->nested.size <= nested_size, 1076 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", 1077 state->nested.size, nested_size); 1078 } else 1079 state->nested.size = 0; 1080 1081 state->msrs.nmsrs = nmsrs; 1082 for (i = 0; i < nmsrs; i++) 1083 state->msrs.entries[i].index = list->indices[i]; 1084 r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); 1085 TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", 1086 r, r == nmsrs ? -1 : list->indices[r]); 1087 1088 r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); 1089 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", 1090 r); 1091 1092 free(list); 1093 return state; 1094 } 1095 1096 void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) 1097 { 1098 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1099 int r; 1100 1101 r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); 1102 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", 1103 r); 1104 1105 r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); 1106 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", 1107 r); 1108 1109 r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); 1110 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", 1111 r); 1112 1113 r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); 1114 TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", 1115 r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); 1116 1117 r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); 1118 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", 1119 r); 1120 1121 r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); 1122 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", 1123 r); 1124 1125 r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); 1126 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", 1127 r); 1128 1129 r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); 1130 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", 1131 r); 1132 1133 if (state->nested.size) { 1134 r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); 1135 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", 1136 r); 1137 } 1138 } 1139 1140 bool is_intel_cpu(void) 1141 { 1142 int eax, ebx, ecx, edx; 1143 const uint32_t *chunk; 1144 const int leaf = 0; 1145 1146 __asm__ __volatile__( 1147 "cpuid" 1148 : /* output */ "=a"(eax), "=b"(ebx), 1149 "=c"(ecx), "=d"(edx) 1150 : /* input */ "0"(leaf), "2"(0)); 1151 1152 chunk = (const uint32_t *)("GenuineIntel"); 1153 return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); 1154 } 1155