1 /* 2 * tools/testing/selftests/kvm/lib/x86_64/processor.c 3 * 4 * Copyright (C) 2018, Google LLC. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. 7 */ 8 9 #define _GNU_SOURCE /* for program_invocation_name */ 10 11 #include "test_util.h" 12 #include "kvm_util.h" 13 #include "../kvm_util_internal.h" 14 #include "processor.h" 15 16 /* Minimum physical address used for virtual translation tables. */ 17 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 18 19 /* Virtual translation table structure declarations */ 20 struct pageMapL4Entry { 21 uint64_t present:1; 22 uint64_t writable:1; 23 uint64_t user:1; 24 uint64_t write_through:1; 25 uint64_t cache_disable:1; 26 uint64_t accessed:1; 27 uint64_t ignored_06:1; 28 uint64_t page_size:1; 29 uint64_t ignored_11_08:4; 30 uint64_t address:40; 31 uint64_t ignored_62_52:11; 32 uint64_t execute_disable:1; 33 }; 34 35 struct pageDirectoryPointerEntry { 36 uint64_t present:1; 37 uint64_t writable:1; 38 uint64_t user:1; 39 uint64_t write_through:1; 40 uint64_t cache_disable:1; 41 uint64_t accessed:1; 42 uint64_t ignored_06:1; 43 uint64_t page_size:1; 44 uint64_t ignored_11_08:4; 45 uint64_t address:40; 46 uint64_t ignored_62_52:11; 47 uint64_t execute_disable:1; 48 }; 49 50 struct pageDirectoryEntry { 51 uint64_t present:1; 52 uint64_t writable:1; 53 uint64_t user:1; 54 uint64_t write_through:1; 55 uint64_t cache_disable:1; 56 uint64_t accessed:1; 57 uint64_t ignored_06:1; 58 uint64_t page_size:1; 59 uint64_t ignored_11_08:4; 60 uint64_t address:40; 61 uint64_t ignored_62_52:11; 62 uint64_t execute_disable:1; 63 }; 64 65 struct pageTableEntry { 66 uint64_t present:1; 67 uint64_t writable:1; 68 uint64_t user:1; 69 uint64_t write_through:1; 70 uint64_t cache_disable:1; 71 uint64_t accessed:1; 72 uint64_t dirty:1; 73 uint64_t reserved_07:1; 74 uint64_t global:1; 75 uint64_t ignored_11_09:3; 76 uint64_t address:40; 77 uint64_t ignored_62_52:11; 78 uint64_t execute_disable:1; 79 }; 80 81 /* Register Dump 82 * 83 * Input Args: 84 * indent - Left margin indent amount 85 * regs - register 86 * 87 * Output Args: 88 * stream - Output FILE stream 89 * 90 * Return: None 91 * 92 * Dumps the state of the registers given by regs, to the FILE stream 93 * given by steam. 94 */ 95 void regs_dump(FILE *stream, struct kvm_regs *regs, 96 uint8_t indent) 97 { 98 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 99 "rcx: 0x%.16llx rdx: 0x%.16llx\n", 100 indent, "", 101 regs->rax, regs->rbx, regs->rcx, regs->rdx); 102 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 103 "rsp: 0x%.16llx rbp: 0x%.16llx\n", 104 indent, "", 105 regs->rsi, regs->rdi, regs->rsp, regs->rbp); 106 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 107 "r10: 0x%.16llx r11: 0x%.16llx\n", 108 indent, "", 109 regs->r8, regs->r9, regs->r10, regs->r11); 110 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 111 "r14: 0x%.16llx r15: 0x%.16llx\n", 112 indent, "", 113 regs->r12, regs->r13, regs->r14, regs->r15); 114 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 115 indent, "", 116 regs->rip, regs->rflags); 117 } 118 119 /* Segment Dump 120 * 121 * Input Args: 122 * indent - Left margin indent amount 123 * segment - KVM segment 124 * 125 * Output Args: 126 * stream - Output FILE stream 127 * 128 * Return: None 129 * 130 * Dumps the state of the KVM segment given by segment, to the FILE stream 131 * given by steam. 132 */ 133 static void segment_dump(FILE *stream, struct kvm_segment *segment, 134 uint8_t indent) 135 { 136 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 137 "selector: 0x%.4x type: 0x%.2x\n", 138 indent, "", segment->base, segment->limit, 139 segment->selector, segment->type); 140 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 141 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 142 indent, "", segment->present, segment->dpl, 143 segment->db, segment->s, segment->l); 144 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 145 "unusable: 0x%.2x padding: 0x%.2x\n", 146 indent, "", segment->g, segment->avl, 147 segment->unusable, segment->padding); 148 } 149 150 /* dtable Dump 151 * 152 * Input Args: 153 * indent - Left margin indent amount 154 * dtable - KVM dtable 155 * 156 * Output Args: 157 * stream - Output FILE stream 158 * 159 * Return: None 160 * 161 * Dumps the state of the KVM dtable given by dtable, to the FILE stream 162 * given by steam. 163 */ 164 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 165 uint8_t indent) 166 { 167 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 168 "padding: 0x%.4x 0x%.4x 0x%.4x\n", 169 indent, "", dtable->base, dtable->limit, 170 dtable->padding[0], dtable->padding[1], dtable->padding[2]); 171 } 172 173 /* System Register Dump 174 * 175 * Input Args: 176 * indent - Left margin indent amount 177 * sregs - System registers 178 * 179 * Output Args: 180 * stream - Output FILE stream 181 * 182 * Return: None 183 * 184 * Dumps the state of the system registers given by sregs, to the FILE stream 185 * given by steam. 186 */ 187 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 188 uint8_t indent) 189 { 190 unsigned int i; 191 192 fprintf(stream, "%*scs:\n", indent, ""); 193 segment_dump(stream, &sregs->cs, indent + 2); 194 fprintf(stream, "%*sds:\n", indent, ""); 195 segment_dump(stream, &sregs->ds, indent + 2); 196 fprintf(stream, "%*ses:\n", indent, ""); 197 segment_dump(stream, &sregs->es, indent + 2); 198 fprintf(stream, "%*sfs:\n", indent, ""); 199 segment_dump(stream, &sregs->fs, indent + 2); 200 fprintf(stream, "%*sgs:\n", indent, ""); 201 segment_dump(stream, &sregs->gs, indent + 2); 202 fprintf(stream, "%*sss:\n", indent, ""); 203 segment_dump(stream, &sregs->ss, indent + 2); 204 fprintf(stream, "%*str:\n", indent, ""); 205 segment_dump(stream, &sregs->tr, indent + 2); 206 fprintf(stream, "%*sldt:\n", indent, ""); 207 segment_dump(stream, &sregs->ldt, indent + 2); 208 209 fprintf(stream, "%*sgdt:\n", indent, ""); 210 dtable_dump(stream, &sregs->gdt, indent + 2); 211 fprintf(stream, "%*sidt:\n", indent, ""); 212 dtable_dump(stream, &sregs->idt, indent + 2); 213 214 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 215 "cr3: 0x%.16llx cr4: 0x%.16llx\n", 216 indent, "", 217 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 218 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 219 "apic_base: 0x%.16llx\n", 220 indent, "", 221 sregs->cr8, sregs->efer, sregs->apic_base); 222 223 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 224 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 225 fprintf(stream, "%*s%.16llx\n", indent + 2, "", 226 sregs->interrupt_bitmap[i]); 227 } 228 } 229 230 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) 231 { 232 int rc; 233 234 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 235 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 236 237 /* If needed, create page map l4 table. */ 238 if (!vm->pgd_created) { 239 vm_paddr_t paddr = vm_phy_page_alloc(vm, 240 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); 241 vm->pgd = paddr; 242 vm->pgd_created = true; 243 } 244 } 245 246 /* VM Virtual Page Map 247 * 248 * Input Args: 249 * vm - Virtual Machine 250 * vaddr - VM Virtual Address 251 * paddr - VM Physical Address 252 * pgd_memslot - Memory region slot for new virtual translation tables 253 * 254 * Output Args: None 255 * 256 * Return: None 257 * 258 * Within the VM given by vm, creates a virtual translation for the page 259 * starting at vaddr to the page starting at paddr. 260 */ 261 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 262 uint32_t pgd_memslot) 263 { 264 uint16_t index[4]; 265 struct pageMapL4Entry *pml4e; 266 267 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 268 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 269 270 TEST_ASSERT((vaddr % vm->page_size) == 0, 271 "Virtual address not on page boundary,\n" 272 " vaddr: 0x%lx vm->page_size: 0x%x", 273 vaddr, vm->page_size); 274 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 275 (vaddr >> vm->page_shift)), 276 "Invalid virtual address, vaddr: 0x%lx", 277 vaddr); 278 TEST_ASSERT((paddr % vm->page_size) == 0, 279 "Physical address not on page boundary,\n" 280 " paddr: 0x%lx vm->page_size: 0x%x", 281 paddr, vm->page_size); 282 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 283 "Physical address beyond beyond maximum supported,\n" 284 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 285 paddr, vm->max_gfn, vm->page_size); 286 287 index[0] = (vaddr >> 12) & 0x1ffu; 288 index[1] = (vaddr >> 21) & 0x1ffu; 289 index[2] = (vaddr >> 30) & 0x1ffu; 290 index[3] = (vaddr >> 39) & 0x1ffu; 291 292 /* Allocate page directory pointer table if not present. */ 293 pml4e = addr_gpa2hva(vm, vm->pgd); 294 if (!pml4e[index[3]].present) { 295 pml4e[index[3]].address = vm_phy_page_alloc(vm, 296 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 297 >> vm->page_shift; 298 pml4e[index[3]].writable = true; 299 pml4e[index[3]].present = true; 300 } 301 302 /* Allocate page directory table if not present. */ 303 struct pageDirectoryPointerEntry *pdpe; 304 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 305 if (!pdpe[index[2]].present) { 306 pdpe[index[2]].address = vm_phy_page_alloc(vm, 307 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 308 >> vm->page_shift; 309 pdpe[index[2]].writable = true; 310 pdpe[index[2]].present = true; 311 } 312 313 /* Allocate page table if not present. */ 314 struct pageDirectoryEntry *pde; 315 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 316 if (!pde[index[1]].present) { 317 pde[index[1]].address = vm_phy_page_alloc(vm, 318 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 319 >> vm->page_shift; 320 pde[index[1]].writable = true; 321 pde[index[1]].present = true; 322 } 323 324 /* Fill in page table entry. */ 325 struct pageTableEntry *pte; 326 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 327 pte[index[0]].address = paddr >> vm->page_shift; 328 pte[index[0]].writable = true; 329 pte[index[0]].present = 1; 330 } 331 332 /* Virtual Translation Tables Dump 333 * 334 * Input Args: 335 * vm - Virtual Machine 336 * indent - Left margin indent amount 337 * 338 * Output Args: 339 * stream - Output FILE stream 340 * 341 * Return: None 342 * 343 * Dumps to the FILE stream given by stream, the contents of all the 344 * virtual translation tables for the VM given by vm. 345 */ 346 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 347 { 348 struct pageMapL4Entry *pml4e, *pml4e_start; 349 struct pageDirectoryPointerEntry *pdpe, *pdpe_start; 350 struct pageDirectoryEntry *pde, *pde_start; 351 struct pageTableEntry *pte, *pte_start; 352 353 if (!vm->pgd_created) 354 return; 355 356 fprintf(stream, "%*s " 357 " no\n", indent, ""); 358 fprintf(stream, "%*s index hvaddr gpaddr " 359 "addr w exec dirty\n", 360 indent, ""); 361 pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, 362 vm->pgd); 363 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 364 pml4e = &pml4e_start[n1]; 365 if (!pml4e->present) 366 continue; 367 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " 368 " %u\n", 369 indent, "", 370 pml4e - pml4e_start, pml4e, 371 addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, 372 pml4e->writable, pml4e->execute_disable); 373 374 pdpe_start = addr_gpa2hva(vm, pml4e->address 375 * vm->page_size); 376 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 377 pdpe = &pdpe_start[n2]; 378 if (!pdpe->present) 379 continue; 380 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " 381 "%u %u\n", 382 indent, "", 383 pdpe - pdpe_start, pdpe, 384 addr_hva2gpa(vm, pdpe), 385 (uint64_t) pdpe->address, pdpe->writable, 386 pdpe->execute_disable); 387 388 pde_start = addr_gpa2hva(vm, 389 pdpe->address * vm->page_size); 390 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 391 pde = &pde_start[n3]; 392 if (!pde->present) 393 continue; 394 fprintf(stream, "%*spde 0x%-3zx %p " 395 "0x%-12lx 0x%-10lx %u %u\n", 396 indent, "", pde - pde_start, pde, 397 addr_hva2gpa(vm, pde), 398 (uint64_t) pde->address, pde->writable, 399 pde->execute_disable); 400 401 pte_start = addr_gpa2hva(vm, 402 pde->address * vm->page_size); 403 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 404 pte = &pte_start[n4]; 405 if (!pte->present) 406 continue; 407 fprintf(stream, "%*spte 0x%-3zx %p " 408 "0x%-12lx 0x%-10lx %u %u " 409 " %u 0x%-10lx\n", 410 indent, "", 411 pte - pte_start, pte, 412 addr_hva2gpa(vm, pte), 413 (uint64_t) pte->address, 414 pte->writable, 415 pte->execute_disable, 416 pte->dirty, 417 ((uint64_t) n1 << 27) 418 | ((uint64_t) n2 << 18) 419 | ((uint64_t) n3 << 9) 420 | ((uint64_t) n4)); 421 } 422 } 423 } 424 } 425 } 426 427 /* Set Unusable Segment 428 * 429 * Input Args: None 430 * 431 * Output Args: 432 * segp - Pointer to segment register 433 * 434 * Return: None 435 * 436 * Sets the segment register pointed to by segp to an unusable state. 437 */ 438 static void kvm_seg_set_unusable(struct kvm_segment *segp) 439 { 440 memset(segp, 0, sizeof(*segp)); 441 segp->unusable = true; 442 } 443 444 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) 445 { 446 void *gdt = addr_gva2hva(vm, vm->gdt); 447 struct desc64 *desc = gdt + (segp->selector >> 3) * 8; 448 449 desc->limit0 = segp->limit & 0xFFFF; 450 desc->base0 = segp->base & 0xFFFF; 451 desc->base1 = segp->base >> 16; 452 desc->s = segp->s; 453 desc->type = segp->type; 454 desc->dpl = segp->dpl; 455 desc->p = segp->present; 456 desc->limit1 = segp->limit >> 16; 457 desc->l = segp->l; 458 desc->db = segp->db; 459 desc->g = segp->g; 460 desc->base2 = segp->base >> 24; 461 if (!segp->s) 462 desc->base3 = segp->base >> 32; 463 } 464 465 466 /* Set Long Mode Flat Kernel Code Segment 467 * 468 * Input Args: 469 * vm - VM whose GDT is being filled, or NULL to only write segp 470 * selector - selector value 471 * 472 * Output Args: 473 * segp - Pointer to KVM segment 474 * 475 * Return: None 476 * 477 * Sets up the KVM segment pointed to by segp, to be a code segment 478 * with the selector value given by selector. 479 */ 480 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, 481 struct kvm_segment *segp) 482 { 483 memset(segp, 0, sizeof(*segp)); 484 segp->selector = selector; 485 segp->limit = 0xFFFFFFFFu; 486 segp->s = 0x1; /* kTypeCodeData */ 487 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 488 * | kFlagCodeReadable 489 */ 490 segp->g = true; 491 segp->l = true; 492 segp->present = 1; 493 if (vm) 494 kvm_seg_fill_gdt_64bit(vm, segp); 495 } 496 497 /* Set Long Mode Flat Kernel Data Segment 498 * 499 * Input Args: 500 * vm - VM whose GDT is being filled, or NULL to only write segp 501 * selector - selector value 502 * 503 * Output Args: 504 * segp - Pointer to KVM segment 505 * 506 * Return: None 507 * 508 * Sets up the KVM segment pointed to by segp, to be a data segment 509 * with the selector value given by selector. 510 */ 511 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, 512 struct kvm_segment *segp) 513 { 514 memset(segp, 0, sizeof(*segp)); 515 segp->selector = selector; 516 segp->limit = 0xFFFFFFFFu; 517 segp->s = 0x1; /* kTypeCodeData */ 518 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 519 * | kFlagDataWritable 520 */ 521 segp->g = true; 522 segp->present = true; 523 if (vm) 524 kvm_seg_fill_gdt_64bit(vm, segp); 525 } 526 527 /* Address Guest Virtual to Guest Physical 528 * 529 * Input Args: 530 * vm - Virtual Machine 531 * gpa - VM virtual address 532 * 533 * Output Args: None 534 * 535 * Return: 536 * Equivalent VM physical address 537 * 538 * Translates the VM virtual address given by gva to a VM physical 539 * address and then locates the memory region containing the VM 540 * physical address, within the VM given by vm. When found, the host 541 * virtual address providing the memory to the vm physical address is returned. 542 * A TEST_ASSERT failure occurs if no region containing translated 543 * VM virtual address exists. 544 */ 545 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 546 { 547 uint16_t index[4]; 548 struct pageMapL4Entry *pml4e; 549 struct pageDirectoryPointerEntry *pdpe; 550 struct pageDirectoryEntry *pde; 551 struct pageTableEntry *pte; 552 void *hva; 553 554 TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " 555 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 556 557 index[0] = (gva >> 12) & 0x1ffu; 558 index[1] = (gva >> 21) & 0x1ffu; 559 index[2] = (gva >> 30) & 0x1ffu; 560 index[3] = (gva >> 39) & 0x1ffu; 561 562 if (!vm->pgd_created) 563 goto unmapped_gva; 564 pml4e = addr_gpa2hva(vm, vm->pgd); 565 if (!pml4e[index[3]].present) 566 goto unmapped_gva; 567 568 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 569 if (!pdpe[index[2]].present) 570 goto unmapped_gva; 571 572 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 573 if (!pde[index[1]].present) 574 goto unmapped_gva; 575 576 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 577 if (!pte[index[0]].present) 578 goto unmapped_gva; 579 580 return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); 581 582 unmapped_gva: 583 TEST_ASSERT(false, "No mapping for vm virtual address, " 584 "gva: 0x%lx", gva); 585 } 586 587 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, 588 int pgd_memslot) 589 { 590 if (!vm->gdt) 591 vm->gdt = vm_vaddr_alloc(vm, getpagesize(), 592 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 593 594 dt->base = vm->gdt; 595 dt->limit = getpagesize(); 596 } 597 598 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, 599 int selector, int gdt_memslot, 600 int pgd_memslot) 601 { 602 if (!vm->tss) 603 vm->tss = vm_vaddr_alloc(vm, getpagesize(), 604 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 605 606 memset(segp, 0, sizeof(*segp)); 607 segp->base = vm->tss; 608 segp->limit = 0x67; 609 segp->selector = selector; 610 segp->type = 0xb; 611 segp->present = 1; 612 kvm_seg_fill_gdt_64bit(vm, segp); 613 } 614 615 void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) 616 { 617 struct kvm_sregs sregs; 618 619 /* Set mode specific system register values. */ 620 vcpu_sregs_get(vm, vcpuid, &sregs); 621 622 sregs.idt.limit = 0; 623 624 kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); 625 626 switch (vm->mode) { 627 case VM_MODE_P52V48_4K: 628 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 629 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 630 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 631 632 kvm_seg_set_unusable(&sregs.ldt); 633 kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); 634 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); 635 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); 636 kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); 637 break; 638 639 default: 640 TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); 641 } 642 643 sregs.cr3 = vm->pgd; 644 vcpu_sregs_set(vm, vcpuid, &sregs); 645 } 646 /* Adds a vCPU with reasonable defaults (i.e., a stack) 647 * 648 * Input Args: 649 * vcpuid - The id of the VCPU to add to the VM. 650 * guest_code - The vCPU's entry point 651 */ 652 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 653 { 654 struct kvm_mp_state mp_state; 655 struct kvm_regs regs; 656 vm_vaddr_t stack_vaddr; 657 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 658 DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 659 660 /* Create VCPU */ 661 vm_vcpu_add(vm, vcpuid, 0, 0); 662 663 /* Setup guest general purpose registers */ 664 vcpu_regs_get(vm, vcpuid, ®s); 665 regs.rflags = regs.rflags | 0x2; 666 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 667 regs.rip = (unsigned long) guest_code; 668 vcpu_regs_set(vm, vcpuid, ®s); 669 670 /* Setup the MP state */ 671 mp_state.mp_state = 0; 672 vcpu_set_mp_state(vm, vcpuid, &mp_state); 673 } 674 675 /* Allocate an instance of struct kvm_cpuid2 676 * 677 * Input Args: None 678 * 679 * Output Args: None 680 * 681 * Return: A pointer to the allocated struct. The caller is responsible 682 * for freeing this struct. 683 * 684 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the 685 * array to be decided at allocation time, allocation is slightly 686 * complicated. This function uses a reasonable default length for 687 * the array and performs the appropriate allocation. 688 */ 689 static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 690 { 691 struct kvm_cpuid2 *cpuid; 692 int nent = 100; 693 size_t size; 694 695 size = sizeof(*cpuid); 696 size += nent * sizeof(struct kvm_cpuid_entry2); 697 cpuid = malloc(size); 698 if (!cpuid) { 699 perror("malloc"); 700 abort(); 701 } 702 703 cpuid->nent = nent; 704 705 return cpuid; 706 } 707 708 /* KVM Supported CPUID Get 709 * 710 * Input Args: None 711 * 712 * Output Args: 713 * 714 * Return: The supported KVM CPUID 715 * 716 * Get the guest CPUID supported by KVM. 717 */ 718 struct kvm_cpuid2 *kvm_get_supported_cpuid(void) 719 { 720 static struct kvm_cpuid2 *cpuid; 721 int ret; 722 int kvm_fd; 723 724 if (cpuid) 725 return cpuid; 726 727 cpuid = allocate_kvm_cpuid2(); 728 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 729 if (kvm_fd < 0) 730 exit(KSFT_SKIP); 731 732 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 733 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 734 ret, errno); 735 736 close(kvm_fd); 737 return cpuid; 738 } 739 740 /* Locate a cpuid entry. 741 * 742 * Input Args: 743 * cpuid: The cpuid. 744 * function: The function of the cpuid entry to find. 745 * 746 * Output Args: None 747 * 748 * Return: A pointer to the cpuid entry. Never returns NULL. 749 */ 750 struct kvm_cpuid_entry2 * 751 kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) 752 { 753 struct kvm_cpuid2 *cpuid; 754 struct kvm_cpuid_entry2 *entry = NULL; 755 int i; 756 757 cpuid = kvm_get_supported_cpuid(); 758 for (i = 0; i < cpuid->nent; i++) { 759 if (cpuid->entries[i].function == function && 760 cpuid->entries[i].index == index) { 761 entry = &cpuid->entries[i]; 762 break; 763 } 764 } 765 766 TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", 767 function, index); 768 return entry; 769 } 770 771 /* VM VCPU CPUID Set 772 * 773 * Input Args: 774 * vm - Virtual Machine 775 * vcpuid - VCPU id 776 * cpuid - The CPUID values to set. 777 * 778 * Output Args: None 779 * 780 * Return: void 781 * 782 * Set the VCPU's CPUID. 783 */ 784 void vcpu_set_cpuid(struct kvm_vm *vm, 785 uint32_t vcpuid, struct kvm_cpuid2 *cpuid) 786 { 787 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 788 int rc; 789 790 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 791 792 rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); 793 TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", 794 rc, errno); 795 796 } 797 798 /* Create a VM with reasonable defaults 799 * 800 * Input Args: 801 * vcpuid - The id of the single VCPU to add to the VM. 802 * extra_mem_pages - The size of extra memories to add (this will 803 * decide how much extra space we will need to 804 * setup the page tables using mem slot 0) 805 * guest_code - The vCPU's entry point 806 * 807 * Output Args: None 808 * 809 * Return: 810 * Pointer to opaque structure that describes the created VM. 811 */ 812 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 813 void *guest_code) 814 { 815 struct kvm_vm *vm; 816 /* 817 * For x86 the maximum page table size for a memory region 818 * will be when only 4K pages are used. In that case the 819 * total extra size for page tables (for extra N pages) will 820 * be: N/512+N/512^2+N/512^3+... which is definitely smaller 821 * than N/512*2. 822 */ 823 uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; 824 825 /* Create VM */ 826 vm = vm_create(VM_MODE_P52V48_4K, 827 DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, 828 O_RDWR); 829 830 /* Setup guest code */ 831 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 832 833 /* Setup IRQ Chip */ 834 vm_create_irqchip(vm); 835 836 /* Add the first vCPU. */ 837 vm_vcpu_add_default(vm, vcpuid, guest_code); 838 839 return vm; 840 } 841 842 /* VCPU Get MSR 843 * 844 * Input Args: 845 * vm - Virtual Machine 846 * vcpuid - VCPU ID 847 * msr_index - Index of MSR 848 * 849 * Output Args: None 850 * 851 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. 852 * 853 * Get value of MSR for VCPU. 854 */ 855 uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) 856 { 857 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 858 struct { 859 struct kvm_msrs header; 860 struct kvm_msr_entry entry; 861 } buffer = {}; 862 int r; 863 864 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 865 buffer.header.nmsrs = 1; 866 buffer.entry.index = msr_index; 867 r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); 868 TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" 869 " rc: %i errno: %i", r, errno); 870 871 return buffer.entry.data; 872 } 873 874 /* VCPU Set MSR 875 * 876 * Input Args: 877 * vm - Virtual Machine 878 * vcpuid - VCPU ID 879 * msr_index - Index of MSR 880 * msr_value - New value of MSR 881 * 882 * Output Args: None 883 * 884 * Return: On success, nothing. On failure a TEST_ASSERT is produced. 885 * 886 * Set value of MSR for VCPU. 887 */ 888 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, 889 uint64_t msr_value) 890 { 891 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 892 struct { 893 struct kvm_msrs header; 894 struct kvm_msr_entry entry; 895 } buffer = {}; 896 int r; 897 898 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 899 memset(&buffer, 0, sizeof(buffer)); 900 buffer.header.nmsrs = 1; 901 buffer.entry.index = msr_index; 902 buffer.entry.data = msr_value; 903 r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); 904 TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" 905 " rc: %i errno: %i", r, errno); 906 } 907 908 /* VM VCPU Args Set 909 * 910 * Input Args: 911 * vm - Virtual Machine 912 * vcpuid - VCPU ID 913 * num - number of arguments 914 * ... - arguments, each of type uint64_t 915 * 916 * Output Args: None 917 * 918 * Return: None 919 * 920 * Sets the first num function input arguments to the values 921 * given as variable args. Each of the variable args is expected to 922 * be of type uint64_t. 923 */ 924 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) 925 { 926 va_list ap; 927 struct kvm_regs regs; 928 929 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 930 " num: %u\n", 931 num); 932 933 va_start(ap, num); 934 vcpu_regs_get(vm, vcpuid, ®s); 935 936 if (num >= 1) 937 regs.rdi = va_arg(ap, uint64_t); 938 939 if (num >= 2) 940 regs.rsi = va_arg(ap, uint64_t); 941 942 if (num >= 3) 943 regs.rdx = va_arg(ap, uint64_t); 944 945 if (num >= 4) 946 regs.rcx = va_arg(ap, uint64_t); 947 948 if (num >= 5) 949 regs.r8 = va_arg(ap, uint64_t); 950 951 if (num >= 6) 952 regs.r9 = va_arg(ap, uint64_t); 953 954 vcpu_regs_set(vm, vcpuid, ®s); 955 va_end(ap); 956 } 957 958 /* 959 * VM VCPU Dump 960 * 961 * Input Args: 962 * vm - Virtual Machine 963 * vcpuid - VCPU ID 964 * indent - Left margin indent amount 965 * 966 * Output Args: 967 * stream - Output FILE stream 968 * 969 * Return: None 970 * 971 * Dumps the current state of the VCPU specified by vcpuid, within the VM 972 * given by vm, to the FILE stream given by stream. 973 */ 974 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) 975 { 976 struct kvm_regs regs; 977 struct kvm_sregs sregs; 978 979 fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); 980 981 fprintf(stream, "%*sregs:\n", indent + 2, ""); 982 vcpu_regs_get(vm, vcpuid, ®s); 983 regs_dump(stream, ®s, indent + 4); 984 985 fprintf(stream, "%*ssregs:\n", indent + 2, ""); 986 vcpu_sregs_get(vm, vcpuid, &sregs); 987 sregs_dump(stream, &sregs, indent + 4); 988 } 989 990 struct kvm_x86_state { 991 struct kvm_vcpu_events events; 992 struct kvm_mp_state mp_state; 993 struct kvm_regs regs; 994 struct kvm_xsave xsave; 995 struct kvm_xcrs xcrs; 996 struct kvm_sregs sregs; 997 struct kvm_debugregs debugregs; 998 union { 999 struct kvm_nested_state nested; 1000 char nested_[16384]; 1001 }; 1002 struct kvm_msrs msrs; 1003 }; 1004 1005 static int kvm_get_num_msrs(struct kvm_vm *vm) 1006 { 1007 struct kvm_msr_list nmsrs; 1008 int r; 1009 1010 nmsrs.nmsrs = 0; 1011 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); 1012 TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", 1013 r); 1014 1015 return nmsrs.nmsrs; 1016 } 1017 1018 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) 1019 { 1020 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1021 struct kvm_msr_list *list; 1022 struct kvm_x86_state *state; 1023 int nmsrs, r, i; 1024 static int nested_size = -1; 1025 1026 if (nested_size == -1) { 1027 nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); 1028 TEST_ASSERT(nested_size <= sizeof(state->nested_), 1029 "Nested state size too big, %i > %zi", 1030 nested_size, sizeof(state->nested_)); 1031 } 1032 1033 /* 1034 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees 1035 * guest state is consistent only after userspace re-enters the 1036 * kernel with KVM_RUN. Complete IO prior to migrating state 1037 * to a new VM. 1038 */ 1039 vcpu_run_complete_io(vm, vcpuid); 1040 1041 nmsrs = kvm_get_num_msrs(vm); 1042 list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 1043 list->nmsrs = nmsrs; 1044 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 1045 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", 1046 r); 1047 1048 state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); 1049 r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); 1050 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", 1051 r); 1052 1053 r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); 1054 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", 1055 r); 1056 1057 r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); 1058 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", 1059 r); 1060 1061 r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); 1062 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", 1063 r); 1064 1065 r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); 1066 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", 1067 r); 1068 1069 r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); 1070 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", 1071 r); 1072 1073 if (nested_size) { 1074 state->nested.size = sizeof(state->nested_); 1075 r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); 1076 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", 1077 r); 1078 TEST_ASSERT(state->nested.size <= nested_size, 1079 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", 1080 state->nested.size, nested_size); 1081 } else 1082 state->nested.size = 0; 1083 1084 state->msrs.nmsrs = nmsrs; 1085 for (i = 0; i < nmsrs; i++) 1086 state->msrs.entries[i].index = list->indices[i]; 1087 r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); 1088 TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", 1089 r, r == nmsrs ? -1 : list->indices[r]); 1090 1091 r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); 1092 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", 1093 r); 1094 1095 free(list); 1096 return state; 1097 } 1098 1099 void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) 1100 { 1101 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1102 int r; 1103 1104 r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); 1105 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", 1106 r); 1107 1108 r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); 1109 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", 1110 r); 1111 1112 r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); 1113 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", 1114 r); 1115 1116 r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); 1117 TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", 1118 r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); 1119 1120 r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); 1121 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", 1122 r); 1123 1124 r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); 1125 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", 1126 r); 1127 1128 r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); 1129 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", 1130 r); 1131 1132 r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); 1133 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", 1134 r); 1135 1136 if (state->nested.size) { 1137 r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); 1138 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", 1139 r); 1140 } 1141 } 1142