1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/x86_64/processor.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "../kvm_util_internal.h" 13 #include "processor.h" 14 15 /* Minimum physical address used for virtual translation tables. */ 16 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 17 18 /* Virtual translation table structure declarations */ 19 struct pageMapL4Entry { 20 uint64_t present:1; 21 uint64_t writable:1; 22 uint64_t user:1; 23 uint64_t write_through:1; 24 uint64_t cache_disable:1; 25 uint64_t accessed:1; 26 uint64_t ignored_06:1; 27 uint64_t page_size:1; 28 uint64_t ignored_11_08:4; 29 uint64_t address:40; 30 uint64_t ignored_62_52:11; 31 uint64_t execute_disable:1; 32 }; 33 34 struct pageDirectoryPointerEntry { 35 uint64_t present:1; 36 uint64_t writable:1; 37 uint64_t user:1; 38 uint64_t write_through:1; 39 uint64_t cache_disable:1; 40 uint64_t accessed:1; 41 uint64_t ignored_06:1; 42 uint64_t page_size:1; 43 uint64_t ignored_11_08:4; 44 uint64_t address:40; 45 uint64_t ignored_62_52:11; 46 uint64_t execute_disable:1; 47 }; 48 49 struct pageDirectoryEntry { 50 uint64_t present:1; 51 uint64_t writable:1; 52 uint64_t user:1; 53 uint64_t write_through:1; 54 uint64_t cache_disable:1; 55 uint64_t accessed:1; 56 uint64_t ignored_06:1; 57 uint64_t page_size:1; 58 uint64_t ignored_11_08:4; 59 uint64_t address:40; 60 uint64_t ignored_62_52:11; 61 uint64_t execute_disable:1; 62 }; 63 64 struct pageTableEntry { 65 uint64_t present:1; 66 uint64_t writable:1; 67 uint64_t user:1; 68 uint64_t write_through:1; 69 uint64_t cache_disable:1; 70 uint64_t accessed:1; 71 uint64_t dirty:1; 72 uint64_t reserved_07:1; 73 uint64_t global:1; 74 uint64_t ignored_11_09:3; 75 uint64_t address:40; 76 uint64_t ignored_62_52:11; 77 uint64_t execute_disable:1; 78 }; 79 80 /* Register Dump 81 * 82 * Input Args: 83 * indent - Left margin indent amount 84 * regs - register 85 * 86 * Output Args: 87 * stream - Output FILE stream 88 * 89 * Return: None 90 * 91 * Dumps the state of the registers given by regs, to the FILE stream 92 * given by steam. 93 */ 94 void regs_dump(FILE *stream, struct kvm_regs *regs, 95 uint8_t indent) 96 { 97 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 98 "rcx: 0x%.16llx rdx: 0x%.16llx\n", 99 indent, "", 100 regs->rax, regs->rbx, regs->rcx, regs->rdx); 101 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 102 "rsp: 0x%.16llx rbp: 0x%.16llx\n", 103 indent, "", 104 regs->rsi, regs->rdi, regs->rsp, regs->rbp); 105 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 106 "r10: 0x%.16llx r11: 0x%.16llx\n", 107 indent, "", 108 regs->r8, regs->r9, regs->r10, regs->r11); 109 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 110 "r14: 0x%.16llx r15: 0x%.16llx\n", 111 indent, "", 112 regs->r12, regs->r13, regs->r14, regs->r15); 113 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 114 indent, "", 115 regs->rip, regs->rflags); 116 } 117 118 /* Segment Dump 119 * 120 * Input Args: 121 * indent - Left margin indent amount 122 * segment - KVM segment 123 * 124 * Output Args: 125 * stream - Output FILE stream 126 * 127 * Return: None 128 * 129 * Dumps the state of the KVM segment given by segment, to the FILE stream 130 * given by steam. 131 */ 132 static void segment_dump(FILE *stream, struct kvm_segment *segment, 133 uint8_t indent) 134 { 135 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 136 "selector: 0x%.4x type: 0x%.2x\n", 137 indent, "", segment->base, segment->limit, 138 segment->selector, segment->type); 139 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 140 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 141 indent, "", segment->present, segment->dpl, 142 segment->db, segment->s, segment->l); 143 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 144 "unusable: 0x%.2x padding: 0x%.2x\n", 145 indent, "", segment->g, segment->avl, 146 segment->unusable, segment->padding); 147 } 148 149 /* dtable Dump 150 * 151 * Input Args: 152 * indent - Left margin indent amount 153 * dtable - KVM dtable 154 * 155 * Output Args: 156 * stream - Output FILE stream 157 * 158 * Return: None 159 * 160 * Dumps the state of the KVM dtable given by dtable, to the FILE stream 161 * given by steam. 162 */ 163 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 164 uint8_t indent) 165 { 166 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 167 "padding: 0x%.4x 0x%.4x 0x%.4x\n", 168 indent, "", dtable->base, dtable->limit, 169 dtable->padding[0], dtable->padding[1], dtable->padding[2]); 170 } 171 172 /* System Register Dump 173 * 174 * Input Args: 175 * indent - Left margin indent amount 176 * sregs - System registers 177 * 178 * Output Args: 179 * stream - Output FILE stream 180 * 181 * Return: None 182 * 183 * Dumps the state of the system registers given by sregs, to the FILE stream 184 * given by steam. 185 */ 186 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 187 uint8_t indent) 188 { 189 unsigned int i; 190 191 fprintf(stream, "%*scs:\n", indent, ""); 192 segment_dump(stream, &sregs->cs, indent + 2); 193 fprintf(stream, "%*sds:\n", indent, ""); 194 segment_dump(stream, &sregs->ds, indent + 2); 195 fprintf(stream, "%*ses:\n", indent, ""); 196 segment_dump(stream, &sregs->es, indent + 2); 197 fprintf(stream, "%*sfs:\n", indent, ""); 198 segment_dump(stream, &sregs->fs, indent + 2); 199 fprintf(stream, "%*sgs:\n", indent, ""); 200 segment_dump(stream, &sregs->gs, indent + 2); 201 fprintf(stream, "%*sss:\n", indent, ""); 202 segment_dump(stream, &sregs->ss, indent + 2); 203 fprintf(stream, "%*str:\n", indent, ""); 204 segment_dump(stream, &sregs->tr, indent + 2); 205 fprintf(stream, "%*sldt:\n", indent, ""); 206 segment_dump(stream, &sregs->ldt, indent + 2); 207 208 fprintf(stream, "%*sgdt:\n", indent, ""); 209 dtable_dump(stream, &sregs->gdt, indent + 2); 210 fprintf(stream, "%*sidt:\n", indent, ""); 211 dtable_dump(stream, &sregs->idt, indent + 2); 212 213 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 214 "cr3: 0x%.16llx cr4: 0x%.16llx\n", 215 indent, "", 216 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 217 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 218 "apic_base: 0x%.16llx\n", 219 indent, "", 220 sregs->cr8, sregs->efer, sregs->apic_base); 221 222 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 223 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 224 fprintf(stream, "%*s%.16llx\n", indent + 2, "", 225 sregs->interrupt_bitmap[i]); 226 } 227 } 228 229 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) 230 { 231 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 232 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 233 234 /* If needed, create page map l4 table. */ 235 if (!vm->pgd_created) { 236 vm_paddr_t paddr = vm_phy_page_alloc(vm, 237 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); 238 vm->pgd = paddr; 239 vm->pgd_created = true; 240 } 241 } 242 243 /* VM Virtual Page Map 244 * 245 * Input Args: 246 * vm - Virtual Machine 247 * vaddr - VM Virtual Address 248 * paddr - VM Physical Address 249 * pgd_memslot - Memory region slot for new virtual translation tables 250 * 251 * Output Args: None 252 * 253 * Return: None 254 * 255 * Within the VM given by vm, creates a virtual translation for the page 256 * starting at vaddr to the page starting at paddr. 257 */ 258 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 259 uint32_t pgd_memslot) 260 { 261 uint16_t index[4]; 262 struct pageMapL4Entry *pml4e; 263 264 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 265 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 266 267 TEST_ASSERT((vaddr % vm->page_size) == 0, 268 "Virtual address not on page boundary,\n" 269 " vaddr: 0x%lx vm->page_size: 0x%x", 270 vaddr, vm->page_size); 271 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 272 (vaddr >> vm->page_shift)), 273 "Invalid virtual address, vaddr: 0x%lx", 274 vaddr); 275 TEST_ASSERT((paddr % vm->page_size) == 0, 276 "Physical address not on page boundary,\n" 277 " paddr: 0x%lx vm->page_size: 0x%x", 278 paddr, vm->page_size); 279 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 280 "Physical address beyond beyond maximum supported,\n" 281 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 282 paddr, vm->max_gfn, vm->page_size); 283 284 index[0] = (vaddr >> 12) & 0x1ffu; 285 index[1] = (vaddr >> 21) & 0x1ffu; 286 index[2] = (vaddr >> 30) & 0x1ffu; 287 index[3] = (vaddr >> 39) & 0x1ffu; 288 289 /* Allocate page directory pointer table if not present. */ 290 pml4e = addr_gpa2hva(vm, vm->pgd); 291 if (!pml4e[index[3]].present) { 292 pml4e[index[3]].address = vm_phy_page_alloc(vm, 293 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 294 >> vm->page_shift; 295 pml4e[index[3]].writable = true; 296 pml4e[index[3]].present = true; 297 } 298 299 /* Allocate page directory table if not present. */ 300 struct pageDirectoryPointerEntry *pdpe; 301 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 302 if (!pdpe[index[2]].present) { 303 pdpe[index[2]].address = vm_phy_page_alloc(vm, 304 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 305 >> vm->page_shift; 306 pdpe[index[2]].writable = true; 307 pdpe[index[2]].present = true; 308 } 309 310 /* Allocate page table if not present. */ 311 struct pageDirectoryEntry *pde; 312 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 313 if (!pde[index[1]].present) { 314 pde[index[1]].address = vm_phy_page_alloc(vm, 315 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 316 >> vm->page_shift; 317 pde[index[1]].writable = true; 318 pde[index[1]].present = true; 319 } 320 321 /* Fill in page table entry. */ 322 struct pageTableEntry *pte; 323 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 324 pte[index[0]].address = paddr >> vm->page_shift; 325 pte[index[0]].writable = true; 326 pte[index[0]].present = 1; 327 } 328 329 /* Virtual Translation Tables Dump 330 * 331 * Input Args: 332 * vm - Virtual Machine 333 * indent - Left margin indent amount 334 * 335 * Output Args: 336 * stream - Output FILE stream 337 * 338 * Return: None 339 * 340 * Dumps to the FILE stream given by stream, the contents of all the 341 * virtual translation tables for the VM given by vm. 342 */ 343 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 344 { 345 struct pageMapL4Entry *pml4e, *pml4e_start; 346 struct pageDirectoryPointerEntry *pdpe, *pdpe_start; 347 struct pageDirectoryEntry *pde, *pde_start; 348 struct pageTableEntry *pte, *pte_start; 349 350 if (!vm->pgd_created) 351 return; 352 353 fprintf(stream, "%*s " 354 " no\n", indent, ""); 355 fprintf(stream, "%*s index hvaddr gpaddr " 356 "addr w exec dirty\n", 357 indent, ""); 358 pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, 359 vm->pgd); 360 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 361 pml4e = &pml4e_start[n1]; 362 if (!pml4e->present) 363 continue; 364 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " 365 " %u\n", 366 indent, "", 367 pml4e - pml4e_start, pml4e, 368 addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, 369 pml4e->writable, pml4e->execute_disable); 370 371 pdpe_start = addr_gpa2hva(vm, pml4e->address 372 * vm->page_size); 373 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 374 pdpe = &pdpe_start[n2]; 375 if (!pdpe->present) 376 continue; 377 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " 378 "%u %u\n", 379 indent, "", 380 pdpe - pdpe_start, pdpe, 381 addr_hva2gpa(vm, pdpe), 382 (uint64_t) pdpe->address, pdpe->writable, 383 pdpe->execute_disable); 384 385 pde_start = addr_gpa2hva(vm, 386 pdpe->address * vm->page_size); 387 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 388 pde = &pde_start[n3]; 389 if (!pde->present) 390 continue; 391 fprintf(stream, "%*spde 0x%-3zx %p " 392 "0x%-12lx 0x%-10lx %u %u\n", 393 indent, "", pde - pde_start, pde, 394 addr_hva2gpa(vm, pde), 395 (uint64_t) pde->address, pde->writable, 396 pde->execute_disable); 397 398 pte_start = addr_gpa2hva(vm, 399 pde->address * vm->page_size); 400 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 401 pte = &pte_start[n4]; 402 if (!pte->present) 403 continue; 404 fprintf(stream, "%*spte 0x%-3zx %p " 405 "0x%-12lx 0x%-10lx %u %u " 406 " %u 0x%-10lx\n", 407 indent, "", 408 pte - pte_start, pte, 409 addr_hva2gpa(vm, pte), 410 (uint64_t) pte->address, 411 pte->writable, 412 pte->execute_disable, 413 pte->dirty, 414 ((uint64_t) n1 << 27) 415 | ((uint64_t) n2 << 18) 416 | ((uint64_t) n3 << 9) 417 | ((uint64_t) n4)); 418 } 419 } 420 } 421 } 422 } 423 424 /* Set Unusable Segment 425 * 426 * Input Args: None 427 * 428 * Output Args: 429 * segp - Pointer to segment register 430 * 431 * Return: None 432 * 433 * Sets the segment register pointed to by segp to an unusable state. 434 */ 435 static void kvm_seg_set_unusable(struct kvm_segment *segp) 436 { 437 memset(segp, 0, sizeof(*segp)); 438 segp->unusable = true; 439 } 440 441 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) 442 { 443 void *gdt = addr_gva2hva(vm, vm->gdt); 444 struct desc64 *desc = gdt + (segp->selector >> 3) * 8; 445 446 desc->limit0 = segp->limit & 0xFFFF; 447 desc->base0 = segp->base & 0xFFFF; 448 desc->base1 = segp->base >> 16; 449 desc->s = segp->s; 450 desc->type = segp->type; 451 desc->dpl = segp->dpl; 452 desc->p = segp->present; 453 desc->limit1 = segp->limit >> 16; 454 desc->l = segp->l; 455 desc->db = segp->db; 456 desc->g = segp->g; 457 desc->base2 = segp->base >> 24; 458 if (!segp->s) 459 desc->base3 = segp->base >> 32; 460 } 461 462 463 /* Set Long Mode Flat Kernel Code Segment 464 * 465 * Input Args: 466 * vm - VM whose GDT is being filled, or NULL to only write segp 467 * selector - selector value 468 * 469 * Output Args: 470 * segp - Pointer to KVM segment 471 * 472 * Return: None 473 * 474 * Sets up the KVM segment pointed to by segp, to be a code segment 475 * with the selector value given by selector. 476 */ 477 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, 478 struct kvm_segment *segp) 479 { 480 memset(segp, 0, sizeof(*segp)); 481 segp->selector = selector; 482 segp->limit = 0xFFFFFFFFu; 483 segp->s = 0x1; /* kTypeCodeData */ 484 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 485 * | kFlagCodeReadable 486 */ 487 segp->g = true; 488 segp->l = true; 489 segp->present = 1; 490 if (vm) 491 kvm_seg_fill_gdt_64bit(vm, segp); 492 } 493 494 /* Set Long Mode Flat Kernel Data Segment 495 * 496 * Input Args: 497 * vm - VM whose GDT is being filled, or NULL to only write segp 498 * selector - selector value 499 * 500 * Output Args: 501 * segp - Pointer to KVM segment 502 * 503 * Return: None 504 * 505 * Sets up the KVM segment pointed to by segp, to be a data segment 506 * with the selector value given by selector. 507 */ 508 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, 509 struct kvm_segment *segp) 510 { 511 memset(segp, 0, sizeof(*segp)); 512 segp->selector = selector; 513 segp->limit = 0xFFFFFFFFu; 514 segp->s = 0x1; /* kTypeCodeData */ 515 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 516 * | kFlagDataWritable 517 */ 518 segp->g = true; 519 segp->present = true; 520 if (vm) 521 kvm_seg_fill_gdt_64bit(vm, segp); 522 } 523 524 /* Address Guest Virtual to Guest Physical 525 * 526 * Input Args: 527 * vm - Virtual Machine 528 * gpa - VM virtual address 529 * 530 * Output Args: None 531 * 532 * Return: 533 * Equivalent VM physical address 534 * 535 * Translates the VM virtual address given by gva to a VM physical 536 * address and then locates the memory region containing the VM 537 * physical address, within the VM given by vm. When found, the host 538 * virtual address providing the memory to the vm physical address is returned. 539 * A TEST_ASSERT failure occurs if no region containing translated 540 * VM virtual address exists. 541 */ 542 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 543 { 544 uint16_t index[4]; 545 struct pageMapL4Entry *pml4e; 546 struct pageDirectoryPointerEntry *pdpe; 547 struct pageDirectoryEntry *pde; 548 struct pageTableEntry *pte; 549 550 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 551 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 552 553 index[0] = (gva >> 12) & 0x1ffu; 554 index[1] = (gva >> 21) & 0x1ffu; 555 index[2] = (gva >> 30) & 0x1ffu; 556 index[3] = (gva >> 39) & 0x1ffu; 557 558 if (!vm->pgd_created) 559 goto unmapped_gva; 560 pml4e = addr_gpa2hva(vm, vm->pgd); 561 if (!pml4e[index[3]].present) 562 goto unmapped_gva; 563 564 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 565 if (!pdpe[index[2]].present) 566 goto unmapped_gva; 567 568 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 569 if (!pde[index[1]].present) 570 goto unmapped_gva; 571 572 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 573 if (!pte[index[0]].present) 574 goto unmapped_gva; 575 576 return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); 577 578 unmapped_gva: 579 TEST_ASSERT(false, "No mapping for vm virtual address, " 580 "gva: 0x%lx", gva); 581 exit(EXIT_FAILURE); 582 } 583 584 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, 585 int pgd_memslot) 586 { 587 if (!vm->gdt) 588 vm->gdt = vm_vaddr_alloc(vm, getpagesize(), 589 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 590 591 dt->base = vm->gdt; 592 dt->limit = getpagesize(); 593 } 594 595 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, 596 int selector, int gdt_memslot, 597 int pgd_memslot) 598 { 599 if (!vm->tss) 600 vm->tss = vm_vaddr_alloc(vm, getpagesize(), 601 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 602 603 memset(segp, 0, sizeof(*segp)); 604 segp->base = vm->tss; 605 segp->limit = 0x67; 606 segp->selector = selector; 607 segp->type = 0xb; 608 segp->present = 1; 609 kvm_seg_fill_gdt_64bit(vm, segp); 610 } 611 612 static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) 613 { 614 struct kvm_sregs sregs; 615 616 /* Set mode specific system register values. */ 617 vcpu_sregs_get(vm, vcpuid, &sregs); 618 619 sregs.idt.limit = 0; 620 621 kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); 622 623 switch (vm->mode) { 624 case VM_MODE_PXXV48_4K: 625 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 626 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 627 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 628 629 kvm_seg_set_unusable(&sregs.ldt); 630 kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); 631 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); 632 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); 633 kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); 634 break; 635 636 default: 637 TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); 638 } 639 640 sregs.cr3 = vm->pgd; 641 vcpu_sregs_set(vm, vcpuid, &sregs); 642 } 643 /* Adds a vCPU with reasonable defaults (i.e., a stack) 644 * 645 * Input Args: 646 * vcpuid - The id of the VCPU to add to the VM. 647 * guest_code - The vCPU's entry point 648 */ 649 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 650 { 651 struct kvm_mp_state mp_state; 652 struct kvm_regs regs; 653 vm_vaddr_t stack_vaddr; 654 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 655 DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 656 657 /* Create VCPU */ 658 vm_vcpu_add(vm, vcpuid); 659 vcpu_setup(vm, vcpuid, 0, 0); 660 661 /* Setup guest general purpose registers */ 662 vcpu_regs_get(vm, vcpuid, ®s); 663 regs.rflags = regs.rflags | 0x2; 664 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 665 regs.rip = (unsigned long) guest_code; 666 vcpu_regs_set(vm, vcpuid, ®s); 667 668 /* Setup the MP state */ 669 mp_state.mp_state = 0; 670 vcpu_set_mp_state(vm, vcpuid, &mp_state); 671 } 672 673 /* Allocate an instance of struct kvm_cpuid2 674 * 675 * Input Args: None 676 * 677 * Output Args: None 678 * 679 * Return: A pointer to the allocated struct. The caller is responsible 680 * for freeing this struct. 681 * 682 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the 683 * array to be decided at allocation time, allocation is slightly 684 * complicated. This function uses a reasonable default length for 685 * the array and performs the appropriate allocation. 686 */ 687 static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 688 { 689 struct kvm_cpuid2 *cpuid; 690 int nent = 100; 691 size_t size; 692 693 size = sizeof(*cpuid); 694 size += nent * sizeof(struct kvm_cpuid_entry2); 695 cpuid = malloc(size); 696 if (!cpuid) { 697 perror("malloc"); 698 abort(); 699 } 700 701 cpuid->nent = nent; 702 703 return cpuid; 704 } 705 706 /* KVM Supported CPUID Get 707 * 708 * Input Args: None 709 * 710 * Output Args: 711 * 712 * Return: The supported KVM CPUID 713 * 714 * Get the guest CPUID supported by KVM. 715 */ 716 struct kvm_cpuid2 *kvm_get_supported_cpuid(void) 717 { 718 static struct kvm_cpuid2 *cpuid; 719 int ret; 720 int kvm_fd; 721 722 if (cpuid) 723 return cpuid; 724 725 cpuid = allocate_kvm_cpuid2(); 726 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 727 if (kvm_fd < 0) 728 exit(KSFT_SKIP); 729 730 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 731 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 732 ret, errno); 733 734 close(kvm_fd); 735 return cpuid; 736 } 737 738 /* Locate a cpuid entry. 739 * 740 * Input Args: 741 * cpuid: The cpuid. 742 * function: The function of the cpuid entry to find. 743 * 744 * Output Args: None 745 * 746 * Return: A pointer to the cpuid entry. Never returns NULL. 747 */ 748 struct kvm_cpuid_entry2 * 749 kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) 750 { 751 struct kvm_cpuid2 *cpuid; 752 struct kvm_cpuid_entry2 *entry = NULL; 753 int i; 754 755 cpuid = kvm_get_supported_cpuid(); 756 for (i = 0; i < cpuid->nent; i++) { 757 if (cpuid->entries[i].function == function && 758 cpuid->entries[i].index == index) { 759 entry = &cpuid->entries[i]; 760 break; 761 } 762 } 763 764 TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", 765 function, index); 766 return entry; 767 } 768 769 /* VM VCPU CPUID Set 770 * 771 * Input Args: 772 * vm - Virtual Machine 773 * vcpuid - VCPU id 774 * cpuid - The CPUID values to set. 775 * 776 * Output Args: None 777 * 778 * Return: void 779 * 780 * Set the VCPU's CPUID. 781 */ 782 void vcpu_set_cpuid(struct kvm_vm *vm, 783 uint32_t vcpuid, struct kvm_cpuid2 *cpuid) 784 { 785 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 786 int rc; 787 788 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 789 790 rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); 791 TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", 792 rc, errno); 793 794 } 795 796 /* Create a VM with reasonable defaults 797 * 798 * Input Args: 799 * vcpuid - The id of the single VCPU to add to the VM. 800 * extra_mem_pages - The size of extra memories to add (this will 801 * decide how much extra space we will need to 802 * setup the page tables using mem slot 0) 803 * guest_code - The vCPU's entry point 804 * 805 * Output Args: None 806 * 807 * Return: 808 * Pointer to opaque structure that describes the created VM. 809 */ 810 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 811 void *guest_code) 812 { 813 struct kvm_vm *vm; 814 /* 815 * For x86 the maximum page table size for a memory region 816 * will be when only 4K pages are used. In that case the 817 * total extra size for page tables (for extra N pages) will 818 * be: N/512+N/512^2+N/512^3+... which is definitely smaller 819 * than N/512*2. 820 */ 821 uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; 822 823 /* Create VM */ 824 vm = vm_create(VM_MODE_DEFAULT, 825 DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, 826 O_RDWR); 827 828 /* Setup guest code */ 829 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 830 831 /* Setup IRQ Chip */ 832 vm_create_irqchip(vm); 833 834 /* Add the first vCPU. */ 835 vm_vcpu_add_default(vm, vcpuid, guest_code); 836 837 return vm; 838 } 839 840 /* VCPU Get MSR 841 * 842 * Input Args: 843 * vm - Virtual Machine 844 * vcpuid - VCPU ID 845 * msr_index - Index of MSR 846 * 847 * Output Args: None 848 * 849 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. 850 * 851 * Get value of MSR for VCPU. 852 */ 853 uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) 854 { 855 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 856 struct { 857 struct kvm_msrs header; 858 struct kvm_msr_entry entry; 859 } buffer = {}; 860 int r; 861 862 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 863 buffer.header.nmsrs = 1; 864 buffer.entry.index = msr_index; 865 r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); 866 TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" 867 " rc: %i errno: %i", r, errno); 868 869 return buffer.entry.data; 870 } 871 872 /* VCPU Set MSR 873 * 874 * Input Args: 875 * vm - Virtual Machine 876 * vcpuid - VCPU ID 877 * msr_index - Index of MSR 878 * msr_value - New value of MSR 879 * 880 * Output Args: None 881 * 882 * Return: On success, nothing. On failure a TEST_ASSERT is produced. 883 * 884 * Set value of MSR for VCPU. 885 */ 886 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, 887 uint64_t msr_value) 888 { 889 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 890 struct { 891 struct kvm_msrs header; 892 struct kvm_msr_entry entry; 893 } buffer = {}; 894 int r; 895 896 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 897 memset(&buffer, 0, sizeof(buffer)); 898 buffer.header.nmsrs = 1; 899 buffer.entry.index = msr_index; 900 buffer.entry.data = msr_value; 901 r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); 902 TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" 903 " rc: %i errno: %i", r, errno); 904 } 905 906 /* VM VCPU Args Set 907 * 908 * Input Args: 909 * vm - Virtual Machine 910 * vcpuid - VCPU ID 911 * num - number of arguments 912 * ... - arguments, each of type uint64_t 913 * 914 * Output Args: None 915 * 916 * Return: None 917 * 918 * Sets the first num function input arguments to the values 919 * given as variable args. Each of the variable args is expected to 920 * be of type uint64_t. 921 */ 922 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) 923 { 924 va_list ap; 925 struct kvm_regs regs; 926 927 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 928 " num: %u\n", 929 num); 930 931 va_start(ap, num); 932 vcpu_regs_get(vm, vcpuid, ®s); 933 934 if (num >= 1) 935 regs.rdi = va_arg(ap, uint64_t); 936 937 if (num >= 2) 938 regs.rsi = va_arg(ap, uint64_t); 939 940 if (num >= 3) 941 regs.rdx = va_arg(ap, uint64_t); 942 943 if (num >= 4) 944 regs.rcx = va_arg(ap, uint64_t); 945 946 if (num >= 5) 947 regs.r8 = va_arg(ap, uint64_t); 948 949 if (num >= 6) 950 regs.r9 = va_arg(ap, uint64_t); 951 952 vcpu_regs_set(vm, vcpuid, ®s); 953 va_end(ap); 954 } 955 956 /* 957 * VM VCPU Dump 958 * 959 * Input Args: 960 * vm - Virtual Machine 961 * vcpuid - VCPU ID 962 * indent - Left margin indent amount 963 * 964 * Output Args: 965 * stream - Output FILE stream 966 * 967 * Return: None 968 * 969 * Dumps the current state of the VCPU specified by vcpuid, within the VM 970 * given by vm, to the FILE stream given by stream. 971 */ 972 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) 973 { 974 struct kvm_regs regs; 975 struct kvm_sregs sregs; 976 977 fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); 978 979 fprintf(stream, "%*sregs:\n", indent + 2, ""); 980 vcpu_regs_get(vm, vcpuid, ®s); 981 regs_dump(stream, ®s, indent + 4); 982 983 fprintf(stream, "%*ssregs:\n", indent + 2, ""); 984 vcpu_sregs_get(vm, vcpuid, &sregs); 985 sregs_dump(stream, &sregs, indent + 4); 986 } 987 988 struct kvm_x86_state { 989 struct kvm_vcpu_events events; 990 struct kvm_mp_state mp_state; 991 struct kvm_regs regs; 992 struct kvm_xsave xsave; 993 struct kvm_xcrs xcrs; 994 struct kvm_sregs sregs; 995 struct kvm_debugregs debugregs; 996 union { 997 struct kvm_nested_state nested; 998 char nested_[16384]; 999 }; 1000 struct kvm_msrs msrs; 1001 }; 1002 1003 static int kvm_get_num_msrs(struct kvm_vm *vm) 1004 { 1005 struct kvm_msr_list nmsrs; 1006 int r; 1007 1008 nmsrs.nmsrs = 0; 1009 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); 1010 TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", 1011 r); 1012 1013 return nmsrs.nmsrs; 1014 } 1015 1016 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) 1017 { 1018 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1019 struct kvm_msr_list *list; 1020 struct kvm_x86_state *state; 1021 int nmsrs, r, i; 1022 static int nested_size = -1; 1023 1024 if (nested_size == -1) { 1025 nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); 1026 TEST_ASSERT(nested_size <= sizeof(state->nested_), 1027 "Nested state size too big, %i > %zi", 1028 nested_size, sizeof(state->nested_)); 1029 } 1030 1031 /* 1032 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees 1033 * guest state is consistent only after userspace re-enters the 1034 * kernel with KVM_RUN. Complete IO prior to migrating state 1035 * to a new VM. 1036 */ 1037 vcpu_run_complete_io(vm, vcpuid); 1038 1039 nmsrs = kvm_get_num_msrs(vm); 1040 list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 1041 list->nmsrs = nmsrs; 1042 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 1043 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", 1044 r); 1045 1046 state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); 1047 r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); 1048 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", 1049 r); 1050 1051 r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); 1052 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", 1053 r); 1054 1055 r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); 1056 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", 1057 r); 1058 1059 r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); 1060 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", 1061 r); 1062 1063 if (kvm_check_cap(KVM_CAP_XCRS)) { 1064 r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); 1065 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", 1066 r); 1067 } 1068 1069 r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); 1070 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", 1071 r); 1072 1073 if (nested_size) { 1074 state->nested.size = sizeof(state->nested_); 1075 r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); 1076 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", 1077 r); 1078 TEST_ASSERT(state->nested.size <= nested_size, 1079 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", 1080 state->nested.size, nested_size); 1081 } else 1082 state->nested.size = 0; 1083 1084 state->msrs.nmsrs = nmsrs; 1085 for (i = 0; i < nmsrs; i++) 1086 state->msrs.entries[i].index = list->indices[i]; 1087 r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); 1088 TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", 1089 r, r == nmsrs ? -1 : list->indices[r]); 1090 1091 r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); 1092 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", 1093 r); 1094 1095 free(list); 1096 return state; 1097 } 1098 1099 void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) 1100 { 1101 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1102 int r; 1103 1104 r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); 1105 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", 1106 r); 1107 1108 if (kvm_check_cap(KVM_CAP_XCRS)) { 1109 r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); 1110 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", 1111 r); 1112 } 1113 1114 r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); 1115 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", 1116 r); 1117 1118 r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); 1119 TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", 1120 r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); 1121 1122 r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); 1123 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", 1124 r); 1125 1126 r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); 1127 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", 1128 r); 1129 1130 r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); 1131 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", 1132 r); 1133 1134 r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); 1135 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", 1136 r); 1137 1138 if (state->nested.size) { 1139 r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); 1140 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", 1141 r); 1142 } 1143 } 1144 1145 bool is_intel_cpu(void) 1146 { 1147 int eax, ebx, ecx, edx; 1148 const uint32_t *chunk; 1149 const int leaf = 0; 1150 1151 __asm__ __volatile__( 1152 "cpuid" 1153 : /* output */ "=a"(eax), "=b"(ebx), 1154 "=c"(ecx), "=d"(edx) 1155 : /* input */ "0"(leaf), "2"(0)); 1156 1157 chunk = (const uint32_t *)("GenuineIntel"); 1158 return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); 1159 } 1160 1161 uint32_t kvm_get_cpuid_max(void) 1162 { 1163 return kvm_get_supported_cpuid_entry(0x80000000)->eax; 1164 } 1165 1166 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) 1167 { 1168 struct kvm_cpuid_entry2 *entry; 1169 bool pae; 1170 1171 /* SDM 4.1.4 */ 1172 if (kvm_get_cpuid_max() < 0x80000008) { 1173 pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); 1174 *pa_bits = pae ? 36 : 32; 1175 *va_bits = 32; 1176 } else { 1177 entry = kvm_get_supported_cpuid_entry(0x80000008); 1178 *pa_bits = entry->eax & 0xff; 1179 *va_bits = (entry->eax >> 8) & 0xff; 1180 } 1181 } 1182