1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/x86_64/processor.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "../kvm_util_internal.h" 13 #include "processor.h" 14 15 /* Minimum physical address used for virtual translation tables. */ 16 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 17 18 /* Virtual translation table structure declarations */ 19 struct pageMapL4Entry { 20 uint64_t present:1; 21 uint64_t writable:1; 22 uint64_t user:1; 23 uint64_t write_through:1; 24 uint64_t cache_disable:1; 25 uint64_t accessed:1; 26 uint64_t ignored_06:1; 27 uint64_t page_size:1; 28 uint64_t ignored_11_08:4; 29 uint64_t address:40; 30 uint64_t ignored_62_52:11; 31 uint64_t execute_disable:1; 32 }; 33 34 struct pageDirectoryPointerEntry { 35 uint64_t present:1; 36 uint64_t writable:1; 37 uint64_t user:1; 38 uint64_t write_through:1; 39 uint64_t cache_disable:1; 40 uint64_t accessed:1; 41 uint64_t ignored_06:1; 42 uint64_t page_size:1; 43 uint64_t ignored_11_08:4; 44 uint64_t address:40; 45 uint64_t ignored_62_52:11; 46 uint64_t execute_disable:1; 47 }; 48 49 struct pageDirectoryEntry { 50 uint64_t present:1; 51 uint64_t writable:1; 52 uint64_t user:1; 53 uint64_t write_through:1; 54 uint64_t cache_disable:1; 55 uint64_t accessed:1; 56 uint64_t ignored_06:1; 57 uint64_t page_size:1; 58 uint64_t ignored_11_08:4; 59 uint64_t address:40; 60 uint64_t ignored_62_52:11; 61 uint64_t execute_disable:1; 62 }; 63 64 struct pageTableEntry { 65 uint64_t present:1; 66 uint64_t writable:1; 67 uint64_t user:1; 68 uint64_t write_through:1; 69 uint64_t cache_disable:1; 70 uint64_t accessed:1; 71 uint64_t dirty:1; 72 uint64_t reserved_07:1; 73 uint64_t global:1; 74 uint64_t ignored_11_09:3; 75 uint64_t address:40; 76 uint64_t ignored_62_52:11; 77 uint64_t execute_disable:1; 78 }; 79 80 void regs_dump(FILE *stream, struct kvm_regs *regs, 81 uint8_t indent) 82 { 83 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 84 "rcx: 0x%.16llx rdx: 0x%.16llx\n", 85 indent, "", 86 regs->rax, regs->rbx, regs->rcx, regs->rdx); 87 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 88 "rsp: 0x%.16llx rbp: 0x%.16llx\n", 89 indent, "", 90 regs->rsi, regs->rdi, regs->rsp, regs->rbp); 91 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 92 "r10: 0x%.16llx r11: 0x%.16llx\n", 93 indent, "", 94 regs->r8, regs->r9, regs->r10, regs->r11); 95 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 96 "r14: 0x%.16llx r15: 0x%.16llx\n", 97 indent, "", 98 regs->r12, regs->r13, regs->r14, regs->r15); 99 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 100 indent, "", 101 regs->rip, regs->rflags); 102 } 103 104 /* 105 * Segment Dump 106 * 107 * Input Args: 108 * stream - Output FILE stream 109 * segment - KVM segment 110 * indent - Left margin indent amount 111 * 112 * Output Args: None 113 * 114 * Return: None 115 * 116 * Dumps the state of the KVM segment given by @segment, to the FILE stream 117 * given by @stream. 118 */ 119 static void segment_dump(FILE *stream, struct kvm_segment *segment, 120 uint8_t indent) 121 { 122 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 123 "selector: 0x%.4x type: 0x%.2x\n", 124 indent, "", segment->base, segment->limit, 125 segment->selector, segment->type); 126 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 127 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 128 indent, "", segment->present, segment->dpl, 129 segment->db, segment->s, segment->l); 130 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 131 "unusable: 0x%.2x padding: 0x%.2x\n", 132 indent, "", segment->g, segment->avl, 133 segment->unusable, segment->padding); 134 } 135 136 /* 137 * dtable Dump 138 * 139 * Input Args: 140 * stream - Output FILE stream 141 * dtable - KVM dtable 142 * indent - Left margin indent amount 143 * 144 * Output Args: None 145 * 146 * Return: None 147 * 148 * Dumps the state of the KVM dtable given by @dtable, to the FILE stream 149 * given by @stream. 150 */ 151 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 152 uint8_t indent) 153 { 154 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 155 "padding: 0x%.4x 0x%.4x 0x%.4x\n", 156 indent, "", dtable->base, dtable->limit, 157 dtable->padding[0], dtable->padding[1], dtable->padding[2]); 158 } 159 160 void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 161 uint8_t indent) 162 { 163 unsigned int i; 164 165 fprintf(stream, "%*scs:\n", indent, ""); 166 segment_dump(stream, &sregs->cs, indent + 2); 167 fprintf(stream, "%*sds:\n", indent, ""); 168 segment_dump(stream, &sregs->ds, indent + 2); 169 fprintf(stream, "%*ses:\n", indent, ""); 170 segment_dump(stream, &sregs->es, indent + 2); 171 fprintf(stream, "%*sfs:\n", indent, ""); 172 segment_dump(stream, &sregs->fs, indent + 2); 173 fprintf(stream, "%*sgs:\n", indent, ""); 174 segment_dump(stream, &sregs->gs, indent + 2); 175 fprintf(stream, "%*sss:\n", indent, ""); 176 segment_dump(stream, &sregs->ss, indent + 2); 177 fprintf(stream, "%*str:\n", indent, ""); 178 segment_dump(stream, &sregs->tr, indent + 2); 179 fprintf(stream, "%*sldt:\n", indent, ""); 180 segment_dump(stream, &sregs->ldt, indent + 2); 181 182 fprintf(stream, "%*sgdt:\n", indent, ""); 183 dtable_dump(stream, &sregs->gdt, indent + 2); 184 fprintf(stream, "%*sidt:\n", indent, ""); 185 dtable_dump(stream, &sregs->idt, indent + 2); 186 187 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 188 "cr3: 0x%.16llx cr4: 0x%.16llx\n", 189 indent, "", 190 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 191 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 192 "apic_base: 0x%.16llx\n", 193 indent, "", 194 sregs->cr8, sregs->efer, sregs->apic_base); 195 196 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 197 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 198 fprintf(stream, "%*s%.16llx\n", indent + 2, "", 199 sregs->interrupt_bitmap[i]); 200 } 201 } 202 203 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) 204 { 205 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 206 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 207 208 /* If needed, create page map l4 table. */ 209 if (!vm->pgd_created) { 210 vm_paddr_t paddr = vm_phy_page_alloc(vm, 211 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); 212 vm->pgd = paddr; 213 vm->pgd_created = true; 214 } 215 } 216 217 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 218 uint32_t pgd_memslot) 219 { 220 uint16_t index[4]; 221 struct pageMapL4Entry *pml4e; 222 223 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 224 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 225 226 TEST_ASSERT((vaddr % vm->page_size) == 0, 227 "Virtual address not on page boundary,\n" 228 " vaddr: 0x%lx vm->page_size: 0x%x", 229 vaddr, vm->page_size); 230 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 231 (vaddr >> vm->page_shift)), 232 "Invalid virtual address, vaddr: 0x%lx", 233 vaddr); 234 TEST_ASSERT((paddr % vm->page_size) == 0, 235 "Physical address not on page boundary,\n" 236 " paddr: 0x%lx vm->page_size: 0x%x", 237 paddr, vm->page_size); 238 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 239 "Physical address beyond beyond maximum supported,\n" 240 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 241 paddr, vm->max_gfn, vm->page_size); 242 243 index[0] = (vaddr >> 12) & 0x1ffu; 244 index[1] = (vaddr >> 21) & 0x1ffu; 245 index[2] = (vaddr >> 30) & 0x1ffu; 246 index[3] = (vaddr >> 39) & 0x1ffu; 247 248 /* Allocate page directory pointer table if not present. */ 249 pml4e = addr_gpa2hva(vm, vm->pgd); 250 if (!pml4e[index[3]].present) { 251 pml4e[index[3]].address = vm_phy_page_alloc(vm, 252 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 253 >> vm->page_shift; 254 pml4e[index[3]].writable = true; 255 pml4e[index[3]].present = true; 256 } 257 258 /* Allocate page directory table if not present. */ 259 struct pageDirectoryPointerEntry *pdpe; 260 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 261 if (!pdpe[index[2]].present) { 262 pdpe[index[2]].address = vm_phy_page_alloc(vm, 263 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 264 >> vm->page_shift; 265 pdpe[index[2]].writable = true; 266 pdpe[index[2]].present = true; 267 } 268 269 /* Allocate page table if not present. */ 270 struct pageDirectoryEntry *pde; 271 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 272 if (!pde[index[1]].present) { 273 pde[index[1]].address = vm_phy_page_alloc(vm, 274 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 275 >> vm->page_shift; 276 pde[index[1]].writable = true; 277 pde[index[1]].present = true; 278 } 279 280 /* Fill in page table entry. */ 281 struct pageTableEntry *pte; 282 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 283 pte[index[0]].address = paddr >> vm->page_shift; 284 pte[index[0]].writable = true; 285 pte[index[0]].present = 1; 286 } 287 288 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 289 { 290 struct pageMapL4Entry *pml4e, *pml4e_start; 291 struct pageDirectoryPointerEntry *pdpe, *pdpe_start; 292 struct pageDirectoryEntry *pde, *pde_start; 293 struct pageTableEntry *pte, *pte_start; 294 295 if (!vm->pgd_created) 296 return; 297 298 fprintf(stream, "%*s " 299 " no\n", indent, ""); 300 fprintf(stream, "%*s index hvaddr gpaddr " 301 "addr w exec dirty\n", 302 indent, ""); 303 pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, 304 vm->pgd); 305 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 306 pml4e = &pml4e_start[n1]; 307 if (!pml4e->present) 308 continue; 309 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " 310 " %u\n", 311 indent, "", 312 pml4e - pml4e_start, pml4e, 313 addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, 314 pml4e->writable, pml4e->execute_disable); 315 316 pdpe_start = addr_gpa2hva(vm, pml4e->address 317 * vm->page_size); 318 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 319 pdpe = &pdpe_start[n2]; 320 if (!pdpe->present) 321 continue; 322 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " 323 "%u %u\n", 324 indent, "", 325 pdpe - pdpe_start, pdpe, 326 addr_hva2gpa(vm, pdpe), 327 (uint64_t) pdpe->address, pdpe->writable, 328 pdpe->execute_disable); 329 330 pde_start = addr_gpa2hva(vm, 331 pdpe->address * vm->page_size); 332 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 333 pde = &pde_start[n3]; 334 if (!pde->present) 335 continue; 336 fprintf(stream, "%*spde 0x%-3zx %p " 337 "0x%-12lx 0x%-10lx %u %u\n", 338 indent, "", pde - pde_start, pde, 339 addr_hva2gpa(vm, pde), 340 (uint64_t) pde->address, pde->writable, 341 pde->execute_disable); 342 343 pte_start = addr_gpa2hva(vm, 344 pde->address * vm->page_size); 345 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 346 pte = &pte_start[n4]; 347 if (!pte->present) 348 continue; 349 fprintf(stream, "%*spte 0x%-3zx %p " 350 "0x%-12lx 0x%-10lx %u %u " 351 " %u 0x%-10lx\n", 352 indent, "", 353 pte - pte_start, pte, 354 addr_hva2gpa(vm, pte), 355 (uint64_t) pte->address, 356 pte->writable, 357 pte->execute_disable, 358 pte->dirty, 359 ((uint64_t) n1 << 27) 360 | ((uint64_t) n2 << 18) 361 | ((uint64_t) n3 << 9) 362 | ((uint64_t) n4)); 363 } 364 } 365 } 366 } 367 } 368 369 /* 370 * Set Unusable Segment 371 * 372 * Input Args: None 373 * 374 * Output Args: 375 * segp - Pointer to segment register 376 * 377 * Return: None 378 * 379 * Sets the segment register pointed to by @segp to an unusable state. 380 */ 381 static void kvm_seg_set_unusable(struct kvm_segment *segp) 382 { 383 memset(segp, 0, sizeof(*segp)); 384 segp->unusable = true; 385 } 386 387 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) 388 { 389 void *gdt = addr_gva2hva(vm, vm->gdt); 390 struct desc64 *desc = gdt + (segp->selector >> 3) * 8; 391 392 desc->limit0 = segp->limit & 0xFFFF; 393 desc->base0 = segp->base & 0xFFFF; 394 desc->base1 = segp->base >> 16; 395 desc->s = segp->s; 396 desc->type = segp->type; 397 desc->dpl = segp->dpl; 398 desc->p = segp->present; 399 desc->limit1 = segp->limit >> 16; 400 desc->l = segp->l; 401 desc->db = segp->db; 402 desc->g = segp->g; 403 desc->base2 = segp->base >> 24; 404 if (!segp->s) 405 desc->base3 = segp->base >> 32; 406 } 407 408 409 /* 410 * Set Long Mode Flat Kernel Code Segment 411 * 412 * Input Args: 413 * vm - VM whose GDT is being filled, or NULL to only write segp 414 * selector - selector value 415 * 416 * Output Args: 417 * segp - Pointer to KVM segment 418 * 419 * Return: None 420 * 421 * Sets up the KVM segment pointed to by @segp, to be a code segment 422 * with the selector value given by @selector. 423 */ 424 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, 425 struct kvm_segment *segp) 426 { 427 memset(segp, 0, sizeof(*segp)); 428 segp->selector = selector; 429 segp->limit = 0xFFFFFFFFu; 430 segp->s = 0x1; /* kTypeCodeData */ 431 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 432 * | kFlagCodeReadable 433 */ 434 segp->g = true; 435 segp->l = true; 436 segp->present = 1; 437 if (vm) 438 kvm_seg_fill_gdt_64bit(vm, segp); 439 } 440 441 /* 442 * Set Long Mode Flat Kernel Data Segment 443 * 444 * Input Args: 445 * vm - VM whose GDT is being filled, or NULL to only write segp 446 * selector - selector value 447 * 448 * Output Args: 449 * segp - Pointer to KVM segment 450 * 451 * Return: None 452 * 453 * Sets up the KVM segment pointed to by @segp, to be a data segment 454 * with the selector value given by @selector. 455 */ 456 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, 457 struct kvm_segment *segp) 458 { 459 memset(segp, 0, sizeof(*segp)); 460 segp->selector = selector; 461 segp->limit = 0xFFFFFFFFu; 462 segp->s = 0x1; /* kTypeCodeData */ 463 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 464 * | kFlagDataWritable 465 */ 466 segp->g = true; 467 segp->present = true; 468 if (vm) 469 kvm_seg_fill_gdt_64bit(vm, segp); 470 } 471 472 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 473 { 474 uint16_t index[4]; 475 struct pageMapL4Entry *pml4e; 476 struct pageDirectoryPointerEntry *pdpe; 477 struct pageDirectoryEntry *pde; 478 struct pageTableEntry *pte; 479 480 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 481 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 482 483 index[0] = (gva >> 12) & 0x1ffu; 484 index[1] = (gva >> 21) & 0x1ffu; 485 index[2] = (gva >> 30) & 0x1ffu; 486 index[3] = (gva >> 39) & 0x1ffu; 487 488 if (!vm->pgd_created) 489 goto unmapped_gva; 490 pml4e = addr_gpa2hva(vm, vm->pgd); 491 if (!pml4e[index[3]].present) 492 goto unmapped_gva; 493 494 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 495 if (!pdpe[index[2]].present) 496 goto unmapped_gva; 497 498 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 499 if (!pde[index[1]].present) 500 goto unmapped_gva; 501 502 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 503 if (!pte[index[0]].present) 504 goto unmapped_gva; 505 506 return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); 507 508 unmapped_gva: 509 TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); 510 exit(EXIT_FAILURE); 511 } 512 513 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, 514 int pgd_memslot) 515 { 516 if (!vm->gdt) 517 vm->gdt = vm_vaddr_alloc(vm, getpagesize(), 518 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 519 520 dt->base = vm->gdt; 521 dt->limit = getpagesize(); 522 } 523 524 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, 525 int selector, int gdt_memslot, 526 int pgd_memslot) 527 { 528 if (!vm->tss) 529 vm->tss = vm_vaddr_alloc(vm, getpagesize(), 530 KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); 531 532 memset(segp, 0, sizeof(*segp)); 533 segp->base = vm->tss; 534 segp->limit = 0x67; 535 segp->selector = selector; 536 segp->type = 0xb; 537 segp->present = 1; 538 kvm_seg_fill_gdt_64bit(vm, segp); 539 } 540 541 static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) 542 { 543 struct kvm_sregs sregs; 544 545 /* Set mode specific system register values. */ 546 vcpu_sregs_get(vm, vcpuid, &sregs); 547 548 sregs.idt.limit = 0; 549 550 kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); 551 552 switch (vm->mode) { 553 case VM_MODE_PXXV48_4K: 554 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 555 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 556 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 557 558 kvm_seg_set_unusable(&sregs.ldt); 559 kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); 560 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); 561 kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); 562 kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); 563 break; 564 565 default: 566 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 567 } 568 569 sregs.cr3 = vm->pgd; 570 vcpu_sregs_set(vm, vcpuid, &sregs); 571 } 572 573 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 574 { 575 struct kvm_mp_state mp_state; 576 struct kvm_regs regs; 577 vm_vaddr_t stack_vaddr; 578 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 579 DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 580 581 /* Create VCPU */ 582 vm_vcpu_add(vm, vcpuid); 583 vcpu_setup(vm, vcpuid, 0, 0); 584 585 /* Setup guest general purpose registers */ 586 vcpu_regs_get(vm, vcpuid, ®s); 587 regs.rflags = regs.rflags | 0x2; 588 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 589 regs.rip = (unsigned long) guest_code; 590 vcpu_regs_set(vm, vcpuid, ®s); 591 592 /* Setup the MP state */ 593 mp_state.mp_state = 0; 594 vcpu_set_mp_state(vm, vcpuid, &mp_state); 595 } 596 597 /* 598 * Allocate an instance of struct kvm_cpuid2 599 * 600 * Input Args: None 601 * 602 * Output Args: None 603 * 604 * Return: A pointer to the allocated struct. The caller is responsible 605 * for freeing this struct. 606 * 607 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the 608 * array to be decided at allocation time, allocation is slightly 609 * complicated. This function uses a reasonable default length for 610 * the array and performs the appropriate allocation. 611 */ 612 static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 613 { 614 struct kvm_cpuid2 *cpuid; 615 int nent = 100; 616 size_t size; 617 618 size = sizeof(*cpuid); 619 size += nent * sizeof(struct kvm_cpuid_entry2); 620 cpuid = malloc(size); 621 if (!cpuid) { 622 perror("malloc"); 623 abort(); 624 } 625 626 cpuid->nent = nent; 627 628 return cpuid; 629 } 630 631 /* 632 * KVM Supported CPUID Get 633 * 634 * Input Args: None 635 * 636 * Output Args: 637 * 638 * Return: The supported KVM CPUID 639 * 640 * Get the guest CPUID supported by KVM. 641 */ 642 struct kvm_cpuid2 *kvm_get_supported_cpuid(void) 643 { 644 static struct kvm_cpuid2 *cpuid; 645 int ret; 646 int kvm_fd; 647 648 if (cpuid) 649 return cpuid; 650 651 cpuid = allocate_kvm_cpuid2(); 652 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 653 if (kvm_fd < 0) 654 exit(KSFT_SKIP); 655 656 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 657 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 658 ret, errno); 659 660 close(kvm_fd); 661 return cpuid; 662 } 663 664 /* 665 * Locate a cpuid entry. 666 * 667 * Input Args: 668 * function: The function of the cpuid entry to find. 669 * index: The index of the cpuid entry. 670 * 671 * Output Args: None 672 * 673 * Return: A pointer to the cpuid entry. Never returns NULL. 674 */ 675 struct kvm_cpuid_entry2 * 676 kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) 677 { 678 struct kvm_cpuid2 *cpuid; 679 struct kvm_cpuid_entry2 *entry = NULL; 680 int i; 681 682 cpuid = kvm_get_supported_cpuid(); 683 for (i = 0; i < cpuid->nent; i++) { 684 if (cpuid->entries[i].function == function && 685 cpuid->entries[i].index == index) { 686 entry = &cpuid->entries[i]; 687 break; 688 } 689 } 690 691 TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", 692 function, index); 693 return entry; 694 } 695 696 /* 697 * VM VCPU CPUID Set 698 * 699 * Input Args: 700 * vm - Virtual Machine 701 * vcpuid - VCPU id 702 * cpuid - The CPUID values to set. 703 * 704 * Output Args: None 705 * 706 * Return: void 707 * 708 * Set the VCPU's CPUID. 709 */ 710 void vcpu_set_cpuid(struct kvm_vm *vm, 711 uint32_t vcpuid, struct kvm_cpuid2 *cpuid) 712 { 713 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 714 int rc; 715 716 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 717 718 rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); 719 TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", 720 rc, errno); 721 722 } 723 724 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 725 void *guest_code) 726 { 727 struct kvm_vm *vm; 728 /* 729 * For x86 the maximum page table size for a memory region 730 * will be when only 4K pages are used. In that case the 731 * total extra size for page tables (for extra N pages) will 732 * be: N/512+N/512^2+N/512^3+... which is definitely smaller 733 * than N/512*2. 734 */ 735 uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; 736 737 /* Create VM */ 738 vm = vm_create(VM_MODE_DEFAULT, 739 DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, 740 O_RDWR); 741 742 /* Setup guest code */ 743 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 744 745 /* Setup IRQ Chip */ 746 vm_create_irqchip(vm); 747 748 /* Add the first vCPU. */ 749 vm_vcpu_add_default(vm, vcpuid, guest_code); 750 751 return vm; 752 } 753 754 /* 755 * VCPU Get MSR 756 * 757 * Input Args: 758 * vm - Virtual Machine 759 * vcpuid - VCPU ID 760 * msr_index - Index of MSR 761 * 762 * Output Args: None 763 * 764 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. 765 * 766 * Get value of MSR for VCPU. 767 */ 768 uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) 769 { 770 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 771 struct { 772 struct kvm_msrs header; 773 struct kvm_msr_entry entry; 774 } buffer = {}; 775 int r; 776 777 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 778 buffer.header.nmsrs = 1; 779 buffer.entry.index = msr_index; 780 r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); 781 TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" 782 " rc: %i errno: %i", r, errno); 783 784 return buffer.entry.data; 785 } 786 787 /* 788 * _VCPU Set MSR 789 * 790 * Input Args: 791 * vm - Virtual Machine 792 * vcpuid - VCPU ID 793 * msr_index - Index of MSR 794 * msr_value - New value of MSR 795 * 796 * Output Args: None 797 * 798 * Return: The result of KVM_SET_MSRS. 799 * 800 * Sets the value of an MSR for the given VCPU. 801 */ 802 int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, 803 uint64_t msr_value) 804 { 805 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 806 struct { 807 struct kvm_msrs header; 808 struct kvm_msr_entry entry; 809 } buffer = {}; 810 int r; 811 812 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 813 memset(&buffer, 0, sizeof(buffer)); 814 buffer.header.nmsrs = 1; 815 buffer.entry.index = msr_index; 816 buffer.entry.data = msr_value; 817 r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); 818 return r; 819 } 820 821 /* 822 * VCPU Set MSR 823 * 824 * Input Args: 825 * vm - Virtual Machine 826 * vcpuid - VCPU ID 827 * msr_index - Index of MSR 828 * msr_value - New value of MSR 829 * 830 * Output Args: None 831 * 832 * Return: On success, nothing. On failure a TEST_ASSERT is produced. 833 * 834 * Set value of MSR for VCPU. 835 */ 836 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, 837 uint64_t msr_value) 838 { 839 int r; 840 841 r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value); 842 TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" 843 " rc: %i errno: %i", r, errno); 844 } 845 846 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) 847 { 848 va_list ap; 849 struct kvm_regs regs; 850 851 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 852 " num: %u\n", 853 num); 854 855 va_start(ap, num); 856 vcpu_regs_get(vm, vcpuid, ®s); 857 858 if (num >= 1) 859 regs.rdi = va_arg(ap, uint64_t); 860 861 if (num >= 2) 862 regs.rsi = va_arg(ap, uint64_t); 863 864 if (num >= 3) 865 regs.rdx = va_arg(ap, uint64_t); 866 867 if (num >= 4) 868 regs.rcx = va_arg(ap, uint64_t); 869 870 if (num >= 5) 871 regs.r8 = va_arg(ap, uint64_t); 872 873 if (num >= 6) 874 regs.r9 = va_arg(ap, uint64_t); 875 876 vcpu_regs_set(vm, vcpuid, ®s); 877 va_end(ap); 878 } 879 880 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) 881 { 882 struct kvm_regs regs; 883 struct kvm_sregs sregs; 884 885 fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); 886 887 fprintf(stream, "%*sregs:\n", indent + 2, ""); 888 vcpu_regs_get(vm, vcpuid, ®s); 889 regs_dump(stream, ®s, indent + 4); 890 891 fprintf(stream, "%*ssregs:\n", indent + 2, ""); 892 vcpu_sregs_get(vm, vcpuid, &sregs); 893 sregs_dump(stream, &sregs, indent + 4); 894 } 895 896 struct kvm_x86_state { 897 struct kvm_vcpu_events events; 898 struct kvm_mp_state mp_state; 899 struct kvm_regs regs; 900 struct kvm_xsave xsave; 901 struct kvm_xcrs xcrs; 902 struct kvm_sregs sregs; 903 struct kvm_debugregs debugregs; 904 union { 905 struct kvm_nested_state nested; 906 char nested_[16384]; 907 }; 908 struct kvm_msrs msrs; 909 }; 910 911 static int kvm_get_num_msrs_fd(int kvm_fd) 912 { 913 struct kvm_msr_list nmsrs; 914 int r; 915 916 nmsrs.nmsrs = 0; 917 r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); 918 TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", 919 r); 920 921 return nmsrs.nmsrs; 922 } 923 924 static int kvm_get_num_msrs(struct kvm_vm *vm) 925 { 926 return kvm_get_num_msrs_fd(vm->kvm_fd); 927 } 928 929 struct kvm_msr_list *kvm_get_msr_index_list(void) 930 { 931 struct kvm_msr_list *list; 932 int nmsrs, r, kvm_fd; 933 934 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 935 if (kvm_fd < 0) 936 exit(KSFT_SKIP); 937 938 nmsrs = kvm_get_num_msrs_fd(kvm_fd); 939 list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 940 list->nmsrs = nmsrs; 941 r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 942 close(kvm_fd); 943 944 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", 945 r); 946 947 return list; 948 } 949 950 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) 951 { 952 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 953 struct kvm_msr_list *list; 954 struct kvm_x86_state *state; 955 int nmsrs, r, i; 956 static int nested_size = -1; 957 958 if (nested_size == -1) { 959 nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); 960 TEST_ASSERT(nested_size <= sizeof(state->nested_), 961 "Nested state size too big, %i > %zi", 962 nested_size, sizeof(state->nested_)); 963 } 964 965 /* 966 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees 967 * guest state is consistent only after userspace re-enters the 968 * kernel with KVM_RUN. Complete IO prior to migrating state 969 * to a new VM. 970 */ 971 vcpu_run_complete_io(vm, vcpuid); 972 973 nmsrs = kvm_get_num_msrs(vm); 974 list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 975 list->nmsrs = nmsrs; 976 r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 977 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", 978 r); 979 980 state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); 981 r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); 982 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", 983 r); 984 985 r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); 986 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", 987 r); 988 989 r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); 990 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", 991 r); 992 993 r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); 994 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", 995 r); 996 997 if (kvm_check_cap(KVM_CAP_XCRS)) { 998 r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); 999 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", 1000 r); 1001 } 1002 1003 r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); 1004 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", 1005 r); 1006 1007 if (nested_size) { 1008 state->nested.size = sizeof(state->nested_); 1009 r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); 1010 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", 1011 r); 1012 TEST_ASSERT(state->nested.size <= nested_size, 1013 "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", 1014 state->nested.size, nested_size); 1015 } else 1016 state->nested.size = 0; 1017 1018 state->msrs.nmsrs = nmsrs; 1019 for (i = 0; i < nmsrs; i++) 1020 state->msrs.entries[i].index = list->indices[i]; 1021 r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); 1022 TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", 1023 r, r == nmsrs ? -1 : list->indices[r]); 1024 1025 r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); 1026 TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", 1027 r); 1028 1029 free(list); 1030 return state; 1031 } 1032 1033 void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) 1034 { 1035 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1036 int r; 1037 1038 r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); 1039 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", 1040 r); 1041 1042 if (kvm_check_cap(KVM_CAP_XCRS)) { 1043 r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); 1044 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", 1045 r); 1046 } 1047 1048 r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); 1049 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", 1050 r); 1051 1052 r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); 1053 TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", 1054 r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); 1055 1056 r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); 1057 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", 1058 r); 1059 1060 r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); 1061 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", 1062 r); 1063 1064 r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); 1065 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", 1066 r); 1067 1068 r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); 1069 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", 1070 r); 1071 1072 if (state->nested.size) { 1073 r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); 1074 TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", 1075 r); 1076 } 1077 } 1078 1079 bool is_intel_cpu(void) 1080 { 1081 int eax, ebx, ecx, edx; 1082 const uint32_t *chunk; 1083 const int leaf = 0; 1084 1085 __asm__ __volatile__( 1086 "cpuid" 1087 : /* output */ "=a"(eax), "=b"(ebx), 1088 "=c"(ecx), "=d"(edx) 1089 : /* input */ "0"(leaf), "2"(0)); 1090 1091 chunk = (const uint32_t *)("GenuineIntel"); 1092 return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); 1093 } 1094 1095 uint32_t kvm_get_cpuid_max_basic(void) 1096 { 1097 return kvm_get_supported_cpuid_entry(0)->eax; 1098 } 1099 1100 uint32_t kvm_get_cpuid_max_extended(void) 1101 { 1102 return kvm_get_supported_cpuid_entry(0x80000000)->eax; 1103 } 1104 1105 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) 1106 { 1107 struct kvm_cpuid_entry2 *entry; 1108 bool pae; 1109 1110 /* SDM 4.1.4 */ 1111 if (kvm_get_cpuid_max_extended() < 0x80000008) { 1112 pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); 1113 *pa_bits = pae ? 36 : 32; 1114 *va_bits = 32; 1115 } else { 1116 entry = kvm_get_supported_cpuid_entry(0x80000008); 1117 *pa_bits = entry->eax & 0xff; 1118 *va_bits = (entry->eax >> 8) & 0xff; 1119 } 1120 } 1121