1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/x86_64/processor.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #include "test_util.h"
9 #include "kvm_util.h"
10 #include "processor.h"
11 
12 #ifndef NUM_INTERRUPTS
13 #define NUM_INTERRUPTS 256
14 #endif
15 
16 #define DEFAULT_CODE_SELECTOR 0x8
17 #define DEFAULT_DATA_SELECTOR 0x10
18 
19 #define MAX_NR_CPUID_ENTRIES 100
20 
21 vm_vaddr_t exception_handlers;
22 
23 static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
24 {
25 	fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
26 		"rcx: 0x%.16llx rdx: 0x%.16llx\n",
27 		indent, "",
28 		regs->rax, regs->rbx, regs->rcx, regs->rdx);
29 	fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
30 		"rsp: 0x%.16llx rbp: 0x%.16llx\n",
31 		indent, "",
32 		regs->rsi, regs->rdi, regs->rsp, regs->rbp);
33 	fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
34 		"r10: 0x%.16llx r11: 0x%.16llx\n",
35 		indent, "",
36 		regs->r8, regs->r9, regs->r10, regs->r11);
37 	fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
38 		"r14: 0x%.16llx r15: 0x%.16llx\n",
39 		indent, "",
40 		regs->r12, regs->r13, regs->r14, regs->r15);
41 	fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
42 		indent, "",
43 		regs->rip, regs->rflags);
44 }
45 
46 static void segment_dump(FILE *stream, struct kvm_segment *segment,
47 			 uint8_t indent)
48 {
49 	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
50 		"selector: 0x%.4x type: 0x%.2x\n",
51 		indent, "", segment->base, segment->limit,
52 		segment->selector, segment->type);
53 	fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
54 		"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
55 		indent, "", segment->present, segment->dpl,
56 		segment->db, segment->s, segment->l);
57 	fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
58 		"unusable: 0x%.2x padding: 0x%.2x\n",
59 		indent, "", segment->g, segment->avl,
60 		segment->unusable, segment->padding);
61 }
62 
63 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
64 			uint8_t indent)
65 {
66 	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
67 		"padding: 0x%.4x 0x%.4x 0x%.4x\n",
68 		indent, "", dtable->base, dtable->limit,
69 		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
70 }
71 
72 static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
73 {
74 	unsigned int i;
75 
76 	fprintf(stream, "%*scs:\n", indent, "");
77 	segment_dump(stream, &sregs->cs, indent + 2);
78 	fprintf(stream, "%*sds:\n", indent, "");
79 	segment_dump(stream, &sregs->ds, indent + 2);
80 	fprintf(stream, "%*ses:\n", indent, "");
81 	segment_dump(stream, &sregs->es, indent + 2);
82 	fprintf(stream, "%*sfs:\n", indent, "");
83 	segment_dump(stream, &sregs->fs, indent + 2);
84 	fprintf(stream, "%*sgs:\n", indent, "");
85 	segment_dump(stream, &sregs->gs, indent + 2);
86 	fprintf(stream, "%*sss:\n", indent, "");
87 	segment_dump(stream, &sregs->ss, indent + 2);
88 	fprintf(stream, "%*str:\n", indent, "");
89 	segment_dump(stream, &sregs->tr, indent + 2);
90 	fprintf(stream, "%*sldt:\n", indent, "");
91 	segment_dump(stream, &sregs->ldt, indent + 2);
92 
93 	fprintf(stream, "%*sgdt:\n", indent, "");
94 	dtable_dump(stream, &sregs->gdt, indent + 2);
95 	fprintf(stream, "%*sidt:\n", indent, "");
96 	dtable_dump(stream, &sregs->idt, indent + 2);
97 
98 	fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
99 		"cr3: 0x%.16llx cr4: 0x%.16llx\n",
100 		indent, "",
101 		sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
102 	fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
103 		"apic_base: 0x%.16llx\n",
104 		indent, "",
105 		sregs->cr8, sregs->efer, sregs->apic_base);
106 
107 	fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
108 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
109 		fprintf(stream, "%*s%.16llx\n", indent + 2, "",
110 			sregs->interrupt_bitmap[i]);
111 	}
112 }
113 
114 void virt_arch_pgd_alloc(struct kvm_vm *vm)
115 {
116 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
117 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
118 
119 	/* If needed, create page map l4 table. */
120 	if (!vm->pgd_created) {
121 		vm->pgd = vm_alloc_page_table(vm);
122 		vm->pgd_created = true;
123 	}
124 }
125 
126 static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
127 			  int level)
128 {
129 	uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
130 	int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
131 
132 	return &page_table[index];
133 }
134 
135 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
136 				       uint64_t pt_pfn,
137 				       uint64_t vaddr,
138 				       uint64_t paddr,
139 				       int current_level,
140 				       int target_level)
141 {
142 	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
143 
144 	if (!(*pte & PTE_PRESENT_MASK)) {
145 		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
146 		if (current_level == target_level)
147 			*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
148 		else
149 			*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
150 	} else {
151 		/*
152 		 * Entry already present.  Assert that the caller doesn't want
153 		 * a hugepage at this level, and that there isn't a hugepage at
154 		 * this level.
155 		 */
156 		TEST_ASSERT(current_level != target_level,
157 			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
158 			    current_level, vaddr);
159 		TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
160 			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
161 			    current_level, vaddr);
162 	}
163 	return pte;
164 }
165 
166 void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
167 {
168 	const uint64_t pg_size = PG_LEVEL_SIZE(level);
169 	uint64_t *pml4e, *pdpe, *pde;
170 	uint64_t *pte;
171 
172 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
173 		    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
174 
175 	TEST_ASSERT((vaddr % pg_size) == 0,
176 		    "Virtual address not aligned,\n"
177 		    "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
178 	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
179 		    "Invalid virtual address, vaddr: 0x%lx", vaddr);
180 	TEST_ASSERT((paddr % pg_size) == 0,
181 		    "Physical address not aligned,\n"
182 		    "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
183 	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
184 		    "Physical address beyond maximum supported,\n"
185 		    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
186 		    paddr, vm->max_gfn, vm->page_size);
187 
188 	/*
189 	 * Allocate upper level page tables, if not already present.  Return
190 	 * early if a hugepage was created.
191 	 */
192 	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
193 				      vaddr, paddr, PG_LEVEL_512G, level);
194 	if (*pml4e & PTE_LARGE_MASK)
195 		return;
196 
197 	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
198 	if (*pdpe & PTE_LARGE_MASK)
199 		return;
200 
201 	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
202 	if (*pde & PTE_LARGE_MASK)
203 		return;
204 
205 	/* Fill in page table entry. */
206 	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
207 	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
208 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
209 	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
210 }
211 
212 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
213 {
214 	__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
215 }
216 
217 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
218 					  struct kvm_vcpu *vcpu,
219 					  uint64_t vaddr)
220 {
221 	uint16_t index[4];
222 	uint64_t *pml4e, *pdpe, *pde;
223 	uint64_t *pte;
224 	struct kvm_sregs sregs;
225 	uint64_t rsvd_mask = 0;
226 
227 	/* Set the high bits in the reserved mask. */
228 	if (vm->pa_bits < 52)
229 		rsvd_mask = GENMASK_ULL(51, vm->pa_bits);
230 
231 	/*
232 	 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
233 	 * with 4-Level Paging and 5-Level Paging".
234 	 * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
235 	 * the XD flag (bit 63) is reserved.
236 	 */
237 	vcpu_sregs_get(vcpu, &sregs);
238 	if ((sregs.efer & EFER_NX) == 0) {
239 		rsvd_mask |= PTE_NX_MASK;
240 	}
241 
242 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
243 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
244 	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
245 		(vaddr >> vm->page_shift)),
246 		"Invalid virtual address, vaddr: 0x%lx",
247 		vaddr);
248 	/*
249 	 * Based on the mode check above there are 48 bits in the vaddr, so
250 	 * shift 16 to sign extend the last bit (bit-47),
251 	 */
252 	TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
253 		"Canonical check failed.  The virtual address is invalid.");
254 
255 	index[0] = (vaddr >> 12) & 0x1ffu;
256 	index[1] = (vaddr >> 21) & 0x1ffu;
257 	index[2] = (vaddr >> 30) & 0x1ffu;
258 	index[3] = (vaddr >> 39) & 0x1ffu;
259 
260 	pml4e = addr_gpa2hva(vm, vm->pgd);
261 	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
262 		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
263 	TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
264 		"Unexpected reserved bits set.");
265 
266 	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
267 	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
268 		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
269 	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
270 		"Expected pdpe to map a pde not a 1-GByte page.");
271 	TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
272 		"Unexpected reserved bits set.");
273 
274 	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
275 	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
276 		"Expected pde to be present for gva: 0x%08lx", vaddr);
277 	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
278 		"Expected pde to map a pte not a 2-MByte page.");
279 	TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
280 		"Unexpected reserved bits set.");
281 
282 	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
283 	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
284 		"Expected pte to be present for gva: 0x%08lx", vaddr);
285 
286 	return &pte[index[0]];
287 }
288 
289 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
290 				 uint64_t vaddr)
291 {
292 	uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
293 
294 	return *(uint64_t *)pte;
295 }
296 
297 void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
298 			     uint64_t vaddr, uint64_t pte)
299 {
300 	uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
301 
302 	*(uint64_t *)new_pte = pte;
303 }
304 
305 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
306 {
307 	uint64_t *pml4e, *pml4e_start;
308 	uint64_t *pdpe, *pdpe_start;
309 	uint64_t *pde, *pde_start;
310 	uint64_t *pte, *pte_start;
311 
312 	if (!vm->pgd_created)
313 		return;
314 
315 	fprintf(stream, "%*s                                          "
316 		"                no\n", indent, "");
317 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
318 		"addr         w exec dirty\n",
319 		indent, "");
320 	pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
321 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
322 		pml4e = &pml4e_start[n1];
323 		if (!(*pml4e & PTE_PRESENT_MASK))
324 			continue;
325 		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
326 			" %u\n",
327 			indent, "",
328 			pml4e - pml4e_start, pml4e,
329 			addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
330 			!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
331 
332 		pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
333 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
334 			pdpe = &pdpe_start[n2];
335 			if (!(*pdpe & PTE_PRESENT_MASK))
336 				continue;
337 			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
338 				"%u  %u\n",
339 				indent, "",
340 				pdpe - pdpe_start, pdpe,
341 				addr_hva2gpa(vm, pdpe),
342 				PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
343 				!!(*pdpe & PTE_NX_MASK));
344 
345 			pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
346 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
347 				pde = &pde_start[n3];
348 				if (!(*pde & PTE_PRESENT_MASK))
349 					continue;
350 				fprintf(stream, "%*spde   0x%-3zx %p "
351 					"0x%-12lx 0x%-10llx %u  %u\n",
352 					indent, "", pde - pde_start, pde,
353 					addr_hva2gpa(vm, pde),
354 					PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
355 					!!(*pde & PTE_NX_MASK));
356 
357 				pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
358 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
359 					pte = &pte_start[n4];
360 					if (!(*pte & PTE_PRESENT_MASK))
361 						continue;
362 					fprintf(stream, "%*spte   0x%-3zx %p "
363 						"0x%-12lx 0x%-10llx %u  %u "
364 						"    %u    0x%-10lx\n",
365 						indent, "",
366 						pte - pte_start, pte,
367 						addr_hva2gpa(vm, pte),
368 						PTE_GET_PFN(*pte),
369 						!!(*pte & PTE_WRITABLE_MASK),
370 						!!(*pte & PTE_NX_MASK),
371 						!!(*pte & PTE_DIRTY_MASK),
372 						((uint64_t) n1 << 27)
373 							| ((uint64_t) n2 << 18)
374 							| ((uint64_t) n3 << 9)
375 							| ((uint64_t) n4));
376 				}
377 			}
378 		}
379 	}
380 }
381 
382 /*
383  * Set Unusable Segment
384  *
385  * Input Args: None
386  *
387  * Output Args:
388  *   segp - Pointer to segment register
389  *
390  * Return: None
391  *
392  * Sets the segment register pointed to by @segp to an unusable state.
393  */
394 static void kvm_seg_set_unusable(struct kvm_segment *segp)
395 {
396 	memset(segp, 0, sizeof(*segp));
397 	segp->unusable = true;
398 }
399 
400 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
401 {
402 	void *gdt = addr_gva2hva(vm, vm->gdt);
403 	struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
404 
405 	desc->limit0 = segp->limit & 0xFFFF;
406 	desc->base0 = segp->base & 0xFFFF;
407 	desc->base1 = segp->base >> 16;
408 	desc->type = segp->type;
409 	desc->s = segp->s;
410 	desc->dpl = segp->dpl;
411 	desc->p = segp->present;
412 	desc->limit1 = segp->limit >> 16;
413 	desc->avl = segp->avl;
414 	desc->l = segp->l;
415 	desc->db = segp->db;
416 	desc->g = segp->g;
417 	desc->base2 = segp->base >> 24;
418 	if (!segp->s)
419 		desc->base3 = segp->base >> 32;
420 }
421 
422 
423 /*
424  * Set Long Mode Flat Kernel Code Segment
425  *
426  * Input Args:
427  *   vm - VM whose GDT is being filled, or NULL to only write segp
428  *   selector - selector value
429  *
430  * Output Args:
431  *   segp - Pointer to KVM segment
432  *
433  * Return: None
434  *
435  * Sets up the KVM segment pointed to by @segp, to be a code segment
436  * with the selector value given by @selector.
437  */
438 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
439 	struct kvm_segment *segp)
440 {
441 	memset(segp, 0, sizeof(*segp));
442 	segp->selector = selector;
443 	segp->limit = 0xFFFFFFFFu;
444 	segp->s = 0x1; /* kTypeCodeData */
445 	segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
446 					  * | kFlagCodeReadable
447 					  */
448 	segp->g = true;
449 	segp->l = true;
450 	segp->present = 1;
451 	if (vm)
452 		kvm_seg_fill_gdt_64bit(vm, segp);
453 }
454 
455 /*
456  * Set Long Mode Flat Kernel Data Segment
457  *
458  * Input Args:
459  *   vm - VM whose GDT is being filled, or NULL to only write segp
460  *   selector - selector value
461  *
462  * Output Args:
463  *   segp - Pointer to KVM segment
464  *
465  * Return: None
466  *
467  * Sets up the KVM segment pointed to by @segp, to be a data segment
468  * with the selector value given by @selector.
469  */
470 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
471 	struct kvm_segment *segp)
472 {
473 	memset(segp, 0, sizeof(*segp));
474 	segp->selector = selector;
475 	segp->limit = 0xFFFFFFFFu;
476 	segp->s = 0x1; /* kTypeCodeData */
477 	segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
478 					  * | kFlagDataWritable
479 					  */
480 	segp->g = true;
481 	segp->present = true;
482 	if (vm)
483 		kvm_seg_fill_gdt_64bit(vm, segp);
484 }
485 
486 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
487 {
488 	uint16_t index[4];
489 	uint64_t *pml4e, *pdpe, *pde;
490 	uint64_t *pte;
491 
492 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
493 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
494 
495 	index[0] = (gva >> 12) & 0x1ffu;
496 	index[1] = (gva >> 21) & 0x1ffu;
497 	index[2] = (gva >> 30) & 0x1ffu;
498 	index[3] = (gva >> 39) & 0x1ffu;
499 
500 	if (!vm->pgd_created)
501 		goto unmapped_gva;
502 	pml4e = addr_gpa2hva(vm, vm->pgd);
503 	if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
504 		goto unmapped_gva;
505 
506 	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
507 	if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
508 		goto unmapped_gva;
509 
510 	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
511 	if (!(pde[index[1]] & PTE_PRESENT_MASK))
512 		goto unmapped_gva;
513 
514 	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
515 	if (!(pte[index[0]] & PTE_PRESENT_MASK))
516 		goto unmapped_gva;
517 
518 	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
519 
520 unmapped_gva:
521 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
522 	exit(EXIT_FAILURE);
523 }
524 
525 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
526 {
527 	if (!vm->gdt)
528 		vm->gdt = vm_vaddr_alloc_page(vm);
529 
530 	dt->base = vm->gdt;
531 	dt->limit = getpagesize();
532 }
533 
534 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
535 				int selector)
536 {
537 	if (!vm->tss)
538 		vm->tss = vm_vaddr_alloc_page(vm);
539 
540 	memset(segp, 0, sizeof(*segp));
541 	segp->base = vm->tss;
542 	segp->limit = 0x67;
543 	segp->selector = selector;
544 	segp->type = 0xb;
545 	segp->present = 1;
546 	kvm_seg_fill_gdt_64bit(vm, segp);
547 }
548 
549 static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
550 {
551 	struct kvm_sregs sregs;
552 
553 	/* Set mode specific system register values. */
554 	vcpu_sregs_get(vcpu, &sregs);
555 
556 	sregs.idt.limit = 0;
557 
558 	kvm_setup_gdt(vm, &sregs.gdt);
559 
560 	switch (vm->mode) {
561 	case VM_MODE_PXXV48_4K:
562 		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
563 		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
564 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
565 
566 		kvm_seg_set_unusable(&sregs.ldt);
567 		kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
568 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
569 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
570 		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
571 		break;
572 
573 	default:
574 		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
575 	}
576 
577 	sregs.cr3 = vm->pgd;
578 	vcpu_sregs_set(vcpu, &sregs);
579 }
580 
581 void __vm_xsave_require_permission(int bit, const char *name)
582 {
583 	int kvm_fd;
584 	u64 bitmask;
585 	long rc;
586 	struct kvm_device_attr attr = {
587 		.group = 0,
588 		.attr = KVM_X86_XCOMP_GUEST_SUPP,
589 		.addr = (unsigned long) &bitmask
590 	};
591 
592 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
593 
594 	kvm_fd = open_kvm_dev_path_or_exit();
595 	rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
596 	close(kvm_fd);
597 
598 	if (rc == -1 && (errno == ENXIO || errno == EINVAL))
599 		__TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
600 
601 	TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
602 
603 	__TEST_REQUIRE(bitmask & (1ULL << bit),
604 		       "Required XSAVE feature '%s' not supported", name);
605 
606 	TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
607 
608 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
609 	TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
610 	TEST_ASSERT(bitmask & (1ULL << bit),
611 		    "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
612 		    bitmask);
613 }
614 
615 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
616 				  void *guest_code)
617 {
618 	struct kvm_mp_state mp_state;
619 	struct kvm_regs regs;
620 	vm_vaddr_t stack_vaddr;
621 	struct kvm_vcpu *vcpu;
622 
623 	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
624 				     DEFAULT_GUEST_STACK_VADDR_MIN);
625 
626 	vcpu = __vm_vcpu_add(vm, vcpu_id);
627 	vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
628 	vcpu_setup(vm, vcpu);
629 
630 	/* Setup guest general purpose registers */
631 	vcpu_regs_get(vcpu, &regs);
632 	regs.rflags = regs.rflags | 0x2;
633 	regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
634 	regs.rip = (unsigned long) guest_code;
635 	vcpu_regs_set(vcpu, &regs);
636 
637 	/* Setup the MP state */
638 	mp_state.mp_state = 0;
639 	vcpu_mp_state_set(vcpu, &mp_state);
640 
641 	return vcpu;
642 }
643 
644 struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
645 {
646 	struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
647 
648 	vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
649 
650 	return vcpu;
651 }
652 
653 void vcpu_arch_free(struct kvm_vcpu *vcpu)
654 {
655 	if (vcpu->cpuid)
656 		free(vcpu->cpuid);
657 }
658 
659 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
660 {
661 	static struct kvm_cpuid2 *cpuid;
662 	int kvm_fd;
663 
664 	if (cpuid)
665 		return cpuid;
666 
667 	cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
668 	kvm_fd = open_kvm_dev_path_or_exit();
669 
670 	kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
671 
672 	close(kvm_fd);
673 	return cpuid;
674 }
675 
676 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
677 		   struct kvm_x86_cpu_feature feature)
678 {
679 	const struct kvm_cpuid_entry2 *entry;
680 	int i;
681 
682 	for (i = 0; i < cpuid->nent; i++) {
683 		entry = &cpuid->entries[i];
684 
685 		/*
686 		 * The output registers in kvm_cpuid_entry2 are in alphabetical
687 		 * order, but kvm_x86_cpu_feature matches that mess, so yay
688 		 * pointer shenanigans!
689 		 */
690 		if (entry->function == feature.function &&
691 		    entry->index == feature.index)
692 			return (&entry->eax)[feature.reg] & BIT(feature.bit);
693 	}
694 
695 	return false;
696 }
697 
698 uint64_t kvm_get_feature_msr(uint64_t msr_index)
699 {
700 	struct {
701 		struct kvm_msrs header;
702 		struct kvm_msr_entry entry;
703 	} buffer = {};
704 	int r, kvm_fd;
705 
706 	buffer.header.nmsrs = 1;
707 	buffer.entry.index = msr_index;
708 	kvm_fd = open_kvm_dev_path_or_exit();
709 
710 	r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
711 	TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
712 
713 	close(kvm_fd);
714 	return buffer.entry.data;
715 }
716 
717 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
718 {
719 	TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
720 
721 	/* Allow overriding the default CPUID. */
722 	if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
723 		free(vcpu->cpuid);
724 		vcpu->cpuid = NULL;
725 	}
726 
727 	if (!vcpu->cpuid)
728 		vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
729 
730 	memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
731 	vcpu_set_cpuid(vcpu);
732 }
733 
734 void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
735 {
736 	struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
737 
738 	entry->eax = (entry->eax & ~0xff) | maxphyaddr;
739 	vcpu_set_cpuid(vcpu);
740 }
741 
742 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
743 {
744 	struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
745 
746 	entry->eax = 0;
747 	entry->ebx = 0;
748 	entry->ecx = 0;
749 	entry->edx = 0;
750 	vcpu_set_cpuid(vcpu);
751 }
752 
753 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
754 				     struct kvm_x86_cpu_feature feature,
755 				     bool set)
756 {
757 	struct kvm_cpuid_entry2 *entry;
758 	u32 *reg;
759 
760 	entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
761 	reg = (&entry->eax) + feature.reg;
762 
763 	if (set)
764 		*reg |= BIT(feature.bit);
765 	else
766 		*reg &= ~BIT(feature.bit);
767 
768 	vcpu_set_cpuid(vcpu);
769 }
770 
771 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
772 {
773 	struct {
774 		struct kvm_msrs header;
775 		struct kvm_msr_entry entry;
776 	} buffer = {};
777 
778 	buffer.header.nmsrs = 1;
779 	buffer.entry.index = msr_index;
780 
781 	vcpu_msrs_get(vcpu, &buffer.header);
782 
783 	return buffer.entry.data;
784 }
785 
786 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
787 {
788 	struct {
789 		struct kvm_msrs header;
790 		struct kvm_msr_entry entry;
791 	} buffer = {};
792 
793 	memset(&buffer, 0, sizeof(buffer));
794 	buffer.header.nmsrs = 1;
795 	buffer.entry.index = msr_index;
796 	buffer.entry.data = msr_value;
797 
798 	return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
799 }
800 
801 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
802 {
803 	va_list ap;
804 	struct kvm_regs regs;
805 
806 	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
807 		    "  num: %u\n",
808 		    num);
809 
810 	va_start(ap, num);
811 	vcpu_regs_get(vcpu, &regs);
812 
813 	if (num >= 1)
814 		regs.rdi = va_arg(ap, uint64_t);
815 
816 	if (num >= 2)
817 		regs.rsi = va_arg(ap, uint64_t);
818 
819 	if (num >= 3)
820 		regs.rdx = va_arg(ap, uint64_t);
821 
822 	if (num >= 4)
823 		regs.rcx = va_arg(ap, uint64_t);
824 
825 	if (num >= 5)
826 		regs.r8 = va_arg(ap, uint64_t);
827 
828 	if (num >= 6)
829 		regs.r9 = va_arg(ap, uint64_t);
830 
831 	vcpu_regs_set(vcpu, &regs);
832 	va_end(ap);
833 }
834 
835 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
836 {
837 	struct kvm_regs regs;
838 	struct kvm_sregs sregs;
839 
840 	fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
841 
842 	fprintf(stream, "%*sregs:\n", indent + 2, "");
843 	vcpu_regs_get(vcpu, &regs);
844 	regs_dump(stream, &regs, indent + 4);
845 
846 	fprintf(stream, "%*ssregs:\n", indent + 2, "");
847 	vcpu_sregs_get(vcpu, &sregs);
848 	sregs_dump(stream, &sregs, indent + 4);
849 }
850 
851 static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
852 {
853 	struct kvm_msr_list *list;
854 	struct kvm_msr_list nmsrs;
855 	int kvm_fd, r;
856 
857 	kvm_fd = open_kvm_dev_path_or_exit();
858 
859 	nmsrs.nmsrs = 0;
860 	if (!feature_msrs)
861 		r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
862 	else
863 		r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
864 
865 	TEST_ASSERT(r == -1 && errno == E2BIG,
866 		    "Expected -E2BIG, got rc: %i errno: %i (%s)",
867 		    r, errno, strerror(errno));
868 
869 	list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
870 	TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
871 	list->nmsrs = nmsrs.nmsrs;
872 
873 	if (!feature_msrs)
874 		kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
875 	else
876 		kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
877 	close(kvm_fd);
878 
879 	TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
880 		    "Number of MSRs in list changed, was %d, now %d",
881 		    nmsrs.nmsrs, list->nmsrs);
882 	return list;
883 }
884 
885 const struct kvm_msr_list *kvm_get_msr_index_list(void)
886 {
887 	static const struct kvm_msr_list *list;
888 
889 	if (!list)
890 		list = __kvm_get_msr_index_list(false);
891 	return list;
892 }
893 
894 
895 const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
896 {
897 	static const struct kvm_msr_list *list;
898 
899 	if (!list)
900 		list = __kvm_get_msr_index_list(true);
901 	return list;
902 }
903 
904 bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
905 {
906 	const struct kvm_msr_list *list = kvm_get_msr_index_list();
907 	int i;
908 
909 	for (i = 0; i < list->nmsrs; ++i) {
910 		if (list->indices[i] == msr_index)
911 			return true;
912 	}
913 
914 	return false;
915 }
916 
917 static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
918 				  struct kvm_x86_state *state)
919 {
920 	int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
921 
922 	if (size) {
923 		state->xsave = malloc(size);
924 		vcpu_xsave2_get(vcpu, state->xsave);
925 	} else {
926 		state->xsave = malloc(sizeof(struct kvm_xsave));
927 		vcpu_xsave_get(vcpu, state->xsave);
928 	}
929 }
930 
931 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
932 {
933 	const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
934 	struct kvm_x86_state *state;
935 	int i;
936 
937 	static int nested_size = -1;
938 
939 	if (nested_size == -1) {
940 		nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
941 		TEST_ASSERT(nested_size <= sizeof(state->nested_),
942 			    "Nested state size too big, %i > %zi",
943 			    nested_size, sizeof(state->nested_));
944 	}
945 
946 	/*
947 	 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
948 	 * guest state is consistent only after userspace re-enters the
949 	 * kernel with KVM_RUN.  Complete IO prior to migrating state
950 	 * to a new VM.
951 	 */
952 	vcpu_run_complete_io(vcpu);
953 
954 	state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
955 
956 	vcpu_events_get(vcpu, &state->events);
957 	vcpu_mp_state_get(vcpu, &state->mp_state);
958 	vcpu_regs_get(vcpu, &state->regs);
959 	vcpu_save_xsave_state(vcpu, state);
960 
961 	if (kvm_has_cap(KVM_CAP_XCRS))
962 		vcpu_xcrs_get(vcpu, &state->xcrs);
963 
964 	vcpu_sregs_get(vcpu, &state->sregs);
965 
966 	if (nested_size) {
967 		state->nested.size = sizeof(state->nested_);
968 
969 		vcpu_nested_state_get(vcpu, &state->nested);
970 		TEST_ASSERT(state->nested.size <= nested_size,
971 			    "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
972 			    state->nested.size, nested_size);
973 	} else {
974 		state->nested.size = 0;
975 	}
976 
977 	state->msrs.nmsrs = msr_list->nmsrs;
978 	for (i = 0; i < msr_list->nmsrs; i++)
979 		state->msrs.entries[i].index = msr_list->indices[i];
980 	vcpu_msrs_get(vcpu, &state->msrs);
981 
982 	vcpu_debugregs_get(vcpu, &state->debugregs);
983 
984 	return state;
985 }
986 
987 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
988 {
989 	vcpu_sregs_set(vcpu, &state->sregs);
990 	vcpu_msrs_set(vcpu, &state->msrs);
991 
992 	if (kvm_has_cap(KVM_CAP_XCRS))
993 		vcpu_xcrs_set(vcpu, &state->xcrs);
994 
995 	vcpu_xsave_set(vcpu,  state->xsave);
996 	vcpu_events_set(vcpu, &state->events);
997 	vcpu_mp_state_set(vcpu, &state->mp_state);
998 	vcpu_debugregs_set(vcpu, &state->debugregs);
999 	vcpu_regs_set(vcpu, &state->regs);
1000 
1001 	if (state->nested.size)
1002 		vcpu_nested_state_set(vcpu, &state->nested);
1003 }
1004 
1005 void kvm_x86_state_cleanup(struct kvm_x86_state *state)
1006 {
1007 	free(state->xsave);
1008 	free(state);
1009 }
1010 
1011 static bool cpu_vendor_string_is(const char *vendor)
1012 {
1013 	const uint32_t *chunk = (const uint32_t *)vendor;
1014 	uint32_t eax, ebx, ecx, edx;
1015 
1016 	cpuid(0, &eax, &ebx, &ecx, &edx);
1017 	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
1018 }
1019 
1020 bool is_intel_cpu(void)
1021 {
1022 	return cpu_vendor_string_is("GenuineIntel");
1023 }
1024 
1025 /*
1026  * Exclude early K5 samples with a vendor string of "AMDisbetter!"
1027  */
1028 bool is_amd_cpu(void)
1029 {
1030 	return cpu_vendor_string_is("AuthenticAMD");
1031 }
1032 
1033 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
1034 {
1035 	const struct kvm_cpuid_entry2 *entry;
1036 	bool pae;
1037 
1038 	/* SDM 4.1.4 */
1039 	if (kvm_get_cpuid_max_extended() < 0x80000008) {
1040 		pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
1041 		*pa_bits = pae ? 36 : 32;
1042 		*va_bits = 32;
1043 	} else {
1044 		entry = kvm_get_supported_cpuid_entry(0x80000008);
1045 		*pa_bits = entry->eax & 0xff;
1046 		*va_bits = (entry->eax >> 8) & 0xff;
1047 	}
1048 }
1049 
1050 struct idt_entry {
1051 	uint16_t offset0;
1052 	uint16_t selector;
1053 	uint16_t ist : 3;
1054 	uint16_t : 5;
1055 	uint16_t type : 4;
1056 	uint16_t : 1;
1057 	uint16_t dpl : 2;
1058 	uint16_t p : 1;
1059 	uint16_t offset1;
1060 	uint32_t offset2; uint32_t reserved;
1061 };
1062 
1063 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
1064 			  int dpl, unsigned short selector)
1065 {
1066 	struct idt_entry *base =
1067 		(struct idt_entry *)addr_gva2hva(vm, vm->idt);
1068 	struct idt_entry *e = &base[vector];
1069 
1070 	memset(e, 0, sizeof(*e));
1071 	e->offset0 = addr;
1072 	e->selector = selector;
1073 	e->ist = 0;
1074 	e->type = 14;
1075 	e->dpl = dpl;
1076 	e->p = 1;
1077 	e->offset1 = addr >> 16;
1078 	e->offset2 = addr >> 32;
1079 }
1080 
1081 
1082 static bool kvm_fixup_exception(struct ex_regs *regs)
1083 {
1084 	if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
1085 		return false;
1086 
1087 	if (regs->vector == DE_VECTOR)
1088 		return false;
1089 
1090 	regs->rip = regs->r11;
1091 	regs->r9 = regs->vector;
1092 	return true;
1093 }
1094 
1095 void kvm_exit_unexpected_vector(uint32_t value)
1096 {
1097 	ucall(UCALL_UNHANDLED, 1, value);
1098 }
1099 
1100 void route_exception(struct ex_regs *regs)
1101 {
1102 	typedef void(*handler)(struct ex_regs *);
1103 	handler *handlers = (handler *)exception_handlers;
1104 
1105 	if (handlers && handlers[regs->vector]) {
1106 		handlers[regs->vector](regs);
1107 		return;
1108 	}
1109 
1110 	if (kvm_fixup_exception(regs))
1111 		return;
1112 
1113 	kvm_exit_unexpected_vector(regs->vector);
1114 }
1115 
1116 void vm_init_descriptor_tables(struct kvm_vm *vm)
1117 {
1118 	extern void *idt_handlers;
1119 	int i;
1120 
1121 	vm->idt = vm_vaddr_alloc_page(vm);
1122 	vm->handlers = vm_vaddr_alloc_page(vm);
1123 	/* Handlers have the same address in both address spaces.*/
1124 	for (i = 0; i < NUM_INTERRUPTS; i++)
1125 		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
1126 			DEFAULT_CODE_SELECTOR);
1127 }
1128 
1129 void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
1130 {
1131 	struct kvm_vm *vm = vcpu->vm;
1132 	struct kvm_sregs sregs;
1133 
1134 	vcpu_sregs_get(vcpu, &sregs);
1135 	sregs.idt.base = vm->idt;
1136 	sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
1137 	sregs.gdt.base = vm->gdt;
1138 	sregs.gdt.limit = getpagesize() - 1;
1139 	kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
1140 	vcpu_sregs_set(vcpu, &sregs);
1141 	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
1142 }
1143 
1144 void vm_install_exception_handler(struct kvm_vm *vm, int vector,
1145 			       void (*handler)(struct ex_regs *))
1146 {
1147 	vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
1148 
1149 	handlers[vector] = (vm_vaddr_t)handler;
1150 }
1151 
1152 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
1153 {
1154 	struct ucall uc;
1155 
1156 	if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
1157 		uint64_t vector = uc.args[0];
1158 
1159 		TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
1160 			  vector);
1161 	}
1162 }
1163 
1164 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
1165 					       uint32_t function, uint32_t index)
1166 {
1167 	int i;
1168 
1169 	for (i = 0; i < cpuid->nent; i++) {
1170 		if (cpuid->entries[i].function == function &&
1171 		    cpuid->entries[i].index == index)
1172 			return &cpuid->entries[i];
1173 	}
1174 
1175 	TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
1176 
1177 	return NULL;
1178 }
1179 
1180 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
1181 		       uint64_t a3)
1182 {
1183 	uint64_t r;
1184 
1185 	asm volatile("vmcall"
1186 		     : "=a"(r)
1187 		     : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
1188 	return r;
1189 }
1190 
1191 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
1192 {
1193 	static struct kvm_cpuid2 *cpuid;
1194 	int kvm_fd;
1195 
1196 	if (cpuid)
1197 		return cpuid;
1198 
1199 	cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
1200 	kvm_fd = open_kvm_dev_path_or_exit();
1201 
1202 	kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
1203 
1204 	close(kvm_fd);
1205 	return cpuid;
1206 }
1207 
1208 void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
1209 {
1210 	static struct kvm_cpuid2 *cpuid_full;
1211 	const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
1212 	int i, nent = 0;
1213 
1214 	if (!cpuid_full) {
1215 		cpuid_sys = kvm_get_supported_cpuid();
1216 		cpuid_hv = kvm_get_supported_hv_cpuid();
1217 
1218 		cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
1219 		if (!cpuid_full) {
1220 			perror("malloc");
1221 			abort();
1222 		}
1223 
1224 		/* Need to skip KVM CPUID leaves 0x400000xx */
1225 		for (i = 0; i < cpuid_sys->nent; i++) {
1226 			if (cpuid_sys->entries[i].function >= 0x40000000 &&
1227 			    cpuid_sys->entries[i].function < 0x40000100)
1228 				continue;
1229 			cpuid_full->entries[nent] = cpuid_sys->entries[i];
1230 			nent++;
1231 		}
1232 
1233 		memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
1234 		       cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
1235 		cpuid_full->nent = nent + cpuid_hv->nent;
1236 	}
1237 
1238 	vcpu_init_cpuid(vcpu, cpuid_full);
1239 }
1240 
1241 const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
1242 {
1243 	struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
1244 
1245 	vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
1246 
1247 	return cpuid;
1248 }
1249 
1250 unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
1251 {
1252 	const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
1253 	unsigned long ht_gfn, max_gfn, max_pfn;
1254 	uint32_t eax, ebx, ecx, edx, max_ext_leaf;
1255 
1256 	max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
1257 
1258 	/* Avoid reserved HyperTransport region on AMD processors.  */
1259 	if (!is_amd_cpu())
1260 		return max_gfn;
1261 
1262 	/* On parts with <40 physical address bits, the area is fully hidden */
1263 	if (vm->pa_bits < 40)
1264 		return max_gfn;
1265 
1266 	/* Before family 17h, the HyperTransport area is just below 1T.  */
1267 	ht_gfn = (1 << 28) - num_ht_pages;
1268 	cpuid(1, &eax, &ebx, &ecx, &edx);
1269 	if (x86_family(eax) < 0x17)
1270 		goto done;
1271 
1272 	/*
1273 	 * Otherwise it's at the top of the physical address space, possibly
1274 	 * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
1275 	 * the old conservative value if MAXPHYADDR is not enumerated.
1276 	 */
1277 	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
1278 	max_ext_leaf = eax;
1279 	if (max_ext_leaf < 0x80000008)
1280 		goto done;
1281 
1282 	cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
1283 	max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
1284 	if (max_ext_leaf >= 0x8000001f) {
1285 		cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1286 		max_pfn >>= (ebx >> 6) & 0x3f;
1287 	}
1288 
1289 	ht_gfn = max_pfn - num_ht_pages;
1290 done:
1291 	return min(max_gfn, ht_gfn - 1);
1292 }
1293 
1294 /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
1295 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
1296 {
1297 	char val = 'N';
1298 	size_t count;
1299 	FILE *f;
1300 
1301 	/* Ensure that a KVM vendor-specific module is loaded. */
1302 	if (vm == NULL)
1303 		close(open_kvm_dev_path_or_exit());
1304 
1305 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
1306 	if (f) {
1307 		count = fread(&val, sizeof(char), 1, f);
1308 		TEST_ASSERT(count == 1, "Unable to read from param file.");
1309 		fclose(f);
1310 	}
1311 
1312 	return val == 'Y';
1313 }
1314