xref: /openbmc/linux/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * vmx_nested_tsc_scaling_test
4  *
5  * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6  *
7  * This test case verifies that nested TSC scaling behaves as expected when
8  * both L1 and L2 are scaled using different ratios. For this test we scale
9  * L1 down and scale L2 up.
10  */
11 
12 #include <time.h>
13 
14 #include "kvm_util.h"
15 #include "vmx.h"
16 #include "kselftest.h"
17 
18 /* L2 is scaled up (from L1's perspective) by this factor */
19 #define L2_SCALE_FACTOR 4ULL
20 
21 #define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
22 #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
23 
24 #define L2_GUEST_STACK_SIZE 64
25 
26 enum { USLEEP, UCHECK_L1, UCHECK_L2 };
27 #define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
28 #define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
29 
30 
31 /*
32  * This function checks whether the "actual" TSC frequency of a guest matches
33  * its expected frequency. In order to account for delays in taking the TSC
34  * measurements, a difference of 1% between the actual and the expected value
35  * is tolerated.
36  */
compare_tsc_freq(uint64_t actual,uint64_t expected)37 static void compare_tsc_freq(uint64_t actual, uint64_t expected)
38 {
39 	uint64_t tolerance, thresh_low, thresh_high;
40 
41 	tolerance = expected / 100;
42 	thresh_low = expected - tolerance;
43 	thresh_high = expected + tolerance;
44 
45 	TEST_ASSERT(thresh_low < actual,
46 		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
47 		" but it actually is %"PRIu64,
48 		thresh_low, thresh_high, actual);
49 	TEST_ASSERT(thresh_high > actual,
50 		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
51 		" but it actually is %"PRIu64,
52 		thresh_low, thresh_high, actual);
53 }
54 
check_tsc_freq(int level)55 static void check_tsc_freq(int level)
56 {
57 	uint64_t tsc_start, tsc_end, tsc_freq;
58 
59 	/*
60 	 * Reading the TSC twice with about a second's difference should give
61 	 * us an approximation of the TSC frequency from the guest's
62 	 * perspective. Now, this won't be completely accurate, but it should
63 	 * be good enough for the purposes of this test.
64 	 */
65 	tsc_start = rdmsr(MSR_IA32_TSC);
66 	GUEST_SLEEP(1);
67 	tsc_end = rdmsr(MSR_IA32_TSC);
68 
69 	tsc_freq = tsc_end - tsc_start;
70 
71 	GUEST_CHECK(level, tsc_freq);
72 }
73 
l2_guest_code(void)74 static void l2_guest_code(void)
75 {
76 	check_tsc_freq(UCHECK_L2);
77 
78 	/* exit to L1 */
79 	__asm__ __volatile__("vmcall");
80 }
81 
l1_guest_code(struct vmx_pages * vmx_pages)82 static void l1_guest_code(struct vmx_pages *vmx_pages)
83 {
84 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
85 	uint32_t control;
86 
87 	/* check that L1's frequency looks alright before launching L2 */
88 	check_tsc_freq(UCHECK_L1);
89 
90 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
91 	GUEST_ASSERT(load_vmcs(vmx_pages));
92 
93 	/* prepare the VMCS for L2 execution */
94 	prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
95 
96 	/* enable TSC offsetting and TSC scaling for L2 */
97 	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
98 	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
99 	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
100 
101 	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
102 	control |= SECONDARY_EXEC_TSC_SCALING;
103 	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
104 
105 	vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
106 	vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
107 	vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
108 
109 	/* launch L2 */
110 	GUEST_ASSERT(!vmlaunch());
111 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
112 
113 	/* check that L1's frequency still looks good */
114 	check_tsc_freq(UCHECK_L1);
115 
116 	GUEST_DONE();
117 }
118 
system_has_stable_tsc(void)119 static bool system_has_stable_tsc(void)
120 {
121 	bool tsc_is_stable;
122 	FILE *fp;
123 	char buf[4];
124 
125 	fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
126 	if (fp == NULL)
127 		return false;
128 
129 	tsc_is_stable = fgets(buf, sizeof(buf), fp) &&
130 			!strncmp(buf, "tsc", sizeof(buf));
131 
132 	fclose(fp);
133 	return tsc_is_stable;
134 }
135 
main(int argc,char * argv[])136 int main(int argc, char *argv[])
137 {
138 	struct kvm_vcpu *vcpu;
139 	struct kvm_vm *vm;
140 	vm_vaddr_t vmx_pages_gva;
141 
142 	uint64_t tsc_start, tsc_end;
143 	uint64_t tsc_khz;
144 	uint64_t l1_scale_factor;
145 	uint64_t l0_tsc_freq = 0;
146 	uint64_t l1_tsc_freq = 0;
147 	uint64_t l2_tsc_freq = 0;
148 
149 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
150 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
151 	TEST_REQUIRE(system_has_stable_tsc());
152 
153 	/*
154 	 * We set L1's scale factor to be a random number from 2 to 10.
155 	 * Ideally we would do the same for L2's factor but that one is
156 	 * referenced by both main() and l1_guest_code() and using a global
157 	 * variable does not work.
158 	 */
159 	srand(time(NULL));
160 	l1_scale_factor = (rand() % 9) + 2;
161 	printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
162 	printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
163 
164 	tsc_start = rdtsc();
165 	sleep(1);
166 	tsc_end = rdtsc();
167 
168 	l0_tsc_freq = tsc_end - tsc_start;
169 	printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
170 
171 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
172 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
173 	vcpu_args_set(vcpu, 1, vmx_pages_gva);
174 
175 	tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
176 	TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
177 
178 	/* scale down L1's TSC frequency */
179 	vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
180 
181 	for (;;) {
182 		struct ucall uc;
183 
184 		vcpu_run(vcpu);
185 		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
186 
187 		switch (get_ucall(vcpu, &uc)) {
188 		case UCALL_ABORT:
189 			REPORT_GUEST_ASSERT(uc);
190 		case UCALL_SYNC:
191 			switch (uc.args[0]) {
192 			case USLEEP:
193 				sleep(uc.args[1]);
194 				break;
195 			case UCHECK_L1:
196 				l1_tsc_freq = uc.args[1];
197 				printf("L1's TSC frequency is around: %"PRIu64
198 				       "\n", l1_tsc_freq);
199 
200 				compare_tsc_freq(l1_tsc_freq,
201 						 l0_tsc_freq / l1_scale_factor);
202 				break;
203 			case UCHECK_L2:
204 				l2_tsc_freq = uc.args[1];
205 				printf("L2's TSC frequency is around: %"PRIu64
206 				       "\n", l2_tsc_freq);
207 
208 				compare_tsc_freq(l2_tsc_freq,
209 						 l1_tsc_freq * L2_SCALE_FACTOR);
210 				break;
211 			}
212 			break;
213 		case UCALL_DONE:
214 			goto done;
215 		default:
216 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
217 		}
218 	}
219 
220 done:
221 	kvm_vm_free(vm);
222 	return 0;
223 }
224