1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * vmx_nested_tsc_scaling_test
4  *
5  * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6  *
7  * This test case verifies that nested TSC scaling behaves as expected when
8  * both L1 and L2 are scaled using different ratios. For this test we scale
9  * L1 down and scale L2 up.
10  */
11 
12 #include <time.h>
13 
14 #include "kvm_util.h"
15 #include "vmx.h"
16 #include "kselftest.h"
17 
18 
19 #define VCPU_ID 0
20 
21 /* L2 is scaled up (from L1's perspective) by this factor */
22 #define L2_SCALE_FACTOR 4ULL
23 
24 #define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
25 #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
26 
27 #define L2_GUEST_STACK_SIZE 64
28 
29 enum { USLEEP, UCHECK_L1, UCHECK_L2 };
30 #define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
31 #define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
32 
33 
34 /*
35  * This function checks whether the "actual" TSC frequency of a guest matches
36  * its expected frequency. In order to account for delays in taking the TSC
37  * measurements, a difference of 1% between the actual and the expected value
38  * is tolerated.
39  */
40 static void compare_tsc_freq(uint64_t actual, uint64_t expected)
41 {
42 	uint64_t tolerance, thresh_low, thresh_high;
43 
44 	tolerance = expected / 100;
45 	thresh_low = expected - tolerance;
46 	thresh_high = expected + tolerance;
47 
48 	TEST_ASSERT(thresh_low < actual,
49 		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
50 		" but it actually is %"PRIu64,
51 		thresh_low, thresh_high, actual);
52 	TEST_ASSERT(thresh_high > actual,
53 		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
54 		" but it actually is %"PRIu64,
55 		thresh_low, thresh_high, actual);
56 }
57 
58 static void check_tsc_freq(int level)
59 {
60 	uint64_t tsc_start, tsc_end, tsc_freq;
61 
62 	/*
63 	 * Reading the TSC twice with about a second's difference should give
64 	 * us an approximation of the TSC frequency from the guest's
65 	 * perspective. Now, this won't be completely accurate, but it should
66 	 * be good enough for the purposes of this test.
67 	 */
68 	tsc_start = rdmsr(MSR_IA32_TSC);
69 	GUEST_SLEEP(1);
70 	tsc_end = rdmsr(MSR_IA32_TSC);
71 
72 	tsc_freq = tsc_end - tsc_start;
73 
74 	GUEST_CHECK(level, tsc_freq);
75 }
76 
77 static void l2_guest_code(void)
78 {
79 	check_tsc_freq(UCHECK_L2);
80 
81 	/* exit to L1 */
82 	__asm__ __volatile__("vmcall");
83 }
84 
85 static void l1_guest_code(struct vmx_pages *vmx_pages)
86 {
87 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
88 	uint32_t control;
89 
90 	/* check that L1's frequency looks alright before launching L2 */
91 	check_tsc_freq(UCHECK_L1);
92 
93 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
94 	GUEST_ASSERT(load_vmcs(vmx_pages));
95 
96 	/* prepare the VMCS for L2 execution */
97 	prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
98 
99 	/* enable TSC offsetting and TSC scaling for L2 */
100 	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
101 	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
102 	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
103 
104 	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
105 	control |= SECONDARY_EXEC_TSC_SCALING;
106 	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
107 
108 	vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
109 	vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
110 	vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
111 
112 	/* launch L2 */
113 	GUEST_ASSERT(!vmlaunch());
114 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
115 
116 	/* check that L1's frequency still looks good */
117 	check_tsc_freq(UCHECK_L1);
118 
119 	GUEST_DONE();
120 }
121 
122 static void tsc_scaling_check_supported(void)
123 {
124 	if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
125 		print_skip("TSC scaling not supported by the HW");
126 		exit(KSFT_SKIP);
127 	}
128 }
129 
130 static void stable_tsc_check_supported(void)
131 {
132 	FILE *fp;
133 	char buf[4];
134 
135 	fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
136 	if (fp == NULL)
137 		goto skip_test;
138 
139 	if (fgets(buf, sizeof(buf), fp) == NULL)
140 		goto skip_test;
141 
142 	if (strncmp(buf, "tsc", sizeof(buf)))
143 		goto skip_test;
144 
145 	return;
146 skip_test:
147 	print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
148 	exit(KSFT_SKIP);
149 }
150 
151 int main(int argc, char *argv[])
152 {
153 	struct kvm_vm *vm;
154 	vm_vaddr_t vmx_pages_gva;
155 
156 	uint64_t tsc_start, tsc_end;
157 	uint64_t tsc_khz;
158 	uint64_t l1_scale_factor;
159 	uint64_t l0_tsc_freq = 0;
160 	uint64_t l1_tsc_freq = 0;
161 	uint64_t l2_tsc_freq = 0;
162 
163 	nested_vmx_check_supported();
164 	tsc_scaling_check_supported();
165 	stable_tsc_check_supported();
166 
167 	/*
168 	 * We set L1's scale factor to be a random number from 2 to 10.
169 	 * Ideally we would do the same for L2's factor but that one is
170 	 * referenced by both main() and l1_guest_code() and using a global
171 	 * variable does not work.
172 	 */
173 	srand(time(NULL));
174 	l1_scale_factor = (rand() % 9) + 2;
175 	printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
176 	printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
177 
178 	tsc_start = rdtsc();
179 	sleep(1);
180 	tsc_end = rdtsc();
181 
182 	l0_tsc_freq = tsc_end - tsc_start;
183 	printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
184 
185 	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
186 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
187 	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
188 
189 	tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
190 	TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
191 
192 	/* scale down L1's TSC frequency */
193 	vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
194 		  (void *) (tsc_khz / l1_scale_factor));
195 
196 	for (;;) {
197 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
198 		struct ucall uc;
199 
200 		vcpu_run(vm, VCPU_ID);
201 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
202 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
203 			    run->exit_reason,
204 			    exit_reason_str(run->exit_reason));
205 
206 		switch (get_ucall(vm, VCPU_ID, &uc)) {
207 		case UCALL_ABORT:
208 			TEST_FAIL("%s", (const char *) uc.args[0]);
209 		case UCALL_SYNC:
210 			switch (uc.args[0]) {
211 			case USLEEP:
212 				sleep(uc.args[1]);
213 				break;
214 			case UCHECK_L1:
215 				l1_tsc_freq = uc.args[1];
216 				printf("L1's TSC frequency is around: %"PRIu64
217 				       "\n", l1_tsc_freq);
218 
219 				compare_tsc_freq(l1_tsc_freq,
220 						 l0_tsc_freq / l1_scale_factor);
221 				break;
222 			case UCHECK_L2:
223 				l2_tsc_freq = uc.args[1];
224 				printf("L2's TSC frequency is around: %"PRIu64
225 				       "\n", l2_tsc_freq);
226 
227 				compare_tsc_freq(l2_tsc_freq,
228 						 l1_tsc_freq * L2_SCALE_FACTOR);
229 				break;
230 			}
231 			break;
232 		case UCALL_DONE:
233 			goto done;
234 		default:
235 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
236 		}
237 	}
238 
239 done:
240 	kvm_vm_free(vm);
241 	return 0;
242 }
243