1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Usage: to be run via nx_huge_page_test.sh, which does the necessary
4  * environment setup and teardown
5  *
6  * Copyright (C) 2022, Google LLC.
7  */
8 
9 #define _GNU_SOURCE
10 
11 #include <fcntl.h>
12 #include <stdint.h>
13 #include <time.h>
14 
15 #include <test_util.h>
16 #include "kvm_util.h"
17 #include "processor.h"
18 
19 #define HPAGE_SLOT		10
20 #define HPAGE_GPA		(4UL << 30) /* 4G prevents collision w/ slot 0 */
21 #define HPAGE_GVA		HPAGE_GPA /* GVA is arbitrary, so use GPA. */
22 #define PAGES_PER_2MB_HUGE_PAGE 512
23 #define HPAGE_SLOT_NPAGES	(3 * PAGES_PER_2MB_HUGE_PAGE)
24 
25 /*
26  * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
27  * being run without it.
28  */
29 #define MAGIC_TOKEN 887563923
30 
31 /*
32  * x86 opcode for the return instruction. Used to call into, and then
33  * immediately return from, memory backed with hugepages.
34  */
35 #define RETURN_OPCODE 0xC3
36 
37 /* Call the specified memory address. */
guest_do_CALL(uint64_t target)38 static void guest_do_CALL(uint64_t target)
39 {
40 	((void (*)(void)) target)();
41 }
42 
43 /*
44  * Exit the VM after each memory access so that the userspace component of the
45  * test can make assertions about the pages backing the VM.
46  *
47  * See the below for an explanation of how each access should affect the
48  * backing mappings.
49  */
guest_code(void)50 void guest_code(void)
51 {
52 	uint64_t hpage_1 = HPAGE_GVA;
53 	uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
54 	uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
55 
56 	READ_ONCE(*(uint64_t *)hpage_1);
57 	GUEST_SYNC(1);
58 
59 	READ_ONCE(*(uint64_t *)hpage_2);
60 	GUEST_SYNC(2);
61 
62 	guest_do_CALL(hpage_1);
63 	GUEST_SYNC(3);
64 
65 	guest_do_CALL(hpage_3);
66 	GUEST_SYNC(4);
67 
68 	READ_ONCE(*(uint64_t *)hpage_1);
69 	GUEST_SYNC(5);
70 
71 	READ_ONCE(*(uint64_t *)hpage_3);
72 	GUEST_SYNC(6);
73 }
74 
check_2m_page_count(struct kvm_vm * vm,int expected_pages_2m)75 static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
76 {
77 	int actual_pages_2m;
78 
79 	actual_pages_2m = vm_get_stat(vm, "pages_2m");
80 
81 	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
82 		    "Unexpected 2m page count. Expected %d, got %d",
83 		    expected_pages_2m, actual_pages_2m);
84 }
85 
check_split_count(struct kvm_vm * vm,int expected_splits)86 static void check_split_count(struct kvm_vm *vm, int expected_splits)
87 {
88 	int actual_splits;
89 
90 	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
91 
92 	TEST_ASSERT(actual_splits == expected_splits,
93 		    "Unexpected NX huge page split count. Expected %d, got %d",
94 		    expected_splits, actual_splits);
95 }
96 
wait_for_reclaim(int reclaim_period_ms)97 static void wait_for_reclaim(int reclaim_period_ms)
98 {
99 	long reclaim_wait_ms;
100 	struct timespec ts;
101 
102 	reclaim_wait_ms = reclaim_period_ms * 5;
103 	ts.tv_sec = reclaim_wait_ms / 1000;
104 	ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
105 	nanosleep(&ts, NULL);
106 }
107 
run_test(int reclaim_period_ms,bool disable_nx_huge_pages,bool reboot_permissions)108 void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
109 	      bool reboot_permissions)
110 {
111 	struct kvm_vcpu *vcpu;
112 	struct kvm_vm *vm;
113 	uint64_t nr_bytes;
114 	void *hva;
115 	int r;
116 
117 	vm = vm_create(1);
118 
119 	if (disable_nx_huge_pages) {
120 		r = __vm_disable_nx_huge_pages(vm);
121 		if (reboot_permissions) {
122 			TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
123 		} else {
124 			TEST_ASSERT(r == -1 && errno == EPERM,
125 				    "This process should not have permission to disable NX huge pages");
126 			return;
127 		}
128 	}
129 
130 	vcpu = vm_vcpu_add(vm, 0, guest_code);
131 
132 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
133 				    HPAGE_GPA, HPAGE_SLOT,
134 				    HPAGE_SLOT_NPAGES, 0);
135 
136 	nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
137 
138 	/*
139 	 * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
140 	 * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
141 	 * whenever KVM is shadowing the guest page tables).
142 	 *
143 	 * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
144 	 * pages irrespective of the guest page size, so map with 4KiB pages
145 	 * to test that that is the case.
146 	 */
147 	if (kvm_is_tdp_enabled())
148 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
149 	else
150 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
151 
152 	hva = addr_gpa2hva(vm, HPAGE_GPA);
153 	memset(hva, RETURN_OPCODE, nr_bytes);
154 
155 	check_2m_page_count(vm, 0);
156 	check_split_count(vm, 0);
157 
158 	/*
159 	 * The guest code will first read from the first hugepage, resulting
160 	 * in a huge page mapping being created.
161 	 */
162 	vcpu_run(vcpu);
163 	check_2m_page_count(vm, 1);
164 	check_split_count(vm, 0);
165 
166 	/*
167 	 * Then the guest code will read from the second hugepage, resulting
168 	 * in another huge page mapping being created.
169 	 */
170 	vcpu_run(vcpu);
171 	check_2m_page_count(vm, 2);
172 	check_split_count(vm, 0);
173 
174 	/*
175 	 * Next, the guest will execute from the first huge page, causing it
176 	 * to be remapped at 4k.
177 	 *
178 	 * If NX huge pages are disabled, this should have no effect.
179 	 */
180 	vcpu_run(vcpu);
181 	check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
182 	check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
183 
184 	/*
185 	 * Executing from the third huge page (previously unaccessed) will
186 	 * cause part to be mapped at 4k.
187 	 *
188 	 * If NX huge pages are disabled, it should be mapped at 2M.
189 	 */
190 	vcpu_run(vcpu);
191 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
192 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
193 
194 	/* Reading from the first huge page again should have no effect. */
195 	vcpu_run(vcpu);
196 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
197 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
198 
199 	/* Give recovery thread time to run. */
200 	wait_for_reclaim(reclaim_period_ms);
201 
202 	/*
203 	 * Now that the reclaimer has run, all the split pages should be gone.
204 	 *
205 	 * If NX huge pages are disabled, the relaimer will not run, so
206 	 * nothing should change from here on.
207 	 */
208 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
209 	check_split_count(vm, 0);
210 
211 	/*
212 	 * The 4k mapping on hpage 3 should have been removed, so check that
213 	 * reading from it causes a huge page mapping to be installed.
214 	 */
215 	vcpu_run(vcpu);
216 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
217 	check_split_count(vm, 0);
218 
219 	kvm_vm_free(vm);
220 }
221 
help(char * name)222 static void help(char *name)
223 {
224 	puts("");
225 	printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
226 	puts("");
227 	printf(" -p: The NX reclaim period in milliseconds.\n");
228 	printf(" -t: The magic token to indicate environment setup is done.\n");
229 	printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
230 	puts("");
231 	exit(0);
232 }
233 
main(int argc,char ** argv)234 int main(int argc, char **argv)
235 {
236 	int reclaim_period_ms = 0, token = 0, opt;
237 	bool reboot_permissions = false;
238 
239 	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
240 		switch (opt) {
241 		case 'p':
242 			reclaim_period_ms = atoi_positive("Reclaim period", optarg);
243 			break;
244 		case 't':
245 			token = atoi_paranoid(optarg);
246 			break;
247 		case 'r':
248 			reboot_permissions = true;
249 			break;
250 		case 'h':
251 		default:
252 			help(argv[0]);
253 			break;
254 		}
255 	}
256 
257 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
258 
259 	__TEST_REQUIRE(token == MAGIC_TOKEN,
260 		       "This test must be run with the magic token %d.\n"
261 		       "This is done by nx_huge_pages_test.sh, which\n"
262 		       "also handles environment setup for the test.");
263 
264 	run_test(reclaim_period_ms, false, reboot_permissions);
265 	run_test(reclaim_period_ms, true, reboot_permissions);
266 
267 	return 0;
268 }
269 
270