18448ec59SBen Gardon // SPDX-License-Identifier: GPL-2.0-only
28448ec59SBen Gardon /*
38448ec59SBen Gardon  * Usage: to be run via nx_huge_page_test.sh, which does the necessary
48448ec59SBen Gardon  * environment setup and teardown
58448ec59SBen Gardon  *
68448ec59SBen Gardon  * Copyright (C) 2022, Google LLC.
78448ec59SBen Gardon  */
88448ec59SBen Gardon 
98448ec59SBen Gardon #define _GNU_SOURCE
108448ec59SBen Gardon 
118448ec59SBen Gardon #include <fcntl.h>
128448ec59SBen Gardon #include <stdint.h>
138448ec59SBen Gardon #include <time.h>
148448ec59SBen Gardon 
158448ec59SBen Gardon #include <test_util.h>
168448ec59SBen Gardon #include "kvm_util.h"
178448ec59SBen Gardon #include "processor.h"
188448ec59SBen Gardon 
198448ec59SBen Gardon #define HPAGE_SLOT		10
208448ec59SBen Gardon #define HPAGE_GPA		(4UL << 30) /* 4G prevents collision w/ slot 0 */
218448ec59SBen Gardon #define HPAGE_GVA		HPAGE_GPA /* GVA is arbitrary, so use GPA. */
228448ec59SBen Gardon #define PAGES_PER_2MB_HUGE_PAGE 512
238448ec59SBen Gardon #define HPAGE_SLOT_NPAGES	(3 * PAGES_PER_2MB_HUGE_PAGE)
248448ec59SBen Gardon 
258448ec59SBen Gardon /*
268448ec59SBen Gardon  * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
278448ec59SBen Gardon  * being run without it.
288448ec59SBen Gardon  */
298448ec59SBen Gardon #define MAGIC_TOKEN 887563923
308448ec59SBen Gardon 
318448ec59SBen Gardon /*
328448ec59SBen Gardon  * x86 opcode for the return instruction. Used to call into, and then
338448ec59SBen Gardon  * immediately return from, memory backed with hugepages.
348448ec59SBen Gardon  */
358448ec59SBen Gardon #define RETURN_OPCODE 0xC3
368448ec59SBen Gardon 
378448ec59SBen Gardon /* Call the specified memory address. */
guest_do_CALL(uint64_t target)388448ec59SBen Gardon static void guest_do_CALL(uint64_t target)
398448ec59SBen Gardon {
408448ec59SBen Gardon 	((void (*)(void)) target)();
418448ec59SBen Gardon }
428448ec59SBen Gardon 
438448ec59SBen Gardon /*
448448ec59SBen Gardon  * Exit the VM after each memory access so that the userspace component of the
458448ec59SBen Gardon  * test can make assertions about the pages backing the VM.
468448ec59SBen Gardon  *
478448ec59SBen Gardon  * See the below for an explanation of how each access should affect the
488448ec59SBen Gardon  * backing mappings.
498448ec59SBen Gardon  */
guest_code(void)508448ec59SBen Gardon void guest_code(void)
518448ec59SBen Gardon {
528448ec59SBen Gardon 	uint64_t hpage_1 = HPAGE_GVA;
538448ec59SBen Gardon 	uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
548448ec59SBen Gardon 	uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
558448ec59SBen Gardon 
568448ec59SBen Gardon 	READ_ONCE(*(uint64_t *)hpage_1);
578448ec59SBen Gardon 	GUEST_SYNC(1);
588448ec59SBen Gardon 
598448ec59SBen Gardon 	READ_ONCE(*(uint64_t *)hpage_2);
608448ec59SBen Gardon 	GUEST_SYNC(2);
618448ec59SBen Gardon 
628448ec59SBen Gardon 	guest_do_CALL(hpage_1);
638448ec59SBen Gardon 	GUEST_SYNC(3);
648448ec59SBen Gardon 
658448ec59SBen Gardon 	guest_do_CALL(hpage_3);
668448ec59SBen Gardon 	GUEST_SYNC(4);
678448ec59SBen Gardon 
688448ec59SBen Gardon 	READ_ONCE(*(uint64_t *)hpage_1);
698448ec59SBen Gardon 	GUEST_SYNC(5);
708448ec59SBen Gardon 
718448ec59SBen Gardon 	READ_ONCE(*(uint64_t *)hpage_3);
728448ec59SBen Gardon 	GUEST_SYNC(6);
738448ec59SBen Gardon }
748448ec59SBen Gardon 
check_2m_page_count(struct kvm_vm * vm,int expected_pages_2m)758448ec59SBen Gardon static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
768448ec59SBen Gardon {
778448ec59SBen Gardon 	int actual_pages_2m;
788448ec59SBen Gardon 
798448ec59SBen Gardon 	actual_pages_2m = vm_get_stat(vm, "pages_2m");
808448ec59SBen Gardon 
818448ec59SBen Gardon 	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
828448ec59SBen Gardon 		    "Unexpected 2m page count. Expected %d, got %d",
838448ec59SBen Gardon 		    expected_pages_2m, actual_pages_2m);
848448ec59SBen Gardon }
858448ec59SBen Gardon 
check_split_count(struct kvm_vm * vm,int expected_splits)868448ec59SBen Gardon static void check_split_count(struct kvm_vm *vm, int expected_splits)
878448ec59SBen Gardon {
888448ec59SBen Gardon 	int actual_splits;
898448ec59SBen Gardon 
908448ec59SBen Gardon 	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
918448ec59SBen Gardon 
928448ec59SBen Gardon 	TEST_ASSERT(actual_splits == expected_splits,
938448ec59SBen Gardon 		    "Unexpected NX huge page split count. Expected %d, got %d",
948448ec59SBen Gardon 		    expected_splits, actual_splits);
958448ec59SBen Gardon }
968448ec59SBen Gardon 
wait_for_reclaim(int reclaim_period_ms)978448ec59SBen Gardon static void wait_for_reclaim(int reclaim_period_ms)
988448ec59SBen Gardon {
998448ec59SBen Gardon 	long reclaim_wait_ms;
1008448ec59SBen Gardon 	struct timespec ts;
1018448ec59SBen Gardon 
1028448ec59SBen Gardon 	reclaim_wait_ms = reclaim_period_ms * 5;
1038448ec59SBen Gardon 	ts.tv_sec = reclaim_wait_ms / 1000;
1048448ec59SBen Gardon 	ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
1058448ec59SBen Gardon 	nanosleep(&ts, NULL);
1068448ec59SBen Gardon }
1078448ec59SBen Gardon 
run_test(int reclaim_period_ms,bool disable_nx_huge_pages,bool reboot_permissions)108b774da3fSBen Gardon void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
109b774da3fSBen Gardon 	      bool reboot_permissions)
1108448ec59SBen Gardon {
1118448ec59SBen Gardon 	struct kvm_vcpu *vcpu;
1128448ec59SBen Gardon 	struct kvm_vm *vm;
113458e9874SDavid Matlack 	uint64_t nr_bytes;
1148448ec59SBen Gardon 	void *hva;
115b774da3fSBen Gardon 	int r;
1168448ec59SBen Gardon 
1178448ec59SBen Gardon 	vm = vm_create(1);
118b774da3fSBen Gardon 
119b774da3fSBen Gardon 	if (disable_nx_huge_pages) {
120b774da3fSBen Gardon 		r = __vm_disable_nx_huge_pages(vm);
121b774da3fSBen Gardon 		if (reboot_permissions) {
122b774da3fSBen Gardon 			TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
123b774da3fSBen Gardon 		} else {
124b774da3fSBen Gardon 			TEST_ASSERT(r == -1 && errno == EPERM,
125b774da3fSBen Gardon 				    "This process should not have permission to disable NX huge pages");
126b774da3fSBen Gardon 			return;
127b774da3fSBen Gardon 		}
128b774da3fSBen Gardon 	}
129b774da3fSBen Gardon 
1308448ec59SBen Gardon 	vcpu = vm_vcpu_add(vm, 0, guest_code);
1318448ec59SBen Gardon 
1328448ec59SBen Gardon 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
1338448ec59SBen Gardon 				    HPAGE_GPA, HPAGE_SLOT,
1348448ec59SBen Gardon 				    HPAGE_SLOT_NPAGES, 0);
1358448ec59SBen Gardon 
136458e9874SDavid Matlack 	nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
137458e9874SDavid Matlack 
138458e9874SDavid Matlack 	/*
139458e9874SDavid Matlack 	 * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
140458e9874SDavid Matlack 	 * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
141458e9874SDavid Matlack 	 * whenever KVM is shadowing the guest page tables).
142458e9874SDavid Matlack 	 *
143458e9874SDavid Matlack 	 * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
144458e9874SDavid Matlack 	 * pages irrespective of the guest page size, so map with 4KiB pages
145458e9874SDavid Matlack 	 * to test that that is the case.
146458e9874SDavid Matlack 	 */
147458e9874SDavid Matlack 	if (kvm_is_tdp_enabled())
148458e9874SDavid Matlack 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
149458e9874SDavid Matlack 	else
150458e9874SDavid Matlack 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
1518448ec59SBen Gardon 
1528448ec59SBen Gardon 	hva = addr_gpa2hva(vm, HPAGE_GPA);
153458e9874SDavid Matlack 	memset(hva, RETURN_OPCODE, nr_bytes);
1548448ec59SBen Gardon 
1558448ec59SBen Gardon 	check_2m_page_count(vm, 0);
1568448ec59SBen Gardon 	check_split_count(vm, 0);
1578448ec59SBen Gardon 
1588448ec59SBen Gardon 	/*
1598448ec59SBen Gardon 	 * The guest code will first read from the first hugepage, resulting
1608448ec59SBen Gardon 	 * in a huge page mapping being created.
1618448ec59SBen Gardon 	 */
1628448ec59SBen Gardon 	vcpu_run(vcpu);
1638448ec59SBen Gardon 	check_2m_page_count(vm, 1);
1648448ec59SBen Gardon 	check_split_count(vm, 0);
1658448ec59SBen Gardon 
1668448ec59SBen Gardon 	/*
1678448ec59SBen Gardon 	 * Then the guest code will read from the second hugepage, resulting
1688448ec59SBen Gardon 	 * in another huge page mapping being created.
1698448ec59SBen Gardon 	 */
1708448ec59SBen Gardon 	vcpu_run(vcpu);
1718448ec59SBen Gardon 	check_2m_page_count(vm, 2);
1728448ec59SBen Gardon 	check_split_count(vm, 0);
1738448ec59SBen Gardon 
1748448ec59SBen Gardon 	/*
1758448ec59SBen Gardon 	 * Next, the guest will execute from the first huge page, causing it
1768448ec59SBen Gardon 	 * to be remapped at 4k.
177b774da3fSBen Gardon 	 *
178b774da3fSBen Gardon 	 * If NX huge pages are disabled, this should have no effect.
1798448ec59SBen Gardon 	 */
1808448ec59SBen Gardon 	vcpu_run(vcpu);
181b774da3fSBen Gardon 	check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
182b774da3fSBen Gardon 	check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
1838448ec59SBen Gardon 
1848448ec59SBen Gardon 	/*
1858448ec59SBen Gardon 	 * Executing from the third huge page (previously unaccessed) will
1868448ec59SBen Gardon 	 * cause part to be mapped at 4k.
187b774da3fSBen Gardon 	 *
188b774da3fSBen Gardon 	 * If NX huge pages are disabled, it should be mapped at 2M.
1898448ec59SBen Gardon 	 */
1908448ec59SBen Gardon 	vcpu_run(vcpu);
191b774da3fSBen Gardon 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
192b774da3fSBen Gardon 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
1938448ec59SBen Gardon 
1948448ec59SBen Gardon 	/* Reading from the first huge page again should have no effect. */
1958448ec59SBen Gardon 	vcpu_run(vcpu);
196b774da3fSBen Gardon 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
197b774da3fSBen Gardon 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
1988448ec59SBen Gardon 
1998448ec59SBen Gardon 	/* Give recovery thread time to run. */
2008448ec59SBen Gardon 	wait_for_reclaim(reclaim_period_ms);
2018448ec59SBen Gardon 
2028448ec59SBen Gardon 	/*
2038448ec59SBen Gardon 	 * Now that the reclaimer has run, all the split pages should be gone.
204b774da3fSBen Gardon 	 *
205b774da3fSBen Gardon 	 * If NX huge pages are disabled, the relaimer will not run, so
206b774da3fSBen Gardon 	 * nothing should change from here on.
2078448ec59SBen Gardon 	 */
208b774da3fSBen Gardon 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
2098448ec59SBen Gardon 	check_split_count(vm, 0);
2108448ec59SBen Gardon 
2118448ec59SBen Gardon 	/*
2128448ec59SBen Gardon 	 * The 4k mapping on hpage 3 should have been removed, so check that
2138448ec59SBen Gardon 	 * reading from it causes a huge page mapping to be installed.
2148448ec59SBen Gardon 	 */
2158448ec59SBen Gardon 	vcpu_run(vcpu);
216b774da3fSBen Gardon 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
2178448ec59SBen Gardon 	check_split_count(vm, 0);
2188448ec59SBen Gardon 
2198448ec59SBen Gardon 	kvm_vm_free(vm);
220b774da3fSBen Gardon }
221b774da3fSBen Gardon 
help(char * name)222b774da3fSBen Gardon static void help(char *name)
223b774da3fSBen Gardon {
224b774da3fSBen Gardon 	puts("");
225b774da3fSBen Gardon 	printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
226b774da3fSBen Gardon 	puts("");
227*56f413f2SColin Ian King 	printf(" -p: The NX reclaim period in milliseconds.\n");
228b774da3fSBen Gardon 	printf(" -t: The magic token to indicate environment setup is done.\n");
229b774da3fSBen Gardon 	printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
230b774da3fSBen Gardon 	puts("");
231b774da3fSBen Gardon 	exit(0);
232b774da3fSBen Gardon }
233b774da3fSBen Gardon 
main(int argc,char ** argv)234b774da3fSBen Gardon int main(int argc, char **argv)
235b774da3fSBen Gardon {
236b774da3fSBen Gardon 	int reclaim_period_ms = 0, token = 0, opt;
237b774da3fSBen Gardon 	bool reboot_permissions = false;
238b774da3fSBen Gardon 
239b774da3fSBen Gardon 	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
240b774da3fSBen Gardon 		switch (opt) {
241b774da3fSBen Gardon 		case 'p':
242c2c46b10SVipin Sharma 			reclaim_period_ms = atoi_positive("Reclaim period", optarg);
243b774da3fSBen Gardon 			break;
244b774da3fSBen Gardon 		case 't':
245018ea2d7SVipin Sharma 			token = atoi_paranoid(optarg);
246b774da3fSBen Gardon 			break;
247b774da3fSBen Gardon 		case 'r':
248b774da3fSBen Gardon 			reboot_permissions = true;
249b774da3fSBen Gardon 			break;
250b774da3fSBen Gardon 		case 'h':
251b774da3fSBen Gardon 		default:
252b774da3fSBen Gardon 			help(argv[0]);
253b774da3fSBen Gardon 			break;
254b774da3fSBen Gardon 		}
255b774da3fSBen Gardon 	}
256b774da3fSBen Gardon 
2575f5651c6SOliver Upton 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
258b774da3fSBen Gardon 
2595f5651c6SOliver Upton 	__TEST_REQUIRE(token == MAGIC_TOKEN,
2605f5651c6SOliver Upton 		       "This test must be run with the magic token %d.\n"
2615f5651c6SOliver Upton 		       "This is done by nx_huge_pages_test.sh, which\n"
2625f5651c6SOliver Upton 		       "also handles environment setup for the test.");
263b774da3fSBen Gardon 
264b774da3fSBen Gardon 	run_test(reclaim_period_ms, false, reboot_permissions);
265b774da3fSBen Gardon 	run_test(reclaim_period_ms, true, reboot_permissions);
2668448ec59SBen Gardon 
2678448ec59SBen Gardon 	return 0;
2688448ec59SBen Gardon }
2698448ec59SBen Gardon 
270