1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KVM demand paging test
4  * Adapted from dirty_log_test.c
5  *
6  * Copyright (C) 2018, Red Hat, Inc.
7  * Copyright (C) 2019, Google, Inc.
8  */
9 
10 #define _GNU_SOURCE /* for pipe2 */
11 
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <time.h>
16 #include <poll.h>
17 #include <pthread.h>
18 #include <linux/userfaultfd.h>
19 #include <sys/syscall.h>
20 
21 #include "kvm_util.h"
22 #include "test_util.h"
23 #include "memstress.h"
24 #include "guest_modes.h"
25 #include "userfaultfd_util.h"
26 
27 #ifdef __NR_userfaultfd
28 
29 static int nr_vcpus = 1;
30 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
31 
32 static size_t demand_paging_size;
33 static char *guest_data_prototype;
34 
35 static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
36 {
37 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
38 	int vcpu_idx = vcpu_args->vcpu_idx;
39 	struct kvm_run *run = vcpu->run;
40 	struct timespec start;
41 	struct timespec ts_diff;
42 	int ret;
43 
44 	clock_gettime(CLOCK_MONOTONIC, &start);
45 
46 	/* Let the guest access its memory */
47 	ret = _vcpu_run(vcpu);
48 	TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
49 	if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
50 		TEST_ASSERT(false,
51 			    "Invalid guest sync status: exit_reason=%s\n",
52 			    exit_reason_str(run->exit_reason));
53 	}
54 
55 	ts_diff = timespec_elapsed(start);
56 	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
57 		       ts_diff.tv_sec, ts_diff.tv_nsec);
58 }
59 
60 static int handle_uffd_page_request(int uffd_mode, int uffd,
61 		struct uffd_msg *msg)
62 {
63 	pid_t tid = syscall(__NR_gettid);
64 	uint64_t addr = msg->arg.pagefault.address;
65 	struct timespec start;
66 	struct timespec ts_diff;
67 	int r;
68 
69 	clock_gettime(CLOCK_MONOTONIC, &start);
70 
71 	if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
72 		struct uffdio_copy copy;
73 
74 		copy.src = (uint64_t)guest_data_prototype;
75 		copy.dst = addr;
76 		copy.len = demand_paging_size;
77 		copy.mode = 0;
78 
79 		r = ioctl(uffd, UFFDIO_COPY, &copy);
80 		if (r == -1) {
81 			pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
82 				addr, tid, errno);
83 			return r;
84 		}
85 	} else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
86 		struct uffdio_continue cont = {0};
87 
88 		cont.range.start = addr;
89 		cont.range.len = demand_paging_size;
90 
91 		r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
92 		if (r == -1) {
93 			pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
94 				addr, tid, errno);
95 			return r;
96 		}
97 	} else {
98 		TEST_FAIL("Invalid uffd mode %d", uffd_mode);
99 	}
100 
101 	ts_diff = timespec_elapsed(start);
102 
103 	PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
104 		       timespec_to_ns(ts_diff));
105 	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
106 		       demand_paging_size, addr, tid);
107 
108 	return 0;
109 }
110 
111 struct test_params {
112 	int uffd_mode;
113 	useconds_t uffd_delay;
114 	enum vm_mem_backing_src_type src_type;
115 	bool partition_vcpu_memory_access;
116 };
117 
118 static void prefault_mem(void *alias, uint64_t len)
119 {
120 	size_t p;
121 
122 	TEST_ASSERT(alias != NULL, "Alias required for minor faults");
123 	for (p = 0; p < (len / demand_paging_size); ++p) {
124 		memcpy(alias + (p * demand_paging_size),
125 		       guest_data_prototype, demand_paging_size);
126 	}
127 }
128 
129 static void run_test(enum vm_guest_mode mode, void *arg)
130 {
131 	struct test_params *p = arg;
132 	struct uffd_desc **uffd_descs = NULL;
133 	struct timespec start;
134 	struct timespec ts_diff;
135 	struct kvm_vm *vm;
136 	int i;
137 
138 	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
139 				 p->src_type, p->partition_vcpu_memory_access);
140 
141 	demand_paging_size = get_backing_src_pagesz(p->src_type);
142 
143 	guest_data_prototype = malloc(demand_paging_size);
144 	TEST_ASSERT(guest_data_prototype,
145 		    "Failed to allocate buffer for guest data pattern");
146 	memset(guest_data_prototype, 0xAB, demand_paging_size);
147 
148 	if (p->uffd_mode) {
149 		uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
150 		TEST_ASSERT(uffd_descs, "Memory allocation failed");
151 
152 		for (i = 0; i < nr_vcpus; i++) {
153 			struct memstress_vcpu_args *vcpu_args;
154 			void *vcpu_hva;
155 			void *vcpu_alias;
156 
157 			vcpu_args = &memstress_args.vcpu_args[i];
158 
159 			/* Cache the host addresses of the region */
160 			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
161 			vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
162 
163 			prefault_mem(vcpu_alias,
164 				vcpu_args->pages * memstress_args.guest_page_size);
165 
166 			/*
167 			 * Set up user fault fd to handle demand paging
168 			 * requests.
169 			 */
170 			uffd_descs[i] = uffd_setup_demand_paging(
171 				p->uffd_mode, p->uffd_delay, vcpu_hva,
172 				vcpu_args->pages * memstress_args.guest_page_size,
173 				&handle_uffd_page_request);
174 		}
175 	}
176 
177 	pr_info("Finished creating vCPUs and starting uffd threads\n");
178 
179 	clock_gettime(CLOCK_MONOTONIC, &start);
180 	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
181 	pr_info("Started all vCPUs\n");
182 
183 	memstress_join_vcpu_threads(nr_vcpus);
184 	ts_diff = timespec_elapsed(start);
185 	pr_info("All vCPU threads joined\n");
186 
187 	if (p->uffd_mode) {
188 		/* Tell the user fault fd handler threads to quit */
189 		for (i = 0; i < nr_vcpus; i++)
190 			uffd_stop_demand_paging(uffd_descs[i]);
191 	}
192 
193 	pr_info("Total guest execution time: %ld.%.9lds\n",
194 		ts_diff.tv_sec, ts_diff.tv_nsec);
195 	pr_info("Overall demand paging rate: %f pgs/sec\n",
196 		memstress_args.vcpu_args[0].pages * nr_vcpus /
197 		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
198 
199 	memstress_destroy_vm(vm);
200 
201 	free(guest_data_prototype);
202 	if (p->uffd_mode)
203 		free(uffd_descs);
204 }
205 
206 static void help(char *name)
207 {
208 	puts("");
209 	printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
210 	       "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
211 	guest_modes_help();
212 	printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
213 	       "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
214 	printf(" -d: add a delay in usec to the User Fault\n"
215 	       "     FD handler to simulate demand paging\n"
216 	       "     overheads. Ignored without -u.\n");
217 	printf(" -b: specify the size of the memory region which should be\n"
218 	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
219 	       "     Default: 1G\n");
220 	backing_src_help("-s");
221 	printf(" -v: specify the number of vCPUs to run.\n");
222 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
223 	       "     them into a separate region of memory for each vCPU.\n");
224 	puts("");
225 	exit(0);
226 }
227 
228 int main(int argc, char *argv[])
229 {
230 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
231 	struct test_params p = {
232 		.src_type = DEFAULT_VM_MEM_SRC,
233 		.partition_vcpu_memory_access = true,
234 	};
235 	int opt;
236 
237 	guest_modes_append_default();
238 
239 	while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
240 		switch (opt) {
241 		case 'm':
242 			guest_modes_cmdline(optarg);
243 			break;
244 		case 'u':
245 			if (!strcmp("MISSING", optarg))
246 				p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
247 			else if (!strcmp("MINOR", optarg))
248 				p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
249 			TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
250 			break;
251 		case 'd':
252 			p.uffd_delay = strtoul(optarg, NULL, 0);
253 			TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
254 			break;
255 		case 'b':
256 			guest_percpu_mem_size = parse_size(optarg);
257 			break;
258 		case 's':
259 			p.src_type = parse_backing_src_type(optarg);
260 			break;
261 		case 'v':
262 			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
263 			TEST_ASSERT(nr_vcpus <= max_vcpus,
264 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
265 			break;
266 		case 'o':
267 			p.partition_vcpu_memory_access = false;
268 			break;
269 		case 'h':
270 		default:
271 			help(argv[0]);
272 			break;
273 		}
274 	}
275 
276 	if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
277 	    !backing_src_is_shared(p.src_type)) {
278 		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
279 	}
280 
281 	for_each_guest_mode(run_test, &p);
282 
283 	return 0;
284 }
285 
286 #else /* __NR_userfaultfd */
287 
288 #warning "missing __NR_userfaultfd definition"
289 
290 int main(void)
291 {
292 	print_skip("__NR_userfaultfd must be present for userfaultfd test");
293 	return KSFT_SKIP;
294 }
295 
296 #endif /* __NR_userfaultfd */
297