1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Userfaultfd unit tests.
4  *
5  *  Copyright (C) 2015-2023  Red Hat, Inc.
6  */
7 
8 #include "uffd-common.h"
9 
10 #ifdef __NR_userfaultfd
11 
12 /* The unit test doesn't need a large or random size, make it 32MB for now */
13 #define  UFFD_TEST_MEM_SIZE               (32UL << 20)
14 
15 #define  MEM_ANON                         BIT_ULL(0)
16 #define  MEM_SHMEM                        BIT_ULL(1)
17 #define  MEM_SHMEM_PRIVATE                BIT_ULL(2)
18 #define  MEM_HUGETLB                      BIT_ULL(3)
19 #define  MEM_HUGETLB_PRIVATE              BIT_ULL(4)
20 
21 #define  MEM_ALL  (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
22 		   MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
23 
24 struct mem_type {
25 	const char *name;
26 	unsigned int mem_flag;
27 	uffd_test_ops_t *mem_ops;
28 	bool shared;
29 };
30 typedef struct mem_type mem_type_t;
31 
32 mem_type_t mem_types[] = {
33 	{
34 		.name = "anon",
35 		.mem_flag = MEM_ANON,
36 		.mem_ops = &anon_uffd_test_ops,
37 		.shared = false,
38 	},
39 	{
40 		.name = "shmem",
41 		.mem_flag = MEM_SHMEM,
42 		.mem_ops = &shmem_uffd_test_ops,
43 		.shared = true,
44 	},
45 	{
46 		.name = "shmem-private",
47 		.mem_flag = MEM_SHMEM_PRIVATE,
48 		.mem_ops = &shmem_uffd_test_ops,
49 		.shared = false,
50 	},
51 	{
52 		.name = "hugetlb",
53 		.mem_flag = MEM_HUGETLB,
54 		.mem_ops = &hugetlb_uffd_test_ops,
55 		.shared = true,
56 	},
57 	{
58 		.name = "hugetlb-private",
59 		.mem_flag = MEM_HUGETLB_PRIVATE,
60 		.mem_ops = &hugetlb_uffd_test_ops,
61 		.shared = false,
62 	},
63 };
64 
65 /* Arguments to be passed over to each uffd unit test */
66 struct uffd_test_args {
67 	mem_type_t *mem_type;
68 };
69 typedef struct uffd_test_args uffd_test_args_t;
70 
71 /* Returns: UFFD_TEST_* */
72 typedef void (*uffd_test_fn)(uffd_test_args_t *);
73 
74 typedef struct {
75 	const char *name;
76 	uffd_test_fn uffd_fn;
77 	unsigned int mem_targets;
78 	uint64_t uffd_feature_required;
79 } uffd_test_case_t;
80 
81 static void uffd_test_report(void)
82 {
83 	printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
84 	       ksft_get_pass_cnt(),
85 	       ksft_get_xskip_cnt(),
86 	       ksft_get_fail_cnt(),
87 	       ksft_test_num());
88 }
89 
90 static void uffd_test_pass(void)
91 {
92 	printf("done\n");
93 	ksft_inc_pass_cnt();
94 }
95 
96 #define  uffd_test_start(...)  do {		\
97 		printf("Testing ");		\
98 		printf(__VA_ARGS__);		\
99 		printf("... ");			\
100 		fflush(stdout);			\
101 	} while (0)
102 
103 #define  uffd_test_fail(...)  do {		\
104 		printf("failed [reason: ");	\
105 		printf(__VA_ARGS__);		\
106 		printf("]\n");			\
107 		ksft_inc_fail_cnt();		\
108 	} while (0)
109 
110 #define  uffd_test_skip(...)  do {		\
111 		printf("skipped [reason: ");	\
112 		printf(__VA_ARGS__);		\
113 		printf("]\n");			\
114 		ksft_inc_xskip_cnt();		\
115 	} while (0)
116 
117 /*
118  * Returns 1 if specific userfaultfd supported, 0 otherwise.  Note, we'll
119  * return 1 even if some test failed as long as uffd supported, because in
120  * that case we still want to proceed with the rest uffd unit tests.
121  */
122 static int test_uffd_api(bool use_dev)
123 {
124 	struct uffdio_api uffdio_api;
125 	int uffd;
126 
127 	uffd_test_start("UFFDIO_API (with %s)",
128 			use_dev ? "/dev/userfaultfd" : "syscall");
129 
130 	if (use_dev)
131 		uffd = uffd_open_dev(UFFD_FLAGS);
132 	else
133 		uffd = uffd_open_sys(UFFD_FLAGS);
134 	if (uffd < 0) {
135 		uffd_test_skip("cannot open userfaultfd handle");
136 		return 0;
137 	}
138 
139 	/* Test wrong UFFD_API */
140 	uffdio_api.api = 0xab;
141 	uffdio_api.features = 0;
142 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
143 		uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
144 		goto out;
145 	}
146 
147 	/* Test wrong feature bit */
148 	uffdio_api.api = UFFD_API;
149 	uffdio_api.features = BIT_ULL(63);
150 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
151 		uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
152 		goto out;
153 	}
154 
155 	/* Test normal UFFDIO_API */
156 	uffdio_api.api = UFFD_API;
157 	uffdio_api.features = 0;
158 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
159 		uffd_test_fail("UFFDIO_API should succeed but failed");
160 		goto out;
161 	}
162 
163 	/* Test double requests of UFFDIO_API with a random feature set */
164 	uffdio_api.features = BIT_ULL(0);
165 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
166 		uffd_test_fail("UFFDIO_API should reject initialized uffd");
167 		goto out;
168 	}
169 
170 	uffd_test_pass();
171 out:
172 	close(uffd);
173 	/* We have a valid uffd handle */
174 	return 1;
175 }
176 
177 /*
178  * This function initializes the global variables.  TODO: remove global
179  * vars and then remove this.
180  */
181 static int
182 uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
183 		       mem_type_t *mem_type, const char **errmsg)
184 {
185 	map_shared = mem_type->shared;
186 	uffd_test_ops = mem_type->mem_ops;
187 
188 	if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
189 		page_size = default_huge_page_size();
190 	else
191 		page_size = psize();
192 
193 	nr_pages = UFFD_TEST_MEM_SIZE / page_size;
194 	/* TODO: remove this global var.. it's so ugly */
195 	nr_cpus = 1;
196 
197 	/* Initialize test arguments */
198 	args->mem_type = mem_type;
199 
200 	return uffd_test_ctx_init(test->uffd_feature_required, errmsg);
201 }
202 
203 static bool uffd_feature_supported(uffd_test_case_t *test)
204 {
205 	uint64_t features;
206 
207 	if (uffd_get_features(&features))
208 		return false;
209 
210 	return (features & test->uffd_feature_required) ==
211 	    test->uffd_feature_required;
212 }
213 
214 static int pagemap_open(void)
215 {
216 	int fd = open("/proc/self/pagemap", O_RDONLY);
217 
218 	if (fd < 0)
219 		err("open pagemap");
220 
221 	return fd;
222 }
223 
224 /* This macro let __LINE__ works in err() */
225 #define  pagemap_check_wp(value, wp) do {				\
226 		if (!!(value & PM_UFFD_WP) != wp)			\
227 			err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
228 	} while (0)
229 
230 static int pagemap_test_fork(bool present)
231 {
232 	pid_t child = fork();
233 	uint64_t value;
234 	int fd, result;
235 
236 	if (!child) {
237 		/* Open the pagemap fd of the child itself */
238 		fd = pagemap_open();
239 		value = pagemap_get_entry(fd, area_dst);
240 		/*
241 		 * After fork() uffd-wp bit should be gone as long as we're
242 		 * without UFFD_FEATURE_EVENT_FORK
243 		 */
244 		pagemap_check_wp(value, false);
245 		/* Succeed */
246 		exit(0);
247 	}
248 	waitpid(child, &result, 0);
249 	return result;
250 }
251 
252 static void uffd_wp_unpopulated_test(uffd_test_args_t *args)
253 {
254 	uint64_t value;
255 	int pagemap_fd;
256 
257 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
258 			  false, true, false))
259 		err("register failed");
260 
261 	pagemap_fd = pagemap_open();
262 
263 	/* Test applying pte marker to anon unpopulated */
264 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
265 	value = pagemap_get_entry(pagemap_fd, area_dst);
266 	pagemap_check_wp(value, true);
267 
268 	/* Test unprotect on anon pte marker */
269 	wp_range(uffd, (uint64_t)area_dst, page_size, false);
270 	value = pagemap_get_entry(pagemap_fd, area_dst);
271 	pagemap_check_wp(value, false);
272 
273 	/* Test zap on anon marker */
274 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
275 	if (madvise(area_dst, page_size, MADV_DONTNEED))
276 		err("madvise(MADV_DONTNEED) failed");
277 	value = pagemap_get_entry(pagemap_fd, area_dst);
278 	pagemap_check_wp(value, false);
279 
280 	/* Test fault in after marker removed */
281 	*area_dst = 1;
282 	value = pagemap_get_entry(pagemap_fd, area_dst);
283 	pagemap_check_wp(value, false);
284 	/* Drop it to make pte none again */
285 	if (madvise(area_dst, page_size, MADV_DONTNEED))
286 		err("madvise(MADV_DONTNEED) failed");
287 
288 	/* Test read-zero-page upon pte marker */
289 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
290 	*(volatile char *)area_dst;
291 	/* Drop it to make pte none again */
292 	if (madvise(area_dst, page_size, MADV_DONTNEED))
293 		err("madvise(MADV_DONTNEED) failed");
294 
295 	uffd_test_pass();
296 }
297 
298 static void uffd_pagemap_test(uffd_test_args_t *args)
299 {
300 	int pagemap_fd;
301 	uint64_t value;
302 
303 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
304 			  false, true, false))
305 		err("register failed");
306 
307 	pagemap_fd = pagemap_open();
308 
309 	/* Touch the page */
310 	*area_dst = 1;
311 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
312 	value = pagemap_get_entry(pagemap_fd, area_dst);
313 	pagemap_check_wp(value, true);
314 	/* Make sure uffd-wp bit dropped when fork */
315 	if (pagemap_test_fork(true))
316 		err("Detected stall uffd-wp bit in child");
317 
318 	/* Exclusive required or PAGEOUT won't work */
319 	if (!(value & PM_MMAP_EXCLUSIVE))
320 		err("multiple mapping detected: 0x%"PRIx64, value);
321 
322 	if (madvise(area_dst, page_size, MADV_PAGEOUT))
323 		err("madvise(MADV_PAGEOUT) failed");
324 
325 	/* Uffd-wp should persist even swapped out */
326 	value = pagemap_get_entry(pagemap_fd, area_dst);
327 	pagemap_check_wp(value, true);
328 	/* Make sure uffd-wp bit dropped when fork */
329 	if (pagemap_test_fork(false))
330 		err("Detected stall uffd-wp bit in child");
331 
332 	/* Unprotect; this tests swap pte modifications */
333 	wp_range(uffd, (uint64_t)area_dst, page_size, false);
334 	value = pagemap_get_entry(pagemap_fd, area_dst);
335 	pagemap_check_wp(value, false);
336 
337 	/* Fault in the page from disk */
338 	*area_dst = 2;
339 	value = pagemap_get_entry(pagemap_fd, area_dst);
340 	pagemap_check_wp(value, false);
341 
342 	close(pagemap_fd);
343 	uffd_test_pass();
344 }
345 
346 static void check_memory_contents(char *p)
347 {
348 	unsigned long i, j;
349 	uint8_t expected_byte;
350 
351 	for (i = 0; i < nr_pages; ++i) {
352 		expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
353 		for (j = 0; j < page_size; j++) {
354 			uint8_t v = *(uint8_t *)(p + (i * page_size) + j);
355 			if (v != expected_byte)
356 				err("unexpected page contents");
357 		}
358 	}
359 }
360 
361 static void uffd_minor_test_common(bool test_collapse, bool test_wp)
362 {
363 	unsigned long p;
364 	pthread_t uffd_mon;
365 	char c;
366 	struct uffd_args args = { 0 };
367 
368 	/*
369 	 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
370 	 * both do not make much sense.
371 	 */
372 	assert(!(test_collapse && test_wp));
373 
374 	if (uffd_register(uffd, area_dst_alias, nr_pages * page_size,
375 			  /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
376 			  false, test_wp, true))
377 		err("register failure");
378 
379 	/*
380 	 * After registering with UFFD, populate the non-UFFD-registered side of
381 	 * the shared mapping. This should *not* trigger any UFFD minor faults.
382 	 */
383 	for (p = 0; p < nr_pages; ++p)
384 		memset(area_dst + (p * page_size), p % ((uint8_t)-1),
385 		       page_size);
386 
387 	args.apply_wp = test_wp;
388 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
389 		err("uffd_poll_thread create");
390 
391 	/*
392 	 * Read each of the pages back using the UFFD-registered mapping. We
393 	 * expect that the first time we touch a page, it will result in a minor
394 	 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
395 	 * page's contents, and then issuing a CONTINUE ioctl.
396 	 */
397 	check_memory_contents(area_dst_alias);
398 
399 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
400 		err("pipe write");
401 	if (pthread_join(uffd_mon, NULL))
402 		err("join() failed");
403 
404 	if (test_collapse) {
405 		if (madvise(area_dst_alias, nr_pages * page_size,
406 			    MADV_COLLAPSE)) {
407 			/* It's fine to fail for this one... */
408 			uffd_test_skip("MADV_COLLAPSE failed");
409 			return;
410 		}
411 
412 		uffd_test_ops->check_pmd_mapping(area_dst,
413 						 nr_pages * page_size /
414 						 read_pmd_pagesize());
415 		/*
416 		 * This won't cause uffd-fault - it purely just makes sure there
417 		 * was no corruption.
418 		 */
419 		check_memory_contents(area_dst_alias);
420 	}
421 
422 	if (args.missing_faults != 0 || args.minor_faults != nr_pages)
423 		uffd_test_fail("stats check error");
424 	else
425 		uffd_test_pass();
426 }
427 
428 void uffd_minor_test(uffd_test_args_t *args)
429 {
430 	uffd_minor_test_common(false, false);
431 }
432 
433 void uffd_minor_wp_test(uffd_test_args_t *args)
434 {
435 	uffd_minor_test_common(false, true);
436 }
437 
438 void uffd_minor_collapse_test(uffd_test_args_t *args)
439 {
440 	uffd_minor_test_common(true, false);
441 }
442 
443 static sigjmp_buf jbuf, *sigbuf;
444 
445 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
446 {
447 	if (sig == SIGBUS) {
448 		if (sigbuf)
449 			siglongjmp(*sigbuf, 1);
450 		abort();
451 	}
452 }
453 
454 /*
455  * For non-cooperative userfaultfd test we fork() a process that will
456  * generate pagefaults, will mremap the area monitored by the
457  * userfaultfd and at last this process will release the monitored
458  * area.
459  * For the anonymous and shared memory the area is divided into two
460  * parts, the first part is accessed before mremap, and the second
461  * part is accessed after mremap. Since hugetlbfs does not support
462  * mremap, the entire monitored area is accessed in a single pass for
463  * HUGETLB_TEST.
464  * The release of the pages currently generates event for shmem and
465  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
466  * for hugetlb.
467  * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
468  * monitored area, generate pagefaults and test that signal is delivered.
469  * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
470  * test robustness use case - we release monitored area, fork a process
471  * that will generate pagefaults and verify signal is generated.
472  * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
473  * feature. Using monitor thread, verify no userfault events are generated.
474  */
475 static int faulting_process(int signal_test, bool wp)
476 {
477 	unsigned long nr, i;
478 	unsigned long long count;
479 	unsigned long split_nr_pages;
480 	unsigned long lastnr;
481 	struct sigaction act;
482 	volatile unsigned long signalled = 0;
483 
484 	split_nr_pages = (nr_pages + 1) / 2;
485 
486 	if (signal_test) {
487 		sigbuf = &jbuf;
488 		memset(&act, 0, sizeof(act));
489 		act.sa_sigaction = sighndl;
490 		act.sa_flags = SA_SIGINFO;
491 		if (sigaction(SIGBUS, &act, 0))
492 			err("sigaction");
493 		lastnr = (unsigned long)-1;
494 	}
495 
496 	for (nr = 0; nr < split_nr_pages; nr++) {
497 		volatile int steps = 1;
498 		unsigned long offset = nr * page_size;
499 
500 		if (signal_test) {
501 			if (sigsetjmp(*sigbuf, 1) != 0) {
502 				if (steps == 1 && nr == lastnr)
503 					err("Signal repeated");
504 
505 				lastnr = nr;
506 				if (signal_test == 1) {
507 					if (steps == 1) {
508 						/* This is a MISSING request */
509 						steps++;
510 						if (copy_page(uffd, offset, wp))
511 							signalled++;
512 					} else {
513 						/* This is a WP request */
514 						assert(steps == 2);
515 						wp_range(uffd,
516 							 (__u64)area_dst +
517 							 offset,
518 							 page_size, false);
519 					}
520 				} else {
521 					signalled++;
522 					continue;
523 				}
524 			}
525 		}
526 
527 		count = *area_count(area_dst, nr);
528 		if (count != count_verify[nr])
529 			err("nr %lu memory corruption %llu %llu\n",
530 			    nr, count, count_verify[nr]);
531 		/*
532 		 * Trigger write protection if there is by writing
533 		 * the same value back.
534 		 */
535 		*area_count(area_dst, nr) = count;
536 	}
537 
538 	if (signal_test)
539 		return signalled != split_nr_pages;
540 
541 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
542 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
543 	if (area_dst == MAP_FAILED)
544 		err("mremap");
545 	/* Reset area_src since we just clobbered it */
546 	area_src = NULL;
547 
548 	for (; nr < nr_pages; nr++) {
549 		count = *area_count(area_dst, nr);
550 		if (count != count_verify[nr]) {
551 			err("nr %lu memory corruption %llu %llu\n",
552 			    nr, count, count_verify[nr]);
553 		}
554 		/*
555 		 * Trigger write protection if there is by writing
556 		 * the same value back.
557 		 */
558 		*area_count(area_dst, nr) = count;
559 	}
560 
561 	uffd_test_ops->release_pages(area_dst);
562 
563 	for (nr = 0; nr < nr_pages; nr++)
564 		for (i = 0; i < page_size; i++)
565 			if (*(area_dst + nr * page_size + i) != 0)
566 				err("page %lu offset %lu is not zero", nr, i);
567 
568 	return 0;
569 }
570 
571 static void uffd_sigbus_test_common(bool wp)
572 {
573 	unsigned long userfaults;
574 	pthread_t uffd_mon;
575 	pid_t pid;
576 	int err;
577 	char c;
578 	struct uffd_args args = { 0 };
579 
580 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
581 
582 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
583 			  true, wp, false))
584 		err("register failure");
585 
586 	if (faulting_process(1, wp))
587 		err("faulting process failed");
588 
589 	uffd_test_ops->release_pages(area_dst);
590 
591 	args.apply_wp = wp;
592 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
593 		err("uffd_poll_thread create");
594 
595 	pid = fork();
596 	if (pid < 0)
597 		err("fork");
598 
599 	if (!pid)
600 		exit(faulting_process(2, wp));
601 
602 	waitpid(pid, &err, 0);
603 	if (err)
604 		err("faulting process failed");
605 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
606 		err("pipe write");
607 	if (pthread_join(uffd_mon, (void **)&userfaults))
608 		err("pthread_join()");
609 
610 	if (userfaults)
611 		uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
612 	else
613 		uffd_test_pass();
614 }
615 
616 static void uffd_sigbus_test(uffd_test_args_t *args)
617 {
618 	uffd_sigbus_test_common(false);
619 }
620 
621 static void uffd_sigbus_wp_test(uffd_test_args_t *args)
622 {
623 	uffd_sigbus_test_common(true);
624 }
625 
626 static void uffd_events_test_common(bool wp)
627 {
628 	pthread_t uffd_mon;
629 	pid_t pid;
630 	int err;
631 	char c;
632 	struct uffd_args args = { 0 };
633 
634 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
635 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
636 			  true, wp, false))
637 		err("register failure");
638 
639 	args.apply_wp = wp;
640 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
641 		err("uffd_poll_thread create");
642 
643 	pid = fork();
644 	if (pid < 0)
645 		err("fork");
646 
647 	if (!pid)
648 		exit(faulting_process(0, wp));
649 
650 	waitpid(pid, &err, 0);
651 	if (err)
652 		err("faulting process failed");
653 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
654 		err("pipe write");
655 	if (pthread_join(uffd_mon, NULL))
656 		err("pthread_join()");
657 
658 	if (args.missing_faults != nr_pages)
659 		uffd_test_fail("Fault counts wrong");
660 	else
661 		uffd_test_pass();
662 }
663 
664 static void uffd_events_test(uffd_test_args_t *args)
665 {
666 	uffd_events_test_common(false);
667 }
668 
669 static void uffd_events_wp_test(uffd_test_args_t *args)
670 {
671 	uffd_events_test_common(true);
672 }
673 
674 static void retry_uffdio_zeropage(int ufd,
675 				  struct uffdio_zeropage *uffdio_zeropage)
676 {
677 	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
678 				     uffdio_zeropage->range.len,
679 				     0);
680 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
681 		if (uffdio_zeropage->zeropage != -EEXIST)
682 			err("UFFDIO_ZEROPAGE error: %"PRId64,
683 			    (int64_t)uffdio_zeropage->zeropage);
684 	} else {
685 		err("UFFDIO_ZEROPAGE error: %"PRId64,
686 		    (int64_t)uffdio_zeropage->zeropage);
687 	}
688 }
689 
690 static bool do_uffdio_zeropage(int ufd, bool has_zeropage)
691 {
692 	struct uffdio_zeropage uffdio_zeropage = { 0 };
693 	int ret;
694 	__s64 res;
695 
696 	uffdio_zeropage.range.start = (unsigned long) area_dst;
697 	uffdio_zeropage.range.len = page_size;
698 	uffdio_zeropage.mode = 0;
699 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
700 	res = uffdio_zeropage.zeropage;
701 	if (ret) {
702 		/* real retval in ufdio_zeropage.zeropage */
703 		if (has_zeropage)
704 			err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
705 		else if (res != -EINVAL)
706 			err("UFFDIO_ZEROPAGE not -EINVAL");
707 	} else if (has_zeropage) {
708 		if (res != page_size)
709 			err("UFFDIO_ZEROPAGE unexpected size");
710 		else
711 			retry_uffdio_zeropage(ufd, &uffdio_zeropage);
712 		return true;
713 	} else
714 		err("UFFDIO_ZEROPAGE succeeded");
715 
716 	return false;
717 }
718 
719 /*
720  * Registers a range with MISSING mode only for zeropage test.  Return true
721  * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
722  * because we want to detect .ioctls along the way.
723  */
724 static bool
725 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
726 {
727 	uint64_t ioctls = 0;
728 
729 	if (uffd_register_with_ioctls(uffd, addr, len, true,
730 				      false, false, &ioctls))
731 		err("zeropage register fail");
732 
733 	return ioctls & (1 << _UFFDIO_ZEROPAGE);
734 }
735 
736 /* exercise UFFDIO_ZEROPAGE */
737 static void uffd_zeropage_test(uffd_test_args_t *args)
738 {
739 	bool has_zeropage;
740 	int i;
741 
742 	has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size);
743 	if (area_dst_alias)
744 		/* Ignore the retval; we already have it */
745 		uffd_register_detect_zeropage(uffd, area_dst_alias, page_size);
746 
747 	if (do_uffdio_zeropage(uffd, has_zeropage))
748 		for (i = 0; i < page_size; i++)
749 			if (area_dst[i] != 0)
750 				err("data non-zero at offset %d\n", i);
751 
752 	if (uffd_unregister(uffd, area_dst, page_size))
753 		err("unregister");
754 
755 	if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size))
756 		err("unregister");
757 
758 	uffd_test_pass();
759 }
760 
761 /*
762  * Test the returned uffdio_register.ioctls with different register modes.
763  * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
764  */
765 static void
766 do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor)
767 {
768 	uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
769 	mem_type_t *mem_type = args->mem_type;
770 	int ret;
771 
772 	ret = uffd_register_with_ioctls(uffd, area_dst, page_size,
773 					miss, wp, minor, &ioctls);
774 
775 	/*
776 	 * Handle special cases of UFFDIO_REGISTER here where it should
777 	 * just fail with -EINVAL first..
778 	 *
779 	 * Case 1: register MINOR on anon
780 	 * Case 2: register with no mode selected
781 	 */
782 	if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
783 	    (!miss && !wp && !minor)) {
784 		if (ret != -EINVAL)
785 			err("register (miss=%d, wp=%d, minor=%d) failed "
786 			    "with wrong errno=%d", miss, wp, minor, ret);
787 		return;
788 	}
789 
790 	/* UFFDIO_REGISTER should succeed, then check ioctls returned */
791 	if (miss)
792 		expected |= BIT_ULL(_UFFDIO_COPY);
793 	if (wp)
794 		expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
795 	if (minor)
796 		expected |= BIT_ULL(_UFFDIO_CONTINUE);
797 
798 	if ((ioctls & expected) != expected)
799 		err("unexpected uffdio_register.ioctls "
800 		    "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
801 		    "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
802 
803 	if (uffd_unregister(uffd, area_dst, page_size))
804 		err("unregister");
805 }
806 
807 static void uffd_register_ioctls_test(uffd_test_args_t *args)
808 {
809 	int miss, wp, minor;
810 
811 	for (miss = 0; miss <= 1; miss++)
812 		for (wp = 0; wp <= 1; wp++)
813 			for (minor = 0; minor <= 1; minor++)
814 				do_register_ioctls_test(args, miss, wp, minor);
815 
816 	uffd_test_pass();
817 }
818 
819 uffd_test_case_t uffd_tests[] = {
820 	{
821 		/* Test returned uffdio_register.ioctls. */
822 		.name = "register-ioctls",
823 		.uffd_fn = uffd_register_ioctls_test,
824 		.mem_targets = MEM_ALL,
825 		.uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
826 		UFFD_FEATURE_MISSING_SHMEM |
827 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
828 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
829 		UFFD_FEATURE_MINOR_HUGETLBFS |
830 		UFFD_FEATURE_MINOR_SHMEM,
831 	},
832 	{
833 		.name = "zeropage",
834 		.uffd_fn = uffd_zeropage_test,
835 		.mem_targets = MEM_ALL,
836 		.uffd_feature_required = 0,
837 	},
838 	{
839 		.name = "pagemap",
840 		.uffd_fn = uffd_pagemap_test,
841 		.mem_targets = MEM_ANON,
842 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP,
843 	},
844 	{
845 		.name = "wp-unpopulated",
846 		.uffd_fn = uffd_wp_unpopulated_test,
847 		.mem_targets = MEM_ANON,
848 		.uffd_feature_required =
849 		UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
850 	},
851 	{
852 		.name = "minor",
853 		.uffd_fn = uffd_minor_test,
854 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
855 		.uffd_feature_required =
856 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
857 	},
858 	{
859 		.name = "minor-wp",
860 		.uffd_fn = uffd_minor_wp_test,
861 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
862 		.uffd_feature_required =
863 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
864 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
865 		/*
866 		 * HACK: here we leveraged WP_UNPOPULATED to detect whether
867 		 * minor mode supports wr-protect.  There's no feature flag
868 		 * for it so this is the best we can test against.
869 		 */
870 		UFFD_FEATURE_WP_UNPOPULATED,
871 	},
872 	{
873 		.name = "minor-collapse",
874 		.uffd_fn = uffd_minor_collapse_test,
875 		/* MADV_COLLAPSE only works with shmem */
876 		.mem_targets = MEM_SHMEM,
877 		/* We can't test MADV_COLLAPSE, so try our luck */
878 		.uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
879 	},
880 	{
881 		.name = "sigbus",
882 		.uffd_fn = uffd_sigbus_test,
883 		.mem_targets = MEM_ALL,
884 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
885 		UFFD_FEATURE_EVENT_FORK,
886 	},
887 	{
888 		.name = "sigbus-wp",
889 		.uffd_fn = uffd_sigbus_wp_test,
890 		.mem_targets = MEM_ALL,
891 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
892 		UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
893 	},
894 	{
895 		.name = "events",
896 		.uffd_fn = uffd_events_test,
897 		.mem_targets = MEM_ALL,
898 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
899 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
900 	},
901 	{
902 		.name = "events-wp",
903 		.uffd_fn = uffd_events_wp_test,
904 		.mem_targets = MEM_ALL,
905 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
906 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
907 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
908 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
909 	},
910 };
911 
912 int main(int argc, char *argv[])
913 {
914 	int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
915 	int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
916 	uffd_test_case_t *test;
917 	mem_type_t *mem_type;
918 	uffd_test_args_t args;
919 	char test_name[128];
920 	const char *errmsg;
921 	int has_uffd;
922 	int i, j;
923 
924 	has_uffd = test_uffd_api(false);
925 	has_uffd |= test_uffd_api(true);
926 
927 	if (!has_uffd) {
928 		printf("Userfaultfd not supported or unprivileged, skip all tests\n");
929 		exit(KSFT_SKIP);
930 	}
931 
932 	for (i = 0; i < n_tests; i++) {
933 		test = &uffd_tests[i];
934 		for (j = 0; j < n_mems; j++) {
935 			mem_type = &mem_types[j];
936 			if (!(test->mem_targets & mem_type->mem_flag))
937 				continue;
938 			snprintf(test_name, sizeof(test_name),
939 				 "%s on %s", test->name, mem_type->name);
940 
941 			uffd_test_start(test_name);
942 			if (!uffd_feature_supported(test)) {
943 				uffd_test_skip("feature missing");
944 				continue;
945 			}
946 			if (uffd_setup_environment(&args, test, mem_type,
947 						   &errmsg)) {
948 				uffd_test_skip(errmsg);
949 				continue;
950 			}
951 			test->uffd_fn(&args);
952 		}
953 	}
954 
955 	uffd_test_report();
956 
957 	return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
958 }
959 
960 #else /* __NR_userfaultfd */
961 
962 #warning "missing __NR_userfaultfd definition"
963 
964 int main(void)
965 {
966 	printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
967 	return KSFT_SKIP;
968 }
969 
970 #endif /* __NR_userfaultfd */
971