xref: /openbmc/linux/tools/testing/selftests/mm/uffd-unit-tests.c (revision 83775e158a3d2dc437132ab357ed6c9214ef0ae9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Userfaultfd unit tests.
4  *
5  *  Copyright (C) 2015-2023  Red Hat, Inc.
6  */
7 
8 #include "uffd-common.h"
9 
10 #include "../../../../mm/gup_test.h"
11 
12 #ifdef __NR_userfaultfd
13 
14 /* The unit test doesn't need a large or random size, make it 32MB for now */
15 #define  UFFD_TEST_MEM_SIZE               (32UL << 20)
16 
17 #define  MEM_ANON                         BIT_ULL(0)
18 #define  MEM_SHMEM                        BIT_ULL(1)
19 #define  MEM_SHMEM_PRIVATE                BIT_ULL(2)
20 #define  MEM_HUGETLB                      BIT_ULL(3)
21 #define  MEM_HUGETLB_PRIVATE              BIT_ULL(4)
22 
23 #define  MEM_ALL  (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
24 		   MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
25 
26 struct mem_type {
27 	const char *name;
28 	unsigned int mem_flag;
29 	uffd_test_ops_t *mem_ops;
30 	bool shared;
31 };
32 typedef struct mem_type mem_type_t;
33 
34 mem_type_t mem_types[] = {
35 	{
36 		.name = "anon",
37 		.mem_flag = MEM_ANON,
38 		.mem_ops = &anon_uffd_test_ops,
39 		.shared = false,
40 	},
41 	{
42 		.name = "shmem",
43 		.mem_flag = MEM_SHMEM,
44 		.mem_ops = &shmem_uffd_test_ops,
45 		.shared = true,
46 	},
47 	{
48 		.name = "shmem-private",
49 		.mem_flag = MEM_SHMEM_PRIVATE,
50 		.mem_ops = &shmem_uffd_test_ops,
51 		.shared = false,
52 	},
53 	{
54 		.name = "hugetlb",
55 		.mem_flag = MEM_HUGETLB,
56 		.mem_ops = &hugetlb_uffd_test_ops,
57 		.shared = true,
58 	},
59 	{
60 		.name = "hugetlb-private",
61 		.mem_flag = MEM_HUGETLB_PRIVATE,
62 		.mem_ops = &hugetlb_uffd_test_ops,
63 		.shared = false,
64 	},
65 };
66 
67 /* Arguments to be passed over to each uffd unit test */
68 struct uffd_test_args {
69 	mem_type_t *mem_type;
70 };
71 typedef struct uffd_test_args uffd_test_args_t;
72 
73 /* Returns: UFFD_TEST_* */
74 typedef void (*uffd_test_fn)(uffd_test_args_t *);
75 
76 typedef struct {
77 	const char *name;
78 	uffd_test_fn uffd_fn;
79 	unsigned int mem_targets;
80 	uint64_t uffd_feature_required;
81 } uffd_test_case_t;
82 
83 static void uffd_test_report(void)
84 {
85 	printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
86 	       ksft_get_pass_cnt(),
87 	       ksft_get_xskip_cnt(),
88 	       ksft_get_fail_cnt(),
89 	       ksft_test_num());
90 }
91 
92 static void uffd_test_pass(void)
93 {
94 	printf("done\n");
95 	ksft_inc_pass_cnt();
96 }
97 
98 #define  uffd_test_start(...)  do {		\
99 		printf("Testing ");		\
100 		printf(__VA_ARGS__);		\
101 		printf("... ");			\
102 		fflush(stdout);			\
103 	} while (0)
104 
105 #define  uffd_test_fail(...)  do {		\
106 		printf("failed [reason: ");	\
107 		printf(__VA_ARGS__);		\
108 		printf("]\n");			\
109 		ksft_inc_fail_cnt();		\
110 	} while (0)
111 
112 #define  uffd_test_skip(...)  do {		\
113 		printf("skipped [reason: ");	\
114 		printf(__VA_ARGS__);		\
115 		printf("]\n");			\
116 		ksft_inc_xskip_cnt();		\
117 	} while (0)
118 
119 /*
120  * Returns 1 if specific userfaultfd supported, 0 otherwise.  Note, we'll
121  * return 1 even if some test failed as long as uffd supported, because in
122  * that case we still want to proceed with the rest uffd unit tests.
123  */
124 static int test_uffd_api(bool use_dev)
125 {
126 	struct uffdio_api uffdio_api;
127 	int uffd;
128 
129 	uffd_test_start("UFFDIO_API (with %s)",
130 			use_dev ? "/dev/userfaultfd" : "syscall");
131 
132 	if (use_dev)
133 		uffd = uffd_open_dev(UFFD_FLAGS);
134 	else
135 		uffd = uffd_open_sys(UFFD_FLAGS);
136 	if (uffd < 0) {
137 		uffd_test_skip("cannot open userfaultfd handle");
138 		return 0;
139 	}
140 
141 	/* Test wrong UFFD_API */
142 	uffdio_api.api = 0xab;
143 	uffdio_api.features = 0;
144 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
145 		uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
146 		goto out;
147 	}
148 
149 	/* Test wrong feature bit */
150 	uffdio_api.api = UFFD_API;
151 	uffdio_api.features = BIT_ULL(63);
152 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
153 		uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
154 		goto out;
155 	}
156 
157 	/* Test normal UFFDIO_API */
158 	uffdio_api.api = UFFD_API;
159 	uffdio_api.features = 0;
160 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
161 		uffd_test_fail("UFFDIO_API should succeed but failed");
162 		goto out;
163 	}
164 
165 	/* Test double requests of UFFDIO_API with a random feature set */
166 	uffdio_api.features = BIT_ULL(0);
167 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
168 		uffd_test_fail("UFFDIO_API should reject initialized uffd");
169 		goto out;
170 	}
171 
172 	uffd_test_pass();
173 out:
174 	close(uffd);
175 	/* We have a valid uffd handle */
176 	return 1;
177 }
178 
179 /*
180  * This function initializes the global variables.  TODO: remove global
181  * vars and then remove this.
182  */
183 static int
184 uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
185 		       mem_type_t *mem_type, const char **errmsg)
186 {
187 	map_shared = mem_type->shared;
188 	uffd_test_ops = mem_type->mem_ops;
189 
190 	if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
191 		page_size = default_huge_page_size();
192 	else
193 		page_size = psize();
194 
195 	nr_pages = UFFD_TEST_MEM_SIZE / page_size;
196 	/* TODO: remove this global var.. it's so ugly */
197 	nr_cpus = 1;
198 
199 	/* Initialize test arguments */
200 	args->mem_type = mem_type;
201 
202 	return uffd_test_ctx_init(test->uffd_feature_required, errmsg);
203 }
204 
205 static bool uffd_feature_supported(uffd_test_case_t *test)
206 {
207 	uint64_t features;
208 
209 	if (uffd_get_features(&features))
210 		return false;
211 
212 	return (features & test->uffd_feature_required) ==
213 	    test->uffd_feature_required;
214 }
215 
216 static int pagemap_open(void)
217 {
218 	int fd = open("/proc/self/pagemap", O_RDONLY);
219 
220 	if (fd < 0)
221 		err("open pagemap");
222 
223 	return fd;
224 }
225 
226 /* This macro let __LINE__ works in err() */
227 #define  pagemap_check_wp(value, wp) do {				\
228 		if (!!(value & PM_UFFD_WP) != wp)			\
229 			err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
230 	} while (0)
231 
232 typedef struct {
233 	int parent_uffd, child_uffd;
234 } fork_event_args;
235 
236 static void *fork_event_consumer(void *data)
237 {
238 	fork_event_args *args = data;
239 	struct uffd_msg msg = { 0 };
240 
241 	/* Read until a full msg received */
242 	while (uffd_read_msg(args->parent_uffd, &msg));
243 
244 	if (msg.event != UFFD_EVENT_FORK)
245 		err("wrong message: %u\n", msg.event);
246 
247 	/* Just to be properly freed later */
248 	args->child_uffd = msg.arg.fork.ufd;
249 	return NULL;
250 }
251 
252 typedef struct {
253 	int gup_fd;
254 	bool pinned;
255 } pin_args;
256 
257 /*
258  * Returns 0 if succeed, <0 for errors.  pin_pages() needs to be paired
259  * with unpin_pages().  Currently it needs to be RO longterm pin to satisfy
260  * all needs of the test cases (e.g., trigger unshare, trigger fork() early
261  * CoW, etc.).
262  */
263 static int pin_pages(pin_args *args, void *buffer, size_t size)
264 {
265 	struct pin_longterm_test test = {
266 		.addr = (uintptr_t)buffer,
267 		.size = size,
268 		/* Read-only pins */
269 		.flags = 0,
270 	};
271 
272 	if (args->pinned)
273 		err("already pinned");
274 
275 	args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
276 	if (args->gup_fd < 0)
277 		return -errno;
278 
279 	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
280 		/* Even if gup_test existed, can be an old gup_test / kernel */
281 		close(args->gup_fd);
282 		return -errno;
283 	}
284 	args->pinned = true;
285 	return 0;
286 }
287 
288 static void unpin_pages(pin_args *args)
289 {
290 	if (!args->pinned)
291 		err("unpin without pin first");
292 	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
293 		err("PIN_LONGTERM_TEST_STOP");
294 	close(args->gup_fd);
295 	args->pinned = false;
296 }
297 
298 static int pagemap_test_fork(int uffd, bool with_event, bool test_pin)
299 {
300 	fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 };
301 	pthread_t thread;
302 	pid_t child;
303 	uint64_t value;
304 	int fd, result;
305 
306 	/* Prepare a thread to resolve EVENT_FORK */
307 	if (with_event) {
308 		if (pthread_create(&thread, NULL, fork_event_consumer, &args))
309 			err("pthread_create()");
310 	}
311 
312 	child = fork();
313 	if (!child) {
314 		/* Open the pagemap fd of the child itself */
315 		pin_args args = {};
316 
317 		fd = pagemap_open();
318 
319 		if (test_pin && pin_pages(&args, area_dst, page_size))
320 			/*
321 			 * Normally when reach here we have pinned in
322 			 * previous tests, so shouldn't fail anymore
323 			 */
324 			err("pin page failed in child");
325 
326 		value = pagemap_get_entry(fd, area_dst);
327 		/*
328 		 * After fork(), we should handle uffd-wp bit differently:
329 		 *
330 		 * (1) when with EVENT_FORK, it should persist
331 		 * (2) when without EVENT_FORK, it should be dropped
332 		 */
333 		pagemap_check_wp(value, with_event);
334 		if (test_pin)
335 			unpin_pages(&args);
336 		/* Succeed */
337 		exit(0);
338 	}
339 	waitpid(child, &result, 0);
340 
341 	if (with_event) {
342 		if (pthread_join(thread, NULL))
343 			err("pthread_join()");
344 		if (args.child_uffd < 0)
345 			err("Didn't receive child uffd");
346 		close(args.child_uffd);
347 	}
348 
349 	return result;
350 }
351 
352 static void uffd_wp_unpopulated_test(uffd_test_args_t *args)
353 {
354 	uint64_t value;
355 	int pagemap_fd;
356 
357 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
358 			  false, true, false))
359 		err("register failed");
360 
361 	pagemap_fd = pagemap_open();
362 
363 	/* Test applying pte marker to anon unpopulated */
364 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
365 	value = pagemap_get_entry(pagemap_fd, area_dst);
366 	pagemap_check_wp(value, true);
367 
368 	/* Test unprotect on anon pte marker */
369 	wp_range(uffd, (uint64_t)area_dst, page_size, false);
370 	value = pagemap_get_entry(pagemap_fd, area_dst);
371 	pagemap_check_wp(value, false);
372 
373 	/* Test zap on anon marker */
374 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
375 	if (madvise(area_dst, page_size, MADV_DONTNEED))
376 		err("madvise(MADV_DONTNEED) failed");
377 	value = pagemap_get_entry(pagemap_fd, area_dst);
378 	pagemap_check_wp(value, false);
379 
380 	/* Test fault in after marker removed */
381 	*area_dst = 1;
382 	value = pagemap_get_entry(pagemap_fd, area_dst);
383 	pagemap_check_wp(value, false);
384 	/* Drop it to make pte none again */
385 	if (madvise(area_dst, page_size, MADV_DONTNEED))
386 		err("madvise(MADV_DONTNEED) failed");
387 
388 	/* Test read-zero-page upon pte marker */
389 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
390 	*(volatile char *)area_dst;
391 	/* Drop it to make pte none again */
392 	if (madvise(area_dst, page_size, MADV_DONTNEED))
393 		err("madvise(MADV_DONTNEED) failed");
394 
395 	uffd_test_pass();
396 }
397 
398 static void uffd_wp_fork_test_common(uffd_test_args_t *args,
399 				     bool with_event)
400 {
401 	int pagemap_fd;
402 	uint64_t value;
403 
404 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
405 			  false, true, false))
406 		err("register failed");
407 
408 	pagemap_fd = pagemap_open();
409 
410 	/* Touch the page */
411 	*area_dst = 1;
412 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
413 	value = pagemap_get_entry(pagemap_fd, area_dst);
414 	pagemap_check_wp(value, true);
415 	if (pagemap_test_fork(uffd, with_event, false)) {
416 		uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
417 			       with_event ? "missing" : "stall");
418 		goto out;
419 	}
420 
421 	/*
422 	 * This is an attempt for zapping the pgtable so as to test the
423 	 * markers.
424 	 *
425 	 * For private mappings, PAGEOUT will only work on exclusive ptes
426 	 * (PM_MMAP_EXCLUSIVE) which we should satisfy.
427 	 *
428 	 * For shared, PAGEOUT may not work.  Use DONTNEED instead which
429 	 * plays a similar role of zapping (rather than freeing the page)
430 	 * to expose pte markers.
431 	 */
432 	if (args->mem_type->shared) {
433 		if (madvise(area_dst, page_size, MADV_DONTNEED))
434 			err("MADV_DONTNEED");
435 	} else {
436 		/*
437 		 * NOTE: ignore retval because private-hugetlb doesn't yet
438 		 * support swapping, so it could fail.
439 		 */
440 		madvise(area_dst, page_size, MADV_PAGEOUT);
441 	}
442 
443 	/* Uffd-wp should persist even swapped out */
444 	value = pagemap_get_entry(pagemap_fd, area_dst);
445 	pagemap_check_wp(value, true);
446 	if (pagemap_test_fork(uffd, with_event, false)) {
447 		uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
448 			       with_event ? "missing" : "stall");
449 		goto out;
450 	}
451 
452 	/* Unprotect; this tests swap pte modifications */
453 	wp_range(uffd, (uint64_t)area_dst, page_size, false);
454 	value = pagemap_get_entry(pagemap_fd, area_dst);
455 	pagemap_check_wp(value, false);
456 
457 	/* Fault in the page from disk */
458 	*area_dst = 2;
459 	value = pagemap_get_entry(pagemap_fd, area_dst);
460 	pagemap_check_wp(value, false);
461 	uffd_test_pass();
462 out:
463 	if (uffd_unregister(uffd, area_dst, nr_pages * page_size))
464 		err("unregister failed");
465 	close(pagemap_fd);
466 }
467 
468 static void uffd_wp_fork_test(uffd_test_args_t *args)
469 {
470 	uffd_wp_fork_test_common(args, false);
471 }
472 
473 static void uffd_wp_fork_with_event_test(uffd_test_args_t *args)
474 {
475 	uffd_wp_fork_test_common(args, true);
476 }
477 
478 static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args,
479 					 bool with_event)
480 {
481 	int pagemap_fd;
482 	pin_args pin_args = {};
483 
484 	if (uffd_register(uffd, area_dst, page_size, false, true, false))
485 		err("register failed");
486 
487 	pagemap_fd = pagemap_open();
488 
489 	/* Touch the page */
490 	*area_dst = 1;
491 	wp_range(uffd, (uint64_t)area_dst, page_size, true);
492 
493 	/*
494 	 * 1. First pin, then fork().  This tests fork() special path when
495 	 * doing early CoW if the page is private.
496 	 */
497 	if (pin_pages(&pin_args, area_dst, page_size)) {
498 		uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
499 			       "or unprivileged");
500 		close(pagemap_fd);
501 		uffd_unregister(uffd, area_dst, page_size);
502 		return;
503 	}
504 
505 	if (pagemap_test_fork(uffd, with_event, false)) {
506 		uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
507 			       with_event ? "missing" : "stall");
508 		unpin_pages(&pin_args);
509 		goto out;
510 	}
511 
512 	unpin_pages(&pin_args);
513 
514 	/*
515 	 * 2. First fork(), then pin (in the child, where test_pin==true).
516 	 * This tests COR, aka, page unsharing on private memories.
517 	 */
518 	if (pagemap_test_fork(uffd, with_event, true)) {
519 		uffd_test_fail("Detected %s uffd-wp bit when RO pin",
520 			       with_event ? "missing" : "stall");
521 		goto out;
522 	}
523 	uffd_test_pass();
524 out:
525 	if (uffd_unregister(uffd, area_dst, page_size))
526 		err("register failed");
527 	close(pagemap_fd);
528 }
529 
530 static void uffd_wp_fork_pin_test(uffd_test_args_t *args)
531 {
532 	uffd_wp_fork_pin_test_common(args, false);
533 }
534 
535 static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args)
536 {
537 	uffd_wp_fork_pin_test_common(args, true);
538 }
539 
540 static void check_memory_contents(char *p)
541 {
542 	unsigned long i, j;
543 	uint8_t expected_byte;
544 
545 	for (i = 0; i < nr_pages; ++i) {
546 		expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
547 		for (j = 0; j < page_size; j++) {
548 			uint8_t v = *(uint8_t *)(p + (i * page_size) + j);
549 			if (v != expected_byte)
550 				err("unexpected page contents");
551 		}
552 	}
553 }
554 
555 static void uffd_minor_test_common(bool test_collapse, bool test_wp)
556 {
557 	unsigned long p;
558 	pthread_t uffd_mon;
559 	char c;
560 	struct uffd_args args = { 0 };
561 
562 	/*
563 	 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
564 	 * both do not make much sense.
565 	 */
566 	assert(!(test_collapse && test_wp));
567 
568 	if (uffd_register(uffd, area_dst_alias, nr_pages * page_size,
569 			  /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
570 			  false, test_wp, true))
571 		err("register failure");
572 
573 	/*
574 	 * After registering with UFFD, populate the non-UFFD-registered side of
575 	 * the shared mapping. This should *not* trigger any UFFD minor faults.
576 	 */
577 	for (p = 0; p < nr_pages; ++p)
578 		memset(area_dst + (p * page_size), p % ((uint8_t)-1),
579 		       page_size);
580 
581 	args.apply_wp = test_wp;
582 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
583 		err("uffd_poll_thread create");
584 
585 	/*
586 	 * Read each of the pages back using the UFFD-registered mapping. We
587 	 * expect that the first time we touch a page, it will result in a minor
588 	 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
589 	 * page's contents, and then issuing a CONTINUE ioctl.
590 	 */
591 	check_memory_contents(area_dst_alias);
592 
593 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
594 		err("pipe write");
595 	if (pthread_join(uffd_mon, NULL))
596 		err("join() failed");
597 
598 	if (test_collapse) {
599 		if (madvise(area_dst_alias, nr_pages * page_size,
600 			    MADV_COLLAPSE)) {
601 			/* It's fine to fail for this one... */
602 			uffd_test_skip("MADV_COLLAPSE failed");
603 			return;
604 		}
605 
606 		uffd_test_ops->check_pmd_mapping(area_dst,
607 						 nr_pages * page_size /
608 						 read_pmd_pagesize());
609 		/*
610 		 * This won't cause uffd-fault - it purely just makes sure there
611 		 * was no corruption.
612 		 */
613 		check_memory_contents(area_dst_alias);
614 	}
615 
616 	if (args.missing_faults != 0 || args.minor_faults != nr_pages)
617 		uffd_test_fail("stats check error");
618 	else
619 		uffd_test_pass();
620 }
621 
622 void uffd_minor_test(uffd_test_args_t *args)
623 {
624 	uffd_minor_test_common(false, false);
625 }
626 
627 void uffd_minor_wp_test(uffd_test_args_t *args)
628 {
629 	uffd_minor_test_common(false, true);
630 }
631 
632 void uffd_minor_collapse_test(uffd_test_args_t *args)
633 {
634 	uffd_minor_test_common(true, false);
635 }
636 
637 static sigjmp_buf jbuf, *sigbuf;
638 
639 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
640 {
641 	if (sig == SIGBUS) {
642 		if (sigbuf)
643 			siglongjmp(*sigbuf, 1);
644 		abort();
645 	}
646 }
647 
648 /*
649  * For non-cooperative userfaultfd test we fork() a process that will
650  * generate pagefaults, will mremap the area monitored by the
651  * userfaultfd and at last this process will release the monitored
652  * area.
653  * For the anonymous and shared memory the area is divided into two
654  * parts, the first part is accessed before mremap, and the second
655  * part is accessed after mremap. Since hugetlbfs does not support
656  * mremap, the entire monitored area is accessed in a single pass for
657  * HUGETLB_TEST.
658  * The release of the pages currently generates event for shmem and
659  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
660  * for hugetlb.
661  * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
662  * monitored area, generate pagefaults and test that signal is delivered.
663  * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
664  * test robustness use case - we release monitored area, fork a process
665  * that will generate pagefaults and verify signal is generated.
666  * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
667  * feature. Using monitor thread, verify no userfault events are generated.
668  */
669 static int faulting_process(int signal_test, bool wp)
670 {
671 	unsigned long nr, i;
672 	unsigned long long count;
673 	unsigned long split_nr_pages;
674 	unsigned long lastnr;
675 	struct sigaction act;
676 	volatile unsigned long signalled = 0;
677 
678 	split_nr_pages = (nr_pages + 1) / 2;
679 
680 	if (signal_test) {
681 		sigbuf = &jbuf;
682 		memset(&act, 0, sizeof(act));
683 		act.sa_sigaction = sighndl;
684 		act.sa_flags = SA_SIGINFO;
685 		if (sigaction(SIGBUS, &act, 0))
686 			err("sigaction");
687 		lastnr = (unsigned long)-1;
688 	}
689 
690 	for (nr = 0; nr < split_nr_pages; nr++) {
691 		volatile int steps = 1;
692 		unsigned long offset = nr * page_size;
693 
694 		if (signal_test) {
695 			if (sigsetjmp(*sigbuf, 1) != 0) {
696 				if (steps == 1 && nr == lastnr)
697 					err("Signal repeated");
698 
699 				lastnr = nr;
700 				if (signal_test == 1) {
701 					if (steps == 1) {
702 						/* This is a MISSING request */
703 						steps++;
704 						if (copy_page(uffd, offset, wp))
705 							signalled++;
706 					} else {
707 						/* This is a WP request */
708 						assert(steps == 2);
709 						wp_range(uffd,
710 							 (__u64)area_dst +
711 							 offset,
712 							 page_size, false);
713 					}
714 				} else {
715 					signalled++;
716 					continue;
717 				}
718 			}
719 		}
720 
721 		count = *area_count(area_dst, nr);
722 		if (count != count_verify[nr])
723 			err("nr %lu memory corruption %llu %llu\n",
724 			    nr, count, count_verify[nr]);
725 		/*
726 		 * Trigger write protection if there is by writing
727 		 * the same value back.
728 		 */
729 		*area_count(area_dst, nr) = count;
730 	}
731 
732 	if (signal_test)
733 		return signalled != split_nr_pages;
734 
735 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
736 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
737 	if (area_dst == MAP_FAILED)
738 		err("mremap");
739 	/* Reset area_src since we just clobbered it */
740 	area_src = NULL;
741 
742 	for (; nr < nr_pages; nr++) {
743 		count = *area_count(area_dst, nr);
744 		if (count != count_verify[nr]) {
745 			err("nr %lu memory corruption %llu %llu\n",
746 			    nr, count, count_verify[nr]);
747 		}
748 		/*
749 		 * Trigger write protection if there is by writing
750 		 * the same value back.
751 		 */
752 		*area_count(area_dst, nr) = count;
753 	}
754 
755 	uffd_test_ops->release_pages(area_dst);
756 
757 	for (nr = 0; nr < nr_pages; nr++)
758 		for (i = 0; i < page_size; i++)
759 			if (*(area_dst + nr * page_size + i) != 0)
760 				err("page %lu offset %lu is not zero", nr, i);
761 
762 	return 0;
763 }
764 
765 static void uffd_sigbus_test_common(bool wp)
766 {
767 	unsigned long userfaults;
768 	pthread_t uffd_mon;
769 	pid_t pid;
770 	int err;
771 	char c;
772 	struct uffd_args args = { 0 };
773 
774 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
775 
776 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
777 			  true, wp, false))
778 		err("register failure");
779 
780 	if (faulting_process(1, wp))
781 		err("faulting process failed");
782 
783 	uffd_test_ops->release_pages(area_dst);
784 
785 	args.apply_wp = wp;
786 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
787 		err("uffd_poll_thread create");
788 
789 	pid = fork();
790 	if (pid < 0)
791 		err("fork");
792 
793 	if (!pid)
794 		exit(faulting_process(2, wp));
795 
796 	waitpid(pid, &err, 0);
797 	if (err)
798 		err("faulting process failed");
799 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
800 		err("pipe write");
801 	if (pthread_join(uffd_mon, (void **)&userfaults))
802 		err("pthread_join()");
803 
804 	if (userfaults)
805 		uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
806 	else
807 		uffd_test_pass();
808 }
809 
810 static void uffd_sigbus_test(uffd_test_args_t *args)
811 {
812 	uffd_sigbus_test_common(false);
813 }
814 
815 static void uffd_sigbus_wp_test(uffd_test_args_t *args)
816 {
817 	uffd_sigbus_test_common(true);
818 }
819 
820 static void uffd_events_test_common(bool wp)
821 {
822 	pthread_t uffd_mon;
823 	pid_t pid;
824 	int err;
825 	char c;
826 	struct uffd_args args = { 0 };
827 
828 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
829 	if (uffd_register(uffd, area_dst, nr_pages * page_size,
830 			  true, wp, false))
831 		err("register failure");
832 
833 	args.apply_wp = wp;
834 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
835 		err("uffd_poll_thread create");
836 
837 	pid = fork();
838 	if (pid < 0)
839 		err("fork");
840 
841 	if (!pid)
842 		exit(faulting_process(0, wp));
843 
844 	waitpid(pid, &err, 0);
845 	if (err)
846 		err("faulting process failed");
847 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
848 		err("pipe write");
849 	if (pthread_join(uffd_mon, NULL))
850 		err("pthread_join()");
851 
852 	if (args.missing_faults != nr_pages)
853 		uffd_test_fail("Fault counts wrong");
854 	else
855 		uffd_test_pass();
856 }
857 
858 static void uffd_events_test(uffd_test_args_t *args)
859 {
860 	uffd_events_test_common(false);
861 }
862 
863 static void uffd_events_wp_test(uffd_test_args_t *args)
864 {
865 	uffd_events_test_common(true);
866 }
867 
868 static void retry_uffdio_zeropage(int ufd,
869 				  struct uffdio_zeropage *uffdio_zeropage)
870 {
871 	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
872 				     uffdio_zeropage->range.len,
873 				     0);
874 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
875 		if (uffdio_zeropage->zeropage != -EEXIST)
876 			err("UFFDIO_ZEROPAGE error: %"PRId64,
877 			    (int64_t)uffdio_zeropage->zeropage);
878 	} else {
879 		err("UFFDIO_ZEROPAGE error: %"PRId64,
880 		    (int64_t)uffdio_zeropage->zeropage);
881 	}
882 }
883 
884 static bool do_uffdio_zeropage(int ufd, bool has_zeropage)
885 {
886 	struct uffdio_zeropage uffdio_zeropage = { 0 };
887 	int ret;
888 	__s64 res;
889 
890 	uffdio_zeropage.range.start = (unsigned long) area_dst;
891 	uffdio_zeropage.range.len = page_size;
892 	uffdio_zeropage.mode = 0;
893 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
894 	res = uffdio_zeropage.zeropage;
895 	if (ret) {
896 		/* real retval in ufdio_zeropage.zeropage */
897 		if (has_zeropage)
898 			err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
899 		else if (res != -EINVAL)
900 			err("UFFDIO_ZEROPAGE not -EINVAL");
901 	} else if (has_zeropage) {
902 		if (res != page_size)
903 			err("UFFDIO_ZEROPAGE unexpected size");
904 		else
905 			retry_uffdio_zeropage(ufd, &uffdio_zeropage);
906 		return true;
907 	} else
908 		err("UFFDIO_ZEROPAGE succeeded");
909 
910 	return false;
911 }
912 
913 /*
914  * Registers a range with MISSING mode only for zeropage test.  Return true
915  * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
916  * because we want to detect .ioctls along the way.
917  */
918 static bool
919 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
920 {
921 	uint64_t ioctls = 0;
922 
923 	if (uffd_register_with_ioctls(uffd, addr, len, true,
924 				      false, false, &ioctls))
925 		err("zeropage register fail");
926 
927 	return ioctls & (1 << _UFFDIO_ZEROPAGE);
928 }
929 
930 /* exercise UFFDIO_ZEROPAGE */
931 static void uffd_zeropage_test(uffd_test_args_t *args)
932 {
933 	bool has_zeropage;
934 	int i;
935 
936 	has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size);
937 	if (area_dst_alias)
938 		/* Ignore the retval; we already have it */
939 		uffd_register_detect_zeropage(uffd, area_dst_alias, page_size);
940 
941 	if (do_uffdio_zeropage(uffd, has_zeropage))
942 		for (i = 0; i < page_size; i++)
943 			if (area_dst[i] != 0)
944 				err("data non-zero at offset %d\n", i);
945 
946 	if (uffd_unregister(uffd, area_dst, page_size))
947 		err("unregister");
948 
949 	if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size))
950 		err("unregister");
951 
952 	uffd_test_pass();
953 }
954 
955 /*
956  * Test the returned uffdio_register.ioctls with different register modes.
957  * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
958  */
959 static void
960 do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor)
961 {
962 	uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
963 	mem_type_t *mem_type = args->mem_type;
964 	int ret;
965 
966 	ret = uffd_register_with_ioctls(uffd, area_dst, page_size,
967 					miss, wp, minor, &ioctls);
968 
969 	/*
970 	 * Handle special cases of UFFDIO_REGISTER here where it should
971 	 * just fail with -EINVAL first..
972 	 *
973 	 * Case 1: register MINOR on anon
974 	 * Case 2: register with no mode selected
975 	 */
976 	if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
977 	    (!miss && !wp && !minor)) {
978 		if (ret != -EINVAL)
979 			err("register (miss=%d, wp=%d, minor=%d) failed "
980 			    "with wrong errno=%d", miss, wp, minor, ret);
981 		return;
982 	}
983 
984 	/* UFFDIO_REGISTER should succeed, then check ioctls returned */
985 	if (miss)
986 		expected |= BIT_ULL(_UFFDIO_COPY);
987 	if (wp)
988 		expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
989 	if (minor)
990 		expected |= BIT_ULL(_UFFDIO_CONTINUE);
991 
992 	if ((ioctls & expected) != expected)
993 		err("unexpected uffdio_register.ioctls "
994 		    "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
995 		    "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
996 
997 	if (uffd_unregister(uffd, area_dst, page_size))
998 		err("unregister");
999 }
1000 
1001 static void uffd_register_ioctls_test(uffd_test_args_t *args)
1002 {
1003 	int miss, wp, minor;
1004 
1005 	for (miss = 0; miss <= 1; miss++)
1006 		for (wp = 0; wp <= 1; wp++)
1007 			for (minor = 0; minor <= 1; minor++)
1008 				do_register_ioctls_test(args, miss, wp, minor);
1009 
1010 	uffd_test_pass();
1011 }
1012 
1013 uffd_test_case_t uffd_tests[] = {
1014 	{
1015 		/* Test returned uffdio_register.ioctls. */
1016 		.name = "register-ioctls",
1017 		.uffd_fn = uffd_register_ioctls_test,
1018 		.mem_targets = MEM_ALL,
1019 		.uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
1020 		UFFD_FEATURE_MISSING_SHMEM |
1021 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1022 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1023 		UFFD_FEATURE_MINOR_HUGETLBFS |
1024 		UFFD_FEATURE_MINOR_SHMEM,
1025 	},
1026 	{
1027 		.name = "zeropage",
1028 		.uffd_fn = uffd_zeropage_test,
1029 		.mem_targets = MEM_ALL,
1030 		.uffd_feature_required = 0,
1031 	},
1032 	{
1033 		.name = "wp-fork",
1034 		.uffd_fn = uffd_wp_fork_test,
1035 		.mem_targets = MEM_ALL,
1036 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1037 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1038 	},
1039 	{
1040 		.name = "wp-fork-with-event",
1041 		.uffd_fn = uffd_wp_fork_with_event_test,
1042 		.mem_targets = MEM_ALL,
1043 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1044 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1045 		/* when set, child process should inherit uffd-wp bits */
1046 		UFFD_FEATURE_EVENT_FORK,
1047 	},
1048 	{
1049 		.name = "wp-fork-pin",
1050 		.uffd_fn = uffd_wp_fork_pin_test,
1051 		.mem_targets = MEM_ALL,
1052 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1053 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1054 	},
1055 	{
1056 		.name = "wp-fork-pin-with-event",
1057 		.uffd_fn = uffd_wp_fork_pin_with_event_test,
1058 		.mem_targets = MEM_ALL,
1059 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1060 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1061 		/* when set, child process should inherit uffd-wp bits */
1062 		UFFD_FEATURE_EVENT_FORK,
1063 	},
1064 	{
1065 		.name = "wp-unpopulated",
1066 		.uffd_fn = uffd_wp_unpopulated_test,
1067 		.mem_targets = MEM_ANON,
1068 		.uffd_feature_required =
1069 		UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
1070 	},
1071 	{
1072 		.name = "minor",
1073 		.uffd_fn = uffd_minor_test,
1074 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1075 		.uffd_feature_required =
1076 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
1077 	},
1078 	{
1079 		.name = "minor-wp",
1080 		.uffd_fn = uffd_minor_wp_test,
1081 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1082 		.uffd_feature_required =
1083 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
1084 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1085 		/*
1086 		 * HACK: here we leveraged WP_UNPOPULATED to detect whether
1087 		 * minor mode supports wr-protect.  There's no feature flag
1088 		 * for it so this is the best we can test against.
1089 		 */
1090 		UFFD_FEATURE_WP_UNPOPULATED,
1091 	},
1092 	{
1093 		.name = "minor-collapse",
1094 		.uffd_fn = uffd_minor_collapse_test,
1095 		/* MADV_COLLAPSE only works with shmem */
1096 		.mem_targets = MEM_SHMEM,
1097 		/* We can't test MADV_COLLAPSE, so try our luck */
1098 		.uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
1099 	},
1100 	{
1101 		.name = "sigbus",
1102 		.uffd_fn = uffd_sigbus_test,
1103 		.mem_targets = MEM_ALL,
1104 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1105 		UFFD_FEATURE_EVENT_FORK,
1106 	},
1107 	{
1108 		.name = "sigbus-wp",
1109 		.uffd_fn = uffd_sigbus_wp_test,
1110 		.mem_targets = MEM_ALL,
1111 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1112 		UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
1113 	},
1114 	{
1115 		.name = "events",
1116 		.uffd_fn = uffd_events_test,
1117 		.mem_targets = MEM_ALL,
1118 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1119 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
1120 	},
1121 	{
1122 		.name = "events-wp",
1123 		.uffd_fn = uffd_events_wp_test,
1124 		.mem_targets = MEM_ALL,
1125 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1126 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
1127 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1128 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1129 	},
1130 };
1131 
1132 static void usage(const char *prog)
1133 {
1134 	printf("usage: %s [-f TESTNAME]\n", prog);
1135 	puts("");
1136 	puts(" -f: test name to filter (e.g., event)");
1137 	puts(" -h: show the help msg");
1138 	puts(" -l: list tests only");
1139 	puts("");
1140 	exit(KSFT_FAIL);
1141 }
1142 
1143 int main(int argc, char *argv[])
1144 {
1145 	int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
1146 	int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
1147 	const char *test_filter = NULL;
1148 	bool list_only = false;
1149 	uffd_test_case_t *test;
1150 	mem_type_t *mem_type;
1151 	uffd_test_args_t args;
1152 	char test_name[128];
1153 	const char *errmsg;
1154 	int has_uffd, opt;
1155 	int i, j;
1156 
1157 	while ((opt = getopt(argc, argv, "f:hl")) != -1) {
1158 		switch (opt) {
1159 		case 'f':
1160 			test_filter = optarg;
1161 			break;
1162 		case 'l':
1163 			list_only = true;
1164 			break;
1165 		case 'h':
1166 		default:
1167 			/* Unknown */
1168 			usage(argv[0]);
1169 			break;
1170 		}
1171 	}
1172 
1173 	if (!test_filter && !list_only) {
1174 		has_uffd = test_uffd_api(false);
1175 		has_uffd |= test_uffd_api(true);
1176 
1177 		if (!has_uffd) {
1178 			printf("Userfaultfd not supported or unprivileged, skip all tests\n");
1179 			exit(KSFT_SKIP);
1180 		}
1181 	}
1182 
1183 	for (i = 0; i < n_tests; i++) {
1184 		test = &uffd_tests[i];
1185 		if (test_filter && !strstr(test->name, test_filter))
1186 			continue;
1187 		if (list_only) {
1188 			printf("%s\n", test->name);
1189 			continue;
1190 		}
1191 		for (j = 0; j < n_mems; j++) {
1192 			mem_type = &mem_types[j];
1193 			if (!(test->mem_targets & mem_type->mem_flag))
1194 				continue;
1195 			snprintf(test_name, sizeof(test_name),
1196 				 "%s on %s", test->name, mem_type->name);
1197 
1198 			uffd_test_start(test_name);
1199 			if (!uffd_feature_supported(test)) {
1200 				uffd_test_skip("feature missing");
1201 				continue;
1202 			}
1203 			if (uffd_setup_environment(&args, test, mem_type,
1204 						   &errmsg)) {
1205 				uffd_test_skip(errmsg);
1206 				continue;
1207 			}
1208 			test->uffd_fn(&args);
1209 		}
1210 	}
1211 
1212 	if (!list_only)
1213 		uffd_test_report();
1214 
1215 	return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
1216 }
1217 
1218 #else /* __NR_userfaultfd */
1219 
1220 #warning "missing __NR_userfaultfd definition"
1221 
1222 int main(void)
1223 {
1224 	printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
1225 	return KSFT_SKIP;
1226 }
1227 
1228 #endif /* __NR_userfaultfd */
1229