1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <signal.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21 
22 #include "common.h"
23 
24 #define MEMFD_STR	"memfd:"
25 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
26 #define SHARED_FT_STR	"(shared file-table)"
27 
28 #define MFD_DEF_SIZE 8192
29 #define STACK_SIZE 65536
30 
31 #define F_SEAL_EXEC	0x0020
32 
33 #define F_WX_SEALS (F_SEAL_SHRINK | \
34 		    F_SEAL_GROW | \
35 		    F_SEAL_WRITE | \
36 		    F_SEAL_FUTURE_WRITE | \
37 		    F_SEAL_EXEC)
38 
39 #define MFD_NOEXEC_SEAL	0x0008U
40 
41 /*
42  * Default is not to test hugetlbfs
43  */
44 static size_t mfd_def_size = MFD_DEF_SIZE;
45 static const char *memfd_str = MEMFD_STR;
46 static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *));
47 static int newpid_thread_fn2(void *arg);
48 static void join_newpid_thread(pid_t pid);
49 
50 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
51 {
52 	char buf1[PATH_MAX];
53 	int size;
54 	ssize_t nbytes;
55 
56 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
57 	if (size < 0) {
58 		printf("snprintf(%d) failed on %m\n", fd);
59 		abort();
60 	}
61 
62 	/*
63 	 * reserver one byte for string termination.
64 	 */
65 	nbytes = readlink(buf1, buf, bufsize-1);
66 	if (nbytes == -1) {
67 		printf("readlink(%s) failed %m\n", buf1);
68 		abort();
69 	}
70 	buf[nbytes] = '\0';
71 	return nbytes;
72 }
73 
74 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
75 {
76 	int r, fd;
77 
78 	fd = sys_memfd_create(name, flags);
79 	if (fd < 0) {
80 		printf("memfd_create(\"%s\", %u) failed: %m\n",
81 		       name, flags);
82 		abort();
83 	}
84 
85 	r = ftruncate(fd, sz);
86 	if (r < 0) {
87 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
88 		abort();
89 	}
90 
91 	return fd;
92 }
93 
94 static void sysctl_assert_write(const char *val)
95 {
96 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
97 
98 	if (fd < 0) {
99 		printf("open sysctl failed\n");
100 		abort();
101 	}
102 
103 	if (write(fd, val, strlen(val)) < 0) {
104 		printf("write sysctl failed\n");
105 		abort();
106 	}
107 }
108 
109 static void sysctl_fail_write(const char *val)
110 {
111 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
112 
113 	if (fd < 0) {
114 		printf("open sysctl failed\n");
115 		abort();
116 	}
117 
118 	if (write(fd, val, strlen(val)) >= 0) {
119 		printf("write sysctl %s succeeded, but failure expected\n",
120 				val);
121 		abort();
122 	}
123 }
124 
125 static int mfd_assert_reopen_fd(int fd_in)
126 {
127 	int fd;
128 	char path[100];
129 
130 	sprintf(path, "/proc/self/fd/%d", fd_in);
131 
132 	fd = open(path, O_RDWR);
133 	if (fd < 0) {
134 		printf("re-open of existing fd %d failed\n", fd_in);
135 		abort();
136 	}
137 
138 	return fd;
139 }
140 
141 static void mfd_fail_new(const char *name, unsigned int flags)
142 {
143 	int r;
144 
145 	r = sys_memfd_create(name, flags);
146 	if (r >= 0) {
147 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
148 		       name, flags);
149 		close(r);
150 		abort();
151 	}
152 }
153 
154 static unsigned int mfd_assert_get_seals(int fd)
155 {
156 	int r;
157 
158 	r = fcntl(fd, F_GET_SEALS);
159 	if (r < 0) {
160 		printf("GET_SEALS(%d) failed: %m\n", fd);
161 		abort();
162 	}
163 
164 	return (unsigned int)r;
165 }
166 
167 static void mfd_assert_has_seals(int fd, unsigned int seals)
168 {
169 	char buf[PATH_MAX];
170 	int nbytes;
171 	unsigned int s;
172 	fd2name(fd, buf, PATH_MAX);
173 
174 	s = mfd_assert_get_seals(fd);
175 	if (s != seals) {
176 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
177 		abort();
178 	}
179 }
180 
181 static void mfd_assert_add_seals(int fd, unsigned int seals)
182 {
183 	int r;
184 	unsigned int s;
185 
186 	s = mfd_assert_get_seals(fd);
187 	r = fcntl(fd, F_ADD_SEALS, seals);
188 	if (r < 0) {
189 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
190 		abort();
191 	}
192 }
193 
194 static void mfd_fail_add_seals(int fd, unsigned int seals)
195 {
196 	int r;
197 	unsigned int s;
198 
199 	r = fcntl(fd, F_GET_SEALS);
200 	if (r < 0)
201 		s = 0;
202 	else
203 		s = (unsigned int)r;
204 
205 	r = fcntl(fd, F_ADD_SEALS, seals);
206 	if (r >= 0) {
207 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
208 				fd, s, seals);
209 		abort();
210 	}
211 }
212 
213 static void mfd_assert_size(int fd, size_t size)
214 {
215 	struct stat st;
216 	int r;
217 
218 	r = fstat(fd, &st);
219 	if (r < 0) {
220 		printf("fstat(%d) failed: %m\n", fd);
221 		abort();
222 	} else if (st.st_size != size) {
223 		printf("wrong file size %lld, but expected %lld\n",
224 		       (long long)st.st_size, (long long)size);
225 		abort();
226 	}
227 }
228 
229 static int mfd_assert_dup(int fd)
230 {
231 	int r;
232 
233 	r = dup(fd);
234 	if (r < 0) {
235 		printf("dup(%d) failed: %m\n", fd);
236 		abort();
237 	}
238 
239 	return r;
240 }
241 
242 static void *mfd_assert_mmap_shared(int fd)
243 {
244 	void *p;
245 
246 	p = mmap(NULL,
247 		 mfd_def_size,
248 		 PROT_READ | PROT_WRITE,
249 		 MAP_SHARED,
250 		 fd,
251 		 0);
252 	if (p == MAP_FAILED) {
253 		printf("mmap() failed: %m\n");
254 		abort();
255 	}
256 
257 	return p;
258 }
259 
260 static void *mfd_assert_mmap_private(int fd)
261 {
262 	void *p;
263 
264 	p = mmap(NULL,
265 		 mfd_def_size,
266 		 PROT_READ,
267 		 MAP_PRIVATE,
268 		 fd,
269 		 0);
270 	if (p == MAP_FAILED) {
271 		printf("mmap() failed: %m\n");
272 		abort();
273 	}
274 
275 	return p;
276 }
277 
278 static int mfd_assert_open(int fd, int flags, mode_t mode)
279 {
280 	char buf[512];
281 	int r;
282 
283 	sprintf(buf, "/proc/self/fd/%d", fd);
284 	r = open(buf, flags, mode);
285 	if (r < 0) {
286 		printf("open(%s) failed: %m\n", buf);
287 		abort();
288 	}
289 
290 	return r;
291 }
292 
293 static void mfd_fail_open(int fd, int flags, mode_t mode)
294 {
295 	char buf[512];
296 	int r;
297 
298 	sprintf(buf, "/proc/self/fd/%d", fd);
299 	r = open(buf, flags, mode);
300 	if (r >= 0) {
301 		printf("open(%s) didn't fail as expected\n", buf);
302 		abort();
303 	}
304 }
305 
306 static void mfd_assert_read(int fd)
307 {
308 	char buf[16];
309 	void *p;
310 	ssize_t l;
311 
312 	l = read(fd, buf, sizeof(buf));
313 	if (l != sizeof(buf)) {
314 		printf("read() failed: %m\n");
315 		abort();
316 	}
317 
318 	/* verify PROT_READ *is* allowed */
319 	p = mmap(NULL,
320 		 mfd_def_size,
321 		 PROT_READ,
322 		 MAP_PRIVATE,
323 		 fd,
324 		 0);
325 	if (p == MAP_FAILED) {
326 		printf("mmap() failed: %m\n");
327 		abort();
328 	}
329 	munmap(p, mfd_def_size);
330 
331 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
332 	p = mmap(NULL,
333 		 mfd_def_size,
334 		 PROT_READ | PROT_WRITE,
335 		 MAP_PRIVATE,
336 		 fd,
337 		 0);
338 	if (p == MAP_FAILED) {
339 		printf("mmap() failed: %m\n");
340 		abort();
341 	}
342 	munmap(p, mfd_def_size);
343 }
344 
345 /* Test that PROT_READ + MAP_SHARED mappings work. */
346 static void mfd_assert_read_shared(int fd)
347 {
348 	void *p;
349 
350 	/* verify PROT_READ and MAP_SHARED *is* allowed */
351 	p = mmap(NULL,
352 		 mfd_def_size,
353 		 PROT_READ,
354 		 MAP_SHARED,
355 		 fd,
356 		 0);
357 	if (p == MAP_FAILED) {
358 		printf("mmap() failed: %m\n");
359 		abort();
360 	}
361 	munmap(p, mfd_def_size);
362 }
363 
364 static void mfd_assert_fork_private_write(int fd)
365 {
366 	int *p;
367 	pid_t pid;
368 
369 	p = mmap(NULL,
370 		 mfd_def_size,
371 		 PROT_READ | PROT_WRITE,
372 		 MAP_PRIVATE,
373 		 fd,
374 		 0);
375 	if (p == MAP_FAILED) {
376 		printf("mmap() failed: %m\n");
377 		abort();
378 	}
379 
380 	p[0] = 22;
381 
382 	pid = fork();
383 	if (pid == 0) {
384 		p[0] = 33;
385 		exit(0);
386 	} else {
387 		waitpid(pid, NULL, 0);
388 
389 		if (p[0] != 22) {
390 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
391 			abort();
392 		}
393 	}
394 
395 	munmap(p, mfd_def_size);
396 }
397 
398 static void mfd_assert_write(int fd)
399 {
400 	ssize_t l;
401 	void *p;
402 	int r;
403 
404 	/*
405 	 * huegtlbfs does not support write, but we want to
406 	 * verify everything else here.
407 	 */
408 	if (!hugetlbfs_test) {
409 		/* verify write() succeeds */
410 		l = write(fd, "\0\0\0\0", 4);
411 		if (l != 4) {
412 			printf("write() failed: %m\n");
413 			abort();
414 		}
415 	}
416 
417 	/* verify PROT_READ | PROT_WRITE is allowed */
418 	p = mmap(NULL,
419 		 mfd_def_size,
420 		 PROT_READ | PROT_WRITE,
421 		 MAP_SHARED,
422 		 fd,
423 		 0);
424 	if (p == MAP_FAILED) {
425 		printf("mmap() failed: %m\n");
426 		abort();
427 	}
428 	*(char *)p = 0;
429 	munmap(p, mfd_def_size);
430 
431 	/* verify PROT_WRITE is allowed */
432 	p = mmap(NULL,
433 		 mfd_def_size,
434 		 PROT_WRITE,
435 		 MAP_SHARED,
436 		 fd,
437 		 0);
438 	if (p == MAP_FAILED) {
439 		printf("mmap() failed: %m\n");
440 		abort();
441 	}
442 	*(char *)p = 0;
443 	munmap(p, mfd_def_size);
444 
445 	/* verify PROT_READ with MAP_SHARED is allowed and a following
446 	 * mprotect(PROT_WRITE) allows writing */
447 	p = mmap(NULL,
448 		 mfd_def_size,
449 		 PROT_READ,
450 		 MAP_SHARED,
451 		 fd,
452 		 0);
453 	if (p == MAP_FAILED) {
454 		printf("mmap() failed: %m\n");
455 		abort();
456 	}
457 
458 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
459 	if (r < 0) {
460 		printf("mprotect() failed: %m\n");
461 		abort();
462 	}
463 
464 	*(char *)p = 0;
465 	munmap(p, mfd_def_size);
466 
467 	/* verify PUNCH_HOLE works */
468 	r = fallocate(fd,
469 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
470 		      0,
471 		      mfd_def_size);
472 	if (r < 0) {
473 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
474 		abort();
475 	}
476 }
477 
478 static void mfd_fail_write(int fd)
479 {
480 	ssize_t l;
481 	void *p;
482 	int r;
483 
484 	/* verify write() fails */
485 	l = write(fd, "data", 4);
486 	if (l != -EPERM) {
487 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
488 		abort();
489 	}
490 
491 	/* verify PROT_READ | PROT_WRITE is not allowed */
492 	p = mmap(NULL,
493 		 mfd_def_size,
494 		 PROT_READ | PROT_WRITE,
495 		 MAP_SHARED,
496 		 fd,
497 		 0);
498 	if (p != MAP_FAILED) {
499 		printf("mmap() didn't fail as expected\n");
500 		abort();
501 	}
502 
503 	/* verify PROT_WRITE is not allowed */
504 	p = mmap(NULL,
505 		 mfd_def_size,
506 		 PROT_WRITE,
507 		 MAP_SHARED,
508 		 fd,
509 		 0);
510 	if (p != MAP_FAILED) {
511 		printf("mmap() didn't fail as expected\n");
512 		abort();
513 	}
514 
515 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
516 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
517 	p = mmap(NULL,
518 		 mfd_def_size,
519 		 PROT_READ,
520 		 MAP_SHARED,
521 		 fd,
522 		 0);
523 	if (p != MAP_FAILED) {
524 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
525 		if (r >= 0) {
526 			printf("mmap()+mprotect() didn't fail as expected\n");
527 			abort();
528 		}
529 		munmap(p, mfd_def_size);
530 	}
531 
532 	/* verify PUNCH_HOLE fails */
533 	r = fallocate(fd,
534 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
535 		      0,
536 		      mfd_def_size);
537 	if (r >= 0) {
538 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
539 		abort();
540 	}
541 }
542 
543 static void mfd_assert_shrink(int fd)
544 {
545 	int r, fd2;
546 
547 	r = ftruncate(fd, mfd_def_size / 2);
548 	if (r < 0) {
549 		printf("ftruncate(SHRINK) failed: %m\n");
550 		abort();
551 	}
552 
553 	mfd_assert_size(fd, mfd_def_size / 2);
554 
555 	fd2 = mfd_assert_open(fd,
556 			      O_RDWR | O_CREAT | O_TRUNC,
557 			      S_IRUSR | S_IWUSR);
558 	close(fd2);
559 
560 	mfd_assert_size(fd, 0);
561 }
562 
563 static void mfd_fail_shrink(int fd)
564 {
565 	int r;
566 
567 	r = ftruncate(fd, mfd_def_size / 2);
568 	if (r >= 0) {
569 		printf("ftruncate(SHRINK) didn't fail as expected\n");
570 		abort();
571 	}
572 
573 	mfd_fail_open(fd,
574 		      O_RDWR | O_CREAT | O_TRUNC,
575 		      S_IRUSR | S_IWUSR);
576 }
577 
578 static void mfd_assert_grow(int fd)
579 {
580 	int r;
581 
582 	r = ftruncate(fd, mfd_def_size * 2);
583 	if (r < 0) {
584 		printf("ftruncate(GROW) failed: %m\n");
585 		abort();
586 	}
587 
588 	mfd_assert_size(fd, mfd_def_size * 2);
589 
590 	r = fallocate(fd,
591 		      0,
592 		      0,
593 		      mfd_def_size * 4);
594 	if (r < 0) {
595 		printf("fallocate(ALLOC) failed: %m\n");
596 		abort();
597 	}
598 
599 	mfd_assert_size(fd, mfd_def_size * 4);
600 }
601 
602 static void mfd_fail_grow(int fd)
603 {
604 	int r;
605 
606 	r = ftruncate(fd, mfd_def_size * 2);
607 	if (r >= 0) {
608 		printf("ftruncate(GROW) didn't fail as expected\n");
609 		abort();
610 	}
611 
612 	r = fallocate(fd,
613 		      0,
614 		      0,
615 		      mfd_def_size * 4);
616 	if (r >= 0) {
617 		printf("fallocate(ALLOC) didn't fail as expected\n");
618 		abort();
619 	}
620 }
621 
622 static void mfd_assert_grow_write(int fd)
623 {
624 	static char *buf;
625 	ssize_t l;
626 
627 	/* hugetlbfs does not support write */
628 	if (hugetlbfs_test)
629 		return;
630 
631 	buf = malloc(mfd_def_size * 8);
632 	if (!buf) {
633 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
634 		abort();
635 	}
636 
637 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
638 	if (l != (mfd_def_size * 8)) {
639 		printf("pwrite() failed: %m\n");
640 		abort();
641 	}
642 
643 	mfd_assert_size(fd, mfd_def_size * 8);
644 }
645 
646 static void mfd_fail_grow_write(int fd)
647 {
648 	static char *buf;
649 	ssize_t l;
650 
651 	/* hugetlbfs does not support write */
652 	if (hugetlbfs_test)
653 		return;
654 
655 	buf = malloc(mfd_def_size * 8);
656 	if (!buf) {
657 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
658 		abort();
659 	}
660 
661 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
662 	if (l == (mfd_def_size * 8)) {
663 		printf("pwrite() didn't fail as expected\n");
664 		abort();
665 	}
666 }
667 
668 static void mfd_assert_mode(int fd, int mode)
669 {
670 	struct stat st;
671 	char buf[PATH_MAX];
672 	int nbytes;
673 
674 	fd2name(fd, buf, PATH_MAX);
675 
676 	if (fstat(fd, &st) < 0) {
677 		printf("fstat(%s) failed: %m\n", buf);
678 		abort();
679 	}
680 
681 	if ((st.st_mode & 07777) != mode) {
682 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
683 		       buf, (int)st.st_mode & 07777, mode);
684 		abort();
685 	}
686 }
687 
688 static void mfd_assert_chmod(int fd, int mode)
689 {
690 	char buf[PATH_MAX];
691 	int nbytes;
692 
693 	fd2name(fd, buf, PATH_MAX);
694 
695 	if (fchmod(fd, mode) < 0) {
696 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
697 		abort();
698 	}
699 
700 	mfd_assert_mode(fd, mode);
701 }
702 
703 static void mfd_fail_chmod(int fd, int mode)
704 {
705 	struct stat st;
706 	char buf[PATH_MAX];
707 	int nbytes;
708 
709 	fd2name(fd, buf, PATH_MAX);
710 
711 	if (fstat(fd, &st) < 0) {
712 		printf("fstat(%s) failed: %m\n", buf);
713 		abort();
714 	}
715 
716 	if (fchmod(fd, mode) == 0) {
717 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
718 		       buf, mode);
719 		abort();
720 	}
721 
722 	/* verify that file mode bits did not change */
723 	mfd_assert_mode(fd, st.st_mode & 07777);
724 }
725 
726 static int idle_thread_fn(void *arg)
727 {
728 	sigset_t set;
729 	int sig;
730 
731 	/* dummy waiter; SIGTERM terminates us anyway */
732 	sigemptyset(&set);
733 	sigaddset(&set, SIGTERM);
734 	sigwait(&set, &sig);
735 
736 	return 0;
737 }
738 
739 static pid_t spawn_idle_thread(unsigned int flags)
740 {
741 	uint8_t *stack;
742 	pid_t pid;
743 
744 	stack = malloc(STACK_SIZE);
745 	if (!stack) {
746 		printf("malloc(STACK_SIZE) failed: %m\n");
747 		abort();
748 	}
749 
750 	pid = clone(idle_thread_fn,
751 		    stack + STACK_SIZE,
752 		    SIGCHLD | flags,
753 		    NULL);
754 	if (pid < 0) {
755 		printf("clone() failed: %m\n");
756 		abort();
757 	}
758 
759 	return pid;
760 }
761 
762 static void join_idle_thread(pid_t pid)
763 {
764 	kill(pid, SIGTERM);
765 	waitpid(pid, NULL, 0);
766 }
767 
768 /*
769  * Test memfd_create() syscall
770  * Verify syscall-argument validation, including name checks, flag validation
771  * and more.
772  */
773 static void test_create(void)
774 {
775 	char buf[2048];
776 	int fd;
777 
778 	printf("%s CREATE\n", memfd_str);
779 
780 	/* test NULL name */
781 	mfd_fail_new(NULL, 0);
782 
783 	/* test over-long name (not zero-terminated) */
784 	memset(buf, 0xff, sizeof(buf));
785 	mfd_fail_new(buf, 0);
786 
787 	/* test over-long zero-terminated name */
788 	memset(buf, 0xff, sizeof(buf));
789 	buf[sizeof(buf) - 1] = 0;
790 	mfd_fail_new(buf, 0);
791 
792 	/* verify "" is a valid name */
793 	fd = mfd_assert_new("", 0, 0);
794 	close(fd);
795 
796 	/* verify invalid O_* open flags */
797 	mfd_fail_new("", 0x0100);
798 	mfd_fail_new("", ~MFD_CLOEXEC);
799 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
800 	mfd_fail_new("", ~0);
801 	mfd_fail_new("", 0x80000000U);
802 
803 	/* verify EXEC and NOEXEC_SEAL can't both be set */
804 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
805 
806 	/* verify MFD_CLOEXEC is allowed */
807 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
808 	close(fd);
809 
810 	/* verify MFD_ALLOW_SEALING is allowed */
811 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
812 	close(fd);
813 
814 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
815 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
816 	close(fd);
817 }
818 
819 /*
820  * Test basic sealing
821  * A very basic sealing test to see whether setting/retrieving seals works.
822  */
823 static void test_basic(void)
824 {
825 	int fd;
826 
827 	printf("%s BASIC\n", memfd_str);
828 
829 	fd = mfd_assert_new("kern_memfd_basic",
830 			    mfd_def_size,
831 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
832 
833 	/* add basic seals */
834 	mfd_assert_has_seals(fd, 0);
835 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
836 				 F_SEAL_WRITE);
837 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
838 				 F_SEAL_WRITE);
839 
840 	/* add them again */
841 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
842 				 F_SEAL_WRITE);
843 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
844 				 F_SEAL_WRITE);
845 
846 	/* add more seals and seal against sealing */
847 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
848 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
849 				 F_SEAL_GROW |
850 				 F_SEAL_WRITE |
851 				 F_SEAL_SEAL);
852 
853 	/* verify that sealing no longer works */
854 	mfd_fail_add_seals(fd, F_SEAL_GROW);
855 	mfd_fail_add_seals(fd, 0);
856 
857 	close(fd);
858 
859 	/* verify sealing does not work without MFD_ALLOW_SEALING */
860 	fd = mfd_assert_new("kern_memfd_basic",
861 			    mfd_def_size,
862 			    MFD_CLOEXEC);
863 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
864 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
865 			       F_SEAL_GROW |
866 			       F_SEAL_WRITE);
867 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
868 	close(fd);
869 }
870 
871 /*
872  * Test SEAL_WRITE
873  * Test whether SEAL_WRITE actually prevents modifications.
874  */
875 static void test_seal_write(void)
876 {
877 	int fd;
878 
879 	printf("%s SEAL-WRITE\n", memfd_str);
880 
881 	fd = mfd_assert_new("kern_memfd_seal_write",
882 			    mfd_def_size,
883 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
884 	mfd_assert_has_seals(fd, 0);
885 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
886 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
887 
888 	mfd_assert_read(fd);
889 	mfd_fail_write(fd);
890 	mfd_assert_shrink(fd);
891 	mfd_assert_grow(fd);
892 	mfd_fail_grow_write(fd);
893 
894 	close(fd);
895 }
896 
897 /*
898  * Test SEAL_FUTURE_WRITE
899  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
900  */
901 static void test_seal_future_write(void)
902 {
903 	int fd, fd2;
904 	void *p;
905 
906 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
907 
908 	fd = mfd_assert_new("kern_memfd_seal_future_write",
909 			    mfd_def_size,
910 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
911 
912 	p = mfd_assert_mmap_shared(fd);
913 
914 	mfd_assert_has_seals(fd, 0);
915 
916 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
917 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
918 
919 	/* read should pass, writes should fail */
920 	mfd_assert_read(fd);
921 	mfd_assert_read_shared(fd);
922 	mfd_fail_write(fd);
923 
924 	fd2 = mfd_assert_reopen_fd(fd);
925 	/* read should pass, writes should still fail */
926 	mfd_assert_read(fd2);
927 	mfd_assert_read_shared(fd2);
928 	mfd_fail_write(fd2);
929 
930 	mfd_assert_fork_private_write(fd);
931 
932 	munmap(p, mfd_def_size);
933 	close(fd2);
934 	close(fd);
935 }
936 
937 /*
938  * Test SEAL_SHRINK
939  * Test whether SEAL_SHRINK actually prevents shrinking
940  */
941 static void test_seal_shrink(void)
942 {
943 	int fd;
944 
945 	printf("%s SEAL-SHRINK\n", memfd_str);
946 
947 	fd = mfd_assert_new("kern_memfd_seal_shrink",
948 			    mfd_def_size,
949 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
950 	mfd_assert_has_seals(fd, 0);
951 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
952 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
953 
954 	mfd_assert_read(fd);
955 	mfd_assert_write(fd);
956 	mfd_fail_shrink(fd);
957 	mfd_assert_grow(fd);
958 	mfd_assert_grow_write(fd);
959 
960 	close(fd);
961 }
962 
963 /*
964  * Test SEAL_GROW
965  * Test whether SEAL_GROW actually prevents growing
966  */
967 static void test_seal_grow(void)
968 {
969 	int fd;
970 
971 	printf("%s SEAL-GROW\n", memfd_str);
972 
973 	fd = mfd_assert_new("kern_memfd_seal_grow",
974 			    mfd_def_size,
975 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
976 	mfd_assert_has_seals(fd, 0);
977 	mfd_assert_add_seals(fd, F_SEAL_GROW);
978 	mfd_assert_has_seals(fd, F_SEAL_GROW);
979 
980 	mfd_assert_read(fd);
981 	mfd_assert_write(fd);
982 	mfd_assert_shrink(fd);
983 	mfd_fail_grow(fd);
984 	mfd_fail_grow_write(fd);
985 
986 	close(fd);
987 }
988 
989 /*
990  * Test SEAL_SHRINK | SEAL_GROW
991  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
992  */
993 static void test_seal_resize(void)
994 {
995 	int fd;
996 
997 	printf("%s SEAL-RESIZE\n", memfd_str);
998 
999 	fd = mfd_assert_new("kern_memfd_seal_resize",
1000 			    mfd_def_size,
1001 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1002 	mfd_assert_has_seals(fd, 0);
1003 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1004 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1005 
1006 	mfd_assert_read(fd);
1007 	mfd_assert_write(fd);
1008 	mfd_fail_shrink(fd);
1009 	mfd_fail_grow(fd);
1010 	mfd_fail_grow_write(fd);
1011 
1012 	close(fd);
1013 }
1014 
1015 /*
1016  * Test SEAL_EXEC
1017  * Test fd is created with exec and allow sealing.
1018  * chmod() cannot change x bits after sealing.
1019  */
1020 static void test_exec_seal(void)
1021 {
1022 	int fd;
1023 
1024 	printf("%s SEAL-EXEC\n", memfd_str);
1025 
1026 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1027 	fd = mfd_assert_new("kern_memfd_seal_exec",
1028 			    mfd_def_size,
1029 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1030 
1031 	mfd_assert_mode(fd, 0777);
1032 	mfd_assert_chmod(fd, 0644);
1033 
1034 	mfd_assert_has_seals(fd, 0);
1035 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1036 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1037 
1038 	mfd_assert_chmod(fd, 0600);
1039 	mfd_fail_chmod(fd, 0777);
1040 	mfd_fail_chmod(fd, 0670);
1041 	mfd_fail_chmod(fd, 0605);
1042 	mfd_fail_chmod(fd, 0700);
1043 	mfd_fail_chmod(fd, 0100);
1044 	mfd_assert_chmod(fd, 0666);
1045 	mfd_assert_write(fd);
1046 	close(fd);
1047 
1048 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1049 	fd = mfd_assert_new("kern_memfd_seal_exec",
1050 			    mfd_def_size,
1051 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1052 
1053 	mfd_assert_mode(fd, 0777);
1054 	mfd_assert_chmod(fd, 0700);
1055 
1056 	mfd_assert_has_seals(fd, 0);
1057 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1058 	mfd_assert_has_seals(fd, F_WX_SEALS);
1059 
1060 	mfd_fail_chmod(fd, 0711);
1061 	mfd_fail_chmod(fd, 0600);
1062 	mfd_fail_write(fd);
1063 	close(fd);
1064 }
1065 
1066 /*
1067  * Test EXEC_NO_SEAL
1068  * Test fd is created with exec and not allow sealing.
1069  */
1070 static void test_exec_no_seal(void)
1071 {
1072 	int fd;
1073 
1074 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1075 
1076 	/* Create with EXEC but without ALLOW_SEALING */
1077 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1078 			    mfd_def_size,
1079 			    MFD_CLOEXEC | MFD_EXEC);
1080 	mfd_assert_mode(fd, 0777);
1081 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1082 	mfd_assert_chmod(fd, 0666);
1083 	close(fd);
1084 }
1085 
1086 /*
1087  * Test memfd_create with MFD_NOEXEC flag
1088  */
1089 static void test_noexec_seal(void)
1090 {
1091 	int fd;
1092 
1093 	printf("%s NOEXEC_SEAL\n", memfd_str);
1094 
1095 	/* Create with NOEXEC and ALLOW_SEALING */
1096 	fd = mfd_assert_new("kern_memfd_noexec",
1097 			    mfd_def_size,
1098 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1099 	mfd_assert_mode(fd, 0666);
1100 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1101 	mfd_fail_chmod(fd, 0777);
1102 	close(fd);
1103 
1104 	/* Create with NOEXEC but without ALLOW_SEALING */
1105 	fd = mfd_assert_new("kern_memfd_noexec",
1106 			    mfd_def_size,
1107 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1108 	mfd_assert_mode(fd, 0666);
1109 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1110 	mfd_fail_chmod(fd, 0777);
1111 	close(fd);
1112 }
1113 
1114 static void test_sysctl_child(void)
1115 {
1116 	int fd;
1117 	int pid;
1118 
1119 	printf("%s sysctl 0\n", memfd_str);
1120 	sysctl_assert_write("0");
1121 	fd = mfd_assert_new("kern_memfd_sysctl_0",
1122 			    mfd_def_size,
1123 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1124 
1125 	mfd_assert_mode(fd, 0777);
1126 	mfd_assert_has_seals(fd, 0);
1127 	mfd_assert_chmod(fd, 0644);
1128 	close(fd);
1129 
1130 	printf("%s sysctl 1\n", memfd_str);
1131 	sysctl_assert_write("1");
1132 	fd = mfd_assert_new("kern_memfd_sysctl_1",
1133 			    mfd_def_size,
1134 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1135 
1136 	printf("%s child ns\n", memfd_str);
1137 	pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2);
1138 	join_newpid_thread(pid);
1139 
1140 	mfd_assert_mode(fd, 0666);
1141 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1142 	mfd_fail_chmod(fd, 0777);
1143 	sysctl_fail_write("0");
1144 	close(fd);
1145 
1146 	printf("%s sysctl 2\n", memfd_str);
1147 	sysctl_assert_write("2");
1148 	mfd_fail_new("kern_memfd_sysctl_2",
1149 		MFD_CLOEXEC | MFD_ALLOW_SEALING);
1150 	mfd_fail_new("kern_memfd_sysctl_2_MFD_EXEC",
1151 		MFD_CLOEXEC | MFD_EXEC);
1152 	fd = mfd_assert_new("", 0, MFD_NOEXEC_SEAL);
1153 	close(fd);
1154 
1155 	sysctl_fail_write("0");
1156 	sysctl_fail_write("1");
1157 }
1158 
1159 static int newpid_thread_fn(void *arg)
1160 {
1161 	test_sysctl_child();
1162 	return 0;
1163 }
1164 
1165 static void test_sysctl_child2(void)
1166 {
1167 	int fd;
1168 
1169 	sysctl_fail_write("0");
1170 	fd = mfd_assert_new("kern_memfd_sysctl_1",
1171 			    mfd_def_size,
1172 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1173 
1174 	mfd_assert_mode(fd, 0666);
1175 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1176 	mfd_fail_chmod(fd, 0777);
1177 	close(fd);
1178 }
1179 
1180 static int newpid_thread_fn2(void *arg)
1181 {
1182 	test_sysctl_child2();
1183 	return 0;
1184 }
1185 static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *))
1186 {
1187 	uint8_t *stack;
1188 	pid_t pid;
1189 
1190 	stack = malloc(STACK_SIZE);
1191 	if (!stack) {
1192 		printf("malloc(STACK_SIZE) failed: %m\n");
1193 		abort();
1194 	}
1195 
1196 	pid = clone(fn,
1197 		    stack + STACK_SIZE,
1198 		    SIGCHLD | flags,
1199 		    NULL);
1200 	if (pid < 0) {
1201 		printf("clone() failed: %m\n");
1202 		abort();
1203 	}
1204 
1205 	return pid;
1206 }
1207 
1208 static void join_newpid_thread(pid_t pid)
1209 {
1210 	waitpid(pid, NULL, 0);
1211 }
1212 
1213 /*
1214  * Test sysctl
1215  * A very basic sealing test to see whether setting/retrieving seals works.
1216  */
1217 static void test_sysctl(void)
1218 {
1219 	int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn);
1220 
1221 	join_newpid_thread(pid);
1222 }
1223 
1224 /*
1225  * Test sharing via dup()
1226  * Test that seals are shared between dupped FDs and they're all equal.
1227  */
1228 static void test_share_dup(char *banner, char *b_suffix)
1229 {
1230 	int fd, fd2;
1231 
1232 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1233 
1234 	fd = mfd_assert_new("kern_memfd_share_dup",
1235 			    mfd_def_size,
1236 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1237 	mfd_assert_has_seals(fd, 0);
1238 
1239 	fd2 = mfd_assert_dup(fd);
1240 	mfd_assert_has_seals(fd2, 0);
1241 
1242 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1243 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1244 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1245 
1246 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1247 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1248 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1249 
1250 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1251 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1252 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1253 
1254 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1255 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1256 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1257 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1258 
1259 	close(fd2);
1260 
1261 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1262 	close(fd);
1263 }
1264 
1265 /*
1266  * Test sealing with active mmap()s
1267  * Modifying seals is only allowed if no other mmap() refs exist.
1268  */
1269 static void test_share_mmap(char *banner, char *b_suffix)
1270 {
1271 	int fd;
1272 	void *p;
1273 
1274 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1275 
1276 	fd = mfd_assert_new("kern_memfd_share_mmap",
1277 			    mfd_def_size,
1278 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1279 	mfd_assert_has_seals(fd, 0);
1280 
1281 	/* shared/writable ref prevents sealing WRITE, but allows others */
1282 	p = mfd_assert_mmap_shared(fd);
1283 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1284 	mfd_assert_has_seals(fd, 0);
1285 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1286 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1287 	munmap(p, mfd_def_size);
1288 
1289 	/* readable ref allows sealing */
1290 	p = mfd_assert_mmap_private(fd);
1291 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1292 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1293 	munmap(p, mfd_def_size);
1294 
1295 	close(fd);
1296 }
1297 
1298 /*
1299  * Test sealing with open(/proc/self/fd/%d)
1300  * Via /proc we can get access to a separate file-context for the same memfd.
1301  * This is *not* like dup(), but like a real separate open(). Make sure the
1302  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1303  */
1304 static void test_share_open(char *banner, char *b_suffix)
1305 {
1306 	int fd, fd2;
1307 
1308 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1309 
1310 	fd = mfd_assert_new("kern_memfd_share_open",
1311 			    mfd_def_size,
1312 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1313 	mfd_assert_has_seals(fd, 0);
1314 
1315 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1316 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1317 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1318 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1319 
1320 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1321 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1322 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1323 
1324 	close(fd);
1325 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1326 
1327 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1328 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1329 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1330 
1331 	close(fd2);
1332 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1333 
1334 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1335 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1336 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1337 
1338 	close(fd2);
1339 	close(fd);
1340 }
1341 
1342 /*
1343  * Test sharing via fork()
1344  * Test whether seal-modifications work as expected with forked childs.
1345  */
1346 static void test_share_fork(char *banner, char *b_suffix)
1347 {
1348 	int fd;
1349 	pid_t pid;
1350 
1351 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1352 
1353 	fd = mfd_assert_new("kern_memfd_share_fork",
1354 			    mfd_def_size,
1355 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1356 	mfd_assert_has_seals(fd, 0);
1357 
1358 	pid = spawn_idle_thread(0);
1359 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1360 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1361 
1362 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1363 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1364 
1365 	join_idle_thread(pid);
1366 
1367 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1368 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1369 
1370 	close(fd);
1371 }
1372 
1373 int main(int argc, char **argv)
1374 {
1375 	pid_t pid;
1376 
1377 	if (argc == 2) {
1378 		if (!strcmp(argv[1], "hugetlbfs")) {
1379 			unsigned long hpage_size = default_huge_page_size();
1380 
1381 			if (!hpage_size) {
1382 				printf("Unable to determine huge page size\n");
1383 				abort();
1384 			}
1385 
1386 			hugetlbfs_test = 1;
1387 			memfd_str = MEMFD_HUGE_STR;
1388 			mfd_def_size = hpage_size * 2;
1389 		} else {
1390 			printf("Unknown option: %s\n", argv[1]);
1391 			abort();
1392 		}
1393 	}
1394 
1395 	test_create();
1396 	test_basic();
1397 	test_exec_seal();
1398 	test_exec_no_seal();
1399 	test_noexec_seal();
1400 
1401 	test_seal_write();
1402 	test_seal_future_write();
1403 	test_seal_shrink();
1404 	test_seal_grow();
1405 	test_seal_resize();
1406 
1407 	test_share_dup("SHARE-DUP", "");
1408 	test_share_mmap("SHARE-MMAP", "");
1409 	test_share_open("SHARE-OPEN", "");
1410 	test_share_fork("SHARE-FORK", "");
1411 
1412 	/* Run test-suite in a multi-threaded environment with a shared
1413 	 * file-table. */
1414 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1415 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1416 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1417 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1418 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1419 	join_idle_thread(pid);
1420 
1421 	test_sysctl();
1422 
1423 	printf("memfd: DONE\n");
1424 
1425 	return 0;
1426 }
1427