1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <signal.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21 
22 #include "common.h"
23 
24 #define MEMFD_STR	"memfd:"
25 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
26 #define SHARED_FT_STR	"(shared file-table)"
27 
28 #define MFD_DEF_SIZE 8192
29 #define STACK_SIZE 65536
30 
31 #define F_SEAL_EXEC	0x0020
32 
33 #define F_WX_SEALS (F_SEAL_SHRINK | \
34 		    F_SEAL_GROW | \
35 		    F_SEAL_WRITE | \
36 		    F_SEAL_FUTURE_WRITE | \
37 		    F_SEAL_EXEC)
38 
39 #define MFD_NOEXEC_SEAL	0x0008U
40 
41 /*
42  * Default is not to test hugetlbfs
43  */
44 static size_t mfd_def_size = MFD_DEF_SIZE;
45 static const char *memfd_str = MEMFD_STR;
46 
47 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
48 {
49 	char buf1[PATH_MAX];
50 	int size;
51 	ssize_t nbytes;
52 
53 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
54 	if (size < 0) {
55 		printf("snprintf(%d) failed on %m\n", fd);
56 		abort();
57 	}
58 
59 	/*
60 	 * reserver one byte for string termination.
61 	 */
62 	nbytes = readlink(buf1, buf, bufsize-1);
63 	if (nbytes == -1) {
64 		printf("readlink(%s) failed %m\n", buf1);
65 		abort();
66 	}
67 	buf[nbytes] = '\0';
68 	return nbytes;
69 }
70 
71 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
72 {
73 	int r, fd;
74 
75 	fd = sys_memfd_create(name, flags);
76 	if (fd < 0) {
77 		printf("memfd_create(\"%s\", %u) failed: %m\n",
78 		       name, flags);
79 		abort();
80 	}
81 
82 	r = ftruncate(fd, sz);
83 	if (r < 0) {
84 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
85 		abort();
86 	}
87 
88 	return fd;
89 }
90 
91 static void sysctl_assert_write(const char *val)
92 {
93 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
94 
95 	if (fd < 0) {
96 		printf("open sysctl failed\n");
97 		abort();
98 	}
99 
100 	if (write(fd, val, strlen(val)) < 0) {
101 		printf("write sysctl failed\n");
102 		abort();
103 	}
104 }
105 
106 static void sysctl_fail_write(const char *val)
107 {
108 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
109 
110 	if (fd < 0) {
111 		printf("open sysctl failed\n");
112 		abort();
113 	}
114 
115 	if (write(fd, val, strlen(val)) >= 0) {
116 		printf("write sysctl %s succeeded, but failure expected\n",
117 				val);
118 		abort();
119 	}
120 }
121 
122 static int mfd_assert_reopen_fd(int fd_in)
123 {
124 	int fd;
125 	char path[100];
126 
127 	sprintf(path, "/proc/self/fd/%d", fd_in);
128 
129 	fd = open(path, O_RDWR);
130 	if (fd < 0) {
131 		printf("re-open of existing fd %d failed\n", fd_in);
132 		abort();
133 	}
134 
135 	return fd;
136 }
137 
138 static void mfd_fail_new(const char *name, unsigned int flags)
139 {
140 	int r;
141 
142 	r = sys_memfd_create(name, flags);
143 	if (r >= 0) {
144 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
145 		       name, flags);
146 		close(r);
147 		abort();
148 	}
149 }
150 
151 static unsigned int mfd_assert_get_seals(int fd)
152 {
153 	int r;
154 
155 	r = fcntl(fd, F_GET_SEALS);
156 	if (r < 0) {
157 		printf("GET_SEALS(%d) failed: %m\n", fd);
158 		abort();
159 	}
160 
161 	return (unsigned int)r;
162 }
163 
164 static void mfd_assert_has_seals(int fd, unsigned int seals)
165 {
166 	char buf[PATH_MAX];
167 	int nbytes;
168 	unsigned int s;
169 	fd2name(fd, buf, PATH_MAX);
170 
171 	s = mfd_assert_get_seals(fd);
172 	if (s != seals) {
173 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
174 		abort();
175 	}
176 }
177 
178 static void mfd_assert_add_seals(int fd, unsigned int seals)
179 {
180 	int r;
181 	unsigned int s;
182 
183 	s = mfd_assert_get_seals(fd);
184 	r = fcntl(fd, F_ADD_SEALS, seals);
185 	if (r < 0) {
186 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
187 		abort();
188 	}
189 }
190 
191 static void mfd_fail_add_seals(int fd, unsigned int seals)
192 {
193 	int r;
194 	unsigned int s;
195 
196 	r = fcntl(fd, F_GET_SEALS);
197 	if (r < 0)
198 		s = 0;
199 	else
200 		s = (unsigned int)r;
201 
202 	r = fcntl(fd, F_ADD_SEALS, seals);
203 	if (r >= 0) {
204 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
205 				fd, s, seals);
206 		abort();
207 	}
208 }
209 
210 static void mfd_assert_size(int fd, size_t size)
211 {
212 	struct stat st;
213 	int r;
214 
215 	r = fstat(fd, &st);
216 	if (r < 0) {
217 		printf("fstat(%d) failed: %m\n", fd);
218 		abort();
219 	} else if (st.st_size != size) {
220 		printf("wrong file size %lld, but expected %lld\n",
221 		       (long long)st.st_size, (long long)size);
222 		abort();
223 	}
224 }
225 
226 static int mfd_assert_dup(int fd)
227 {
228 	int r;
229 
230 	r = dup(fd);
231 	if (r < 0) {
232 		printf("dup(%d) failed: %m\n", fd);
233 		abort();
234 	}
235 
236 	return r;
237 }
238 
239 static void *mfd_assert_mmap_shared(int fd)
240 {
241 	void *p;
242 
243 	p = mmap(NULL,
244 		 mfd_def_size,
245 		 PROT_READ | PROT_WRITE,
246 		 MAP_SHARED,
247 		 fd,
248 		 0);
249 	if (p == MAP_FAILED) {
250 		printf("mmap() failed: %m\n");
251 		abort();
252 	}
253 
254 	return p;
255 }
256 
257 static void *mfd_assert_mmap_private(int fd)
258 {
259 	void *p;
260 
261 	p = mmap(NULL,
262 		 mfd_def_size,
263 		 PROT_READ,
264 		 MAP_PRIVATE,
265 		 fd,
266 		 0);
267 	if (p == MAP_FAILED) {
268 		printf("mmap() failed: %m\n");
269 		abort();
270 	}
271 
272 	return p;
273 }
274 
275 static int mfd_assert_open(int fd, int flags, mode_t mode)
276 {
277 	char buf[512];
278 	int r;
279 
280 	sprintf(buf, "/proc/self/fd/%d", fd);
281 	r = open(buf, flags, mode);
282 	if (r < 0) {
283 		printf("open(%s) failed: %m\n", buf);
284 		abort();
285 	}
286 
287 	return r;
288 }
289 
290 static void mfd_fail_open(int fd, int flags, mode_t mode)
291 {
292 	char buf[512];
293 	int r;
294 
295 	sprintf(buf, "/proc/self/fd/%d", fd);
296 	r = open(buf, flags, mode);
297 	if (r >= 0) {
298 		printf("open(%s) didn't fail as expected\n", buf);
299 		abort();
300 	}
301 }
302 
303 static void mfd_assert_read(int fd)
304 {
305 	char buf[16];
306 	void *p;
307 	ssize_t l;
308 
309 	l = read(fd, buf, sizeof(buf));
310 	if (l != sizeof(buf)) {
311 		printf("read() failed: %m\n");
312 		abort();
313 	}
314 
315 	/* verify PROT_READ *is* allowed */
316 	p = mmap(NULL,
317 		 mfd_def_size,
318 		 PROT_READ,
319 		 MAP_PRIVATE,
320 		 fd,
321 		 0);
322 	if (p == MAP_FAILED) {
323 		printf("mmap() failed: %m\n");
324 		abort();
325 	}
326 	munmap(p, mfd_def_size);
327 
328 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
329 	p = mmap(NULL,
330 		 mfd_def_size,
331 		 PROT_READ | PROT_WRITE,
332 		 MAP_PRIVATE,
333 		 fd,
334 		 0);
335 	if (p == MAP_FAILED) {
336 		printf("mmap() failed: %m\n");
337 		abort();
338 	}
339 	munmap(p, mfd_def_size);
340 }
341 
342 /* Test that PROT_READ + MAP_SHARED mappings work. */
343 static void mfd_assert_read_shared(int fd)
344 {
345 	void *p;
346 
347 	/* verify PROT_READ and MAP_SHARED *is* allowed */
348 	p = mmap(NULL,
349 		 mfd_def_size,
350 		 PROT_READ,
351 		 MAP_SHARED,
352 		 fd,
353 		 0);
354 	if (p == MAP_FAILED) {
355 		printf("mmap() failed: %m\n");
356 		abort();
357 	}
358 	munmap(p, mfd_def_size);
359 }
360 
361 static void mfd_assert_fork_private_write(int fd)
362 {
363 	int *p;
364 	pid_t pid;
365 
366 	p = mmap(NULL,
367 		 mfd_def_size,
368 		 PROT_READ | PROT_WRITE,
369 		 MAP_PRIVATE,
370 		 fd,
371 		 0);
372 	if (p == MAP_FAILED) {
373 		printf("mmap() failed: %m\n");
374 		abort();
375 	}
376 
377 	p[0] = 22;
378 
379 	pid = fork();
380 	if (pid == 0) {
381 		p[0] = 33;
382 		exit(0);
383 	} else {
384 		waitpid(pid, NULL, 0);
385 
386 		if (p[0] != 22) {
387 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
388 			abort();
389 		}
390 	}
391 
392 	munmap(p, mfd_def_size);
393 }
394 
395 static void mfd_assert_write(int fd)
396 {
397 	ssize_t l;
398 	void *p;
399 	int r;
400 
401 	/*
402 	 * huegtlbfs does not support write, but we want to
403 	 * verify everything else here.
404 	 */
405 	if (!hugetlbfs_test) {
406 		/* verify write() succeeds */
407 		l = write(fd, "\0\0\0\0", 4);
408 		if (l != 4) {
409 			printf("write() failed: %m\n");
410 			abort();
411 		}
412 	}
413 
414 	/* verify PROT_READ | PROT_WRITE is allowed */
415 	p = mmap(NULL,
416 		 mfd_def_size,
417 		 PROT_READ | PROT_WRITE,
418 		 MAP_SHARED,
419 		 fd,
420 		 0);
421 	if (p == MAP_FAILED) {
422 		printf("mmap() failed: %m\n");
423 		abort();
424 	}
425 	*(char *)p = 0;
426 	munmap(p, mfd_def_size);
427 
428 	/* verify PROT_WRITE is allowed */
429 	p = mmap(NULL,
430 		 mfd_def_size,
431 		 PROT_WRITE,
432 		 MAP_SHARED,
433 		 fd,
434 		 0);
435 	if (p == MAP_FAILED) {
436 		printf("mmap() failed: %m\n");
437 		abort();
438 	}
439 	*(char *)p = 0;
440 	munmap(p, mfd_def_size);
441 
442 	/* verify PROT_READ with MAP_SHARED is allowed and a following
443 	 * mprotect(PROT_WRITE) allows writing */
444 	p = mmap(NULL,
445 		 mfd_def_size,
446 		 PROT_READ,
447 		 MAP_SHARED,
448 		 fd,
449 		 0);
450 	if (p == MAP_FAILED) {
451 		printf("mmap() failed: %m\n");
452 		abort();
453 	}
454 
455 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
456 	if (r < 0) {
457 		printf("mprotect() failed: %m\n");
458 		abort();
459 	}
460 
461 	*(char *)p = 0;
462 	munmap(p, mfd_def_size);
463 
464 	/* verify PUNCH_HOLE works */
465 	r = fallocate(fd,
466 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
467 		      0,
468 		      mfd_def_size);
469 	if (r < 0) {
470 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
471 		abort();
472 	}
473 }
474 
475 static void mfd_fail_write(int fd)
476 {
477 	ssize_t l;
478 	void *p;
479 	int r;
480 
481 	/* verify write() fails */
482 	l = write(fd, "data", 4);
483 	if (l != -EPERM) {
484 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
485 		abort();
486 	}
487 
488 	/* verify PROT_READ | PROT_WRITE is not allowed */
489 	p = mmap(NULL,
490 		 mfd_def_size,
491 		 PROT_READ | PROT_WRITE,
492 		 MAP_SHARED,
493 		 fd,
494 		 0);
495 	if (p != MAP_FAILED) {
496 		printf("mmap() didn't fail as expected\n");
497 		abort();
498 	}
499 
500 	/* verify PROT_WRITE is not allowed */
501 	p = mmap(NULL,
502 		 mfd_def_size,
503 		 PROT_WRITE,
504 		 MAP_SHARED,
505 		 fd,
506 		 0);
507 	if (p != MAP_FAILED) {
508 		printf("mmap() didn't fail as expected\n");
509 		abort();
510 	}
511 
512 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
513 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
514 	p = mmap(NULL,
515 		 mfd_def_size,
516 		 PROT_READ,
517 		 MAP_SHARED,
518 		 fd,
519 		 0);
520 	if (p != MAP_FAILED) {
521 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
522 		if (r >= 0) {
523 			printf("mmap()+mprotect() didn't fail as expected\n");
524 			abort();
525 		}
526 		munmap(p, mfd_def_size);
527 	}
528 
529 	/* verify PUNCH_HOLE fails */
530 	r = fallocate(fd,
531 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
532 		      0,
533 		      mfd_def_size);
534 	if (r >= 0) {
535 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
536 		abort();
537 	}
538 }
539 
540 static void mfd_assert_shrink(int fd)
541 {
542 	int r, fd2;
543 
544 	r = ftruncate(fd, mfd_def_size / 2);
545 	if (r < 0) {
546 		printf("ftruncate(SHRINK) failed: %m\n");
547 		abort();
548 	}
549 
550 	mfd_assert_size(fd, mfd_def_size / 2);
551 
552 	fd2 = mfd_assert_open(fd,
553 			      O_RDWR | O_CREAT | O_TRUNC,
554 			      S_IRUSR | S_IWUSR);
555 	close(fd2);
556 
557 	mfd_assert_size(fd, 0);
558 }
559 
560 static void mfd_fail_shrink(int fd)
561 {
562 	int r;
563 
564 	r = ftruncate(fd, mfd_def_size / 2);
565 	if (r >= 0) {
566 		printf("ftruncate(SHRINK) didn't fail as expected\n");
567 		abort();
568 	}
569 
570 	mfd_fail_open(fd,
571 		      O_RDWR | O_CREAT | O_TRUNC,
572 		      S_IRUSR | S_IWUSR);
573 }
574 
575 static void mfd_assert_grow(int fd)
576 {
577 	int r;
578 
579 	r = ftruncate(fd, mfd_def_size * 2);
580 	if (r < 0) {
581 		printf("ftruncate(GROW) failed: %m\n");
582 		abort();
583 	}
584 
585 	mfd_assert_size(fd, mfd_def_size * 2);
586 
587 	r = fallocate(fd,
588 		      0,
589 		      0,
590 		      mfd_def_size * 4);
591 	if (r < 0) {
592 		printf("fallocate(ALLOC) failed: %m\n");
593 		abort();
594 	}
595 
596 	mfd_assert_size(fd, mfd_def_size * 4);
597 }
598 
599 static void mfd_fail_grow(int fd)
600 {
601 	int r;
602 
603 	r = ftruncate(fd, mfd_def_size * 2);
604 	if (r >= 0) {
605 		printf("ftruncate(GROW) didn't fail as expected\n");
606 		abort();
607 	}
608 
609 	r = fallocate(fd,
610 		      0,
611 		      0,
612 		      mfd_def_size * 4);
613 	if (r >= 0) {
614 		printf("fallocate(ALLOC) didn't fail as expected\n");
615 		abort();
616 	}
617 }
618 
619 static void mfd_assert_grow_write(int fd)
620 {
621 	static char *buf;
622 	ssize_t l;
623 
624 	/* hugetlbfs does not support write */
625 	if (hugetlbfs_test)
626 		return;
627 
628 	buf = malloc(mfd_def_size * 8);
629 	if (!buf) {
630 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
631 		abort();
632 	}
633 
634 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
635 	if (l != (mfd_def_size * 8)) {
636 		printf("pwrite() failed: %m\n");
637 		abort();
638 	}
639 
640 	mfd_assert_size(fd, mfd_def_size * 8);
641 }
642 
643 static void mfd_fail_grow_write(int fd)
644 {
645 	static char *buf;
646 	ssize_t l;
647 
648 	/* hugetlbfs does not support write */
649 	if (hugetlbfs_test)
650 		return;
651 
652 	buf = malloc(mfd_def_size * 8);
653 	if (!buf) {
654 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
655 		abort();
656 	}
657 
658 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
659 	if (l == (mfd_def_size * 8)) {
660 		printf("pwrite() didn't fail as expected\n");
661 		abort();
662 	}
663 }
664 
665 static void mfd_assert_mode(int fd, int mode)
666 {
667 	struct stat st;
668 	char buf[PATH_MAX];
669 	int nbytes;
670 
671 	fd2name(fd, buf, PATH_MAX);
672 
673 	if (fstat(fd, &st) < 0) {
674 		printf("fstat(%s) failed: %m\n", buf);
675 		abort();
676 	}
677 
678 	if ((st.st_mode & 07777) != mode) {
679 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
680 		       buf, (int)st.st_mode & 07777, mode);
681 		abort();
682 	}
683 }
684 
685 static void mfd_assert_chmod(int fd, int mode)
686 {
687 	char buf[PATH_MAX];
688 	int nbytes;
689 
690 	fd2name(fd, buf, PATH_MAX);
691 
692 	if (fchmod(fd, mode) < 0) {
693 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
694 		abort();
695 	}
696 
697 	mfd_assert_mode(fd, mode);
698 }
699 
700 static void mfd_fail_chmod(int fd, int mode)
701 {
702 	struct stat st;
703 	char buf[PATH_MAX];
704 	int nbytes;
705 
706 	fd2name(fd, buf, PATH_MAX);
707 
708 	if (fstat(fd, &st) < 0) {
709 		printf("fstat(%s) failed: %m\n", buf);
710 		abort();
711 	}
712 
713 	if (fchmod(fd, mode) == 0) {
714 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
715 		       buf, mode);
716 		abort();
717 	}
718 
719 	/* verify that file mode bits did not change */
720 	mfd_assert_mode(fd, st.st_mode & 07777);
721 }
722 
723 static int idle_thread_fn(void *arg)
724 {
725 	sigset_t set;
726 	int sig;
727 
728 	/* dummy waiter; SIGTERM terminates us anyway */
729 	sigemptyset(&set);
730 	sigaddset(&set, SIGTERM);
731 	sigwait(&set, &sig);
732 
733 	return 0;
734 }
735 
736 static pid_t spawn_idle_thread(unsigned int flags)
737 {
738 	uint8_t *stack;
739 	pid_t pid;
740 
741 	stack = malloc(STACK_SIZE);
742 	if (!stack) {
743 		printf("malloc(STACK_SIZE) failed: %m\n");
744 		abort();
745 	}
746 
747 	pid = clone(idle_thread_fn,
748 		    stack + STACK_SIZE,
749 		    SIGCHLD | flags,
750 		    NULL);
751 	if (pid < 0) {
752 		printf("clone() failed: %m\n");
753 		abort();
754 	}
755 
756 	return pid;
757 }
758 
759 static void join_idle_thread(pid_t pid)
760 {
761 	kill(pid, SIGTERM);
762 	waitpid(pid, NULL, 0);
763 }
764 
765 /*
766  * Test memfd_create() syscall
767  * Verify syscall-argument validation, including name checks, flag validation
768  * and more.
769  */
770 static void test_create(void)
771 {
772 	char buf[2048];
773 	int fd;
774 
775 	printf("%s CREATE\n", memfd_str);
776 
777 	/* test NULL name */
778 	mfd_fail_new(NULL, 0);
779 
780 	/* test over-long name (not zero-terminated) */
781 	memset(buf, 0xff, sizeof(buf));
782 	mfd_fail_new(buf, 0);
783 
784 	/* test over-long zero-terminated name */
785 	memset(buf, 0xff, sizeof(buf));
786 	buf[sizeof(buf) - 1] = 0;
787 	mfd_fail_new(buf, 0);
788 
789 	/* verify "" is a valid name */
790 	fd = mfd_assert_new("", 0, 0);
791 	close(fd);
792 
793 	/* verify invalid O_* open flags */
794 	mfd_fail_new("", 0x0100);
795 	mfd_fail_new("", ~MFD_CLOEXEC);
796 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
797 	mfd_fail_new("", ~0);
798 	mfd_fail_new("", 0x80000000U);
799 
800 	/* verify EXEC and NOEXEC_SEAL can't both be set */
801 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
802 
803 	/* verify MFD_CLOEXEC is allowed */
804 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
805 	close(fd);
806 
807 	/* verify MFD_ALLOW_SEALING is allowed */
808 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
809 	close(fd);
810 
811 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
812 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
813 	close(fd);
814 }
815 
816 /*
817  * Test basic sealing
818  * A very basic sealing test to see whether setting/retrieving seals works.
819  */
820 static void test_basic(void)
821 {
822 	int fd;
823 
824 	printf("%s BASIC\n", memfd_str);
825 
826 	fd = mfd_assert_new("kern_memfd_basic",
827 			    mfd_def_size,
828 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
829 
830 	/* add basic seals */
831 	mfd_assert_has_seals(fd, 0);
832 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
833 				 F_SEAL_WRITE);
834 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
835 				 F_SEAL_WRITE);
836 
837 	/* add them again */
838 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
839 				 F_SEAL_WRITE);
840 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
841 				 F_SEAL_WRITE);
842 
843 	/* add more seals and seal against sealing */
844 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
845 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
846 				 F_SEAL_GROW |
847 				 F_SEAL_WRITE |
848 				 F_SEAL_SEAL);
849 
850 	/* verify that sealing no longer works */
851 	mfd_fail_add_seals(fd, F_SEAL_GROW);
852 	mfd_fail_add_seals(fd, 0);
853 
854 	close(fd);
855 
856 	/* verify sealing does not work without MFD_ALLOW_SEALING */
857 	fd = mfd_assert_new("kern_memfd_basic",
858 			    mfd_def_size,
859 			    MFD_CLOEXEC);
860 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
861 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
862 			       F_SEAL_GROW |
863 			       F_SEAL_WRITE);
864 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
865 	close(fd);
866 }
867 
868 /*
869  * Test SEAL_WRITE
870  * Test whether SEAL_WRITE actually prevents modifications.
871  */
872 static void test_seal_write(void)
873 {
874 	int fd;
875 
876 	printf("%s SEAL-WRITE\n", memfd_str);
877 
878 	fd = mfd_assert_new("kern_memfd_seal_write",
879 			    mfd_def_size,
880 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
881 	mfd_assert_has_seals(fd, 0);
882 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
883 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
884 
885 	mfd_assert_read(fd);
886 	mfd_fail_write(fd);
887 	mfd_assert_shrink(fd);
888 	mfd_assert_grow(fd);
889 	mfd_fail_grow_write(fd);
890 
891 	close(fd);
892 }
893 
894 /*
895  * Test SEAL_FUTURE_WRITE
896  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
897  */
898 static void test_seal_future_write(void)
899 {
900 	int fd, fd2;
901 	void *p;
902 
903 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
904 
905 	fd = mfd_assert_new("kern_memfd_seal_future_write",
906 			    mfd_def_size,
907 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
908 
909 	p = mfd_assert_mmap_shared(fd);
910 
911 	mfd_assert_has_seals(fd, 0);
912 
913 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
914 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
915 
916 	/* read should pass, writes should fail */
917 	mfd_assert_read(fd);
918 	mfd_assert_read_shared(fd);
919 	mfd_fail_write(fd);
920 
921 	fd2 = mfd_assert_reopen_fd(fd);
922 	/* read should pass, writes should still fail */
923 	mfd_assert_read(fd2);
924 	mfd_assert_read_shared(fd2);
925 	mfd_fail_write(fd2);
926 
927 	mfd_assert_fork_private_write(fd);
928 
929 	munmap(p, mfd_def_size);
930 	close(fd2);
931 	close(fd);
932 }
933 
934 /*
935  * Test SEAL_SHRINK
936  * Test whether SEAL_SHRINK actually prevents shrinking
937  */
938 static void test_seal_shrink(void)
939 {
940 	int fd;
941 
942 	printf("%s SEAL-SHRINK\n", memfd_str);
943 
944 	fd = mfd_assert_new("kern_memfd_seal_shrink",
945 			    mfd_def_size,
946 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
947 	mfd_assert_has_seals(fd, 0);
948 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
949 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
950 
951 	mfd_assert_read(fd);
952 	mfd_assert_write(fd);
953 	mfd_fail_shrink(fd);
954 	mfd_assert_grow(fd);
955 	mfd_assert_grow_write(fd);
956 
957 	close(fd);
958 }
959 
960 /*
961  * Test SEAL_GROW
962  * Test whether SEAL_GROW actually prevents growing
963  */
964 static void test_seal_grow(void)
965 {
966 	int fd;
967 
968 	printf("%s SEAL-GROW\n", memfd_str);
969 
970 	fd = mfd_assert_new("kern_memfd_seal_grow",
971 			    mfd_def_size,
972 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
973 	mfd_assert_has_seals(fd, 0);
974 	mfd_assert_add_seals(fd, F_SEAL_GROW);
975 	mfd_assert_has_seals(fd, F_SEAL_GROW);
976 
977 	mfd_assert_read(fd);
978 	mfd_assert_write(fd);
979 	mfd_assert_shrink(fd);
980 	mfd_fail_grow(fd);
981 	mfd_fail_grow_write(fd);
982 
983 	close(fd);
984 }
985 
986 /*
987  * Test SEAL_SHRINK | SEAL_GROW
988  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
989  */
990 static void test_seal_resize(void)
991 {
992 	int fd;
993 
994 	printf("%s SEAL-RESIZE\n", memfd_str);
995 
996 	fd = mfd_assert_new("kern_memfd_seal_resize",
997 			    mfd_def_size,
998 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
999 	mfd_assert_has_seals(fd, 0);
1000 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1001 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1002 
1003 	mfd_assert_read(fd);
1004 	mfd_assert_write(fd);
1005 	mfd_fail_shrink(fd);
1006 	mfd_fail_grow(fd);
1007 	mfd_fail_grow_write(fd);
1008 
1009 	close(fd);
1010 }
1011 
1012 /*
1013  * Test SEAL_EXEC
1014  * Test fd is created with exec and allow sealing.
1015  * chmod() cannot change x bits after sealing.
1016  */
1017 static void test_exec_seal(void)
1018 {
1019 	int fd;
1020 
1021 	printf("%s SEAL-EXEC\n", memfd_str);
1022 
1023 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1024 	fd = mfd_assert_new("kern_memfd_seal_exec",
1025 			    mfd_def_size,
1026 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1027 
1028 	mfd_assert_mode(fd, 0777);
1029 	mfd_assert_chmod(fd, 0644);
1030 
1031 	mfd_assert_has_seals(fd, 0);
1032 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1033 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1034 
1035 	mfd_assert_chmod(fd, 0600);
1036 	mfd_fail_chmod(fd, 0777);
1037 	mfd_fail_chmod(fd, 0670);
1038 	mfd_fail_chmod(fd, 0605);
1039 	mfd_fail_chmod(fd, 0700);
1040 	mfd_fail_chmod(fd, 0100);
1041 	mfd_assert_chmod(fd, 0666);
1042 	mfd_assert_write(fd);
1043 	close(fd);
1044 
1045 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1046 	fd = mfd_assert_new("kern_memfd_seal_exec",
1047 			    mfd_def_size,
1048 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1049 
1050 	mfd_assert_mode(fd, 0777);
1051 	mfd_assert_chmod(fd, 0700);
1052 
1053 	mfd_assert_has_seals(fd, 0);
1054 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1055 	mfd_assert_has_seals(fd, F_WX_SEALS);
1056 
1057 	mfd_fail_chmod(fd, 0711);
1058 	mfd_fail_chmod(fd, 0600);
1059 	mfd_fail_write(fd);
1060 	close(fd);
1061 }
1062 
1063 /*
1064  * Test EXEC_NO_SEAL
1065  * Test fd is created with exec and not allow sealing.
1066  */
1067 static void test_exec_no_seal(void)
1068 {
1069 	int fd;
1070 
1071 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1072 
1073 	/* Create with EXEC but without ALLOW_SEALING */
1074 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1075 			    mfd_def_size,
1076 			    MFD_CLOEXEC | MFD_EXEC);
1077 	mfd_assert_mode(fd, 0777);
1078 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1079 	mfd_assert_chmod(fd, 0666);
1080 	close(fd);
1081 }
1082 
1083 /*
1084  * Test memfd_create with MFD_NOEXEC flag
1085  */
1086 static void test_noexec_seal(void)
1087 {
1088 	int fd;
1089 
1090 	printf("%s NOEXEC_SEAL\n", memfd_str);
1091 
1092 	/* Create with NOEXEC and ALLOW_SEALING */
1093 	fd = mfd_assert_new("kern_memfd_noexec",
1094 			    mfd_def_size,
1095 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1096 	mfd_assert_mode(fd, 0666);
1097 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1098 	mfd_fail_chmod(fd, 0777);
1099 	close(fd);
1100 
1101 	/* Create with NOEXEC but without ALLOW_SEALING */
1102 	fd = mfd_assert_new("kern_memfd_noexec",
1103 			    mfd_def_size,
1104 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1105 	mfd_assert_mode(fd, 0666);
1106 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1107 	mfd_fail_chmod(fd, 0777);
1108 	close(fd);
1109 }
1110 
1111 static void test_sysctl_child(void)
1112 {
1113 	int fd;
1114 
1115 	printf("%s sysctl 0\n", memfd_str);
1116 	sysctl_assert_write("0");
1117 	fd = mfd_assert_new("kern_memfd_sysctl_0",
1118 			    mfd_def_size,
1119 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1120 
1121 	mfd_assert_mode(fd, 0777);
1122 	mfd_assert_has_seals(fd, 0);
1123 	mfd_assert_chmod(fd, 0644);
1124 	close(fd);
1125 
1126 	printf("%s sysctl 1\n", memfd_str);
1127 	sysctl_assert_write("1");
1128 	fd = mfd_assert_new("kern_memfd_sysctl_1",
1129 			    mfd_def_size,
1130 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1131 
1132 	mfd_assert_mode(fd, 0666);
1133 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1134 	mfd_fail_chmod(fd, 0777);
1135 	sysctl_fail_write("0");
1136 	close(fd);
1137 
1138 	printf("%s sysctl 2\n", memfd_str);
1139 	sysctl_assert_write("2");
1140 	mfd_fail_new("kern_memfd_sysctl_2",
1141 		MFD_CLOEXEC | MFD_ALLOW_SEALING);
1142 	sysctl_fail_write("0");
1143 	sysctl_fail_write("1");
1144 }
1145 
1146 static int newpid_thread_fn(void *arg)
1147 {
1148 	test_sysctl_child();
1149 	return 0;
1150 }
1151 
1152 static void test_sysctl_child2(void)
1153 {
1154 	int fd;
1155 
1156 	sysctl_fail_write("0");
1157 	fd = mfd_assert_new("kern_memfd_sysctl_1",
1158 			    mfd_def_size,
1159 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1160 
1161 	mfd_assert_mode(fd, 0666);
1162 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1163 	mfd_fail_chmod(fd, 0777);
1164 	close(fd);
1165 }
1166 
1167 static int newpid_thread_fn2(void *arg)
1168 {
1169 	test_sysctl_child2();
1170 	return 0;
1171 }
1172 static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *))
1173 {
1174 	uint8_t *stack;
1175 	pid_t pid;
1176 
1177 	stack = malloc(STACK_SIZE);
1178 	if (!stack) {
1179 		printf("malloc(STACK_SIZE) failed: %m\n");
1180 		abort();
1181 	}
1182 
1183 	pid = clone(fn,
1184 		    stack + STACK_SIZE,
1185 		    SIGCHLD | flags,
1186 		    NULL);
1187 	if (pid < 0) {
1188 		printf("clone() failed: %m\n");
1189 		abort();
1190 	}
1191 
1192 	return pid;
1193 }
1194 
1195 static void join_newpid_thread(pid_t pid)
1196 {
1197 	waitpid(pid, NULL, 0);
1198 }
1199 
1200 /*
1201  * Test sysctl
1202  * A very basic sealing test to see whether setting/retrieving seals works.
1203  */
1204 static void test_sysctl(void)
1205 {
1206 	int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn);
1207 
1208 	join_newpid_thread(pid);
1209 
1210 	printf("%s child ns\n", memfd_str);
1211 	sysctl_assert_write("1");
1212 
1213 	pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2);
1214 	join_newpid_thread(pid);
1215 }
1216 
1217 /*
1218  * Test sharing via dup()
1219  * Test that seals are shared between dupped FDs and they're all equal.
1220  */
1221 static void test_share_dup(char *banner, char *b_suffix)
1222 {
1223 	int fd, fd2;
1224 
1225 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1226 
1227 	fd = mfd_assert_new("kern_memfd_share_dup",
1228 			    mfd_def_size,
1229 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1230 	mfd_assert_has_seals(fd, 0);
1231 
1232 	fd2 = mfd_assert_dup(fd);
1233 	mfd_assert_has_seals(fd2, 0);
1234 
1235 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1236 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1237 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1238 
1239 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1240 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1241 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1242 
1243 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1244 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1245 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1246 
1247 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1248 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1249 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1250 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1251 
1252 	close(fd2);
1253 
1254 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1255 	close(fd);
1256 }
1257 
1258 /*
1259  * Test sealing with active mmap()s
1260  * Modifying seals is only allowed if no other mmap() refs exist.
1261  */
1262 static void test_share_mmap(char *banner, char *b_suffix)
1263 {
1264 	int fd;
1265 	void *p;
1266 
1267 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1268 
1269 	fd = mfd_assert_new("kern_memfd_share_mmap",
1270 			    mfd_def_size,
1271 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1272 	mfd_assert_has_seals(fd, 0);
1273 
1274 	/* shared/writable ref prevents sealing WRITE, but allows others */
1275 	p = mfd_assert_mmap_shared(fd);
1276 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1277 	mfd_assert_has_seals(fd, 0);
1278 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1279 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1280 	munmap(p, mfd_def_size);
1281 
1282 	/* readable ref allows sealing */
1283 	p = mfd_assert_mmap_private(fd);
1284 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1285 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1286 	munmap(p, mfd_def_size);
1287 
1288 	close(fd);
1289 }
1290 
1291 /*
1292  * Test sealing with open(/proc/self/fd/%d)
1293  * Via /proc we can get access to a separate file-context for the same memfd.
1294  * This is *not* like dup(), but like a real separate open(). Make sure the
1295  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1296  */
1297 static void test_share_open(char *banner, char *b_suffix)
1298 {
1299 	int fd, fd2;
1300 
1301 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1302 
1303 	fd = mfd_assert_new("kern_memfd_share_open",
1304 			    mfd_def_size,
1305 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1306 	mfd_assert_has_seals(fd, 0);
1307 
1308 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1309 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1310 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1311 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1312 
1313 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1314 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1315 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1316 
1317 	close(fd);
1318 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1319 
1320 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1321 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1322 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1323 
1324 	close(fd2);
1325 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1326 
1327 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1328 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1329 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1330 
1331 	close(fd2);
1332 	close(fd);
1333 }
1334 
1335 /*
1336  * Test sharing via fork()
1337  * Test whether seal-modifications work as expected with forked childs.
1338  */
1339 static void test_share_fork(char *banner, char *b_suffix)
1340 {
1341 	int fd;
1342 	pid_t pid;
1343 
1344 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1345 
1346 	fd = mfd_assert_new("kern_memfd_share_fork",
1347 			    mfd_def_size,
1348 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1349 	mfd_assert_has_seals(fd, 0);
1350 
1351 	pid = spawn_idle_thread(0);
1352 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1353 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1354 
1355 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1356 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1357 
1358 	join_idle_thread(pid);
1359 
1360 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1361 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1362 
1363 	close(fd);
1364 }
1365 
1366 int main(int argc, char **argv)
1367 {
1368 	pid_t pid;
1369 
1370 	if (argc == 2) {
1371 		if (!strcmp(argv[1], "hugetlbfs")) {
1372 			unsigned long hpage_size = default_huge_page_size();
1373 
1374 			if (!hpage_size) {
1375 				printf("Unable to determine huge page size\n");
1376 				abort();
1377 			}
1378 
1379 			hugetlbfs_test = 1;
1380 			memfd_str = MEMFD_HUGE_STR;
1381 			mfd_def_size = hpage_size * 2;
1382 		} else {
1383 			printf("Unknown option: %s\n", argv[1]);
1384 			abort();
1385 		}
1386 	}
1387 
1388 	test_create();
1389 	test_basic();
1390 	test_exec_seal();
1391 	test_exec_no_seal();
1392 	test_noexec_seal();
1393 
1394 	test_seal_write();
1395 	test_seal_future_write();
1396 	test_seal_shrink();
1397 	test_seal_grow();
1398 	test_seal_resize();
1399 
1400 	test_share_dup("SHARE-DUP", "");
1401 	test_share_mmap("SHARE-MMAP", "");
1402 	test_share_open("SHARE-OPEN", "");
1403 	test_share_fork("SHARE-FORK", "");
1404 
1405 	/* Run test-suite in a multi-threaded environment with a shared
1406 	 * file-table. */
1407 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1408 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1409 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1410 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1411 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1412 	join_idle_thread(pid);
1413 
1414 	test_sysctl();
1415 
1416 	printf("memfd: DONE\n");
1417 
1418 	return 0;
1419 }
1420