1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <signal.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21 #include <ctype.h>
22 
23 #include "common.h"
24 
25 #define MEMFD_STR	"memfd:"
26 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
27 #define SHARED_FT_STR	"(shared file-table)"
28 
29 #define MFD_DEF_SIZE 8192
30 #define STACK_SIZE 65536
31 
32 #define F_SEAL_EXEC	0x0020
33 
34 #define F_WX_SEALS (F_SEAL_SHRINK | \
35 		    F_SEAL_GROW | \
36 		    F_SEAL_WRITE | \
37 		    F_SEAL_FUTURE_WRITE | \
38 		    F_SEAL_EXEC)
39 
40 #define MFD_NOEXEC_SEAL	0x0008U
41 
42 /*
43  * Default is not to test hugetlbfs
44  */
45 static size_t mfd_def_size = MFD_DEF_SIZE;
46 static const char *memfd_str = MEMFD_STR;
47 static int newpid_thread_fn2(void *arg);
48 static void join_newpid_thread(pid_t pid);
49 
fd2name(int fd,char * buf,size_t bufsize)50 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
51 {
52 	char buf1[PATH_MAX];
53 	int size;
54 	ssize_t nbytes;
55 
56 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
57 	if (size < 0) {
58 		printf("snprintf(%d) failed on %m\n", fd);
59 		abort();
60 	}
61 
62 	/*
63 	 * reserver one byte for string termination.
64 	 */
65 	nbytes = readlink(buf1, buf, bufsize-1);
66 	if (nbytes == -1) {
67 		printf("readlink(%s) failed %m\n", buf1);
68 		abort();
69 	}
70 	buf[nbytes] = '\0';
71 	return nbytes;
72 }
73 
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)74 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
75 {
76 	int r, fd;
77 
78 	fd = sys_memfd_create(name, flags);
79 	if (fd < 0) {
80 		printf("memfd_create(\"%s\", %u) failed: %m\n",
81 		       name, flags);
82 		abort();
83 	}
84 
85 	r = ftruncate(fd, sz);
86 	if (r < 0) {
87 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
88 		abort();
89 	}
90 
91 	return fd;
92 }
93 
sysctl_assert_write(const char * val)94 static void sysctl_assert_write(const char *val)
95 {
96 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
97 
98 	if (fd < 0) {
99 		printf("open sysctl failed: %m\n");
100 		abort();
101 	}
102 
103 	if (write(fd, val, strlen(val)) < 0) {
104 		printf("write sysctl %s failed: %m\n", val);
105 		abort();
106 	}
107 }
108 
sysctl_fail_write(const char * val)109 static void sysctl_fail_write(const char *val)
110 {
111 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
112 
113 	if (fd < 0) {
114 		printf("open sysctl failed: %m\n");
115 		abort();
116 	}
117 
118 	if (write(fd, val, strlen(val)) >= 0) {
119 		printf("write sysctl %s succeeded, but failure expected\n",
120 				val);
121 		abort();
122 	}
123 }
124 
sysctl_assert_equal(const char * val)125 static void sysctl_assert_equal(const char *val)
126 {
127 	char *p, buf[128] = {};
128 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
129 
130 	if (fd < 0) {
131 		printf("open sysctl failed: %m\n");
132 		abort();
133 	}
134 
135 	if (read(fd, buf, sizeof(buf)) < 0) {
136 		printf("read sysctl failed: %m\n");
137 		abort();
138 	}
139 
140 	/* Strip trailing whitespace. */
141 	p = buf;
142 	while (!isspace(*p))
143 		p++;
144 	*p = '\0';
145 
146 	if (strcmp(buf, val) != 0) {
147 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
148 		abort();
149 	}
150 }
151 
mfd_assert_reopen_fd(int fd_in)152 static int mfd_assert_reopen_fd(int fd_in)
153 {
154 	int fd;
155 	char path[100];
156 
157 	sprintf(path, "/proc/self/fd/%d", fd_in);
158 
159 	fd = open(path, O_RDWR);
160 	if (fd < 0) {
161 		printf("re-open of existing fd %d failed\n", fd_in);
162 		abort();
163 	}
164 
165 	return fd;
166 }
167 
mfd_fail_new(const char * name,unsigned int flags)168 static void mfd_fail_new(const char *name, unsigned int flags)
169 {
170 	int r;
171 
172 	r = sys_memfd_create(name, flags);
173 	if (r >= 0) {
174 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
175 		       name, flags);
176 		close(r);
177 		abort();
178 	}
179 }
180 
mfd_assert_get_seals(int fd)181 static unsigned int mfd_assert_get_seals(int fd)
182 {
183 	int r;
184 
185 	r = fcntl(fd, F_GET_SEALS);
186 	if (r < 0) {
187 		printf("GET_SEALS(%d) failed: %m\n", fd);
188 		abort();
189 	}
190 
191 	return (unsigned int)r;
192 }
193 
mfd_assert_has_seals(int fd,unsigned int seals)194 static void mfd_assert_has_seals(int fd, unsigned int seals)
195 {
196 	char buf[PATH_MAX];
197 	int nbytes;
198 	unsigned int s;
199 	fd2name(fd, buf, PATH_MAX);
200 
201 	s = mfd_assert_get_seals(fd);
202 	if (s != seals) {
203 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
204 		abort();
205 	}
206 }
207 
mfd_assert_add_seals(int fd,unsigned int seals)208 static void mfd_assert_add_seals(int fd, unsigned int seals)
209 {
210 	int r;
211 	unsigned int s;
212 
213 	s = mfd_assert_get_seals(fd);
214 	r = fcntl(fd, F_ADD_SEALS, seals);
215 	if (r < 0) {
216 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
217 		abort();
218 	}
219 }
220 
mfd_fail_add_seals(int fd,unsigned int seals)221 static void mfd_fail_add_seals(int fd, unsigned int seals)
222 {
223 	int r;
224 	unsigned int s;
225 
226 	r = fcntl(fd, F_GET_SEALS);
227 	if (r < 0)
228 		s = 0;
229 	else
230 		s = (unsigned int)r;
231 
232 	r = fcntl(fd, F_ADD_SEALS, seals);
233 	if (r >= 0) {
234 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
235 				fd, s, seals);
236 		abort();
237 	}
238 }
239 
mfd_assert_size(int fd,size_t size)240 static void mfd_assert_size(int fd, size_t size)
241 {
242 	struct stat st;
243 	int r;
244 
245 	r = fstat(fd, &st);
246 	if (r < 0) {
247 		printf("fstat(%d) failed: %m\n", fd);
248 		abort();
249 	} else if (st.st_size != size) {
250 		printf("wrong file size %lld, but expected %lld\n",
251 		       (long long)st.st_size, (long long)size);
252 		abort();
253 	}
254 }
255 
mfd_assert_dup(int fd)256 static int mfd_assert_dup(int fd)
257 {
258 	int r;
259 
260 	r = dup(fd);
261 	if (r < 0) {
262 		printf("dup(%d) failed: %m\n", fd);
263 		abort();
264 	}
265 
266 	return r;
267 }
268 
mfd_assert_mmap_shared(int fd)269 static void *mfd_assert_mmap_shared(int fd)
270 {
271 	void *p;
272 
273 	p = mmap(NULL,
274 		 mfd_def_size,
275 		 PROT_READ | PROT_WRITE,
276 		 MAP_SHARED,
277 		 fd,
278 		 0);
279 	if (p == MAP_FAILED) {
280 		printf("mmap() failed: %m\n");
281 		abort();
282 	}
283 
284 	return p;
285 }
286 
mfd_assert_mmap_private(int fd)287 static void *mfd_assert_mmap_private(int fd)
288 {
289 	void *p;
290 
291 	p = mmap(NULL,
292 		 mfd_def_size,
293 		 PROT_READ,
294 		 MAP_PRIVATE,
295 		 fd,
296 		 0);
297 	if (p == MAP_FAILED) {
298 		printf("mmap() failed: %m\n");
299 		abort();
300 	}
301 
302 	return p;
303 }
304 
mfd_assert_open(int fd,int flags,mode_t mode)305 static int mfd_assert_open(int fd, int flags, mode_t mode)
306 {
307 	char buf[512];
308 	int r;
309 
310 	sprintf(buf, "/proc/self/fd/%d", fd);
311 	r = open(buf, flags, mode);
312 	if (r < 0) {
313 		printf("open(%s) failed: %m\n", buf);
314 		abort();
315 	}
316 
317 	return r;
318 }
319 
mfd_fail_open(int fd,int flags,mode_t mode)320 static void mfd_fail_open(int fd, int flags, mode_t mode)
321 {
322 	char buf[512];
323 	int r;
324 
325 	sprintf(buf, "/proc/self/fd/%d", fd);
326 	r = open(buf, flags, mode);
327 	if (r >= 0) {
328 		printf("open(%s) didn't fail as expected\n", buf);
329 		abort();
330 	}
331 }
332 
mfd_assert_read(int fd)333 static void mfd_assert_read(int fd)
334 {
335 	char buf[16];
336 	void *p;
337 	ssize_t l;
338 
339 	l = read(fd, buf, sizeof(buf));
340 	if (l != sizeof(buf)) {
341 		printf("read() failed: %m\n");
342 		abort();
343 	}
344 
345 	/* verify PROT_READ *is* allowed */
346 	p = mmap(NULL,
347 		 mfd_def_size,
348 		 PROT_READ,
349 		 MAP_PRIVATE,
350 		 fd,
351 		 0);
352 	if (p == MAP_FAILED) {
353 		printf("mmap() failed: %m\n");
354 		abort();
355 	}
356 	munmap(p, mfd_def_size);
357 
358 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
359 	p = mmap(NULL,
360 		 mfd_def_size,
361 		 PROT_READ | PROT_WRITE,
362 		 MAP_PRIVATE,
363 		 fd,
364 		 0);
365 	if (p == MAP_FAILED) {
366 		printf("mmap() failed: %m\n");
367 		abort();
368 	}
369 	munmap(p, mfd_def_size);
370 }
371 
372 /* Test that PROT_READ + MAP_SHARED mappings work. */
mfd_assert_read_shared(int fd)373 static void mfd_assert_read_shared(int fd)
374 {
375 	void *p;
376 
377 	/* verify PROT_READ and MAP_SHARED *is* allowed */
378 	p = mmap(NULL,
379 		 mfd_def_size,
380 		 PROT_READ,
381 		 MAP_SHARED,
382 		 fd,
383 		 0);
384 	if (p == MAP_FAILED) {
385 		printf("mmap() failed: %m\n");
386 		abort();
387 	}
388 	munmap(p, mfd_def_size);
389 }
390 
mfd_assert_fork_private_write(int fd)391 static void mfd_assert_fork_private_write(int fd)
392 {
393 	int *p;
394 	pid_t pid;
395 
396 	p = mmap(NULL,
397 		 mfd_def_size,
398 		 PROT_READ | PROT_WRITE,
399 		 MAP_PRIVATE,
400 		 fd,
401 		 0);
402 	if (p == MAP_FAILED) {
403 		printf("mmap() failed: %m\n");
404 		abort();
405 	}
406 
407 	p[0] = 22;
408 
409 	pid = fork();
410 	if (pid == 0) {
411 		p[0] = 33;
412 		exit(0);
413 	} else {
414 		waitpid(pid, NULL, 0);
415 
416 		if (p[0] != 22) {
417 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
418 			abort();
419 		}
420 	}
421 
422 	munmap(p, mfd_def_size);
423 }
424 
mfd_assert_write(int fd)425 static void mfd_assert_write(int fd)
426 {
427 	ssize_t l;
428 	void *p;
429 	int r;
430 
431 	/*
432 	 * huegtlbfs does not support write, but we want to
433 	 * verify everything else here.
434 	 */
435 	if (!hugetlbfs_test) {
436 		/* verify write() succeeds */
437 		l = write(fd, "\0\0\0\0", 4);
438 		if (l != 4) {
439 			printf("write() failed: %m\n");
440 			abort();
441 		}
442 	}
443 
444 	/* verify PROT_READ | PROT_WRITE is allowed */
445 	p = mmap(NULL,
446 		 mfd_def_size,
447 		 PROT_READ | PROT_WRITE,
448 		 MAP_SHARED,
449 		 fd,
450 		 0);
451 	if (p == MAP_FAILED) {
452 		printf("mmap() failed: %m\n");
453 		abort();
454 	}
455 	*(char *)p = 0;
456 	munmap(p, mfd_def_size);
457 
458 	/* verify PROT_WRITE is allowed */
459 	p = mmap(NULL,
460 		 mfd_def_size,
461 		 PROT_WRITE,
462 		 MAP_SHARED,
463 		 fd,
464 		 0);
465 	if (p == MAP_FAILED) {
466 		printf("mmap() failed: %m\n");
467 		abort();
468 	}
469 	*(char *)p = 0;
470 	munmap(p, mfd_def_size);
471 
472 	/* verify PROT_READ with MAP_SHARED is allowed and a following
473 	 * mprotect(PROT_WRITE) allows writing */
474 	p = mmap(NULL,
475 		 mfd_def_size,
476 		 PROT_READ,
477 		 MAP_SHARED,
478 		 fd,
479 		 0);
480 	if (p == MAP_FAILED) {
481 		printf("mmap() failed: %m\n");
482 		abort();
483 	}
484 
485 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
486 	if (r < 0) {
487 		printf("mprotect() failed: %m\n");
488 		abort();
489 	}
490 
491 	*(char *)p = 0;
492 	munmap(p, mfd_def_size);
493 
494 	/* verify PUNCH_HOLE works */
495 	r = fallocate(fd,
496 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
497 		      0,
498 		      mfd_def_size);
499 	if (r < 0) {
500 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
501 		abort();
502 	}
503 }
504 
mfd_fail_write(int fd)505 static void mfd_fail_write(int fd)
506 {
507 	ssize_t l;
508 	void *p;
509 	int r;
510 
511 	/* verify write() fails */
512 	l = write(fd, "data", 4);
513 	if (l != -EPERM) {
514 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
515 		abort();
516 	}
517 
518 	/* verify PROT_READ | PROT_WRITE is not allowed */
519 	p = mmap(NULL,
520 		 mfd_def_size,
521 		 PROT_READ | PROT_WRITE,
522 		 MAP_SHARED,
523 		 fd,
524 		 0);
525 	if (p != MAP_FAILED) {
526 		printf("mmap() didn't fail as expected\n");
527 		abort();
528 	}
529 
530 	/* verify PROT_WRITE is not allowed */
531 	p = mmap(NULL,
532 		 mfd_def_size,
533 		 PROT_WRITE,
534 		 MAP_SHARED,
535 		 fd,
536 		 0);
537 	if (p != MAP_FAILED) {
538 		printf("mmap() didn't fail as expected\n");
539 		abort();
540 	}
541 
542 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
543 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
544 	p = mmap(NULL,
545 		 mfd_def_size,
546 		 PROT_READ,
547 		 MAP_SHARED,
548 		 fd,
549 		 0);
550 	if (p != MAP_FAILED) {
551 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
552 		if (r >= 0) {
553 			printf("mmap()+mprotect() didn't fail as expected\n");
554 			abort();
555 		}
556 		munmap(p, mfd_def_size);
557 	}
558 
559 	/* verify PUNCH_HOLE fails */
560 	r = fallocate(fd,
561 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
562 		      0,
563 		      mfd_def_size);
564 	if (r >= 0) {
565 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
566 		abort();
567 	}
568 }
569 
mfd_assert_shrink(int fd)570 static void mfd_assert_shrink(int fd)
571 {
572 	int r, fd2;
573 
574 	r = ftruncate(fd, mfd_def_size / 2);
575 	if (r < 0) {
576 		printf("ftruncate(SHRINK) failed: %m\n");
577 		abort();
578 	}
579 
580 	mfd_assert_size(fd, mfd_def_size / 2);
581 
582 	fd2 = mfd_assert_open(fd,
583 			      O_RDWR | O_CREAT | O_TRUNC,
584 			      S_IRUSR | S_IWUSR);
585 	close(fd2);
586 
587 	mfd_assert_size(fd, 0);
588 }
589 
mfd_fail_shrink(int fd)590 static void mfd_fail_shrink(int fd)
591 {
592 	int r;
593 
594 	r = ftruncate(fd, mfd_def_size / 2);
595 	if (r >= 0) {
596 		printf("ftruncate(SHRINK) didn't fail as expected\n");
597 		abort();
598 	}
599 
600 	mfd_fail_open(fd,
601 		      O_RDWR | O_CREAT | O_TRUNC,
602 		      S_IRUSR | S_IWUSR);
603 }
604 
mfd_assert_grow(int fd)605 static void mfd_assert_grow(int fd)
606 {
607 	int r;
608 
609 	r = ftruncate(fd, mfd_def_size * 2);
610 	if (r < 0) {
611 		printf("ftruncate(GROW) failed: %m\n");
612 		abort();
613 	}
614 
615 	mfd_assert_size(fd, mfd_def_size * 2);
616 
617 	r = fallocate(fd,
618 		      0,
619 		      0,
620 		      mfd_def_size * 4);
621 	if (r < 0) {
622 		printf("fallocate(ALLOC) failed: %m\n");
623 		abort();
624 	}
625 
626 	mfd_assert_size(fd, mfd_def_size * 4);
627 }
628 
mfd_fail_grow(int fd)629 static void mfd_fail_grow(int fd)
630 {
631 	int r;
632 
633 	r = ftruncate(fd, mfd_def_size * 2);
634 	if (r >= 0) {
635 		printf("ftruncate(GROW) didn't fail as expected\n");
636 		abort();
637 	}
638 
639 	r = fallocate(fd,
640 		      0,
641 		      0,
642 		      mfd_def_size * 4);
643 	if (r >= 0) {
644 		printf("fallocate(ALLOC) didn't fail as expected\n");
645 		abort();
646 	}
647 }
648 
mfd_assert_grow_write(int fd)649 static void mfd_assert_grow_write(int fd)
650 {
651 	static char *buf;
652 	ssize_t l;
653 
654 	/* hugetlbfs does not support write */
655 	if (hugetlbfs_test)
656 		return;
657 
658 	buf = malloc(mfd_def_size * 8);
659 	if (!buf) {
660 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
661 		abort();
662 	}
663 
664 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
665 	if (l != (mfd_def_size * 8)) {
666 		printf("pwrite() failed: %m\n");
667 		abort();
668 	}
669 
670 	mfd_assert_size(fd, mfd_def_size * 8);
671 }
672 
mfd_fail_grow_write(int fd)673 static void mfd_fail_grow_write(int fd)
674 {
675 	static char *buf;
676 	ssize_t l;
677 
678 	/* hugetlbfs does not support write */
679 	if (hugetlbfs_test)
680 		return;
681 
682 	buf = malloc(mfd_def_size * 8);
683 	if (!buf) {
684 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
685 		abort();
686 	}
687 
688 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
689 	if (l == (mfd_def_size * 8)) {
690 		printf("pwrite() didn't fail as expected\n");
691 		abort();
692 	}
693 }
694 
mfd_assert_mode(int fd,int mode)695 static void mfd_assert_mode(int fd, int mode)
696 {
697 	struct stat st;
698 	char buf[PATH_MAX];
699 	int nbytes;
700 
701 	fd2name(fd, buf, PATH_MAX);
702 
703 	if (fstat(fd, &st) < 0) {
704 		printf("fstat(%s) failed: %m\n", buf);
705 		abort();
706 	}
707 
708 	if ((st.st_mode & 07777) != mode) {
709 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
710 		       buf, (int)st.st_mode & 07777, mode);
711 		abort();
712 	}
713 }
714 
mfd_assert_chmod(int fd,int mode)715 static void mfd_assert_chmod(int fd, int mode)
716 {
717 	char buf[PATH_MAX];
718 	int nbytes;
719 
720 	fd2name(fd, buf, PATH_MAX);
721 
722 	if (fchmod(fd, mode) < 0) {
723 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
724 		abort();
725 	}
726 
727 	mfd_assert_mode(fd, mode);
728 }
729 
mfd_fail_chmod(int fd,int mode)730 static void mfd_fail_chmod(int fd, int mode)
731 {
732 	struct stat st;
733 	char buf[PATH_MAX];
734 	int nbytes;
735 
736 	fd2name(fd, buf, PATH_MAX);
737 
738 	if (fstat(fd, &st) < 0) {
739 		printf("fstat(%s) failed: %m\n", buf);
740 		abort();
741 	}
742 
743 	if (fchmod(fd, mode) == 0) {
744 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
745 		       buf, mode);
746 		abort();
747 	}
748 
749 	/* verify that file mode bits did not change */
750 	mfd_assert_mode(fd, st.st_mode & 07777);
751 }
752 
idle_thread_fn(void * arg)753 static int idle_thread_fn(void *arg)
754 {
755 	sigset_t set;
756 	int sig;
757 
758 	/* dummy waiter; SIGTERM terminates us anyway */
759 	sigemptyset(&set);
760 	sigaddset(&set, SIGTERM);
761 	sigwait(&set, &sig);
762 
763 	return 0;
764 }
765 
spawn_thread(unsigned int flags,int (* fn)(void *),void * arg)766 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
767 {
768 	uint8_t *stack;
769 	pid_t pid;
770 
771 	stack = malloc(STACK_SIZE);
772 	if (!stack) {
773 		printf("malloc(STACK_SIZE) failed: %m\n");
774 		abort();
775 	}
776 
777 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
778 	if (pid < 0) {
779 		printf("clone() failed: %m\n");
780 		abort();
781 	}
782 
783 	return pid;
784 }
785 
join_thread(pid_t pid)786 static void join_thread(pid_t pid)
787 {
788 	int wstatus;
789 
790 	if (waitpid(pid, &wstatus, 0) < 0) {
791 		printf("newpid thread: waitpid() failed: %m\n");
792 		abort();
793 	}
794 
795 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
796 		printf("newpid thread: exited with non-zero error code %d\n",
797 		       WEXITSTATUS(wstatus));
798 		abort();
799 	}
800 
801 	if (WIFSIGNALED(wstatus)) {
802 		printf("newpid thread: killed by signal %d\n",
803 		       WTERMSIG(wstatus));
804 		abort();
805 	}
806 }
807 
spawn_idle_thread(unsigned int flags)808 static pid_t spawn_idle_thread(unsigned int flags)
809 {
810 	return spawn_thread(flags, idle_thread_fn, NULL);
811 }
812 
join_idle_thread(pid_t pid)813 static void join_idle_thread(pid_t pid)
814 {
815 	kill(pid, SIGTERM);
816 	waitpid(pid, NULL, 0);
817 }
818 
819 /*
820  * Test memfd_create() syscall
821  * Verify syscall-argument validation, including name checks, flag validation
822  * and more.
823  */
test_create(void)824 static void test_create(void)
825 {
826 	char buf[2048];
827 	int fd;
828 
829 	printf("%s CREATE\n", memfd_str);
830 
831 	/* test NULL name */
832 	mfd_fail_new(NULL, 0);
833 
834 	/* test over-long name (not zero-terminated) */
835 	memset(buf, 0xff, sizeof(buf));
836 	mfd_fail_new(buf, 0);
837 
838 	/* test over-long zero-terminated name */
839 	memset(buf, 0xff, sizeof(buf));
840 	buf[sizeof(buf) - 1] = 0;
841 	mfd_fail_new(buf, 0);
842 
843 	/* verify "" is a valid name */
844 	fd = mfd_assert_new("", 0, 0);
845 	close(fd);
846 
847 	/* verify invalid O_* open flags */
848 	mfd_fail_new("", 0x0100);
849 	mfd_fail_new("", ~MFD_CLOEXEC);
850 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
851 	mfd_fail_new("", ~0);
852 	mfd_fail_new("", 0x80000000U);
853 
854 	/* verify EXEC and NOEXEC_SEAL can't both be set */
855 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
856 
857 	/* verify MFD_CLOEXEC is allowed */
858 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
859 	close(fd);
860 
861 	/* verify MFD_ALLOW_SEALING is allowed */
862 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
863 	close(fd);
864 
865 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
866 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
867 	close(fd);
868 }
869 
870 /*
871  * Test basic sealing
872  * A very basic sealing test to see whether setting/retrieving seals works.
873  */
test_basic(void)874 static void test_basic(void)
875 {
876 	int fd;
877 
878 	printf("%s BASIC\n", memfd_str);
879 
880 	fd = mfd_assert_new("kern_memfd_basic",
881 			    mfd_def_size,
882 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
883 
884 	/* add basic seals */
885 	mfd_assert_has_seals(fd, 0);
886 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
887 				 F_SEAL_WRITE);
888 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
889 				 F_SEAL_WRITE);
890 
891 	/* add them again */
892 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
893 				 F_SEAL_WRITE);
894 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
895 				 F_SEAL_WRITE);
896 
897 	/* add more seals and seal against sealing */
898 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
899 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
900 				 F_SEAL_GROW |
901 				 F_SEAL_WRITE |
902 				 F_SEAL_SEAL);
903 
904 	/* verify that sealing no longer works */
905 	mfd_fail_add_seals(fd, F_SEAL_GROW);
906 	mfd_fail_add_seals(fd, 0);
907 
908 	close(fd);
909 
910 	/* verify sealing does not work without MFD_ALLOW_SEALING */
911 	fd = mfd_assert_new("kern_memfd_basic",
912 			    mfd_def_size,
913 			    MFD_CLOEXEC);
914 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
915 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
916 			       F_SEAL_GROW |
917 			       F_SEAL_WRITE);
918 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
919 	close(fd);
920 }
921 
922 /*
923  * Test SEAL_WRITE
924  * Test whether SEAL_WRITE actually prevents modifications.
925  */
test_seal_write(void)926 static void test_seal_write(void)
927 {
928 	int fd;
929 
930 	printf("%s SEAL-WRITE\n", memfd_str);
931 
932 	fd = mfd_assert_new("kern_memfd_seal_write",
933 			    mfd_def_size,
934 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
935 	mfd_assert_has_seals(fd, 0);
936 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
937 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
938 
939 	mfd_assert_read(fd);
940 	mfd_fail_write(fd);
941 	mfd_assert_shrink(fd);
942 	mfd_assert_grow(fd);
943 	mfd_fail_grow_write(fd);
944 
945 	close(fd);
946 }
947 
948 /*
949  * Test SEAL_FUTURE_WRITE
950  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
951  */
test_seal_future_write(void)952 static void test_seal_future_write(void)
953 {
954 	int fd, fd2;
955 	void *p;
956 
957 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
958 
959 	fd = mfd_assert_new("kern_memfd_seal_future_write",
960 			    mfd_def_size,
961 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
962 
963 	p = mfd_assert_mmap_shared(fd);
964 
965 	mfd_assert_has_seals(fd, 0);
966 
967 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
968 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
969 
970 	/* read should pass, writes should fail */
971 	mfd_assert_read(fd);
972 	mfd_assert_read_shared(fd);
973 	mfd_fail_write(fd);
974 
975 	fd2 = mfd_assert_reopen_fd(fd);
976 	/* read should pass, writes should still fail */
977 	mfd_assert_read(fd2);
978 	mfd_assert_read_shared(fd2);
979 	mfd_fail_write(fd2);
980 
981 	mfd_assert_fork_private_write(fd);
982 
983 	munmap(p, mfd_def_size);
984 	close(fd2);
985 	close(fd);
986 }
987 
988 /*
989  * Test SEAL_SHRINK
990  * Test whether SEAL_SHRINK actually prevents shrinking
991  */
test_seal_shrink(void)992 static void test_seal_shrink(void)
993 {
994 	int fd;
995 
996 	printf("%s SEAL-SHRINK\n", memfd_str);
997 
998 	fd = mfd_assert_new("kern_memfd_seal_shrink",
999 			    mfd_def_size,
1000 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1001 	mfd_assert_has_seals(fd, 0);
1002 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1003 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1004 
1005 	mfd_assert_read(fd);
1006 	mfd_assert_write(fd);
1007 	mfd_fail_shrink(fd);
1008 	mfd_assert_grow(fd);
1009 	mfd_assert_grow_write(fd);
1010 
1011 	close(fd);
1012 }
1013 
1014 /*
1015  * Test SEAL_GROW
1016  * Test whether SEAL_GROW actually prevents growing
1017  */
test_seal_grow(void)1018 static void test_seal_grow(void)
1019 {
1020 	int fd;
1021 
1022 	printf("%s SEAL-GROW\n", memfd_str);
1023 
1024 	fd = mfd_assert_new("kern_memfd_seal_grow",
1025 			    mfd_def_size,
1026 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1027 	mfd_assert_has_seals(fd, 0);
1028 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1029 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1030 
1031 	mfd_assert_read(fd);
1032 	mfd_assert_write(fd);
1033 	mfd_assert_shrink(fd);
1034 	mfd_fail_grow(fd);
1035 	mfd_fail_grow_write(fd);
1036 
1037 	close(fd);
1038 }
1039 
1040 /*
1041  * Test SEAL_SHRINK | SEAL_GROW
1042  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1043  */
test_seal_resize(void)1044 static void test_seal_resize(void)
1045 {
1046 	int fd;
1047 
1048 	printf("%s SEAL-RESIZE\n", memfd_str);
1049 
1050 	fd = mfd_assert_new("kern_memfd_seal_resize",
1051 			    mfd_def_size,
1052 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1053 	mfd_assert_has_seals(fd, 0);
1054 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1055 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1056 
1057 	mfd_assert_read(fd);
1058 	mfd_assert_write(fd);
1059 	mfd_fail_shrink(fd);
1060 	mfd_fail_grow(fd);
1061 	mfd_fail_grow_write(fd);
1062 
1063 	close(fd);
1064 }
1065 
1066 /*
1067  * Test SEAL_EXEC
1068  * Test fd is created with exec and allow sealing.
1069  * chmod() cannot change x bits after sealing.
1070  */
test_exec_seal(void)1071 static void test_exec_seal(void)
1072 {
1073 	int fd;
1074 
1075 	printf("%s SEAL-EXEC\n", memfd_str);
1076 
1077 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1078 	fd = mfd_assert_new("kern_memfd_seal_exec",
1079 			    mfd_def_size,
1080 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1081 
1082 	mfd_assert_mode(fd, 0777);
1083 	mfd_assert_chmod(fd, 0644);
1084 
1085 	mfd_assert_has_seals(fd, 0);
1086 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1087 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1088 
1089 	mfd_assert_chmod(fd, 0600);
1090 	mfd_fail_chmod(fd, 0777);
1091 	mfd_fail_chmod(fd, 0670);
1092 	mfd_fail_chmod(fd, 0605);
1093 	mfd_fail_chmod(fd, 0700);
1094 	mfd_fail_chmod(fd, 0100);
1095 	mfd_assert_chmod(fd, 0666);
1096 	mfd_assert_write(fd);
1097 	close(fd);
1098 
1099 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1100 	fd = mfd_assert_new("kern_memfd_seal_exec",
1101 			    mfd_def_size,
1102 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1103 
1104 	mfd_assert_mode(fd, 0777);
1105 	mfd_assert_chmod(fd, 0700);
1106 
1107 	mfd_assert_has_seals(fd, 0);
1108 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1109 	mfd_assert_has_seals(fd, F_WX_SEALS);
1110 
1111 	mfd_fail_chmod(fd, 0711);
1112 	mfd_fail_chmod(fd, 0600);
1113 	mfd_fail_write(fd);
1114 	close(fd);
1115 }
1116 
1117 /*
1118  * Test EXEC_NO_SEAL
1119  * Test fd is created with exec and not allow sealing.
1120  */
test_exec_no_seal(void)1121 static void test_exec_no_seal(void)
1122 {
1123 	int fd;
1124 
1125 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1126 
1127 	/* Create with EXEC but without ALLOW_SEALING */
1128 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1129 			    mfd_def_size,
1130 			    MFD_CLOEXEC | MFD_EXEC);
1131 	mfd_assert_mode(fd, 0777);
1132 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1133 	mfd_assert_chmod(fd, 0666);
1134 	close(fd);
1135 }
1136 
1137 /*
1138  * Test memfd_create with MFD_NOEXEC flag
1139  */
test_noexec_seal(void)1140 static void test_noexec_seal(void)
1141 {
1142 	int fd;
1143 
1144 	printf("%s NOEXEC_SEAL\n", memfd_str);
1145 
1146 	/* Create with NOEXEC and ALLOW_SEALING */
1147 	fd = mfd_assert_new("kern_memfd_noexec",
1148 			    mfd_def_size,
1149 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1150 	mfd_assert_mode(fd, 0666);
1151 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1152 	mfd_fail_chmod(fd, 0777);
1153 	close(fd);
1154 
1155 	/* Create with NOEXEC but without ALLOW_SEALING */
1156 	fd = mfd_assert_new("kern_memfd_noexec",
1157 			    mfd_def_size,
1158 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1159 	mfd_assert_mode(fd, 0666);
1160 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1161 	mfd_fail_chmod(fd, 0777);
1162 	close(fd);
1163 }
1164 
test_sysctl_sysctl0(void)1165 static void test_sysctl_sysctl0(void)
1166 {
1167 	int fd;
1168 
1169 	sysctl_assert_equal("0");
1170 
1171 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1172 			    mfd_def_size,
1173 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1174 	mfd_assert_mode(fd, 0777);
1175 	mfd_assert_has_seals(fd, 0);
1176 	mfd_assert_chmod(fd, 0644);
1177 	close(fd);
1178 }
1179 
test_sysctl_set_sysctl0(void)1180 static void test_sysctl_set_sysctl0(void)
1181 {
1182 	sysctl_assert_write("0");
1183 	test_sysctl_sysctl0();
1184 }
1185 
test_sysctl_sysctl1(void)1186 static void test_sysctl_sysctl1(void)
1187 {
1188 	int fd;
1189 
1190 	sysctl_assert_equal("1");
1191 
1192 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1193 			    mfd_def_size,
1194 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1195 	mfd_assert_mode(fd, 0666);
1196 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1197 	mfd_fail_chmod(fd, 0777);
1198 	close(fd);
1199 
1200 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1201 			    mfd_def_size,
1202 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1203 	mfd_assert_mode(fd, 0777);
1204 	mfd_assert_has_seals(fd, 0);
1205 	mfd_assert_chmod(fd, 0644);
1206 	close(fd);
1207 
1208 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1209 			    mfd_def_size,
1210 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1211 	mfd_assert_mode(fd, 0666);
1212 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1213 	mfd_fail_chmod(fd, 0777);
1214 	close(fd);
1215 }
1216 
test_sysctl_set_sysctl1(void)1217 static void test_sysctl_set_sysctl1(void)
1218 {
1219 	sysctl_assert_write("1");
1220 	test_sysctl_sysctl1();
1221 }
1222 
test_sysctl_sysctl2(void)1223 static void test_sysctl_sysctl2(void)
1224 {
1225 	int fd;
1226 
1227 	sysctl_assert_equal("2");
1228 
1229 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1230 			    mfd_def_size,
1231 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1232 	mfd_assert_mode(fd, 0666);
1233 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1234 	mfd_fail_chmod(fd, 0777);
1235 	close(fd);
1236 
1237 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1238 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1239 
1240 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1241 			    mfd_def_size,
1242 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1243 	mfd_assert_mode(fd, 0666);
1244 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1245 	mfd_fail_chmod(fd, 0777);
1246 	close(fd);
1247 }
1248 
test_sysctl_set_sysctl2(void)1249 static void test_sysctl_set_sysctl2(void)
1250 {
1251 	sysctl_assert_write("2");
1252 	test_sysctl_sysctl2();
1253 }
1254 
sysctl_simple_child(void * arg)1255 static int sysctl_simple_child(void *arg)
1256 {
1257 	int fd;
1258 	int pid;
1259 
1260 	printf("%s sysctl 0\n", memfd_str);
1261 	test_sysctl_set_sysctl0();
1262 
1263 	printf("%s sysctl 1\n", memfd_str);
1264 	test_sysctl_set_sysctl1();
1265 
1266 	printf("%s sysctl 0\n", memfd_str);
1267 	test_sysctl_set_sysctl0();
1268 
1269 	printf("%s sysctl 2\n", memfd_str);
1270 	test_sysctl_set_sysctl2();
1271 
1272 	printf("%s sysctl 1\n", memfd_str);
1273 	test_sysctl_set_sysctl1();
1274 
1275 	printf("%s sysctl 0\n", memfd_str);
1276 	test_sysctl_set_sysctl0();
1277 
1278 	return 0;
1279 }
1280 
1281 /*
1282  * Test sysctl
1283  * A very basic test to make sure the core sysctl semantics work.
1284  */
test_sysctl_simple(void)1285 static void test_sysctl_simple(void)
1286 {
1287 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1288 
1289 	join_thread(pid);
1290 }
1291 
sysctl_nested(void * arg)1292 static int sysctl_nested(void *arg)
1293 {
1294 	void (*fn)(void) = arg;
1295 
1296 	fn();
1297 	return 0;
1298 }
1299 
sysctl_nested_wait(void * arg)1300 static int sysctl_nested_wait(void *arg)
1301 {
1302 	/* Wait for a SIGCONT. */
1303 	kill(getpid(), SIGSTOP);
1304 	return sysctl_nested(arg);
1305 }
1306 
test_sysctl_sysctl1_failset(void)1307 static void test_sysctl_sysctl1_failset(void)
1308 {
1309 	sysctl_fail_write("0");
1310 	test_sysctl_sysctl1();
1311 }
1312 
test_sysctl_sysctl2_failset(void)1313 static void test_sysctl_sysctl2_failset(void)
1314 {
1315 	sysctl_fail_write("1");
1316 	test_sysctl_sysctl2();
1317 
1318 	sysctl_fail_write("0");
1319 	test_sysctl_sysctl2();
1320 }
1321 
sysctl_nested_child(void * arg)1322 static int sysctl_nested_child(void *arg)
1323 {
1324 	int fd;
1325 	int pid;
1326 
1327 	printf("%s nested sysctl 0\n", memfd_str);
1328 	sysctl_assert_write("0");
1329 	/* A further nested pidns works the same. */
1330 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1331 	join_thread(pid);
1332 
1333 	printf("%s nested sysctl 1\n", memfd_str);
1334 	sysctl_assert_write("1");
1335 	/* Child inherits our setting. */
1336 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1337 	join_thread(pid);
1338 	/* Child cannot raise the setting. */
1339 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1340 			   test_sysctl_sysctl1_failset);
1341 	join_thread(pid);
1342 	/* Child can lower the setting. */
1343 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1344 			   test_sysctl_set_sysctl2);
1345 	join_thread(pid);
1346 	/* Child lowering the setting has no effect on our setting. */
1347 	test_sysctl_sysctl1();
1348 
1349 	printf("%s nested sysctl 2\n", memfd_str);
1350 	sysctl_assert_write("2");
1351 	/* Child inherits our setting. */
1352 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1353 	join_thread(pid);
1354 	/* Child cannot raise the setting. */
1355 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1356 			   test_sysctl_sysctl2_failset);
1357 	join_thread(pid);
1358 
1359 	/* Verify that the rules are actually inherited after fork. */
1360 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1361 	sysctl_assert_write("0");
1362 
1363 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1364 			   test_sysctl_sysctl1_failset);
1365 	sysctl_assert_write("1");
1366 	kill(pid, SIGCONT);
1367 	join_thread(pid);
1368 
1369 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1370 	sysctl_assert_write("0");
1371 
1372 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1373 			   test_sysctl_sysctl2_failset);
1374 	sysctl_assert_write("2");
1375 	kill(pid, SIGCONT);
1376 	join_thread(pid);
1377 
1378 	/*
1379 	 * Verify that the current effective setting is saved on fork, meaning
1380 	 * that the parent lowering the sysctl doesn't affect already-forked
1381 	 * children.
1382 	 */
1383 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1384 	sysctl_assert_write("2");
1385 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1386 			   test_sysctl_sysctl2);
1387 	sysctl_assert_write("1");
1388 	kill(pid, SIGCONT);
1389 	join_thread(pid);
1390 
1391 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1392 	sysctl_assert_write("2");
1393 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1394 			   test_sysctl_sysctl2);
1395 	sysctl_assert_write("0");
1396 	kill(pid, SIGCONT);
1397 	join_thread(pid);
1398 
1399 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1400 	sysctl_assert_write("1");
1401 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1402 			   test_sysctl_sysctl1);
1403 	sysctl_assert_write("0");
1404 	kill(pid, SIGCONT);
1405 	join_thread(pid);
1406 
1407 	return 0;
1408 }
1409 
1410 /*
1411  * Test sysctl with nested pid namespaces
1412  * Make sure that the sysctl nesting semantics work correctly.
1413  */
test_sysctl_nested(void)1414 static void test_sysctl_nested(void)
1415 {
1416 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1417 
1418 	join_thread(pid);
1419 }
1420 
1421 /*
1422  * Test sharing via dup()
1423  * Test that seals are shared between dupped FDs and they're all equal.
1424  */
test_share_dup(char * banner,char * b_suffix)1425 static void test_share_dup(char *banner, char *b_suffix)
1426 {
1427 	int fd, fd2;
1428 
1429 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1430 
1431 	fd = mfd_assert_new("kern_memfd_share_dup",
1432 			    mfd_def_size,
1433 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1434 	mfd_assert_has_seals(fd, 0);
1435 
1436 	fd2 = mfd_assert_dup(fd);
1437 	mfd_assert_has_seals(fd2, 0);
1438 
1439 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1440 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1441 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1442 
1443 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1444 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1445 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1446 
1447 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1448 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1449 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1450 
1451 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1452 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1453 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1454 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1455 
1456 	close(fd2);
1457 
1458 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1459 	close(fd);
1460 }
1461 
1462 /*
1463  * Test sealing with active mmap()s
1464  * Modifying seals is only allowed if no other mmap() refs exist.
1465  */
test_share_mmap(char * banner,char * b_suffix)1466 static void test_share_mmap(char *banner, char *b_suffix)
1467 {
1468 	int fd;
1469 	void *p;
1470 
1471 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1472 
1473 	fd = mfd_assert_new("kern_memfd_share_mmap",
1474 			    mfd_def_size,
1475 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1476 	mfd_assert_has_seals(fd, 0);
1477 
1478 	/* shared/writable ref prevents sealing WRITE, but allows others */
1479 	p = mfd_assert_mmap_shared(fd);
1480 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1481 	mfd_assert_has_seals(fd, 0);
1482 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1483 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1484 	munmap(p, mfd_def_size);
1485 
1486 	/* readable ref allows sealing */
1487 	p = mfd_assert_mmap_private(fd);
1488 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1489 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1490 	munmap(p, mfd_def_size);
1491 
1492 	close(fd);
1493 }
1494 
1495 /*
1496  * Test sealing with open(/proc/self/fd/%d)
1497  * Via /proc we can get access to a separate file-context for the same memfd.
1498  * This is *not* like dup(), but like a real separate open(). Make sure the
1499  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1500  */
test_share_open(char * banner,char * b_suffix)1501 static void test_share_open(char *banner, char *b_suffix)
1502 {
1503 	int fd, fd2;
1504 
1505 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1506 
1507 	fd = mfd_assert_new("kern_memfd_share_open",
1508 			    mfd_def_size,
1509 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1510 	mfd_assert_has_seals(fd, 0);
1511 
1512 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1513 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1514 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1515 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1516 
1517 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1518 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1519 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1520 
1521 	close(fd);
1522 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1523 
1524 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1525 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1526 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1527 
1528 	close(fd2);
1529 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1530 
1531 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1532 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1533 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1534 
1535 	close(fd2);
1536 	close(fd);
1537 }
1538 
1539 /*
1540  * Test sharing via fork()
1541  * Test whether seal-modifications work as expected with forked childs.
1542  */
test_share_fork(char * banner,char * b_suffix)1543 static void test_share_fork(char *banner, char *b_suffix)
1544 {
1545 	int fd;
1546 	pid_t pid;
1547 
1548 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1549 
1550 	fd = mfd_assert_new("kern_memfd_share_fork",
1551 			    mfd_def_size,
1552 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1553 	mfd_assert_has_seals(fd, 0);
1554 
1555 	pid = spawn_idle_thread(0);
1556 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1557 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1558 
1559 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1560 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1561 
1562 	join_idle_thread(pid);
1563 
1564 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1565 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1566 
1567 	close(fd);
1568 }
1569 
main(int argc,char ** argv)1570 int main(int argc, char **argv)
1571 {
1572 	pid_t pid;
1573 
1574 	if (argc == 2) {
1575 		if (!strcmp(argv[1], "hugetlbfs")) {
1576 			unsigned long hpage_size = default_huge_page_size();
1577 
1578 			if (!hpage_size) {
1579 				printf("Unable to determine huge page size\n");
1580 				abort();
1581 			}
1582 
1583 			hugetlbfs_test = 1;
1584 			memfd_str = MEMFD_HUGE_STR;
1585 			mfd_def_size = hpage_size * 2;
1586 		} else {
1587 			printf("Unknown option: %s\n", argv[1]);
1588 			abort();
1589 		}
1590 	}
1591 
1592 	test_create();
1593 	test_basic();
1594 	test_exec_seal();
1595 	test_exec_no_seal();
1596 	test_noexec_seal();
1597 
1598 	test_seal_write();
1599 	test_seal_future_write();
1600 	test_seal_shrink();
1601 	test_seal_grow();
1602 	test_seal_resize();
1603 
1604 	test_sysctl_simple();
1605 	test_sysctl_nested();
1606 
1607 	test_share_dup("SHARE-DUP", "");
1608 	test_share_mmap("SHARE-MMAP", "");
1609 	test_share_open("SHARE-OPEN", "");
1610 	test_share_fork("SHARE-FORK", "");
1611 
1612 	/* Run test-suite in a multi-threaded environment with a shared
1613 	 * file-table. */
1614 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1615 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1616 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1617 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1618 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1619 	join_idle_thread(pid);
1620 
1621 	printf("memfd: DONE\n");
1622 
1623 	return 0;
1624 }
1625