xref: /openbmc/linux/tools/testing/selftests/memfd/memfd_test.c (revision 55e43d6abd078ed6d219902ce8cb4d68e3c993ba)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <signal.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/stat.h>
19 #include <sys/syscall.h>
20 #include <sys/wait.h>
21 #include <unistd.h>
22 #include <ctype.h>
23 
24 #include "common.h"
25 
26 #define MEMFD_STR	"memfd:"
27 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
28 #define SHARED_FT_STR	"(shared file-table)"
29 
30 #define MFD_DEF_SIZE 8192
31 #define STACK_SIZE 65536
32 
33 #define F_SEAL_EXEC	0x0020
34 
35 #define F_WX_SEALS (F_SEAL_SHRINK | \
36 		    F_SEAL_GROW | \
37 		    F_SEAL_WRITE | \
38 		    F_SEAL_FUTURE_WRITE | \
39 		    F_SEAL_EXEC)
40 
41 #define MFD_NOEXEC_SEAL	0x0008U
42 
43 /*
44  * Default is not to test hugetlbfs
45  */
46 static size_t mfd_def_size = MFD_DEF_SIZE;
47 static const char *memfd_str = MEMFD_STR;
48 static int newpid_thread_fn2(void *arg);
49 static void join_newpid_thread(pid_t pid);
50 
fd2name(int fd,char * buf,size_t bufsize)51 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
52 {
53 	char buf1[PATH_MAX];
54 	int size;
55 	ssize_t nbytes;
56 
57 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
58 	if (size < 0) {
59 		printf("snprintf(%d) failed on %m\n", fd);
60 		abort();
61 	}
62 
63 	/*
64 	 * reserver one byte for string termination.
65 	 */
66 	nbytes = readlink(buf1, buf, bufsize-1);
67 	if (nbytes == -1) {
68 		printf("readlink(%s) failed %m\n", buf1);
69 		abort();
70 	}
71 	buf[nbytes] = '\0';
72 	return nbytes;
73 }
74 
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)75 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
76 {
77 	int r, fd;
78 
79 	fd = sys_memfd_create(name, flags);
80 	if (fd < 0) {
81 		printf("memfd_create(\"%s\", %u) failed: %m\n",
82 		       name, flags);
83 		abort();
84 	}
85 
86 	r = ftruncate(fd, sz);
87 	if (r < 0) {
88 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
89 		abort();
90 	}
91 
92 	return fd;
93 }
94 
sysctl_assert_write(const char * val)95 static void sysctl_assert_write(const char *val)
96 {
97 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
98 
99 	if (fd < 0) {
100 		printf("open sysctl failed: %m\n");
101 		abort();
102 	}
103 
104 	if (write(fd, val, strlen(val)) < 0) {
105 		printf("write sysctl %s failed: %m\n", val);
106 		abort();
107 	}
108 }
109 
sysctl_fail_write(const char * val)110 static void sysctl_fail_write(const char *val)
111 {
112 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
113 
114 	if (fd < 0) {
115 		printf("open sysctl failed: %m\n");
116 		abort();
117 	}
118 
119 	if (write(fd, val, strlen(val)) >= 0) {
120 		printf("write sysctl %s succeeded, but failure expected\n",
121 				val);
122 		abort();
123 	}
124 }
125 
sysctl_assert_equal(const char * val)126 static void sysctl_assert_equal(const char *val)
127 {
128 	char *p, buf[128] = {};
129 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
130 
131 	if (fd < 0) {
132 		printf("open sysctl failed: %m\n");
133 		abort();
134 	}
135 
136 	if (read(fd, buf, sizeof(buf)) < 0) {
137 		printf("read sysctl failed: %m\n");
138 		abort();
139 	}
140 
141 	/* Strip trailing whitespace. */
142 	p = buf;
143 	while (!isspace(*p))
144 		p++;
145 	*p = '\0';
146 
147 	if (strcmp(buf, val) != 0) {
148 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
149 		abort();
150 	}
151 }
152 
mfd_assert_reopen_fd(int fd_in)153 static int mfd_assert_reopen_fd(int fd_in)
154 {
155 	int fd;
156 	char path[100];
157 
158 	sprintf(path, "/proc/self/fd/%d", fd_in);
159 
160 	fd = open(path, O_RDWR);
161 	if (fd < 0) {
162 		printf("re-open of existing fd %d failed\n", fd_in);
163 		abort();
164 	}
165 
166 	return fd;
167 }
168 
mfd_fail_new(const char * name,unsigned int flags)169 static void mfd_fail_new(const char *name, unsigned int flags)
170 {
171 	int r;
172 
173 	r = sys_memfd_create(name, flags);
174 	if (r >= 0) {
175 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
176 		       name, flags);
177 		close(r);
178 		abort();
179 	}
180 }
181 
mfd_assert_get_seals(int fd)182 static unsigned int mfd_assert_get_seals(int fd)
183 {
184 	int r;
185 
186 	r = fcntl(fd, F_GET_SEALS);
187 	if (r < 0) {
188 		printf("GET_SEALS(%d) failed: %m\n", fd);
189 		abort();
190 	}
191 
192 	return (unsigned int)r;
193 }
194 
mfd_assert_has_seals(int fd,unsigned int seals)195 static void mfd_assert_has_seals(int fd, unsigned int seals)
196 {
197 	char buf[PATH_MAX];
198 	int nbytes;
199 	unsigned int s;
200 	fd2name(fd, buf, PATH_MAX);
201 
202 	s = mfd_assert_get_seals(fd);
203 	if (s != seals) {
204 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
205 		abort();
206 	}
207 }
208 
mfd_assert_add_seals(int fd,unsigned int seals)209 static void mfd_assert_add_seals(int fd, unsigned int seals)
210 {
211 	int r;
212 	unsigned int s;
213 
214 	s = mfd_assert_get_seals(fd);
215 	r = fcntl(fd, F_ADD_SEALS, seals);
216 	if (r < 0) {
217 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
218 		abort();
219 	}
220 }
221 
mfd_fail_add_seals(int fd,unsigned int seals)222 static void mfd_fail_add_seals(int fd, unsigned int seals)
223 {
224 	int r;
225 	unsigned int s;
226 
227 	r = fcntl(fd, F_GET_SEALS);
228 	if (r < 0)
229 		s = 0;
230 	else
231 		s = (unsigned int)r;
232 
233 	r = fcntl(fd, F_ADD_SEALS, seals);
234 	if (r >= 0) {
235 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
236 				fd, s, seals);
237 		abort();
238 	}
239 }
240 
mfd_assert_size(int fd,size_t size)241 static void mfd_assert_size(int fd, size_t size)
242 {
243 	struct stat st;
244 	int r;
245 
246 	r = fstat(fd, &st);
247 	if (r < 0) {
248 		printf("fstat(%d) failed: %m\n", fd);
249 		abort();
250 	} else if (st.st_size != size) {
251 		printf("wrong file size %lld, but expected %lld\n",
252 		       (long long)st.st_size, (long long)size);
253 		abort();
254 	}
255 }
256 
mfd_assert_dup(int fd)257 static int mfd_assert_dup(int fd)
258 {
259 	int r;
260 
261 	r = dup(fd);
262 	if (r < 0) {
263 		printf("dup(%d) failed: %m\n", fd);
264 		abort();
265 	}
266 
267 	return r;
268 }
269 
mfd_assert_mmap_shared(int fd)270 static void *mfd_assert_mmap_shared(int fd)
271 {
272 	void *p;
273 
274 	p = mmap(NULL,
275 		 mfd_def_size,
276 		 PROT_READ | PROT_WRITE,
277 		 MAP_SHARED,
278 		 fd,
279 		 0);
280 	if (p == MAP_FAILED) {
281 		printf("mmap() failed: %m\n");
282 		abort();
283 	}
284 
285 	return p;
286 }
287 
mfd_assert_mmap_private(int fd)288 static void *mfd_assert_mmap_private(int fd)
289 {
290 	void *p;
291 
292 	p = mmap(NULL,
293 		 mfd_def_size,
294 		 PROT_READ,
295 		 MAP_PRIVATE,
296 		 fd,
297 		 0);
298 	if (p == MAP_FAILED) {
299 		printf("mmap() failed: %m\n");
300 		abort();
301 	}
302 
303 	return p;
304 }
305 
mfd_assert_open(int fd,int flags,mode_t mode)306 static int mfd_assert_open(int fd, int flags, mode_t mode)
307 {
308 	char buf[512];
309 	int r;
310 
311 	sprintf(buf, "/proc/self/fd/%d", fd);
312 	r = open(buf, flags, mode);
313 	if (r < 0) {
314 		printf("open(%s) failed: %m\n", buf);
315 		abort();
316 	}
317 
318 	return r;
319 }
320 
mfd_fail_open(int fd,int flags,mode_t mode)321 static void mfd_fail_open(int fd, int flags, mode_t mode)
322 {
323 	char buf[512];
324 	int r;
325 
326 	sprintf(buf, "/proc/self/fd/%d", fd);
327 	r = open(buf, flags, mode);
328 	if (r >= 0) {
329 		printf("open(%s) didn't fail as expected\n", buf);
330 		abort();
331 	}
332 }
333 
mfd_assert_read(int fd)334 static void mfd_assert_read(int fd)
335 {
336 	char buf[16];
337 	void *p;
338 	ssize_t l;
339 
340 	l = read(fd, buf, sizeof(buf));
341 	if (l != sizeof(buf)) {
342 		printf("read() failed: %m\n");
343 		abort();
344 	}
345 
346 	/* verify PROT_READ *is* allowed */
347 	p = mmap(NULL,
348 		 mfd_def_size,
349 		 PROT_READ,
350 		 MAP_PRIVATE,
351 		 fd,
352 		 0);
353 	if (p == MAP_FAILED) {
354 		printf("mmap() failed: %m\n");
355 		abort();
356 	}
357 	munmap(p, mfd_def_size);
358 
359 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
360 	p = mmap(NULL,
361 		 mfd_def_size,
362 		 PROT_READ | PROT_WRITE,
363 		 MAP_PRIVATE,
364 		 fd,
365 		 0);
366 	if (p == MAP_FAILED) {
367 		printf("mmap() failed: %m\n");
368 		abort();
369 	}
370 	munmap(p, mfd_def_size);
371 }
372 
373 /* Test that PROT_READ + MAP_SHARED mappings work. */
mfd_assert_read_shared(int fd)374 static void mfd_assert_read_shared(int fd)
375 {
376 	void *p;
377 
378 	/* verify PROT_READ and MAP_SHARED *is* allowed */
379 	p = mmap(NULL,
380 		 mfd_def_size,
381 		 PROT_READ,
382 		 MAP_SHARED,
383 		 fd,
384 		 0);
385 	if (p == MAP_FAILED) {
386 		printf("mmap() failed: %m\n");
387 		abort();
388 	}
389 	munmap(p, mfd_def_size);
390 }
391 
mfd_assert_fork_private_write(int fd)392 static void mfd_assert_fork_private_write(int fd)
393 {
394 	int *p;
395 	pid_t pid;
396 
397 	p = mmap(NULL,
398 		 mfd_def_size,
399 		 PROT_READ | PROT_WRITE,
400 		 MAP_PRIVATE,
401 		 fd,
402 		 0);
403 	if (p == MAP_FAILED) {
404 		printf("mmap() failed: %m\n");
405 		abort();
406 	}
407 
408 	p[0] = 22;
409 
410 	pid = fork();
411 	if (pid == 0) {
412 		p[0] = 33;
413 		exit(0);
414 	} else {
415 		waitpid(pid, NULL, 0);
416 
417 		if (p[0] != 22) {
418 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
419 			abort();
420 		}
421 	}
422 
423 	munmap(p, mfd_def_size);
424 }
425 
mfd_assert_write(int fd)426 static void mfd_assert_write(int fd)
427 {
428 	ssize_t l;
429 	void *p;
430 	int r;
431 
432 	/*
433 	 * huegtlbfs does not support write, but we want to
434 	 * verify everything else here.
435 	 */
436 	if (!hugetlbfs_test) {
437 		/* verify write() succeeds */
438 		l = write(fd, "\0\0\0\0", 4);
439 		if (l != 4) {
440 			printf("write() failed: %m\n");
441 			abort();
442 		}
443 	}
444 
445 	/* verify PROT_READ | PROT_WRITE is allowed */
446 	p = mmap(NULL,
447 		 mfd_def_size,
448 		 PROT_READ | PROT_WRITE,
449 		 MAP_SHARED,
450 		 fd,
451 		 0);
452 	if (p == MAP_FAILED) {
453 		printf("mmap() failed: %m\n");
454 		abort();
455 	}
456 	*(char *)p = 0;
457 	munmap(p, mfd_def_size);
458 
459 	/* verify PROT_WRITE is allowed */
460 	p = mmap(NULL,
461 		 mfd_def_size,
462 		 PROT_WRITE,
463 		 MAP_SHARED,
464 		 fd,
465 		 0);
466 	if (p == MAP_FAILED) {
467 		printf("mmap() failed: %m\n");
468 		abort();
469 	}
470 	*(char *)p = 0;
471 	munmap(p, mfd_def_size);
472 
473 	/* verify PROT_READ with MAP_SHARED is allowed and a following
474 	 * mprotect(PROT_WRITE) allows writing */
475 	p = mmap(NULL,
476 		 mfd_def_size,
477 		 PROT_READ,
478 		 MAP_SHARED,
479 		 fd,
480 		 0);
481 	if (p == MAP_FAILED) {
482 		printf("mmap() failed: %m\n");
483 		abort();
484 	}
485 
486 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
487 	if (r < 0) {
488 		printf("mprotect() failed: %m\n");
489 		abort();
490 	}
491 
492 	*(char *)p = 0;
493 	munmap(p, mfd_def_size);
494 
495 	/* verify PUNCH_HOLE works */
496 	r = fallocate(fd,
497 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
498 		      0,
499 		      mfd_def_size);
500 	if (r < 0) {
501 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
502 		abort();
503 	}
504 }
505 
mfd_fail_write(int fd)506 static void mfd_fail_write(int fd)
507 {
508 	ssize_t l;
509 	void *p;
510 	int r;
511 
512 	/* verify write() fails */
513 	l = write(fd, "data", 4);
514 	if (l != -EPERM) {
515 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
516 		abort();
517 	}
518 
519 	/* verify PROT_READ | PROT_WRITE is not allowed */
520 	p = mmap(NULL,
521 		 mfd_def_size,
522 		 PROT_READ | PROT_WRITE,
523 		 MAP_SHARED,
524 		 fd,
525 		 0);
526 	if (p != MAP_FAILED) {
527 		printf("mmap() didn't fail as expected\n");
528 		abort();
529 	}
530 
531 	/* verify PROT_WRITE is not allowed */
532 	p = mmap(NULL,
533 		 mfd_def_size,
534 		 PROT_WRITE,
535 		 MAP_SHARED,
536 		 fd,
537 		 0);
538 	if (p != MAP_FAILED) {
539 		printf("mmap() didn't fail as expected\n");
540 		abort();
541 	}
542 
543 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
544 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
545 	p = mmap(NULL,
546 		 mfd_def_size,
547 		 PROT_READ,
548 		 MAP_SHARED,
549 		 fd,
550 		 0);
551 	if (p != MAP_FAILED) {
552 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
553 		if (r >= 0) {
554 			printf("mmap()+mprotect() didn't fail as expected\n");
555 			abort();
556 		}
557 		munmap(p, mfd_def_size);
558 	}
559 
560 	/* verify PUNCH_HOLE fails */
561 	r = fallocate(fd,
562 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
563 		      0,
564 		      mfd_def_size);
565 	if (r >= 0) {
566 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
567 		abort();
568 	}
569 }
570 
mfd_assert_shrink(int fd)571 static void mfd_assert_shrink(int fd)
572 {
573 	int r, fd2;
574 
575 	r = ftruncate(fd, mfd_def_size / 2);
576 	if (r < 0) {
577 		printf("ftruncate(SHRINK) failed: %m\n");
578 		abort();
579 	}
580 
581 	mfd_assert_size(fd, mfd_def_size / 2);
582 
583 	fd2 = mfd_assert_open(fd,
584 			      O_RDWR | O_CREAT | O_TRUNC,
585 			      S_IRUSR | S_IWUSR);
586 	close(fd2);
587 
588 	mfd_assert_size(fd, 0);
589 }
590 
mfd_fail_shrink(int fd)591 static void mfd_fail_shrink(int fd)
592 {
593 	int r;
594 
595 	r = ftruncate(fd, mfd_def_size / 2);
596 	if (r >= 0) {
597 		printf("ftruncate(SHRINK) didn't fail as expected\n");
598 		abort();
599 	}
600 
601 	mfd_fail_open(fd,
602 		      O_RDWR | O_CREAT | O_TRUNC,
603 		      S_IRUSR | S_IWUSR);
604 }
605 
mfd_assert_grow(int fd)606 static void mfd_assert_grow(int fd)
607 {
608 	int r;
609 
610 	r = ftruncate(fd, mfd_def_size * 2);
611 	if (r < 0) {
612 		printf("ftruncate(GROW) failed: %m\n");
613 		abort();
614 	}
615 
616 	mfd_assert_size(fd, mfd_def_size * 2);
617 
618 	r = fallocate(fd,
619 		      0,
620 		      0,
621 		      mfd_def_size * 4);
622 	if (r < 0) {
623 		printf("fallocate(ALLOC) failed: %m\n");
624 		abort();
625 	}
626 
627 	mfd_assert_size(fd, mfd_def_size * 4);
628 }
629 
mfd_fail_grow(int fd)630 static void mfd_fail_grow(int fd)
631 {
632 	int r;
633 
634 	r = ftruncate(fd, mfd_def_size * 2);
635 	if (r >= 0) {
636 		printf("ftruncate(GROW) didn't fail as expected\n");
637 		abort();
638 	}
639 
640 	r = fallocate(fd,
641 		      0,
642 		      0,
643 		      mfd_def_size * 4);
644 	if (r >= 0) {
645 		printf("fallocate(ALLOC) didn't fail as expected\n");
646 		abort();
647 	}
648 }
649 
mfd_assert_grow_write(int fd)650 static void mfd_assert_grow_write(int fd)
651 {
652 	static char *buf;
653 	ssize_t l;
654 
655 	/* hugetlbfs does not support write */
656 	if (hugetlbfs_test)
657 		return;
658 
659 	buf = malloc(mfd_def_size * 8);
660 	if (!buf) {
661 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
662 		abort();
663 	}
664 
665 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
666 	if (l != (mfd_def_size * 8)) {
667 		printf("pwrite() failed: %m\n");
668 		abort();
669 	}
670 
671 	mfd_assert_size(fd, mfd_def_size * 8);
672 }
673 
mfd_fail_grow_write(int fd)674 static void mfd_fail_grow_write(int fd)
675 {
676 	static char *buf;
677 	ssize_t l;
678 
679 	/* hugetlbfs does not support write */
680 	if (hugetlbfs_test)
681 		return;
682 
683 	buf = malloc(mfd_def_size * 8);
684 	if (!buf) {
685 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
686 		abort();
687 	}
688 
689 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
690 	if (l == (mfd_def_size * 8)) {
691 		printf("pwrite() didn't fail as expected\n");
692 		abort();
693 	}
694 }
695 
mfd_assert_mode(int fd,int mode)696 static void mfd_assert_mode(int fd, int mode)
697 {
698 	struct stat st;
699 	char buf[PATH_MAX];
700 	int nbytes;
701 
702 	fd2name(fd, buf, PATH_MAX);
703 
704 	if (fstat(fd, &st) < 0) {
705 		printf("fstat(%s) failed: %m\n", buf);
706 		abort();
707 	}
708 
709 	if ((st.st_mode & 07777) != mode) {
710 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
711 		       buf, (int)st.st_mode & 07777, mode);
712 		abort();
713 	}
714 }
715 
mfd_assert_chmod(int fd,int mode)716 static void mfd_assert_chmod(int fd, int mode)
717 {
718 	char buf[PATH_MAX];
719 	int nbytes;
720 
721 	fd2name(fd, buf, PATH_MAX);
722 
723 	if (fchmod(fd, mode) < 0) {
724 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
725 		abort();
726 	}
727 
728 	mfd_assert_mode(fd, mode);
729 }
730 
mfd_fail_chmod(int fd,int mode)731 static void mfd_fail_chmod(int fd, int mode)
732 {
733 	struct stat st;
734 	char buf[PATH_MAX];
735 	int nbytes;
736 
737 	fd2name(fd, buf, PATH_MAX);
738 
739 	if (fstat(fd, &st) < 0) {
740 		printf("fstat(%s) failed: %m\n", buf);
741 		abort();
742 	}
743 
744 	if (fchmod(fd, mode) == 0) {
745 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
746 		       buf, mode);
747 		abort();
748 	}
749 
750 	/* verify that file mode bits did not change */
751 	mfd_assert_mode(fd, st.st_mode & 07777);
752 }
753 
idle_thread_fn(void * arg)754 static int idle_thread_fn(void *arg)
755 {
756 	sigset_t set;
757 	int sig;
758 
759 	/* dummy waiter; SIGTERM terminates us anyway */
760 	sigemptyset(&set);
761 	sigaddset(&set, SIGTERM);
762 	sigwait(&set, &sig);
763 
764 	return 0;
765 }
766 
spawn_thread(unsigned int flags,int (* fn)(void *),void * arg)767 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
768 {
769 	uint8_t *stack;
770 	pid_t pid;
771 
772 	stack = malloc(STACK_SIZE);
773 	if (!stack) {
774 		printf("malloc(STACK_SIZE) failed: %m\n");
775 		abort();
776 	}
777 
778 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
779 	if (pid < 0) {
780 		printf("clone() failed: %m\n");
781 		abort();
782 	}
783 
784 	return pid;
785 }
786 
join_thread(pid_t pid)787 static void join_thread(pid_t pid)
788 {
789 	int wstatus;
790 
791 	if (waitpid(pid, &wstatus, 0) < 0) {
792 		printf("newpid thread: waitpid() failed: %m\n");
793 		abort();
794 	}
795 
796 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
797 		printf("newpid thread: exited with non-zero error code %d\n",
798 		       WEXITSTATUS(wstatus));
799 		abort();
800 	}
801 
802 	if (WIFSIGNALED(wstatus)) {
803 		printf("newpid thread: killed by signal %d\n",
804 		       WTERMSIG(wstatus));
805 		abort();
806 	}
807 }
808 
spawn_idle_thread(unsigned int flags)809 static pid_t spawn_idle_thread(unsigned int flags)
810 {
811 	return spawn_thread(flags, idle_thread_fn, NULL);
812 }
813 
join_idle_thread(pid_t pid)814 static void join_idle_thread(pid_t pid)
815 {
816 	kill(pid, SIGTERM);
817 	waitpid(pid, NULL, 0);
818 }
819 
820 /*
821  * Test memfd_create() syscall
822  * Verify syscall-argument validation, including name checks, flag validation
823  * and more.
824  */
test_create(void)825 static void test_create(void)
826 {
827 	char buf[2048];
828 	int fd;
829 
830 	printf("%s CREATE\n", memfd_str);
831 
832 	/* test NULL name */
833 	mfd_fail_new(NULL, 0);
834 
835 	/* test over-long name (not zero-terminated) */
836 	memset(buf, 0xff, sizeof(buf));
837 	mfd_fail_new(buf, 0);
838 
839 	/* test over-long zero-terminated name */
840 	memset(buf, 0xff, sizeof(buf));
841 	buf[sizeof(buf) - 1] = 0;
842 	mfd_fail_new(buf, 0);
843 
844 	/* verify "" is a valid name */
845 	fd = mfd_assert_new("", 0, 0);
846 	close(fd);
847 
848 	/* verify invalid O_* open flags */
849 	mfd_fail_new("", 0x0100);
850 	mfd_fail_new("", ~MFD_CLOEXEC);
851 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
852 	mfd_fail_new("", ~0);
853 	mfd_fail_new("", 0x80000000U);
854 
855 	/* verify EXEC and NOEXEC_SEAL can't both be set */
856 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
857 
858 	/* verify MFD_CLOEXEC is allowed */
859 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
860 	close(fd);
861 
862 	/* verify MFD_ALLOW_SEALING is allowed */
863 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
864 	close(fd);
865 
866 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
867 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
868 	close(fd);
869 }
870 
871 /*
872  * Test basic sealing
873  * A very basic sealing test to see whether setting/retrieving seals works.
874  */
test_basic(void)875 static void test_basic(void)
876 {
877 	int fd;
878 
879 	printf("%s BASIC\n", memfd_str);
880 
881 	fd = mfd_assert_new("kern_memfd_basic",
882 			    mfd_def_size,
883 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
884 
885 	/* add basic seals */
886 	mfd_assert_has_seals(fd, 0);
887 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
888 				 F_SEAL_WRITE);
889 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
890 				 F_SEAL_WRITE);
891 
892 	/* add them again */
893 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
894 				 F_SEAL_WRITE);
895 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
896 				 F_SEAL_WRITE);
897 
898 	/* add more seals and seal against sealing */
899 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
900 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
901 				 F_SEAL_GROW |
902 				 F_SEAL_WRITE |
903 				 F_SEAL_SEAL);
904 
905 	/* verify that sealing no longer works */
906 	mfd_fail_add_seals(fd, F_SEAL_GROW);
907 	mfd_fail_add_seals(fd, 0);
908 
909 	close(fd);
910 
911 	/* verify sealing does not work without MFD_ALLOW_SEALING */
912 	fd = mfd_assert_new("kern_memfd_basic",
913 			    mfd_def_size,
914 			    MFD_CLOEXEC);
915 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
916 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
917 			       F_SEAL_GROW |
918 			       F_SEAL_WRITE);
919 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
920 	close(fd);
921 }
922 
923 /*
924  * Test SEAL_WRITE
925  * Test whether SEAL_WRITE actually prevents modifications.
926  */
test_seal_write(void)927 static void test_seal_write(void)
928 {
929 	int fd;
930 
931 	printf("%s SEAL-WRITE\n", memfd_str);
932 
933 	fd = mfd_assert_new("kern_memfd_seal_write",
934 			    mfd_def_size,
935 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
936 	mfd_assert_has_seals(fd, 0);
937 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
938 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
939 
940 	mfd_assert_read(fd);
941 	mfd_fail_write(fd);
942 	mfd_assert_shrink(fd);
943 	mfd_assert_grow(fd);
944 	mfd_fail_grow_write(fd);
945 
946 	close(fd);
947 }
948 
949 /*
950  * Test SEAL_FUTURE_WRITE
951  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
952  */
test_seal_future_write(void)953 static void test_seal_future_write(void)
954 {
955 	int fd, fd2;
956 	void *p;
957 
958 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
959 
960 	fd = mfd_assert_new("kern_memfd_seal_future_write",
961 			    mfd_def_size,
962 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
963 
964 	p = mfd_assert_mmap_shared(fd);
965 
966 	mfd_assert_has_seals(fd, 0);
967 
968 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
969 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
970 
971 	/* read should pass, writes should fail */
972 	mfd_assert_read(fd);
973 	mfd_assert_read_shared(fd);
974 	mfd_fail_write(fd);
975 
976 	fd2 = mfd_assert_reopen_fd(fd);
977 	/* read should pass, writes should still fail */
978 	mfd_assert_read(fd2);
979 	mfd_assert_read_shared(fd2);
980 	mfd_fail_write(fd2);
981 
982 	mfd_assert_fork_private_write(fd);
983 
984 	munmap(p, mfd_def_size);
985 	close(fd2);
986 	close(fd);
987 }
988 
989 /*
990  * Test SEAL_SHRINK
991  * Test whether SEAL_SHRINK actually prevents shrinking
992  */
test_seal_shrink(void)993 static void test_seal_shrink(void)
994 {
995 	int fd;
996 
997 	printf("%s SEAL-SHRINK\n", memfd_str);
998 
999 	fd = mfd_assert_new("kern_memfd_seal_shrink",
1000 			    mfd_def_size,
1001 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1002 	mfd_assert_has_seals(fd, 0);
1003 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1004 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1005 
1006 	mfd_assert_read(fd);
1007 	mfd_assert_write(fd);
1008 	mfd_fail_shrink(fd);
1009 	mfd_assert_grow(fd);
1010 	mfd_assert_grow_write(fd);
1011 
1012 	close(fd);
1013 }
1014 
1015 /*
1016  * Test SEAL_GROW
1017  * Test whether SEAL_GROW actually prevents growing
1018  */
test_seal_grow(void)1019 static void test_seal_grow(void)
1020 {
1021 	int fd;
1022 
1023 	printf("%s SEAL-GROW\n", memfd_str);
1024 
1025 	fd = mfd_assert_new("kern_memfd_seal_grow",
1026 			    mfd_def_size,
1027 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1028 	mfd_assert_has_seals(fd, 0);
1029 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1030 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1031 
1032 	mfd_assert_read(fd);
1033 	mfd_assert_write(fd);
1034 	mfd_assert_shrink(fd);
1035 	mfd_fail_grow(fd);
1036 	mfd_fail_grow_write(fd);
1037 
1038 	close(fd);
1039 }
1040 
1041 /*
1042  * Test SEAL_SHRINK | SEAL_GROW
1043  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1044  */
test_seal_resize(void)1045 static void test_seal_resize(void)
1046 {
1047 	int fd;
1048 
1049 	printf("%s SEAL-RESIZE\n", memfd_str);
1050 
1051 	fd = mfd_assert_new("kern_memfd_seal_resize",
1052 			    mfd_def_size,
1053 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1054 	mfd_assert_has_seals(fd, 0);
1055 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1056 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1057 
1058 	mfd_assert_read(fd);
1059 	mfd_assert_write(fd);
1060 	mfd_fail_shrink(fd);
1061 	mfd_fail_grow(fd);
1062 	mfd_fail_grow_write(fd);
1063 
1064 	close(fd);
1065 }
1066 
1067 /*
1068  * Test SEAL_EXEC
1069  * Test fd is created with exec and allow sealing.
1070  * chmod() cannot change x bits after sealing.
1071  */
test_exec_seal(void)1072 static void test_exec_seal(void)
1073 {
1074 	int fd;
1075 
1076 	printf("%s SEAL-EXEC\n", memfd_str);
1077 
1078 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1079 	fd = mfd_assert_new("kern_memfd_seal_exec",
1080 			    mfd_def_size,
1081 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1082 
1083 	mfd_assert_mode(fd, 0777);
1084 	mfd_assert_chmod(fd, 0644);
1085 
1086 	mfd_assert_has_seals(fd, 0);
1087 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1088 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1089 
1090 	mfd_assert_chmod(fd, 0600);
1091 	mfd_fail_chmod(fd, 0777);
1092 	mfd_fail_chmod(fd, 0670);
1093 	mfd_fail_chmod(fd, 0605);
1094 	mfd_fail_chmod(fd, 0700);
1095 	mfd_fail_chmod(fd, 0100);
1096 	mfd_assert_chmod(fd, 0666);
1097 	mfd_assert_write(fd);
1098 	close(fd);
1099 
1100 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1101 	fd = mfd_assert_new("kern_memfd_seal_exec",
1102 			    mfd_def_size,
1103 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1104 
1105 	mfd_assert_mode(fd, 0777);
1106 	mfd_assert_chmod(fd, 0700);
1107 
1108 	mfd_assert_has_seals(fd, 0);
1109 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1110 	mfd_assert_has_seals(fd, F_WX_SEALS);
1111 
1112 	mfd_fail_chmod(fd, 0711);
1113 	mfd_fail_chmod(fd, 0600);
1114 	mfd_fail_write(fd);
1115 	close(fd);
1116 }
1117 
1118 /*
1119  * Test EXEC_NO_SEAL
1120  * Test fd is created with exec and not allow sealing.
1121  */
test_exec_no_seal(void)1122 static void test_exec_no_seal(void)
1123 {
1124 	int fd;
1125 
1126 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1127 
1128 	/* Create with EXEC but without ALLOW_SEALING */
1129 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1130 			    mfd_def_size,
1131 			    MFD_CLOEXEC | MFD_EXEC);
1132 	mfd_assert_mode(fd, 0777);
1133 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1134 	mfd_assert_chmod(fd, 0666);
1135 	close(fd);
1136 }
1137 
1138 /*
1139  * Test memfd_create with MFD_NOEXEC flag
1140  */
test_noexec_seal(void)1141 static void test_noexec_seal(void)
1142 {
1143 	int fd;
1144 
1145 	printf("%s NOEXEC_SEAL\n", memfd_str);
1146 
1147 	/* Create with NOEXEC and ALLOW_SEALING */
1148 	fd = mfd_assert_new("kern_memfd_noexec",
1149 			    mfd_def_size,
1150 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1151 	mfd_assert_mode(fd, 0666);
1152 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1153 	mfd_fail_chmod(fd, 0777);
1154 	close(fd);
1155 
1156 	/* Create with NOEXEC but without ALLOW_SEALING */
1157 	fd = mfd_assert_new("kern_memfd_noexec",
1158 			    mfd_def_size,
1159 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1160 	mfd_assert_mode(fd, 0666);
1161 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1162 	mfd_fail_chmod(fd, 0777);
1163 	close(fd);
1164 }
1165 
test_sysctl_sysctl0(void)1166 static void test_sysctl_sysctl0(void)
1167 {
1168 	int fd;
1169 
1170 	sysctl_assert_equal("0");
1171 
1172 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1173 			    mfd_def_size,
1174 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1175 	mfd_assert_mode(fd, 0777);
1176 	mfd_assert_has_seals(fd, 0);
1177 	mfd_assert_chmod(fd, 0644);
1178 	close(fd);
1179 }
1180 
test_sysctl_set_sysctl0(void)1181 static void test_sysctl_set_sysctl0(void)
1182 {
1183 	sysctl_assert_write("0");
1184 	test_sysctl_sysctl0();
1185 }
1186 
test_sysctl_sysctl1(void)1187 static void test_sysctl_sysctl1(void)
1188 {
1189 	int fd;
1190 
1191 	sysctl_assert_equal("1");
1192 
1193 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1194 			    mfd_def_size,
1195 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1196 	mfd_assert_mode(fd, 0666);
1197 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1198 	mfd_fail_chmod(fd, 0777);
1199 	close(fd);
1200 
1201 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1202 			    mfd_def_size,
1203 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1204 	mfd_assert_mode(fd, 0777);
1205 	mfd_assert_has_seals(fd, 0);
1206 	mfd_assert_chmod(fd, 0644);
1207 	close(fd);
1208 
1209 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1210 			    mfd_def_size,
1211 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1212 	mfd_assert_mode(fd, 0666);
1213 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1214 	mfd_fail_chmod(fd, 0777);
1215 	close(fd);
1216 }
1217 
test_sysctl_set_sysctl1(void)1218 static void test_sysctl_set_sysctl1(void)
1219 {
1220 	sysctl_assert_write("1");
1221 	test_sysctl_sysctl1();
1222 }
1223 
test_sysctl_sysctl2(void)1224 static void test_sysctl_sysctl2(void)
1225 {
1226 	int fd;
1227 
1228 	sysctl_assert_equal("2");
1229 
1230 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1231 			    mfd_def_size,
1232 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1233 	mfd_assert_mode(fd, 0666);
1234 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1235 	mfd_fail_chmod(fd, 0777);
1236 	close(fd);
1237 
1238 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1239 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1240 
1241 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1242 			    mfd_def_size,
1243 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1244 	mfd_assert_mode(fd, 0666);
1245 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1246 	mfd_fail_chmod(fd, 0777);
1247 	close(fd);
1248 }
1249 
test_sysctl_set_sysctl2(void)1250 static void test_sysctl_set_sysctl2(void)
1251 {
1252 	sysctl_assert_write("2");
1253 	test_sysctl_sysctl2();
1254 }
1255 
sysctl_simple_child(void * arg)1256 static int sysctl_simple_child(void *arg)
1257 {
1258 	int fd;
1259 	int pid;
1260 
1261 	printf("%s sysctl 0\n", memfd_str);
1262 	test_sysctl_set_sysctl0();
1263 
1264 	printf("%s sysctl 1\n", memfd_str);
1265 	test_sysctl_set_sysctl1();
1266 
1267 	printf("%s sysctl 0\n", memfd_str);
1268 	test_sysctl_set_sysctl0();
1269 
1270 	printf("%s sysctl 2\n", memfd_str);
1271 	test_sysctl_set_sysctl2();
1272 
1273 	printf("%s sysctl 1\n", memfd_str);
1274 	test_sysctl_set_sysctl1();
1275 
1276 	printf("%s sysctl 0\n", memfd_str);
1277 	test_sysctl_set_sysctl0();
1278 
1279 	return 0;
1280 }
1281 
1282 /*
1283  * Test sysctl
1284  * A very basic test to make sure the core sysctl semantics work.
1285  */
test_sysctl_simple(void)1286 static void test_sysctl_simple(void)
1287 {
1288 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1289 
1290 	join_thread(pid);
1291 }
1292 
sysctl_nested(void * arg)1293 static int sysctl_nested(void *arg)
1294 {
1295 	void (*fn)(void) = arg;
1296 
1297 	fn();
1298 	return 0;
1299 }
1300 
sysctl_nested_wait(void * arg)1301 static int sysctl_nested_wait(void *arg)
1302 {
1303 	/* Wait for a SIGCONT. */
1304 	kill(getpid(), SIGSTOP);
1305 	return sysctl_nested(arg);
1306 }
1307 
test_sysctl_sysctl1_failset(void)1308 static void test_sysctl_sysctl1_failset(void)
1309 {
1310 	sysctl_fail_write("0");
1311 	test_sysctl_sysctl1();
1312 }
1313 
test_sysctl_sysctl2_failset(void)1314 static void test_sysctl_sysctl2_failset(void)
1315 {
1316 	sysctl_fail_write("1");
1317 	test_sysctl_sysctl2();
1318 
1319 	sysctl_fail_write("0");
1320 	test_sysctl_sysctl2();
1321 }
1322 
sysctl_nested_child(void * arg)1323 static int sysctl_nested_child(void *arg)
1324 {
1325 	int fd;
1326 	int pid;
1327 
1328 	printf("%s nested sysctl 0\n", memfd_str);
1329 	sysctl_assert_write("0");
1330 	/* A further nested pidns works the same. */
1331 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1332 	join_thread(pid);
1333 
1334 	printf("%s nested sysctl 1\n", memfd_str);
1335 	sysctl_assert_write("1");
1336 	/* Child inherits our setting. */
1337 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1338 	join_thread(pid);
1339 	/* Child cannot raise the setting. */
1340 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1341 			   test_sysctl_sysctl1_failset);
1342 	join_thread(pid);
1343 	/* Child can lower the setting. */
1344 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1345 			   test_sysctl_set_sysctl2);
1346 	join_thread(pid);
1347 	/* Child lowering the setting has no effect on our setting. */
1348 	test_sysctl_sysctl1();
1349 
1350 	printf("%s nested sysctl 2\n", memfd_str);
1351 	sysctl_assert_write("2");
1352 	/* Child inherits our setting. */
1353 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1354 	join_thread(pid);
1355 	/* Child cannot raise the setting. */
1356 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1357 			   test_sysctl_sysctl2_failset);
1358 	join_thread(pid);
1359 
1360 	/* Verify that the rules are actually inherited after fork. */
1361 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1362 	sysctl_assert_write("0");
1363 
1364 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1365 			   test_sysctl_sysctl1_failset);
1366 	sysctl_assert_write("1");
1367 	kill(pid, SIGCONT);
1368 	join_thread(pid);
1369 
1370 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1371 	sysctl_assert_write("0");
1372 
1373 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1374 			   test_sysctl_sysctl2_failset);
1375 	sysctl_assert_write("2");
1376 	kill(pid, SIGCONT);
1377 	join_thread(pid);
1378 
1379 	/*
1380 	 * Verify that the current effective setting is saved on fork, meaning
1381 	 * that the parent lowering the sysctl doesn't affect already-forked
1382 	 * children.
1383 	 */
1384 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1385 	sysctl_assert_write("2");
1386 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1387 			   test_sysctl_sysctl2);
1388 	sysctl_assert_write("1");
1389 	kill(pid, SIGCONT);
1390 	join_thread(pid);
1391 
1392 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1393 	sysctl_assert_write("2");
1394 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1395 			   test_sysctl_sysctl2);
1396 	sysctl_assert_write("0");
1397 	kill(pid, SIGCONT);
1398 	join_thread(pid);
1399 
1400 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1401 	sysctl_assert_write("1");
1402 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1403 			   test_sysctl_sysctl1);
1404 	sysctl_assert_write("0");
1405 	kill(pid, SIGCONT);
1406 	join_thread(pid);
1407 
1408 	return 0;
1409 }
1410 
1411 /*
1412  * Test sysctl with nested pid namespaces
1413  * Make sure that the sysctl nesting semantics work correctly.
1414  */
test_sysctl_nested(void)1415 static void test_sysctl_nested(void)
1416 {
1417 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1418 
1419 	join_thread(pid);
1420 }
1421 
1422 /*
1423  * Test sharing via dup()
1424  * Test that seals are shared between dupped FDs and they're all equal.
1425  */
test_share_dup(char * banner,char * b_suffix)1426 static void test_share_dup(char *banner, char *b_suffix)
1427 {
1428 	int fd, fd2;
1429 
1430 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1431 
1432 	fd = mfd_assert_new("kern_memfd_share_dup",
1433 			    mfd_def_size,
1434 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1435 	mfd_assert_has_seals(fd, 0);
1436 
1437 	fd2 = mfd_assert_dup(fd);
1438 	mfd_assert_has_seals(fd2, 0);
1439 
1440 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1441 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1442 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1443 
1444 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1445 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1446 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1447 
1448 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1449 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1450 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1451 
1452 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1453 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1454 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1455 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1456 
1457 	close(fd2);
1458 
1459 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1460 	close(fd);
1461 }
1462 
1463 /*
1464  * Test sealing with active mmap()s
1465  * Modifying seals is only allowed if no other mmap() refs exist.
1466  */
test_share_mmap(char * banner,char * b_suffix)1467 static void test_share_mmap(char *banner, char *b_suffix)
1468 {
1469 	int fd;
1470 	void *p;
1471 
1472 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1473 
1474 	fd = mfd_assert_new("kern_memfd_share_mmap",
1475 			    mfd_def_size,
1476 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1477 	mfd_assert_has_seals(fd, 0);
1478 
1479 	/* shared/writable ref prevents sealing WRITE, but allows others */
1480 	p = mfd_assert_mmap_shared(fd);
1481 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1482 	mfd_assert_has_seals(fd, 0);
1483 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1484 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1485 	munmap(p, mfd_def_size);
1486 
1487 	/* readable ref allows sealing */
1488 	p = mfd_assert_mmap_private(fd);
1489 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1490 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1491 	munmap(p, mfd_def_size);
1492 
1493 	close(fd);
1494 }
1495 
1496 /*
1497  * Test sealing with open(/proc/self/fd/%d)
1498  * Via /proc we can get access to a separate file-context for the same memfd.
1499  * This is *not* like dup(), but like a real separate open(). Make sure the
1500  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1501  */
test_share_open(char * banner,char * b_suffix)1502 static void test_share_open(char *banner, char *b_suffix)
1503 {
1504 	int fd, fd2;
1505 
1506 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1507 
1508 	fd = mfd_assert_new("kern_memfd_share_open",
1509 			    mfd_def_size,
1510 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1511 	mfd_assert_has_seals(fd, 0);
1512 
1513 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1514 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1515 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1516 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1517 
1518 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1519 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1520 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1521 
1522 	close(fd);
1523 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1524 
1525 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1526 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1527 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1528 
1529 	close(fd2);
1530 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1531 
1532 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1533 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1534 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1535 
1536 	close(fd2);
1537 	close(fd);
1538 }
1539 
1540 /*
1541  * Test sharing via fork()
1542  * Test whether seal-modifications work as expected with forked childs.
1543  */
test_share_fork(char * banner,char * b_suffix)1544 static void test_share_fork(char *banner, char *b_suffix)
1545 {
1546 	int fd;
1547 	pid_t pid;
1548 
1549 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1550 
1551 	fd = mfd_assert_new("kern_memfd_share_fork",
1552 			    mfd_def_size,
1553 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1554 	mfd_assert_has_seals(fd, 0);
1555 
1556 	pid = spawn_idle_thread(0);
1557 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1558 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1559 
1560 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1561 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1562 
1563 	join_idle_thread(pid);
1564 
1565 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1566 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1567 
1568 	close(fd);
1569 }
1570 
pid_ns_supported(void)1571 static bool pid_ns_supported(void)
1572 {
1573 	return access("/proc/self/ns/pid", F_OK) == 0;
1574 }
1575 
main(int argc,char ** argv)1576 int main(int argc, char **argv)
1577 {
1578 	pid_t pid;
1579 
1580 	if (argc == 2) {
1581 		if (!strcmp(argv[1], "hugetlbfs")) {
1582 			unsigned long hpage_size = default_huge_page_size();
1583 
1584 			if (!hpage_size) {
1585 				printf("Unable to determine huge page size\n");
1586 				abort();
1587 			}
1588 
1589 			hugetlbfs_test = 1;
1590 			memfd_str = MEMFD_HUGE_STR;
1591 			mfd_def_size = hpage_size * 2;
1592 		} else {
1593 			printf("Unknown option: %s\n", argv[1]);
1594 			abort();
1595 		}
1596 	}
1597 
1598 	test_create();
1599 	test_basic();
1600 	test_exec_seal();
1601 	test_exec_no_seal();
1602 	test_noexec_seal();
1603 
1604 	test_seal_write();
1605 	test_seal_future_write();
1606 	test_seal_shrink();
1607 	test_seal_grow();
1608 	test_seal_resize();
1609 
1610 	if (pid_ns_supported()) {
1611 		test_sysctl_simple();
1612 		test_sysctl_nested();
1613 	} else {
1614 		printf("PID namespaces are not supported; skipping sysctl tests\n");
1615 	}
1616 
1617 	test_share_dup("SHARE-DUP", "");
1618 	test_share_mmap("SHARE-MMAP", "");
1619 	test_share_open("SHARE-OPEN", "");
1620 	test_share_fork("SHARE-FORK", "");
1621 
1622 	/* Run test-suite in a multi-threaded environment with a shared
1623 	 * file-table. */
1624 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1625 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1626 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1627 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1628 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1629 	join_idle_thread(pid);
1630 
1631 	printf("memfd: DONE\n");
1632 
1633 	return 0;
1634 }
1635