xref: /openbmc/linux/tools/testing/selftests/memfd/fuse_test.c (revision 9a87ffc99ec8eb8d35eed7c4f816d75f5cc9662e)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
287b2d440SDavid Herrmann /*
387b2d440SDavid Herrmann  * memfd GUP test-case
487b2d440SDavid Herrmann  * This tests memfd interactions with get_user_pages(). We require the
587b2d440SDavid Herrmann  * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
687b2d440SDavid Herrmann  * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
787b2d440SDavid Herrmann  * read() on files in that file-system will pin the receive-buffer pages for at
887b2d440SDavid Herrmann  * least 1s via get_user_pages().
987b2d440SDavid Herrmann  *
1087b2d440SDavid Herrmann  * We use this trick to race ADD_SEALS against a write on a memfd object. The
1187b2d440SDavid Herrmann  * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
1287b2d440SDavid Herrmann  * the read() syscall with our memory-mapped memfd object as receive buffer to
1387b2d440SDavid Herrmann  * force the kernel to write into our memfd object.
1487b2d440SDavid Herrmann  */
1587b2d440SDavid Herrmann 
1687b2d440SDavid Herrmann #define _GNU_SOURCE
1787b2d440SDavid Herrmann #define __EXPORTED_HEADERS__
1887b2d440SDavid Herrmann 
1987b2d440SDavid Herrmann #include <errno.h>
2087b2d440SDavid Herrmann #include <inttypes.h>
2187b2d440SDavid Herrmann #include <limits.h>
2287b2d440SDavid Herrmann #include <linux/falloc.h>
231c49e378SMichael Ellerman #include <fcntl.h>
2487b2d440SDavid Herrmann #include <linux/memfd.h>
25*11f75a01SJeff Xu #include <linux/types.h>
2687b2d440SDavid Herrmann #include <sched.h>
2787b2d440SDavid Herrmann #include <stdio.h>
2887b2d440SDavid Herrmann #include <stdlib.h>
2987b2d440SDavid Herrmann #include <signal.h>
3087b2d440SDavid Herrmann #include <string.h>
3187b2d440SDavid Herrmann #include <sys/mman.h>
3287b2d440SDavid Herrmann #include <sys/stat.h>
3387b2d440SDavid Herrmann #include <sys/syscall.h>
3487b2d440SDavid Herrmann #include <sys/wait.h>
3587b2d440SDavid Herrmann #include <unistd.h>
3687b2d440SDavid Herrmann 
3729f34d1dSMarc-André Lureau #include "common.h"
3829f34d1dSMarc-André Lureau 
3987b2d440SDavid Herrmann #define MFD_DEF_SIZE 8192
400e64f1d7SOrson Zhai #define STACK_SIZE 65536
4187b2d440SDavid Herrmann 
42c5c63835SMarc-André Lureau static size_t mfd_def_size = MFD_DEF_SIZE;
43c5c63835SMarc-André Lureau 
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)4487b2d440SDavid Herrmann static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
4587b2d440SDavid Herrmann {
4687b2d440SDavid Herrmann 	int r, fd;
4787b2d440SDavid Herrmann 
4887b2d440SDavid Herrmann 	fd = sys_memfd_create(name, flags);
4987b2d440SDavid Herrmann 	if (fd < 0) {
5087b2d440SDavid Herrmann 		printf("memfd_create(\"%s\", %u) failed: %m\n",
5187b2d440SDavid Herrmann 		       name, flags);
5287b2d440SDavid Herrmann 		abort();
5387b2d440SDavid Herrmann 	}
5487b2d440SDavid Herrmann 
5587b2d440SDavid Herrmann 	r = ftruncate(fd, sz);
5687b2d440SDavid Herrmann 	if (r < 0) {
5787b2d440SDavid Herrmann 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
5887b2d440SDavid Herrmann 		abort();
5987b2d440SDavid Herrmann 	}
6087b2d440SDavid Herrmann 
6187b2d440SDavid Herrmann 	return fd;
6287b2d440SDavid Herrmann }
6387b2d440SDavid Herrmann 
mfd_assert_get_seals(int fd)6487b2d440SDavid Herrmann static __u64 mfd_assert_get_seals(int fd)
6587b2d440SDavid Herrmann {
6687b2d440SDavid Herrmann 	long r;
6787b2d440SDavid Herrmann 
6887b2d440SDavid Herrmann 	r = fcntl(fd, F_GET_SEALS);
6987b2d440SDavid Herrmann 	if (r < 0) {
7087b2d440SDavid Herrmann 		printf("GET_SEALS(%d) failed: %m\n", fd);
7187b2d440SDavid Herrmann 		abort();
7287b2d440SDavid Herrmann 	}
7387b2d440SDavid Herrmann 
7487b2d440SDavid Herrmann 	return r;
7587b2d440SDavid Herrmann }
7687b2d440SDavid Herrmann 
mfd_assert_has_seals(int fd,__u64 seals)7787b2d440SDavid Herrmann static void mfd_assert_has_seals(int fd, __u64 seals)
7887b2d440SDavid Herrmann {
7987b2d440SDavid Herrmann 	__u64 s;
8087b2d440SDavid Herrmann 
8187b2d440SDavid Herrmann 	s = mfd_assert_get_seals(fd);
8287b2d440SDavid Herrmann 	if (s != seals) {
8387b2d440SDavid Herrmann 		printf("%llu != %llu = GET_SEALS(%d)\n",
8487b2d440SDavid Herrmann 		       (unsigned long long)seals, (unsigned long long)s, fd);
8587b2d440SDavid Herrmann 		abort();
8687b2d440SDavid Herrmann 	}
8787b2d440SDavid Herrmann }
8887b2d440SDavid Herrmann 
mfd_assert_add_seals(int fd,__u64 seals)8987b2d440SDavid Herrmann static void mfd_assert_add_seals(int fd, __u64 seals)
9087b2d440SDavid Herrmann {
9187b2d440SDavid Herrmann 	long r;
9287b2d440SDavid Herrmann 	__u64 s;
9387b2d440SDavid Herrmann 
9487b2d440SDavid Herrmann 	s = mfd_assert_get_seals(fd);
9587b2d440SDavid Herrmann 	r = fcntl(fd, F_ADD_SEALS, seals);
9687b2d440SDavid Herrmann 	if (r < 0) {
9787b2d440SDavid Herrmann 		printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
9887b2d440SDavid Herrmann 		       fd, (unsigned long long)s, (unsigned long long)seals);
9987b2d440SDavid Herrmann 		abort();
10087b2d440SDavid Herrmann 	}
10187b2d440SDavid Herrmann }
10287b2d440SDavid Herrmann 
mfd_busy_add_seals(int fd,__u64 seals)10387b2d440SDavid Herrmann static int mfd_busy_add_seals(int fd, __u64 seals)
10487b2d440SDavid Herrmann {
10587b2d440SDavid Herrmann 	long r;
10687b2d440SDavid Herrmann 	__u64 s;
10787b2d440SDavid Herrmann 
10887b2d440SDavid Herrmann 	r = fcntl(fd, F_GET_SEALS);
10987b2d440SDavid Herrmann 	if (r < 0)
11087b2d440SDavid Herrmann 		s = 0;
11187b2d440SDavid Herrmann 	else
11287b2d440SDavid Herrmann 		s = r;
11387b2d440SDavid Herrmann 
11487b2d440SDavid Herrmann 	r = fcntl(fd, F_ADD_SEALS, seals);
11587b2d440SDavid Herrmann 	if (r < 0 && errno != EBUSY) {
11687b2d440SDavid Herrmann 		printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
11787b2d440SDavid Herrmann 		       fd, (unsigned long long)s, (unsigned long long)seals);
11887b2d440SDavid Herrmann 		abort();
11987b2d440SDavid Herrmann 	}
12087b2d440SDavid Herrmann 
12187b2d440SDavid Herrmann 	return r;
12287b2d440SDavid Herrmann }
12387b2d440SDavid Herrmann 
mfd_assert_mmap_shared(int fd)12487b2d440SDavid Herrmann static void *mfd_assert_mmap_shared(int fd)
12587b2d440SDavid Herrmann {
12687b2d440SDavid Herrmann 	void *p;
12787b2d440SDavid Herrmann 
12887b2d440SDavid Herrmann 	p = mmap(NULL,
129c5c63835SMarc-André Lureau 		 mfd_def_size,
13087b2d440SDavid Herrmann 		 PROT_READ | PROT_WRITE,
13187b2d440SDavid Herrmann 		 MAP_SHARED,
13287b2d440SDavid Herrmann 		 fd,
13387b2d440SDavid Herrmann 		 0);
13487b2d440SDavid Herrmann 	if (p == MAP_FAILED) {
13587b2d440SDavid Herrmann 		printf("mmap() failed: %m\n");
13687b2d440SDavid Herrmann 		abort();
13787b2d440SDavid Herrmann 	}
13887b2d440SDavid Herrmann 
13987b2d440SDavid Herrmann 	return p;
14087b2d440SDavid Herrmann }
14187b2d440SDavid Herrmann 
mfd_assert_mmap_private(int fd)14287b2d440SDavid Herrmann static void *mfd_assert_mmap_private(int fd)
14387b2d440SDavid Herrmann {
14487b2d440SDavid Herrmann 	void *p;
14587b2d440SDavid Herrmann 
14687b2d440SDavid Herrmann 	p = mmap(NULL,
147c5c63835SMarc-André Lureau 		 mfd_def_size,
14887b2d440SDavid Herrmann 		 PROT_READ | PROT_WRITE,
14987b2d440SDavid Herrmann 		 MAP_PRIVATE,
15087b2d440SDavid Herrmann 		 fd,
15187b2d440SDavid Herrmann 		 0);
15287b2d440SDavid Herrmann 	if (p == MAP_FAILED) {
15387b2d440SDavid Herrmann 		printf("mmap() failed: %m\n");
15487b2d440SDavid Herrmann 		abort();
15587b2d440SDavid Herrmann 	}
15687b2d440SDavid Herrmann 
15787b2d440SDavid Herrmann 	return p;
15887b2d440SDavid Herrmann }
15987b2d440SDavid Herrmann 
16087b2d440SDavid Herrmann static int global_mfd = -1;
16187b2d440SDavid Herrmann static void *global_p = NULL;
16287b2d440SDavid Herrmann 
sealing_thread_fn(void * arg)16387b2d440SDavid Herrmann static int sealing_thread_fn(void *arg)
16487b2d440SDavid Herrmann {
16587b2d440SDavid Herrmann 	int sig, r;
16687b2d440SDavid Herrmann 
16787b2d440SDavid Herrmann 	/*
16887b2d440SDavid Herrmann 	 * This thread first waits 200ms so any pending operation in the parent
16987b2d440SDavid Herrmann 	 * is correctly started. After that, it tries to seal @global_mfd as
17087b2d440SDavid Herrmann 	 * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
17187b2d440SDavid Herrmann 	 * that memory mapped object still ongoing.
17287b2d440SDavid Herrmann 	 * We then wait one more second and try sealing again. This time it
17387b2d440SDavid Herrmann 	 * must succeed as there shouldn't be anyone else pinning the pages.
17487b2d440SDavid Herrmann 	 */
17587b2d440SDavid Herrmann 
17687b2d440SDavid Herrmann 	/* wait 200ms for FUSE-request to be active */
17787b2d440SDavid Herrmann 	usleep(200000);
17887b2d440SDavid Herrmann 
17987b2d440SDavid Herrmann 	/* unmount mapping before sealing to avoid i_mmap_writable failures */
180c5c63835SMarc-André Lureau 	munmap(global_p, mfd_def_size);
18187b2d440SDavid Herrmann 
18287b2d440SDavid Herrmann 	/* Try sealing the global file; expect EBUSY or success. Current
18387b2d440SDavid Herrmann 	 * kernels will never succeed, but in the future, kernels might
18487b2d440SDavid Herrmann 	 * implement page-replacements or other fancy ways to avoid racing
18587b2d440SDavid Herrmann 	 * writes. */
18687b2d440SDavid Herrmann 	r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
18787b2d440SDavid Herrmann 	if (r >= 0) {
18887b2d440SDavid Herrmann 		printf("HURRAY! This kernel fixed GUP races!\n");
18987b2d440SDavid Herrmann 	} else {
19087b2d440SDavid Herrmann 		/* wait 1s more so the FUSE-request is done */
19187b2d440SDavid Herrmann 		sleep(1);
19287b2d440SDavid Herrmann 
19387b2d440SDavid Herrmann 		/* try sealing the global file again */
19487b2d440SDavid Herrmann 		mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
19587b2d440SDavid Herrmann 	}
19687b2d440SDavid Herrmann 
19787b2d440SDavid Herrmann 	return 0;
19887b2d440SDavid Herrmann }
19987b2d440SDavid Herrmann 
spawn_sealing_thread(void)20087b2d440SDavid Herrmann static pid_t spawn_sealing_thread(void)
20187b2d440SDavid Herrmann {
20287b2d440SDavid Herrmann 	uint8_t *stack;
20387b2d440SDavid Herrmann 	pid_t pid;
20487b2d440SDavid Herrmann 
20587b2d440SDavid Herrmann 	stack = malloc(STACK_SIZE);
20687b2d440SDavid Herrmann 	if (!stack) {
20787b2d440SDavid Herrmann 		printf("malloc(STACK_SIZE) failed: %m\n");
20887b2d440SDavid Herrmann 		abort();
20987b2d440SDavid Herrmann 	}
21087b2d440SDavid Herrmann 
21187b2d440SDavid Herrmann 	pid = clone(sealing_thread_fn,
21287b2d440SDavid Herrmann 		    stack + STACK_SIZE,
21387b2d440SDavid Herrmann 		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
21487b2d440SDavid Herrmann 		    NULL);
21587b2d440SDavid Herrmann 	if (pid < 0) {
21687b2d440SDavid Herrmann 		printf("clone() failed: %m\n");
21787b2d440SDavid Herrmann 		abort();
21887b2d440SDavid Herrmann 	}
21987b2d440SDavid Herrmann 
22087b2d440SDavid Herrmann 	return pid;
22187b2d440SDavid Herrmann }
22287b2d440SDavid Herrmann 
join_sealing_thread(pid_t pid)22387b2d440SDavid Herrmann static void join_sealing_thread(pid_t pid)
22487b2d440SDavid Herrmann {
22587b2d440SDavid Herrmann 	waitpid(pid, NULL, 0);
22687b2d440SDavid Herrmann }
22787b2d440SDavid Herrmann 
main(int argc,char ** argv)22887b2d440SDavid Herrmann int main(int argc, char **argv)
22987b2d440SDavid Herrmann {
230c5c63835SMarc-André Lureau 	char *zero;
23187b2d440SDavid Herrmann 	int fd, mfd, r;
23287b2d440SDavid Herrmann 	void *p;
23387b2d440SDavid Herrmann 	int was_sealed;
23487b2d440SDavid Herrmann 	pid_t pid;
23587b2d440SDavid Herrmann 
23687b2d440SDavid Herrmann 	if (argc < 2) {
23787b2d440SDavid Herrmann 		printf("error: please pass path to file in fuse_mnt mount-point\n");
23887b2d440SDavid Herrmann 		abort();
23987b2d440SDavid Herrmann 	}
24087b2d440SDavid Herrmann 
241c5c63835SMarc-André Lureau 	if (argc >= 3) {
242c5c63835SMarc-André Lureau 		if (!strcmp(argv[2], "hugetlbfs")) {
243c5c63835SMarc-André Lureau 			unsigned long hpage_size = default_huge_page_size();
244c5c63835SMarc-André Lureau 
245c5c63835SMarc-André Lureau 			if (!hpage_size) {
246c5c63835SMarc-André Lureau 				printf("Unable to determine huge page size\n");
247c5c63835SMarc-André Lureau 				abort();
248c5c63835SMarc-André Lureau 			}
249c5c63835SMarc-André Lureau 
250c5c63835SMarc-André Lureau 			hugetlbfs_test = 1;
251c5c63835SMarc-André Lureau 			mfd_def_size = hpage_size * 2;
252c5c63835SMarc-André Lureau 		} else {
253c5c63835SMarc-André Lureau 			printf("Unknown option: %s\n", argv[2]);
254c5c63835SMarc-André Lureau 			abort();
255c5c63835SMarc-André Lureau 		}
256c5c63835SMarc-André Lureau 	}
257c5c63835SMarc-André Lureau 
258c5c63835SMarc-André Lureau 	zero = calloc(sizeof(*zero), mfd_def_size);
259c5c63835SMarc-André Lureau 
26087b2d440SDavid Herrmann 	/* open FUSE memfd file for GUP testing */
26187b2d440SDavid Herrmann 	printf("opening: %s\n", argv[1]);
26287b2d440SDavid Herrmann 	fd = open(argv[1], O_RDONLY | O_CLOEXEC);
26387b2d440SDavid Herrmann 	if (fd < 0) {
26487b2d440SDavid Herrmann 		printf("cannot open(\"%s\"): %m\n", argv[1]);
26587b2d440SDavid Herrmann 		abort();
26687b2d440SDavid Herrmann 	}
26787b2d440SDavid Herrmann 
26887b2d440SDavid Herrmann 	/* create new memfd-object */
26987b2d440SDavid Herrmann 	mfd = mfd_assert_new("kern_memfd_fuse",
270c5c63835SMarc-André Lureau 			     mfd_def_size,
27187b2d440SDavid Herrmann 			     MFD_CLOEXEC | MFD_ALLOW_SEALING);
27287b2d440SDavid Herrmann 
27387b2d440SDavid Herrmann 	/* mmap memfd-object for writing */
27487b2d440SDavid Herrmann 	p = mfd_assert_mmap_shared(mfd);
27587b2d440SDavid Herrmann 
27687b2d440SDavid Herrmann 	/* pass mfd+mapping to a separate sealing-thread which tries to seal
27787b2d440SDavid Herrmann 	 * the memfd objects with SEAL_WRITE while we write into it */
27887b2d440SDavid Herrmann 	global_mfd = mfd;
27987b2d440SDavid Herrmann 	global_p = p;
28087b2d440SDavid Herrmann 	pid = spawn_sealing_thread();
28187b2d440SDavid Herrmann 
28287b2d440SDavid Herrmann 	/* Use read() on the FUSE file to read into our memory-mapped memfd
28387b2d440SDavid Herrmann 	 * object. This races the other thread which tries to seal the
28487b2d440SDavid Herrmann 	 * memfd-object.
28587b2d440SDavid Herrmann 	 * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
28687b2d440SDavid Herrmann 	 * This guarantees that the receive-buffer is pinned for 1s until the
28787b2d440SDavid Herrmann 	 * data is written into it. The racing ADD_SEALS should thus fail as
28887b2d440SDavid Herrmann 	 * the pages are still pinned. */
289c5c63835SMarc-André Lureau 	r = read(fd, p, mfd_def_size);
29087b2d440SDavid Herrmann 	if (r < 0) {
29187b2d440SDavid Herrmann 		printf("read() failed: %m\n");
29287b2d440SDavid Herrmann 		abort();
29387b2d440SDavid Herrmann 	} else if (!r) {
29487b2d440SDavid Herrmann 		printf("unexpected EOF on read()\n");
29587b2d440SDavid Herrmann 		abort();
29687b2d440SDavid Herrmann 	}
29787b2d440SDavid Herrmann 
29887b2d440SDavid Herrmann 	was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
29987b2d440SDavid Herrmann 
30087b2d440SDavid Herrmann 	/* Wait for sealing-thread to finish and verify that it
30187b2d440SDavid Herrmann 	 * successfully sealed the file after the second try. */
30287b2d440SDavid Herrmann 	join_sealing_thread(pid);
30387b2d440SDavid Herrmann 	mfd_assert_has_seals(mfd, F_SEAL_WRITE);
30487b2d440SDavid Herrmann 
30587b2d440SDavid Herrmann 	/* *IF* the memfd-object was sealed at the time our read() returned,
30687b2d440SDavid Herrmann 	 * then the kernel did a page-replacement or canceled the read() (or
30787b2d440SDavid Herrmann 	 * whatever magic it did..). In that case, the memfd object is still
30887b2d440SDavid Herrmann 	 * all zero.
30987b2d440SDavid Herrmann 	 * In case the memfd-object was *not* sealed, the read() was successfull
31087b2d440SDavid Herrmann 	 * and the memfd object must *not* be all zero.
31187b2d440SDavid Herrmann 	 * Note that in real scenarios, there might be a mixture of both, but
31287b2d440SDavid Herrmann 	 * in this test-cases, we have explicit 200ms delays which should be
31387b2d440SDavid Herrmann 	 * enough to avoid any in-flight writes. */
31487b2d440SDavid Herrmann 
31587b2d440SDavid Herrmann 	p = mfd_assert_mmap_private(mfd);
316c5c63835SMarc-André Lureau 	if (was_sealed && memcmp(p, zero, mfd_def_size)) {
31787b2d440SDavid Herrmann 		printf("memfd sealed during read() but data not discarded\n");
31887b2d440SDavid Herrmann 		abort();
319c5c63835SMarc-André Lureau 	} else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
32087b2d440SDavid Herrmann 		printf("memfd sealed after read() but data discarded\n");
32187b2d440SDavid Herrmann 		abort();
32287b2d440SDavid Herrmann 	}
32387b2d440SDavid Herrmann 
32487b2d440SDavid Herrmann 	close(mfd);
32587b2d440SDavid Herrmann 	close(fd);
32687b2d440SDavid Herrmann 
32787b2d440SDavid Herrmann 	printf("fuse: DONE\n");
328c5c63835SMarc-André Lureau 	free(zero);
32987b2d440SDavid Herrmann 
33087b2d440SDavid Herrmann 	return 0;
33187b2d440SDavid Herrmann }
332