xref: /openbmc/linux/mm/memfd.c (revision 2562d67b)
15d752600SMike Kravetz /*
25d752600SMike Kravetz  * memfd_create system call and file sealing support
35d752600SMike Kravetz  *
45d752600SMike Kravetz  * Code was originally included in shmem.c, and broken out to facilitate
55d752600SMike Kravetz  * use by hugetlbfs as well as tmpfs.
65d752600SMike Kravetz  *
75d752600SMike Kravetz  * This file is released under the GPL.
85d752600SMike Kravetz  */
95d752600SMike Kravetz 
105d752600SMike Kravetz #include <linux/fs.h>
115d752600SMike Kravetz #include <linux/vfs.h>
125d752600SMike Kravetz #include <linux/pagemap.h>
135d752600SMike Kravetz #include <linux/file.h>
145d752600SMike Kravetz #include <linux/mm.h>
155d752600SMike Kravetz #include <linux/sched/signal.h>
165d752600SMike Kravetz #include <linux/khugepaged.h>
175d752600SMike Kravetz #include <linux/syscalls.h>
185d752600SMike Kravetz #include <linux/hugetlb.h>
195d752600SMike Kravetz #include <linux/shmem_fs.h>
205d752600SMike Kravetz #include <linux/memfd.h>
21105ff533SJeff Xu #include <linux/pid_namespace.h>
225d752600SMike Kravetz #include <uapi/linux/memfd.h>
235d752600SMike Kravetz 
245d752600SMike Kravetz /*
252313216fSMatthew Wilcox  * We need a tag: a new tag would expand every xa_node by 8 bytes,
265d752600SMike Kravetz  * so reuse a tag which we firmly believe is never set or cleared on tmpfs
275d752600SMike Kravetz  * or hugetlbfs because they are memory only filesystems.
285d752600SMike Kravetz  */
295d752600SMike Kravetz #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
305d752600SMike Kravetz #define LAST_SCAN               4       /* about 150ms max */
315d752600SMike Kravetz 
memfd_tag_pins(struct xa_state * xas)32ef3038a5SMatthew Wilcox static void memfd_tag_pins(struct xa_state *xas)
335d752600SMike Kravetz {
345d752600SMike Kravetz 	struct page *page;
35f2b277c4SHugh Dickins 	int latency = 0;
36f2b277c4SHugh Dickins 	int cache_count;
375d752600SMike Kravetz 
385d752600SMike Kravetz 	lru_add_drain();
395d752600SMike Kravetz 
40ef3038a5SMatthew Wilcox 	xas_lock_irq(xas);
41ef3038a5SMatthew Wilcox 	xas_for_each(xas, page, ULONG_MAX) {
42f2b277c4SHugh Dickins 		cache_count = 1;
43f2b277c4SHugh Dickins 		if (!xa_is_value(page) &&
44f2b277c4SHugh Dickins 		    PageTransHuge(page) && !PageHuge(page))
45f2b277c4SHugh Dickins 			cache_count = HPAGE_PMD_NR;
465d752600SMike Kravetz 
47f2b277c4SHugh Dickins 		if (!xa_is_value(page) &&
48f2b277c4SHugh Dickins 		    page_count(page) - total_mapcount(page) != cache_count)
49f2b277c4SHugh Dickins 			xas_set_mark(xas, MEMFD_TAG_PINNED);
50f2b277c4SHugh Dickins 		if (cache_count != 1)
51f2b277c4SHugh Dickins 			xas_set(xas, page->index + cache_count);
52f2b277c4SHugh Dickins 
53f2b277c4SHugh Dickins 		latency += cache_count;
54f2b277c4SHugh Dickins 		if (latency < XA_CHECK_SCHED)
55ef3038a5SMatthew Wilcox 			continue;
56f2b277c4SHugh Dickins 		latency = 0;
57ef3038a5SMatthew Wilcox 
58ef3038a5SMatthew Wilcox 		xas_pause(xas);
59ef3038a5SMatthew Wilcox 		xas_unlock_irq(xas);
60ef3038a5SMatthew Wilcox 		cond_resched();
61ef3038a5SMatthew Wilcox 		xas_lock_irq(xas);
625d752600SMike Kravetz 	}
63ef3038a5SMatthew Wilcox 	xas_unlock_irq(xas);
645d752600SMike Kravetz }
655d752600SMike Kravetz 
665d752600SMike Kravetz /*
675d752600SMike Kravetz  * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
685d752600SMike Kravetz  * via get_user_pages(), drivers might have some pending I/O without any active
695d752600SMike Kravetz  * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
705d752600SMike Kravetz  * and see whether it has an elevated ref-count. If so, we tag them and wait for
715d752600SMike Kravetz  * them to be dropped.
725d752600SMike Kravetz  * The caller must guarantee that no new user will acquire writable references
735d752600SMike Kravetz  * to those pages to avoid races.
745d752600SMike Kravetz  */
memfd_wait_for_pins(struct address_space * mapping)755d752600SMike Kravetz static int memfd_wait_for_pins(struct address_space *mapping)
765d752600SMike Kravetz {
772313216fSMatthew Wilcox 	XA_STATE(xas, &mapping->i_pages, 0);
785d752600SMike Kravetz 	struct page *page;
795d752600SMike Kravetz 	int error, scan;
805d752600SMike Kravetz 
81ef3038a5SMatthew Wilcox 	memfd_tag_pins(&xas);
825d752600SMike Kravetz 
835d752600SMike Kravetz 	error = 0;
845d752600SMike Kravetz 	for (scan = 0; scan <= LAST_SCAN; scan++) {
85f2b277c4SHugh Dickins 		int latency = 0;
86f2b277c4SHugh Dickins 		int cache_count;
872313216fSMatthew Wilcox 
882313216fSMatthew Wilcox 		if (!xas_marked(&xas, MEMFD_TAG_PINNED))
895d752600SMike Kravetz 			break;
905d752600SMike Kravetz 
915d752600SMike Kravetz 		if (!scan)
925d752600SMike Kravetz 			lru_add_drain_all();
935d752600SMike Kravetz 		else if (schedule_timeout_killable((HZ << scan) / 200))
945d752600SMike Kravetz 			scan = LAST_SCAN;
955d752600SMike Kravetz 
962313216fSMatthew Wilcox 		xas_set(&xas, 0);
972313216fSMatthew Wilcox 		xas_lock_irq(&xas);
982313216fSMatthew Wilcox 		xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
992313216fSMatthew Wilcox 			bool clear = true;
100f2b277c4SHugh Dickins 
101f2b277c4SHugh Dickins 			cache_count = 1;
102f2b277c4SHugh Dickins 			if (!xa_is_value(page) &&
103f2b277c4SHugh Dickins 			    PageTransHuge(page) && !PageHuge(page))
104f2b277c4SHugh Dickins 				cache_count = HPAGE_PMD_NR;
105f2b277c4SHugh Dickins 
106f2b277c4SHugh Dickins 			if (!xa_is_value(page) && cache_count !=
107f2b277c4SHugh Dickins 			    page_count(page) - total_mapcount(page)) {
1085d752600SMike Kravetz 				/*
1095d752600SMike Kravetz 				 * On the last scan, we clean up all those tags
1105d752600SMike Kravetz 				 * we inserted; but make a note that we still
1115d752600SMike Kravetz 				 * found pages pinned.
1125d752600SMike Kravetz 				 */
1132313216fSMatthew Wilcox 				if (scan == LAST_SCAN)
1145d752600SMike Kravetz 					error = -EBUSY;
1152313216fSMatthew Wilcox 				else
1162313216fSMatthew Wilcox 					clear = false;
1175d752600SMike Kravetz 			}
1182313216fSMatthew Wilcox 			if (clear)
1192313216fSMatthew Wilcox 				xas_clear_mark(&xas, MEMFD_TAG_PINNED);
120f2b277c4SHugh Dickins 
121f2b277c4SHugh Dickins 			latency += cache_count;
122f2b277c4SHugh Dickins 			if (latency < XA_CHECK_SCHED)
1232313216fSMatthew Wilcox 				continue;
124f2b277c4SHugh Dickins 			latency = 0;
1255d752600SMike Kravetz 
1262313216fSMatthew Wilcox 			xas_pause(&xas);
1272313216fSMatthew Wilcox 			xas_unlock_irq(&xas);
1282313216fSMatthew Wilcox 			cond_resched();
1292313216fSMatthew Wilcox 			xas_lock_irq(&xas);
1305d752600SMike Kravetz 		}
1312313216fSMatthew Wilcox 		xas_unlock_irq(&xas);
1325d752600SMike Kravetz 	}
1335d752600SMike Kravetz 
1345d752600SMike Kravetz 	return error;
1355d752600SMike Kravetz }
1365d752600SMike Kravetz 
memfd_file_seals_ptr(struct file * file)1375d752600SMike Kravetz static unsigned int *memfd_file_seals_ptr(struct file *file)
1385d752600SMike Kravetz {
1395d752600SMike Kravetz 	if (shmem_file(file))
1405d752600SMike Kravetz 		return &SHMEM_I(file_inode(file))->seals;
1415d752600SMike Kravetz 
1425d752600SMike Kravetz #ifdef CONFIG_HUGETLBFS
1435d752600SMike Kravetz 	if (is_file_hugepages(file))
1445d752600SMike Kravetz 		return &HUGETLBFS_I(file_inode(file))->seals;
1455d752600SMike Kravetz #endif
1465d752600SMike Kravetz 
1475d752600SMike Kravetz 	return NULL;
1485d752600SMike Kravetz }
1495d752600SMike Kravetz 
1505d752600SMike Kravetz #define F_ALL_SEALS (F_SEAL_SEAL | \
1516fd73538SDaniel Verkamp 		     F_SEAL_EXEC | \
1525d752600SMike Kravetz 		     F_SEAL_SHRINK | \
1535d752600SMike Kravetz 		     F_SEAL_GROW | \
154ab3948f5SJoel Fernandes (Google) 		     F_SEAL_WRITE | \
155ab3948f5SJoel Fernandes (Google) 		     F_SEAL_FUTURE_WRITE)
1565d752600SMike Kravetz 
memfd_add_seals(struct file * file,unsigned int seals)1575d752600SMike Kravetz static int memfd_add_seals(struct file *file, unsigned int seals)
1585d752600SMike Kravetz {
1595d752600SMike Kravetz 	struct inode *inode = file_inode(file);
1605d752600SMike Kravetz 	unsigned int *file_seals;
1615d752600SMike Kravetz 	int error;
1625d752600SMike Kravetz 
1635d752600SMike Kravetz 	/*
1645d752600SMike Kravetz 	 * SEALING
1655d752600SMike Kravetz 	 * Sealing allows multiple parties to share a tmpfs or hugetlbfs file
1665d752600SMike Kravetz 	 * but restrict access to a specific subset of file operations. Seals
1675d752600SMike Kravetz 	 * can only be added, but never removed. This way, mutually untrusted
1685d752600SMike Kravetz 	 * parties can share common memory regions with a well-defined policy.
1695d752600SMike Kravetz 	 * A malicious peer can thus never perform unwanted operations on a
1705d752600SMike Kravetz 	 * shared object.
1715d752600SMike Kravetz 	 *
1725d752600SMike Kravetz 	 * Seals are only supported on special tmpfs or hugetlbfs files and
1735d752600SMike Kravetz 	 * always affect the whole underlying inode. Once a seal is set, it
1745d752600SMike Kravetz 	 * may prevent some kinds of access to the file. Currently, the
1755d752600SMike Kravetz 	 * following seals are defined:
1765d752600SMike Kravetz 	 *   SEAL_SEAL: Prevent further seals from being set on this file
1775d752600SMike Kravetz 	 *   SEAL_SHRINK: Prevent the file from shrinking
1785d752600SMike Kravetz 	 *   SEAL_GROW: Prevent the file from growing
1795d752600SMike Kravetz 	 *   SEAL_WRITE: Prevent write access to the file
1806fd73538SDaniel Verkamp 	 *   SEAL_EXEC: Prevent modification of the exec bits in the file mode
1815d752600SMike Kravetz 	 *
1825d752600SMike Kravetz 	 * As we don't require any trust relationship between two parties, we
1835d752600SMike Kravetz 	 * must prevent seals from being removed. Therefore, sealing a file
1845d752600SMike Kravetz 	 * only adds a given set of seals to the file, it never touches
1855d752600SMike Kravetz 	 * existing seals. Furthermore, the "setting seals"-operation can be
1865d752600SMike Kravetz 	 * sealed itself, which basically prevents any further seal from being
1875d752600SMike Kravetz 	 * added.
1885d752600SMike Kravetz 	 *
1895d752600SMike Kravetz 	 * Semantics of sealing are only defined on volatile files. Only
1905d752600SMike Kravetz 	 * anonymous tmpfs and hugetlbfs files support sealing. More
1915d752600SMike Kravetz 	 * importantly, seals are never written to disk. Therefore, there's
1925d752600SMike Kravetz 	 * no plan to support it on other file types.
1935d752600SMike Kravetz 	 */
1945d752600SMike Kravetz 
1955d752600SMike Kravetz 	if (!(file->f_mode & FMODE_WRITE))
1965d752600SMike Kravetz 		return -EPERM;
1975d752600SMike Kravetz 	if (seals & ~(unsigned int)F_ALL_SEALS)
1985d752600SMike Kravetz 		return -EINVAL;
1995d752600SMike Kravetz 
2005d752600SMike Kravetz 	inode_lock(inode);
2015d752600SMike Kravetz 
2025d752600SMike Kravetz 	file_seals = memfd_file_seals_ptr(file);
2035d752600SMike Kravetz 	if (!file_seals) {
2045d752600SMike Kravetz 		error = -EINVAL;
2055d752600SMike Kravetz 		goto unlock;
2065d752600SMike Kravetz 	}
2075d752600SMike Kravetz 
2085d752600SMike Kravetz 	if (*file_seals & F_SEAL_SEAL) {
2095d752600SMike Kravetz 		error = -EPERM;
2105d752600SMike Kravetz 		goto unlock;
2115d752600SMike Kravetz 	}
2125d752600SMike Kravetz 
2135d752600SMike Kravetz 	if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
2145d752600SMike Kravetz 		error = mapping_deny_writable(file->f_mapping);
2155d752600SMike Kravetz 		if (error)
2165d752600SMike Kravetz 			goto unlock;
2175d752600SMike Kravetz 
2185d752600SMike Kravetz 		error = memfd_wait_for_pins(file->f_mapping);
2195d752600SMike Kravetz 		if (error) {
2205d752600SMike Kravetz 			mapping_allow_writable(file->f_mapping);
2215d752600SMike Kravetz 			goto unlock;
2225d752600SMike Kravetz 		}
2235d752600SMike Kravetz 	}
2245d752600SMike Kravetz 
225c4f75bc8SJeff Xu 	/*
226c4f75bc8SJeff Xu 	 * SEAL_EXEC implys SEAL_WRITE, making W^X from the start.
227c4f75bc8SJeff Xu 	 */
228c4f75bc8SJeff Xu 	if (seals & F_SEAL_EXEC && inode->i_mode & 0111)
229c4f75bc8SJeff Xu 		seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE;
230c4f75bc8SJeff Xu 
2315d752600SMike Kravetz 	*file_seals |= seals;
2325d752600SMike Kravetz 	error = 0;
2335d752600SMike Kravetz 
2345d752600SMike Kravetz unlock:
2355d752600SMike Kravetz 	inode_unlock(inode);
2365d752600SMike Kravetz 	return error;
2375d752600SMike Kravetz }
2385d752600SMike Kravetz 
memfd_get_seals(struct file * file)2395d752600SMike Kravetz static int memfd_get_seals(struct file *file)
2405d752600SMike Kravetz {
2415d752600SMike Kravetz 	unsigned int *seals = memfd_file_seals_ptr(file);
2425d752600SMike Kravetz 
2435d752600SMike Kravetz 	return seals ? *seals : -EINVAL;
2445d752600SMike Kravetz }
2455d752600SMike Kravetz 
memfd_fcntl(struct file * file,unsigned int cmd,unsigned int arg)246f7b8f70bSLuca Vizzarro long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
2475d752600SMike Kravetz {
2485d752600SMike Kravetz 	long error;
2495d752600SMike Kravetz 
2505d752600SMike Kravetz 	switch (cmd) {
2515d752600SMike Kravetz 	case F_ADD_SEALS:
2525d752600SMike Kravetz 		error = memfd_add_seals(file, arg);
2535d752600SMike Kravetz 		break;
2545d752600SMike Kravetz 	case F_GET_SEALS:
2555d752600SMike Kravetz 		error = memfd_get_seals(file);
2565d752600SMike Kravetz 		break;
2575d752600SMike Kravetz 	default:
2585d752600SMike Kravetz 		error = -EINVAL;
2595d752600SMike Kravetz 		break;
2605d752600SMike Kravetz 	}
2615d752600SMike Kravetz 
2625d752600SMike Kravetz 	return error;
2635d752600SMike Kravetz }
2645d752600SMike Kravetz 
2655d752600SMike Kravetz #define MFD_NAME_PREFIX "memfd:"
2665d752600SMike Kravetz #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
2675d752600SMike Kravetz #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
2685d752600SMike Kravetz 
269105ff533SJeff Xu #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_NOEXEC_SEAL | MFD_EXEC)
2705d752600SMike Kravetz 
check_sysctl_memfd_noexec(unsigned int * flags)27172de2591SJeff Xu static int check_sysctl_memfd_noexec(unsigned int *flags)
27272de2591SJeff Xu {
27372de2591SJeff Xu #ifdef CONFIG_SYSCTL
2749876cfe8SAleksa Sarai 	struct pid_namespace *ns = task_active_pid_ns(current);
2759876cfe8SAleksa Sarai 	int sysctl = pidns_memfd_noexec_scope(ns);
27672de2591SJeff Xu 
27772de2591SJeff Xu 	if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
278202e1422SAleksa Sarai 		if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
27972de2591SJeff Xu 			*flags |= MFD_NOEXEC_SEAL;
28072de2591SJeff Xu 		else
28172de2591SJeff Xu 			*flags |= MFD_EXEC;
28272de2591SJeff Xu 	}
28372de2591SJeff Xu 
284202e1422SAleksa Sarai 	if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
285202e1422SAleksa Sarai 		pr_err_ratelimited(
286202e1422SAleksa Sarai 			"%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
287202e1422SAleksa Sarai 			current->comm, task_pid_nr(current), sysctl);
28872de2591SJeff Xu 		return -EACCES;
28972de2591SJeff Xu 	}
29072de2591SJeff Xu #endif
29172de2591SJeff Xu 	return 0;
29272de2591SJeff Xu }
29372de2591SJeff Xu 
SYSCALL_DEFINE2(memfd_create,const char __user *,uname,unsigned int,flags)2945d752600SMike Kravetz SYSCALL_DEFINE2(memfd_create,
2955d752600SMike Kravetz 		const char __user *, uname,
2965d752600SMike Kravetz 		unsigned int, flags)
2975d752600SMike Kravetz {
2985d752600SMike Kravetz 	unsigned int *file_seals;
2995d752600SMike Kravetz 	struct file *file;
3005d752600SMike Kravetz 	int fd, error;
3015d752600SMike Kravetz 	char *name;
3025d752600SMike Kravetz 	long len;
3035d752600SMike Kravetz 
3045d752600SMike Kravetz 	if (!(flags & MFD_HUGETLB)) {
3055d752600SMike Kravetz 		if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3065d752600SMike Kravetz 			return -EINVAL;
3075d752600SMike Kravetz 	} else {
3085d752600SMike Kravetz 		/* Allow huge page size encoding in flags. */
3095d752600SMike Kravetz 		if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
3105d752600SMike Kravetz 				(MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
3115d752600SMike Kravetz 			return -EINVAL;
3125d752600SMike Kravetz 	}
3135d752600SMike Kravetz 
314105ff533SJeff Xu 	/* Invalid if both EXEC and NOEXEC_SEAL are set.*/
315105ff533SJeff Xu 	if ((flags & MFD_EXEC) && (flags & MFD_NOEXEC_SEAL))
316105ff533SJeff Xu 		return -EINVAL;
317105ff533SJeff Xu 
318105ff533SJeff Xu 	if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
319*2562d67bSAndrew Morton 		pr_warn_once(
320202e1422SAleksa Sarai 			"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
321202e1422SAleksa Sarai 			current->comm, task_pid_nr(current));
322105ff533SJeff Xu 	}
323105ff533SJeff Xu 
324202e1422SAleksa Sarai 	error = check_sysctl_memfd_noexec(&flags);
325202e1422SAleksa Sarai 	if (error < 0)
326202e1422SAleksa Sarai 		return error;
32772de2591SJeff Xu 
3285d752600SMike Kravetz 	/* length includes terminating zero */
3295d752600SMike Kravetz 	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3305d752600SMike Kravetz 	if (len <= 0)
3315d752600SMike Kravetz 		return -EFAULT;
3325d752600SMike Kravetz 	if (len > MFD_NAME_MAX_LEN + 1)
3335d752600SMike Kravetz 		return -EINVAL;
3345d752600SMike Kravetz 
3355d752600SMike Kravetz 	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
3365d752600SMike Kravetz 	if (!name)
3375d752600SMike Kravetz 		return -ENOMEM;
3385d752600SMike Kravetz 
3395d752600SMike Kravetz 	strcpy(name, MFD_NAME_PREFIX);
3405d752600SMike Kravetz 	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3415d752600SMike Kravetz 		error = -EFAULT;
3425d752600SMike Kravetz 		goto err_name;
3435d752600SMike Kravetz 	}
3445d752600SMike Kravetz 
3455d752600SMike Kravetz 	/* terminating-zero may have changed after strnlen_user() returned */
3465d752600SMike Kravetz 	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3475d752600SMike Kravetz 		error = -EFAULT;
3485d752600SMike Kravetz 		goto err_name;
3495d752600SMike Kravetz 	}
3505d752600SMike Kravetz 
3515d752600SMike Kravetz 	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3525d752600SMike Kravetz 	if (fd < 0) {
3535d752600SMike Kravetz 		error = fd;
3545d752600SMike Kravetz 		goto err_name;
3555d752600SMike Kravetz 	}
3565d752600SMike Kravetz 
3575d752600SMike Kravetz 	if (flags & MFD_HUGETLB) {
35883c1fd76Szhangyiru 		file = hugetlb_file_setup(name, 0, VM_NORESERVE,
3595d752600SMike Kravetz 					HUGETLB_ANONHUGE_INODE,
3605d752600SMike Kravetz 					(flags >> MFD_HUGE_SHIFT) &
3615d752600SMike Kravetz 					MFD_HUGE_MASK);
3625d752600SMike Kravetz 	} else
3635d752600SMike Kravetz 		file = shmem_file_setup(name, 0, VM_NORESERVE);
3645d752600SMike Kravetz 	if (IS_ERR(file)) {
3655d752600SMike Kravetz 		error = PTR_ERR(file);
3665d752600SMike Kravetz 		goto err_fd;
3675d752600SMike Kravetz 	}
3685d752600SMike Kravetz 	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
369c9c554f2SAl Viro 	file->f_flags |= O_LARGEFILE;
3705d752600SMike Kravetz 
371105ff533SJeff Xu 	if (flags & MFD_NOEXEC_SEAL) {
372105ff533SJeff Xu 		struct inode *inode = file_inode(file);
373105ff533SJeff Xu 
374105ff533SJeff Xu 		inode->i_mode &= ~0111;
375105ff533SJeff Xu 		file_seals = memfd_file_seals_ptr(file);
376935d44acSRoberto Sassu 		if (file_seals) {
377105ff533SJeff Xu 			*file_seals &= ~F_SEAL_SEAL;
378105ff533SJeff Xu 			*file_seals |= F_SEAL_EXEC;
379935d44acSRoberto Sassu 		}
380105ff533SJeff Xu 	} else if (flags & MFD_ALLOW_SEALING) {
381105ff533SJeff Xu 		/* MFD_EXEC and MFD_ALLOW_SEALING are set */
3825d752600SMike Kravetz 		file_seals = memfd_file_seals_ptr(file);
383935d44acSRoberto Sassu 		if (file_seals)
3845d752600SMike Kravetz 			*file_seals &= ~F_SEAL_SEAL;
3855d752600SMike Kravetz 	}
3865d752600SMike Kravetz 
3875d752600SMike Kravetz 	fd_install(fd, file);
3885d752600SMike Kravetz 	kfree(name);
3895d752600SMike Kravetz 	return fd;
3905d752600SMike Kravetz 
3915d752600SMike Kravetz err_fd:
3925d752600SMike Kravetz 	put_unused_fd(fd);
3935d752600SMike Kravetz err_name:
3945d752600SMike Kravetz 	kfree(name);
3955d752600SMike Kravetz 	return error;
3965d752600SMike Kravetz }
397