xref: /openbmc/linux/fs/namespace.c (revision 0f8821da)
159bd9dedSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *  linux/fs/namespace.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * (C) Copyright Al Viro 2000, 2001
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Based on code from fs/super.c, copyright Linus Torvalds and others.
81da177e4SLinus Torvalds  * Heavily rewritten.
91da177e4SLinus Torvalds  */
101da177e4SLinus Torvalds 
111da177e4SLinus Torvalds #include <linux/syscalls.h>
12d10577a8SAl Viro #include <linux/export.h>
1316f7e0feSRandy Dunlap #include <linux/capability.h>
146b3286edSKirill Korotaev #include <linux/mnt_namespace.h>
15771b1371SEric W. Biederman #include <linux/user_namespace.h>
161da177e4SLinus Torvalds #include <linux/namei.h>
171da177e4SLinus Torvalds #include <linux/security.h>
185b825c3aSIngo Molnar #include <linux/cred.h>
1973cd49ecSMiklos Szeredi #include <linux/idr.h>
2057f150a5SRob Landley #include <linux/init.h>		/* init_rootfs */
21d10577a8SAl Viro #include <linux/fs_struct.h>	/* get_fs_root et.al. */
22d10577a8SAl Viro #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
23a07b2000SAl Viro #include <linux/file.h>
24d10577a8SAl Viro #include <linux/uaccess.h>
250bb80f24SDavid Howells #include <linux/proc_ns.h>
2620b4fb48SLinus Torvalds #include <linux/magic.h>
2757c8a661SMike Rapoport #include <linux/memblock.h>
289caccd41SChristian Brauner #include <linux/proc_fs.h>
299ea459e1SAl Viro #include <linux/task_work.h>
309164bb4aSIngo Molnar #include <linux/sched/task.h>
31e262e32dSDavid Howells #include <uapi/linux/mount.h>
329bc61ab1SDavid Howells #include <linux/fs_context.h>
33037f11b4SAl Viro #include <linux/shmem_fs.h>
349164bb4aSIngo Molnar 
3507b20889SRam Pai #include "pnode.h"
36948730b0SAdrian Bunk #include "internal.h"
371da177e4SLinus Torvalds 
38d2921684SEric W. Biederman /* Maximum number of mounts in a mount namespace */
39d2921684SEric W. Biederman unsigned int sysctl_mount_max __read_mostly = 100000;
40d2921684SEric W. Biederman 
410818bf27SAl Viro static unsigned int m_hash_mask __read_mostly;
420818bf27SAl Viro static unsigned int m_hash_shift __read_mostly;
430818bf27SAl Viro static unsigned int mp_hash_mask __read_mostly;
440818bf27SAl Viro static unsigned int mp_hash_shift __read_mostly;
450818bf27SAl Viro 
460818bf27SAl Viro static __initdata unsigned long mhash_entries;
470818bf27SAl Viro static int __init set_mhash_entries(char *str)
480818bf27SAl Viro {
490818bf27SAl Viro 	if (!str)
500818bf27SAl Viro 		return 0;
510818bf27SAl Viro 	mhash_entries = simple_strtoul(str, &str, 0);
520818bf27SAl Viro 	return 1;
530818bf27SAl Viro }
540818bf27SAl Viro __setup("mhash_entries=", set_mhash_entries);
550818bf27SAl Viro 
560818bf27SAl Viro static __initdata unsigned long mphash_entries;
570818bf27SAl Viro static int __init set_mphash_entries(char *str)
580818bf27SAl Viro {
590818bf27SAl Viro 	if (!str)
600818bf27SAl Viro 		return 0;
610818bf27SAl Viro 	mphash_entries = simple_strtoul(str, &str, 0);
620818bf27SAl Viro 	return 1;
630818bf27SAl Viro }
640818bf27SAl Viro __setup("mphash_entries=", set_mphash_entries);
6513f14b4dSEric Dumazet 
66c7999c36SAl Viro static u64 event;
6773cd49ecSMiklos Szeredi static DEFINE_IDA(mnt_id_ida);
68719f5d7fSMiklos Szeredi static DEFINE_IDA(mnt_group_ida);
695addc5ddSAl Viro 
7038129a13SAl Viro static struct hlist_head *mount_hashtable __read_mostly;
710818bf27SAl Viro static struct hlist_head *mountpoint_hashtable __read_mostly;
72e18b890bSChristoph Lameter static struct kmem_cache *mnt_cache __read_mostly;
7359aa0da8SAl Viro static DECLARE_RWSEM(namespace_sem);
744edbe133SAl Viro static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
754edbe133SAl Viro static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
761da177e4SLinus Torvalds 
772a186721SChristian Brauner struct mount_kattr {
782a186721SChristian Brauner 	unsigned int attr_set;
792a186721SChristian Brauner 	unsigned int attr_clr;
802a186721SChristian Brauner 	unsigned int propagation;
812a186721SChristian Brauner 	unsigned int lookup_flags;
822a186721SChristian Brauner 	bool recurse;
839caccd41SChristian Brauner 	struct user_namespace *mnt_userns;
842a186721SChristian Brauner };
852a186721SChristian Brauner 
86f87fd4c2SMiklos Szeredi /* /sys/fs */
8700d26666SGreg Kroah-Hartman struct kobject *fs_kobj;
8800d26666SGreg Kroah-Hartman EXPORT_SYMBOL_GPL(fs_kobj);
89f87fd4c2SMiklos Szeredi 
9099b7db7bSNick Piggin /*
9199b7db7bSNick Piggin  * vfsmount lock may be taken for read to prevent changes to the
9299b7db7bSNick Piggin  * vfsmount hash, ie. during mountpoint lookups or walking back
9399b7db7bSNick Piggin  * up the tree.
9499b7db7bSNick Piggin  *
9599b7db7bSNick Piggin  * It should be taken for write in all cases where the vfsmount
9699b7db7bSNick Piggin  * tree or hash is modified or when a vfsmount structure is modified.
9799b7db7bSNick Piggin  */
9848a066e7SAl Viro __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
9999b7db7bSNick Piggin 
100d033cb67SChristian Brauner static inline void lock_mount_hash(void)
101d033cb67SChristian Brauner {
102d033cb67SChristian Brauner 	write_seqlock(&mount_lock);
103d033cb67SChristian Brauner }
104d033cb67SChristian Brauner 
105d033cb67SChristian Brauner static inline void unlock_mount_hash(void)
106d033cb67SChristian Brauner {
107d033cb67SChristian Brauner 	write_sequnlock(&mount_lock);
108d033cb67SChristian Brauner }
109d033cb67SChristian Brauner 
11038129a13SAl Viro static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
1111da177e4SLinus Torvalds {
1121da177e4SLinus Torvalds 	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
1131da177e4SLinus Torvalds 	tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
1140818bf27SAl Viro 	tmp = tmp + (tmp >> m_hash_shift);
1150818bf27SAl Viro 	return &mount_hashtable[tmp & m_hash_mask];
1160818bf27SAl Viro }
1170818bf27SAl Viro 
1180818bf27SAl Viro static inline struct hlist_head *mp_hash(struct dentry *dentry)
1190818bf27SAl Viro {
1200818bf27SAl Viro 	unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
1210818bf27SAl Viro 	tmp = tmp + (tmp >> mp_hash_shift);
1220818bf27SAl Viro 	return &mountpoint_hashtable[tmp & mp_hash_mask];
1231da177e4SLinus Torvalds }
1241da177e4SLinus Torvalds 
125b105e270SAl Viro static int mnt_alloc_id(struct mount *mnt)
12673cd49ecSMiklos Szeredi {
127169b480eSMatthew Wilcox 	int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
12873cd49ecSMiklos Szeredi 
129169b480eSMatthew Wilcox 	if (res < 0)
13073cd49ecSMiklos Szeredi 		return res;
131169b480eSMatthew Wilcox 	mnt->mnt_id = res;
132169b480eSMatthew Wilcox 	return 0;
13373cd49ecSMiklos Szeredi }
13473cd49ecSMiklos Szeredi 
135b105e270SAl Viro static void mnt_free_id(struct mount *mnt)
13673cd49ecSMiklos Szeredi {
137169b480eSMatthew Wilcox 	ida_free(&mnt_id_ida, mnt->mnt_id);
13873cd49ecSMiklos Szeredi }
13973cd49ecSMiklos Szeredi 
140719f5d7fSMiklos Szeredi /*
141719f5d7fSMiklos Szeredi  * Allocate a new peer group ID
142719f5d7fSMiklos Szeredi  */
1434b8b21f4SAl Viro static int mnt_alloc_group_id(struct mount *mnt)
144719f5d7fSMiklos Szeredi {
145169b480eSMatthew Wilcox 	int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
146f21f6220SAl Viro 
147169b480eSMatthew Wilcox 	if (res < 0)
148f21f6220SAl Viro 		return res;
149169b480eSMatthew Wilcox 	mnt->mnt_group_id = res;
150169b480eSMatthew Wilcox 	return 0;
151719f5d7fSMiklos Szeredi }
152719f5d7fSMiklos Szeredi 
153719f5d7fSMiklos Szeredi /*
154719f5d7fSMiklos Szeredi  * Release a peer group ID
155719f5d7fSMiklos Szeredi  */
1564b8b21f4SAl Viro void mnt_release_group_id(struct mount *mnt)
157719f5d7fSMiklos Szeredi {
158169b480eSMatthew Wilcox 	ida_free(&mnt_group_ida, mnt->mnt_group_id);
15915169fe7SAl Viro 	mnt->mnt_group_id = 0;
160719f5d7fSMiklos Szeredi }
161719f5d7fSMiklos Szeredi 
162b3e19d92SNick Piggin /*
163b3e19d92SNick Piggin  * vfsmount lock must be held for read
164b3e19d92SNick Piggin  */
16583adc753SAl Viro static inline void mnt_add_count(struct mount *mnt, int n)
166b3e19d92SNick Piggin {
167b3e19d92SNick Piggin #ifdef CONFIG_SMP
16868e8a9feSAl Viro 	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169b3e19d92SNick Piggin #else
170b3e19d92SNick Piggin 	preempt_disable();
17168e8a9feSAl Viro 	mnt->mnt_count += n;
172b3e19d92SNick Piggin 	preempt_enable();
173b3e19d92SNick Piggin #endif
174b3e19d92SNick Piggin }
175b3e19d92SNick Piggin 
176b3e19d92SNick Piggin /*
177b3e19d92SNick Piggin  * vfsmount lock must be held for write
178b3e19d92SNick Piggin  */
179edf7ddbfSEric Biggers int mnt_get_count(struct mount *mnt)
180b3e19d92SNick Piggin {
181b3e19d92SNick Piggin #ifdef CONFIG_SMP
182edf7ddbfSEric Biggers 	int count = 0;
183b3e19d92SNick Piggin 	int cpu;
184b3e19d92SNick Piggin 
185b3e19d92SNick Piggin 	for_each_possible_cpu(cpu) {
18668e8a9feSAl Viro 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187b3e19d92SNick Piggin 	}
188b3e19d92SNick Piggin 
189b3e19d92SNick Piggin 	return count;
190b3e19d92SNick Piggin #else
19168e8a9feSAl Viro 	return mnt->mnt_count;
192b3e19d92SNick Piggin #endif
193b3e19d92SNick Piggin }
194b3e19d92SNick Piggin 
195b105e270SAl Viro static struct mount *alloc_vfsmnt(const char *name)
1961da177e4SLinus Torvalds {
197c63181e6SAl Viro 	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
198c63181e6SAl Viro 	if (mnt) {
19973cd49ecSMiklos Szeredi 		int err;
20073cd49ecSMiklos Szeredi 
201c63181e6SAl Viro 		err = mnt_alloc_id(mnt);
20288b38782SLi Zefan 		if (err)
20388b38782SLi Zefan 			goto out_free_cache;
20488b38782SLi Zefan 
20588b38782SLi Zefan 		if (name) {
20679f6540bSVasily Averin 			mnt->mnt_devname = kstrdup_const(name,
20779f6540bSVasily Averin 							 GFP_KERNEL_ACCOUNT);
208c63181e6SAl Viro 			if (!mnt->mnt_devname)
20988b38782SLi Zefan 				goto out_free_id;
21073cd49ecSMiklos Szeredi 		}
21173cd49ecSMiklos Szeredi 
212b3e19d92SNick Piggin #ifdef CONFIG_SMP
213c63181e6SAl Viro 		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
214c63181e6SAl Viro 		if (!mnt->mnt_pcp)
215b3e19d92SNick Piggin 			goto out_free_devname;
216b3e19d92SNick Piggin 
217c63181e6SAl Viro 		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
218b3e19d92SNick Piggin #else
219c63181e6SAl Viro 		mnt->mnt_count = 1;
220c63181e6SAl Viro 		mnt->mnt_writers = 0;
221b3e19d92SNick Piggin #endif
222b3e19d92SNick Piggin 
22338129a13SAl Viro 		INIT_HLIST_NODE(&mnt->mnt_hash);
224c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_child);
225c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_mounts);
226c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_list);
227c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_expire);
228c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_share);
229c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
230c63181e6SAl Viro 		INIT_LIST_HEAD(&mnt->mnt_slave);
2310a5eb7c8SEric W. Biederman 		INIT_HLIST_NODE(&mnt->mnt_mp_list);
23299b19d16SEric W. Biederman 		INIT_LIST_HEAD(&mnt->mnt_umounting);
23356cbb429SAl Viro 		INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
234a6435940SChristian Brauner 		mnt->mnt.mnt_userns = &init_user_ns;
2351da177e4SLinus Torvalds 	}
236c63181e6SAl Viro 	return mnt;
23788b38782SLi Zefan 
238d3ef3d73Snpiggin@suse.de #ifdef CONFIG_SMP
239d3ef3d73Snpiggin@suse.de out_free_devname:
240fcc139aeSAndrzej Hajda 	kfree_const(mnt->mnt_devname);
241d3ef3d73Snpiggin@suse.de #endif
24288b38782SLi Zefan out_free_id:
243c63181e6SAl Viro 	mnt_free_id(mnt);
24488b38782SLi Zefan out_free_cache:
245c63181e6SAl Viro 	kmem_cache_free(mnt_cache, mnt);
24688b38782SLi Zefan 	return NULL;
2471da177e4SLinus Torvalds }
2481da177e4SLinus Torvalds 
2498366025eSDave Hansen /*
2508366025eSDave Hansen  * Most r/o checks on a fs are for operations that take
2518366025eSDave Hansen  * discrete amounts of time, like a write() or unlink().
2528366025eSDave Hansen  * We must keep track of when those operations start
2538366025eSDave Hansen  * (for permission checks) and when they end, so that
2548366025eSDave Hansen  * we can determine when writes are able to occur to
2558366025eSDave Hansen  * a filesystem.
2568366025eSDave Hansen  */
2573d733633SDave Hansen /*
2583d733633SDave Hansen  * __mnt_is_readonly: check whether a mount is read-only
2593d733633SDave Hansen  * @mnt: the mount to check for its write status
2603d733633SDave Hansen  *
2613d733633SDave Hansen  * This shouldn't be used directly ouside of the VFS.
2623d733633SDave Hansen  * It does not guarantee that the filesystem will stay
2633d733633SDave Hansen  * r/w, just that it is right *now*.  This can not and
2643d733633SDave Hansen  * should not be used in place of IS_RDONLY(inode).
2653d733633SDave Hansen  * mnt_want/drop_write() will _keep_ the filesystem
2663d733633SDave Hansen  * r/w.
2673d733633SDave Hansen  */
26843f5e655SDavid Howells bool __mnt_is_readonly(struct vfsmount *mnt)
2693d733633SDave Hansen {
27043f5e655SDavid Howells 	return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
2713d733633SDave Hansen }
2723d733633SDave Hansen EXPORT_SYMBOL_GPL(__mnt_is_readonly);
2733d733633SDave Hansen 
27483adc753SAl Viro static inline void mnt_inc_writers(struct mount *mnt)
2753d733633SDave Hansen {
276d3ef3d73Snpiggin@suse.de #ifdef CONFIG_SMP
27768e8a9feSAl Viro 	this_cpu_inc(mnt->mnt_pcp->mnt_writers);
278d3ef3d73Snpiggin@suse.de #else
27968e8a9feSAl Viro 	mnt->mnt_writers++;
280d3ef3d73Snpiggin@suse.de #endif
2813d733633SDave Hansen }
2823d733633SDave Hansen 
28383adc753SAl Viro static inline void mnt_dec_writers(struct mount *mnt)
2843d733633SDave Hansen {
285d3ef3d73Snpiggin@suse.de #ifdef CONFIG_SMP
28668e8a9feSAl Viro 	this_cpu_dec(mnt->mnt_pcp->mnt_writers);
287d3ef3d73Snpiggin@suse.de #else
28868e8a9feSAl Viro 	mnt->mnt_writers--;
289d3ef3d73Snpiggin@suse.de #endif
290d3ef3d73Snpiggin@suse.de }
291d3ef3d73Snpiggin@suse.de 
29283adc753SAl Viro static unsigned int mnt_get_writers(struct mount *mnt)
293d3ef3d73Snpiggin@suse.de {
294d3ef3d73Snpiggin@suse.de #ifdef CONFIG_SMP
295d3ef3d73Snpiggin@suse.de 	unsigned int count = 0;
2963d733633SDave Hansen 	int cpu;
2973d733633SDave Hansen 
2983d733633SDave Hansen 	for_each_possible_cpu(cpu) {
29968e8a9feSAl Viro 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
3003d733633SDave Hansen 	}
3013d733633SDave Hansen 
302d3ef3d73Snpiggin@suse.de 	return count;
303d3ef3d73Snpiggin@suse.de #else
304d3ef3d73Snpiggin@suse.de 	return mnt->mnt_writers;
305d3ef3d73Snpiggin@suse.de #endif
3063d733633SDave Hansen }
3073d733633SDave Hansen 
3084ed5e82fSMiklos Szeredi static int mnt_is_readonly(struct vfsmount *mnt)
3094ed5e82fSMiklos Szeredi {
3104ed5e82fSMiklos Szeredi 	if (mnt->mnt_sb->s_readonly_remount)
3114ed5e82fSMiklos Szeredi 		return 1;
3124ed5e82fSMiklos Szeredi 	/* Order wrt setting s_flags/s_readonly_remount in do_remount() */
3134ed5e82fSMiklos Szeredi 	smp_rmb();
3144ed5e82fSMiklos Szeredi 	return __mnt_is_readonly(mnt);
3154ed5e82fSMiklos Szeredi }
3164ed5e82fSMiklos Szeredi 
3173d733633SDave Hansen /*
318eb04c282SJan Kara  * Most r/o & frozen checks on a fs are for operations that take discrete
319eb04c282SJan Kara  * amounts of time, like a write() or unlink().  We must keep track of when
320eb04c282SJan Kara  * those operations start (for permission checks) and when they end, so that we
321eb04c282SJan Kara  * can determine when writes are able to occur to a filesystem.
3223d733633SDave Hansen  */
3238366025eSDave Hansen /**
324eb04c282SJan Kara  * __mnt_want_write - get write access to a mount without freeze protection
32583adc753SAl Viro  * @m: the mount on which to take a write
3268366025eSDave Hansen  *
327eb04c282SJan Kara  * This tells the low-level filesystem that a write is about to be performed to
328eb04c282SJan Kara  * it, and makes sure that writes are allowed (mnt it read-write) before
329eb04c282SJan Kara  * returning success. This operation does not protect against filesystem being
330eb04c282SJan Kara  * frozen. When the write operation is finished, __mnt_drop_write() must be
331eb04c282SJan Kara  * called. This is effectively a refcount.
3328366025eSDave Hansen  */
333eb04c282SJan Kara int __mnt_want_write(struct vfsmount *m)
3348366025eSDave Hansen {
33583adc753SAl Viro 	struct mount *mnt = real_mount(m);
3363d733633SDave Hansen 	int ret = 0;
3373d733633SDave Hansen 
338d3ef3d73Snpiggin@suse.de 	preempt_disable();
339c6653a83SNick Piggin 	mnt_inc_writers(mnt);
340d3ef3d73Snpiggin@suse.de 	/*
341c6653a83SNick Piggin 	 * The store to mnt_inc_writers must be visible before we pass
342d3ef3d73Snpiggin@suse.de 	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
343d3ef3d73Snpiggin@suse.de 	 * incremented count after it has set MNT_WRITE_HOLD.
344d3ef3d73Snpiggin@suse.de 	 */
345d3ef3d73Snpiggin@suse.de 	smp_mb();
346*0f8821daSSebastian Andrzej Siewior 	might_lock(&mount_lock.lock);
347*0f8821daSSebastian Andrzej Siewior 	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
348*0f8821daSSebastian Andrzej Siewior 		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
349d3ef3d73Snpiggin@suse.de 			cpu_relax();
350*0f8821daSSebastian Andrzej Siewior 		} else {
351*0f8821daSSebastian Andrzej Siewior 			/*
352*0f8821daSSebastian Andrzej Siewior 			 * This prevents priority inversion, if the task
353*0f8821daSSebastian Andrzej Siewior 			 * setting MNT_WRITE_HOLD got preempted on a remote
354*0f8821daSSebastian Andrzej Siewior 			 * CPU, and it prevents life lock if the task setting
355*0f8821daSSebastian Andrzej Siewior 			 * MNT_WRITE_HOLD has a lower priority and is bound to
356*0f8821daSSebastian Andrzej Siewior 			 * the same CPU as the task that is spinning here.
357*0f8821daSSebastian Andrzej Siewior 			 */
358*0f8821daSSebastian Andrzej Siewior 			preempt_enable();
359*0f8821daSSebastian Andrzej Siewior 			lock_mount_hash();
360*0f8821daSSebastian Andrzej Siewior 			unlock_mount_hash();
361*0f8821daSSebastian Andrzej Siewior 			preempt_disable();
362*0f8821daSSebastian Andrzej Siewior 		}
363*0f8821daSSebastian Andrzej Siewior 	}
364d3ef3d73Snpiggin@suse.de 	/*
365d3ef3d73Snpiggin@suse.de 	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
366d3ef3d73Snpiggin@suse.de 	 * be set to match its requirements. So we must not load that until
367d3ef3d73Snpiggin@suse.de 	 * MNT_WRITE_HOLD is cleared.
368d3ef3d73Snpiggin@suse.de 	 */
369d3ef3d73Snpiggin@suse.de 	smp_rmb();
3704ed5e82fSMiklos Szeredi 	if (mnt_is_readonly(m)) {
371c6653a83SNick Piggin 		mnt_dec_writers(mnt);
3723d733633SDave Hansen 		ret = -EROFS;
3733d733633SDave Hansen 	}
374d3ef3d73Snpiggin@suse.de 	preempt_enable();
375eb04c282SJan Kara 
376eb04c282SJan Kara 	return ret;
377eb04c282SJan Kara }
378eb04c282SJan Kara 
379eb04c282SJan Kara /**
380eb04c282SJan Kara  * mnt_want_write - get write access to a mount
381eb04c282SJan Kara  * @m: the mount on which to take a write
382eb04c282SJan Kara  *
383eb04c282SJan Kara  * This tells the low-level filesystem that a write is about to be performed to
384eb04c282SJan Kara  * it, and makes sure that writes are allowed (mount is read-write, filesystem
385eb04c282SJan Kara  * is not frozen) before returning success.  When the write operation is
386eb04c282SJan Kara  * finished, mnt_drop_write() must be called.  This is effectively a refcount.
387eb04c282SJan Kara  */
388eb04c282SJan Kara int mnt_want_write(struct vfsmount *m)
389eb04c282SJan Kara {
390eb04c282SJan Kara 	int ret;
391eb04c282SJan Kara 
392eb04c282SJan Kara 	sb_start_write(m->mnt_sb);
393eb04c282SJan Kara 	ret = __mnt_want_write(m);
394eb04c282SJan Kara 	if (ret)
395eb04c282SJan Kara 		sb_end_write(m->mnt_sb);
3963d733633SDave Hansen 	return ret;
3978366025eSDave Hansen }
3988366025eSDave Hansen EXPORT_SYMBOL_GPL(mnt_want_write);
3998366025eSDave Hansen 
4008366025eSDave Hansen /**
401eb04c282SJan Kara  * __mnt_want_write_file - get write access to a file's mount
402eb04c282SJan Kara  * @file: the file who's mount on which to take a write
403eb04c282SJan Kara  *
40414e43bf4SEric Biggers  * This is like __mnt_want_write, but if the file is already open for writing it
40514e43bf4SEric Biggers  * skips incrementing mnt_writers (since the open file already has a reference)
40614e43bf4SEric Biggers  * and instead only does the check for emergency r/o remounts.  This must be
40714e43bf4SEric Biggers  * paired with __mnt_drop_write_file.
408eb04c282SJan Kara  */
409eb04c282SJan Kara int __mnt_want_write_file(struct file *file)
410eb04c282SJan Kara {
41114e43bf4SEric Biggers 	if (file->f_mode & FMODE_WRITER) {
41214e43bf4SEric Biggers 		/*
41314e43bf4SEric Biggers 		 * Superblock may have become readonly while there are still
41414e43bf4SEric Biggers 		 * writable fd's, e.g. due to a fs error with errors=remount-ro
41514e43bf4SEric Biggers 		 */
41614e43bf4SEric Biggers 		if (__mnt_is_readonly(file->f_path.mnt))
41714e43bf4SEric Biggers 			return -EROFS;
41814e43bf4SEric Biggers 		return 0;
41914e43bf4SEric Biggers 	}
420eb04c282SJan Kara 	return __mnt_want_write(file->f_path.mnt);
421eb04c282SJan Kara }
422eb04c282SJan Kara 
423eb04c282SJan Kara /**
4247c6893e3SMiklos Szeredi  * mnt_want_write_file - get write access to a file's mount
4257c6893e3SMiklos Szeredi  * @file: the file who's mount on which to take a write
4267c6893e3SMiklos Szeredi  *
42714e43bf4SEric Biggers  * This is like mnt_want_write, but if the file is already open for writing it
42814e43bf4SEric Biggers  * skips incrementing mnt_writers (since the open file already has a reference)
42914e43bf4SEric Biggers  * and instead only does the freeze protection and the check for emergency r/o
43014e43bf4SEric Biggers  * remounts.  This must be paired with mnt_drop_write_file.
4317c6893e3SMiklos Szeredi  */
4327c6893e3SMiklos Szeredi int mnt_want_write_file(struct file *file)
4337c6893e3SMiklos Szeredi {
4347c6893e3SMiklos Szeredi 	int ret;
4357c6893e3SMiklos Szeredi 
4367c6893e3SMiklos Szeredi 	sb_start_write(file_inode(file)->i_sb);
4377c6893e3SMiklos Szeredi 	ret = __mnt_want_write_file(file);
4387c6893e3SMiklos Szeredi 	if (ret)
4397c6893e3SMiklos Szeredi 		sb_end_write(file_inode(file)->i_sb);
4407c6893e3SMiklos Szeredi 	return ret;
4417c6893e3SMiklos Szeredi }
44296029c4eSnpiggin@suse.de EXPORT_SYMBOL_GPL(mnt_want_write_file);
44396029c4eSnpiggin@suse.de 
44496029c4eSnpiggin@suse.de /**
445eb04c282SJan Kara  * __mnt_drop_write - give up write access to a mount
4468366025eSDave Hansen  * @mnt: the mount on which to give up write access
4478366025eSDave Hansen  *
4488366025eSDave Hansen  * Tells the low-level filesystem that we are done
4498366025eSDave Hansen  * performing writes to it.  Must be matched with
450eb04c282SJan Kara  * __mnt_want_write() call above.
4518366025eSDave Hansen  */
452eb04c282SJan Kara void __mnt_drop_write(struct vfsmount *mnt)
4538366025eSDave Hansen {
454d3ef3d73Snpiggin@suse.de 	preempt_disable();
45583adc753SAl Viro 	mnt_dec_writers(real_mount(mnt));
456d3ef3d73Snpiggin@suse.de 	preempt_enable();
4578366025eSDave Hansen }
458eb04c282SJan Kara 
459eb04c282SJan Kara /**
460eb04c282SJan Kara  * mnt_drop_write - give up write access to a mount
461eb04c282SJan Kara  * @mnt: the mount on which to give up write access
462eb04c282SJan Kara  *
463eb04c282SJan Kara  * Tells the low-level filesystem that we are done performing writes to it and
464eb04c282SJan Kara  * also allows filesystem to be frozen again.  Must be matched with
465eb04c282SJan Kara  * mnt_want_write() call above.
466eb04c282SJan Kara  */
467eb04c282SJan Kara void mnt_drop_write(struct vfsmount *mnt)
468eb04c282SJan Kara {
469eb04c282SJan Kara 	__mnt_drop_write(mnt);
470eb04c282SJan Kara 	sb_end_write(mnt->mnt_sb);
471eb04c282SJan Kara }
4728366025eSDave Hansen EXPORT_SYMBOL_GPL(mnt_drop_write);
4738366025eSDave Hansen 
474eb04c282SJan Kara void __mnt_drop_write_file(struct file *file)
475eb04c282SJan Kara {
47614e43bf4SEric Biggers 	if (!(file->f_mode & FMODE_WRITER))
477eb04c282SJan Kara 		__mnt_drop_write(file->f_path.mnt);
478eb04c282SJan Kara }
479eb04c282SJan Kara 
4807c6893e3SMiklos Szeredi void mnt_drop_write_file(struct file *file)
4817c6893e3SMiklos Szeredi {
482a6795a58SMiklos Szeredi 	__mnt_drop_write_file(file);
4837c6893e3SMiklos Szeredi 	sb_end_write(file_inode(file)->i_sb);
4847c6893e3SMiklos Szeredi }
4852a79f17eSAl Viro EXPORT_SYMBOL(mnt_drop_write_file);
4862a79f17eSAl Viro 
487fbdc2f6cSChristian Brauner static inline int mnt_hold_writers(struct mount *mnt)
4888366025eSDave Hansen {
48983adc753SAl Viro 	mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
490d3ef3d73Snpiggin@suse.de 	/*
491d3ef3d73Snpiggin@suse.de 	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
492d3ef3d73Snpiggin@suse.de 	 * should be visible before we do.
493d3ef3d73Snpiggin@suse.de 	 */
494d3ef3d73Snpiggin@suse.de 	smp_mb();
495d3ef3d73Snpiggin@suse.de 
496d3ef3d73Snpiggin@suse.de 	/*
497d3ef3d73Snpiggin@suse.de 	 * With writers on hold, if this value is zero, then there are
498d3ef3d73Snpiggin@suse.de 	 * definitely no active writers (although held writers may subsequently
499d3ef3d73Snpiggin@suse.de 	 * increment the count, they'll have to wait, and decrement it after
500d3ef3d73Snpiggin@suse.de 	 * seeing MNT_READONLY).
501d3ef3d73Snpiggin@suse.de 	 *
502d3ef3d73Snpiggin@suse.de 	 * It is OK to have counter incremented on one CPU and decremented on
503d3ef3d73Snpiggin@suse.de 	 * another: the sum will add up correctly. The danger would be when we
504d3ef3d73Snpiggin@suse.de 	 * sum up each counter, if we read a counter before it is incremented,
505d3ef3d73Snpiggin@suse.de 	 * but then read another CPU's count which it has been subsequently
506d3ef3d73Snpiggin@suse.de 	 * decremented from -- we would see more decrements than we should.
507d3ef3d73Snpiggin@suse.de 	 * MNT_WRITE_HOLD protects against this scenario, because
508d3ef3d73Snpiggin@suse.de 	 * mnt_want_write first increments count, then smp_mb, then spins on
509d3ef3d73Snpiggin@suse.de 	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
510d3ef3d73Snpiggin@suse.de 	 * we're counting up here.
511d3ef3d73Snpiggin@suse.de 	 */
512c6653a83SNick Piggin 	if (mnt_get_writers(mnt) > 0)
513fbdc2f6cSChristian Brauner 		return -EBUSY;
514fbdc2f6cSChristian Brauner 
515fbdc2f6cSChristian Brauner 	return 0;
516fbdc2f6cSChristian Brauner }
517fbdc2f6cSChristian Brauner 
518fbdc2f6cSChristian Brauner static inline void mnt_unhold_writers(struct mount *mnt)
519fbdc2f6cSChristian Brauner {
520d3ef3d73Snpiggin@suse.de 	/*
521d3ef3d73Snpiggin@suse.de 	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
522d3ef3d73Snpiggin@suse.de 	 * that become unheld will see MNT_READONLY.
523d3ef3d73Snpiggin@suse.de 	 */
524d3ef3d73Snpiggin@suse.de 	smp_wmb();
52583adc753SAl Viro 	mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
526fbdc2f6cSChristian Brauner }
527fbdc2f6cSChristian Brauner 
528fbdc2f6cSChristian Brauner static int mnt_make_readonly(struct mount *mnt)
529fbdc2f6cSChristian Brauner {
530fbdc2f6cSChristian Brauner 	int ret;
531fbdc2f6cSChristian Brauner 
532fbdc2f6cSChristian Brauner 	ret = mnt_hold_writers(mnt);
533fbdc2f6cSChristian Brauner 	if (!ret)
534fbdc2f6cSChristian Brauner 		mnt->mnt.mnt_flags |= MNT_READONLY;
535fbdc2f6cSChristian Brauner 	mnt_unhold_writers(mnt);
5363d733633SDave Hansen 	return ret;
5373d733633SDave Hansen }
5388366025eSDave Hansen 
5394ed5e82fSMiklos Szeredi int sb_prepare_remount_readonly(struct super_block *sb)
5404ed5e82fSMiklos Szeredi {
5414ed5e82fSMiklos Szeredi 	struct mount *mnt;
5424ed5e82fSMiklos Szeredi 	int err = 0;
5434ed5e82fSMiklos Szeredi 
5448e8b8796SMiklos Szeredi 	/* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
5458e8b8796SMiklos Szeredi 	if (atomic_long_read(&sb->s_remove_count))
5468e8b8796SMiklos Szeredi 		return -EBUSY;
5478e8b8796SMiklos Szeredi 
548719ea2fbSAl Viro 	lock_mount_hash();
5494ed5e82fSMiklos Szeredi 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
5504ed5e82fSMiklos Szeredi 		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
5514ed5e82fSMiklos Szeredi 			mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
5524ed5e82fSMiklos Szeredi 			smp_mb();
5534ed5e82fSMiklos Szeredi 			if (mnt_get_writers(mnt) > 0) {
5544ed5e82fSMiklos Szeredi 				err = -EBUSY;
5554ed5e82fSMiklos Szeredi 				break;
5564ed5e82fSMiklos Szeredi 			}
5574ed5e82fSMiklos Szeredi 		}
5584ed5e82fSMiklos Szeredi 	}
5598e8b8796SMiklos Szeredi 	if (!err && atomic_long_read(&sb->s_remove_count))
5608e8b8796SMiklos Szeredi 		err = -EBUSY;
5618e8b8796SMiklos Szeredi 
5624ed5e82fSMiklos Szeredi 	if (!err) {
5634ed5e82fSMiklos Szeredi 		sb->s_readonly_remount = 1;
5644ed5e82fSMiklos Szeredi 		smp_wmb();
5654ed5e82fSMiklos Szeredi 	}
5664ed5e82fSMiklos Szeredi 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
5674ed5e82fSMiklos Szeredi 		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
5684ed5e82fSMiklos Szeredi 			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
5694ed5e82fSMiklos Szeredi 	}
570719ea2fbSAl Viro 	unlock_mount_hash();
5714ed5e82fSMiklos Szeredi 
5724ed5e82fSMiklos Szeredi 	return err;
5734ed5e82fSMiklos Szeredi }
5744ed5e82fSMiklos Szeredi 
575b105e270SAl Viro static void free_vfsmnt(struct mount *mnt)
5761da177e4SLinus Torvalds {
577a6435940SChristian Brauner 	struct user_namespace *mnt_userns;
578a6435940SChristian Brauner 
579a6435940SChristian Brauner 	mnt_userns = mnt_user_ns(&mnt->mnt);
580a6435940SChristian Brauner 	if (mnt_userns != &init_user_ns)
581a6435940SChristian Brauner 		put_user_ns(mnt_userns);
582fcc139aeSAndrzej Hajda 	kfree_const(mnt->mnt_devname);
583d3ef3d73Snpiggin@suse.de #ifdef CONFIG_SMP
58468e8a9feSAl Viro 	free_percpu(mnt->mnt_pcp);
585d3ef3d73Snpiggin@suse.de #endif
586b105e270SAl Viro 	kmem_cache_free(mnt_cache, mnt);
5871da177e4SLinus Torvalds }
5881da177e4SLinus Torvalds 
5898ffcb32eSDavid Howells static void delayed_free_vfsmnt(struct rcu_head *head)
5908ffcb32eSDavid Howells {
5918ffcb32eSDavid Howells 	free_vfsmnt(container_of(head, struct mount, mnt_rcu));
5928ffcb32eSDavid Howells }
5938ffcb32eSDavid Howells 
59448a066e7SAl Viro /* call under rcu_read_lock */
595294d71ffSAl Viro int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
59648a066e7SAl Viro {
59748a066e7SAl Viro 	struct mount *mnt;
59848a066e7SAl Viro 	if (read_seqretry(&mount_lock, seq))
599294d71ffSAl Viro 		return 1;
60048a066e7SAl Viro 	if (bastard == NULL)
601294d71ffSAl Viro 		return 0;
60248a066e7SAl Viro 	mnt = real_mount(bastard);
60348a066e7SAl Viro 	mnt_add_count(mnt, 1);
604119e1ef8SAl Viro 	smp_mb();			// see mntput_no_expire()
60548a066e7SAl Viro 	if (likely(!read_seqretry(&mount_lock, seq)))
606294d71ffSAl Viro 		return 0;
60748a066e7SAl Viro 	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
60848a066e7SAl Viro 		mnt_add_count(mnt, -1);
609294d71ffSAl Viro 		return 1;
61048a066e7SAl Viro 	}
611119e1ef8SAl Viro 	lock_mount_hash();
612119e1ef8SAl Viro 	if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
613119e1ef8SAl Viro 		mnt_add_count(mnt, -1);
614119e1ef8SAl Viro 		unlock_mount_hash();
615119e1ef8SAl Viro 		return 1;
616119e1ef8SAl Viro 	}
617119e1ef8SAl Viro 	unlock_mount_hash();
618119e1ef8SAl Viro 	/* caller will mntput() */
619294d71ffSAl Viro 	return -1;
620294d71ffSAl Viro }
621294d71ffSAl Viro 
622294d71ffSAl Viro /* call under rcu_read_lock */
623294d71ffSAl Viro bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
624294d71ffSAl Viro {
625294d71ffSAl Viro 	int res = __legitimize_mnt(bastard, seq);
626294d71ffSAl Viro 	if (likely(!res))
627294d71ffSAl Viro 		return true;
628294d71ffSAl Viro 	if (unlikely(res < 0)) {
62948a066e7SAl Viro 		rcu_read_unlock();
63048a066e7SAl Viro 		mntput(bastard);
63148a066e7SAl Viro 		rcu_read_lock();
632294d71ffSAl Viro 	}
63348a066e7SAl Viro 	return false;
63448a066e7SAl Viro }
63548a066e7SAl Viro 
6361da177e4SLinus Torvalds /*
637474279dcSAl Viro  * find the first mount at @dentry on vfsmount @mnt.
63848a066e7SAl Viro  * call under rcu_read_lock()
6391da177e4SLinus Torvalds  */
640474279dcSAl Viro struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
6411da177e4SLinus Torvalds {
64238129a13SAl Viro 	struct hlist_head *head = m_hash(mnt, dentry);
643474279dcSAl Viro 	struct mount *p;
6441da177e4SLinus Torvalds 
64538129a13SAl Viro 	hlist_for_each_entry_rcu(p, head, mnt_hash)
646474279dcSAl Viro 		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
647474279dcSAl Viro 			return p;
648474279dcSAl Viro 	return NULL;
6491da177e4SLinus Torvalds }
650474279dcSAl Viro 
651474279dcSAl Viro /*
652f015f126SDavid Howells  * lookup_mnt - Return the first child mount mounted at path
653f015f126SDavid Howells  *
654f015f126SDavid Howells  * "First" means first mounted chronologically.  If you create the
655f015f126SDavid Howells  * following mounts:
656f015f126SDavid Howells  *
657f015f126SDavid Howells  * mount /dev/sda1 /mnt
658f015f126SDavid Howells  * mount /dev/sda2 /mnt
659f015f126SDavid Howells  * mount /dev/sda3 /mnt
660f015f126SDavid Howells  *
661f015f126SDavid Howells  * Then lookup_mnt() on the base /mnt dentry in the root mount will
662f015f126SDavid Howells  * return successively the root dentry and vfsmount of /dev/sda1, then
663f015f126SDavid Howells  * /dev/sda2, then /dev/sda3, then NULL.
664f015f126SDavid Howells  *
665f015f126SDavid Howells  * lookup_mnt takes a reference to the found vfsmount.
666a05964f3SRam Pai  */
667ca71cf71SAl Viro struct vfsmount *lookup_mnt(const struct path *path)
668a05964f3SRam Pai {
669c7105365SAl Viro 	struct mount *child_mnt;
67048a066e7SAl Viro 	struct vfsmount *m;
67148a066e7SAl Viro 	unsigned seq;
67299b7db7bSNick Piggin 
67348a066e7SAl Viro 	rcu_read_lock();
67448a066e7SAl Viro 	do {
67548a066e7SAl Viro 		seq = read_seqbegin(&mount_lock);
676474279dcSAl Viro 		child_mnt = __lookup_mnt(path->mnt, path->dentry);
67748a066e7SAl Viro 		m = child_mnt ? &child_mnt->mnt : NULL;
67848a066e7SAl Viro 	} while (!legitimize_mnt(m, seq));
67948a066e7SAl Viro 	rcu_read_unlock();
68048a066e7SAl Viro 	return m;
681a05964f3SRam Pai }
682a05964f3SRam Pai 
6839f6c61f9SMiklos Szeredi static inline void lock_ns_list(struct mnt_namespace *ns)
6849f6c61f9SMiklos Szeredi {
6859f6c61f9SMiklos Szeredi 	spin_lock(&ns->ns_lock);
6869f6c61f9SMiklos Szeredi }
6879f6c61f9SMiklos Szeredi 
6889f6c61f9SMiklos Szeredi static inline void unlock_ns_list(struct mnt_namespace *ns)
6899f6c61f9SMiklos Szeredi {
6909f6c61f9SMiklos Szeredi 	spin_unlock(&ns->ns_lock);
6919f6c61f9SMiklos Szeredi }
6929f6c61f9SMiklos Szeredi 
6939f6c61f9SMiklos Szeredi static inline bool mnt_is_cursor(struct mount *mnt)
6949f6c61f9SMiklos Szeredi {
6959f6c61f9SMiklos Szeredi 	return mnt->mnt.mnt_flags & MNT_CURSOR;
6969f6c61f9SMiklos Szeredi }
6979f6c61f9SMiklos Szeredi 
6987af1364fSEric W. Biederman /*
6997af1364fSEric W. Biederman  * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
7007af1364fSEric W. Biederman  *                         current mount namespace.
7017af1364fSEric W. Biederman  *
7027af1364fSEric W. Biederman  * The common case is dentries are not mountpoints at all and that
7037af1364fSEric W. Biederman  * test is handled inline.  For the slow case when we are actually
7047af1364fSEric W. Biederman  * dealing with a mountpoint of some kind, walk through all of the
7057af1364fSEric W. Biederman  * mounts in the current mount namespace and test to see if the dentry
7067af1364fSEric W. Biederman  * is a mountpoint.
7077af1364fSEric W. Biederman  *
7087af1364fSEric W. Biederman  * The mount_hashtable is not usable in the context because we
7097af1364fSEric W. Biederman  * need to identify all mounts that may be in the current mount
7107af1364fSEric W. Biederman  * namespace not just a mount that happens to have some specified
7117af1364fSEric W. Biederman  * parent mount.
7127af1364fSEric W. Biederman  */
7137af1364fSEric W. Biederman bool __is_local_mountpoint(struct dentry *dentry)
7147af1364fSEric W. Biederman {
7157af1364fSEric W. Biederman 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
7167af1364fSEric W. Biederman 	struct mount *mnt;
7177af1364fSEric W. Biederman 	bool is_covered = false;
7187af1364fSEric W. Biederman 
7197af1364fSEric W. Biederman 	down_read(&namespace_sem);
7209f6c61f9SMiklos Szeredi 	lock_ns_list(ns);
7217af1364fSEric W. Biederman 	list_for_each_entry(mnt, &ns->list, mnt_list) {
7229f6c61f9SMiklos Szeredi 		if (mnt_is_cursor(mnt))
7239f6c61f9SMiklos Szeredi 			continue;
7247af1364fSEric W. Biederman 		is_covered = (mnt->mnt_mountpoint == dentry);
7257af1364fSEric W. Biederman 		if (is_covered)
7267af1364fSEric W. Biederman 			break;
7277af1364fSEric W. Biederman 	}
7289f6c61f9SMiklos Szeredi 	unlock_ns_list(ns);
7297af1364fSEric W. Biederman 	up_read(&namespace_sem);
7305ad05cc8SNikolay Borisov 
7317af1364fSEric W. Biederman 	return is_covered;
7327af1364fSEric W. Biederman }
7337af1364fSEric W. Biederman 
734e2dfa935SEric W. Biederman static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
73584d17192SAl Viro {
7360818bf27SAl Viro 	struct hlist_head *chain = mp_hash(dentry);
73784d17192SAl Viro 	struct mountpoint *mp;
73884d17192SAl Viro 
7390818bf27SAl Viro 	hlist_for_each_entry(mp, chain, m_hash) {
74084d17192SAl Viro 		if (mp->m_dentry == dentry) {
74184d17192SAl Viro 			mp->m_count++;
74284d17192SAl Viro 			return mp;
74384d17192SAl Viro 		}
74484d17192SAl Viro 	}
745e2dfa935SEric W. Biederman 	return NULL;
746e2dfa935SEric W. Biederman }
747e2dfa935SEric W. Biederman 
7483895dbf8SEric W. Biederman static struct mountpoint *get_mountpoint(struct dentry *dentry)
749e2dfa935SEric W. Biederman {
7503895dbf8SEric W. Biederman 	struct mountpoint *mp, *new = NULL;
751e2dfa935SEric W. Biederman 	int ret;
75284d17192SAl Viro 
7533895dbf8SEric W. Biederman 	if (d_mountpoint(dentry)) {
7541e9c75fbSBenjamin Coddington 		/* might be worth a WARN_ON() */
7551e9c75fbSBenjamin Coddington 		if (d_unlinked(dentry))
7561e9c75fbSBenjamin Coddington 			return ERR_PTR(-ENOENT);
7573895dbf8SEric W. Biederman mountpoint:
7583895dbf8SEric W. Biederman 		read_seqlock_excl(&mount_lock);
7593895dbf8SEric W. Biederman 		mp = lookup_mountpoint(dentry);
7603895dbf8SEric W. Biederman 		read_sequnlock_excl(&mount_lock);
7613895dbf8SEric W. Biederman 		if (mp)
7623895dbf8SEric W. Biederman 			goto done;
76384d17192SAl Viro 	}
764eed81007SMiklos Szeredi 
7653895dbf8SEric W. Biederman 	if (!new)
7663895dbf8SEric W. Biederman 		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
7673895dbf8SEric W. Biederman 	if (!new)
7683895dbf8SEric W. Biederman 		return ERR_PTR(-ENOMEM);
7693895dbf8SEric W. Biederman 
7703895dbf8SEric W. Biederman 
7713895dbf8SEric W. Biederman 	/* Exactly one processes may set d_mounted */
7723895dbf8SEric W. Biederman 	ret = d_set_mounted(dentry);
7733895dbf8SEric W. Biederman 
7743895dbf8SEric W. Biederman 	/* Someone else set d_mounted? */
7753895dbf8SEric W. Biederman 	if (ret == -EBUSY)
7763895dbf8SEric W. Biederman 		goto mountpoint;
7773895dbf8SEric W. Biederman 
7783895dbf8SEric W. Biederman 	/* The dentry is not available as a mountpoint? */
7793895dbf8SEric W. Biederman 	mp = ERR_PTR(ret);
7803895dbf8SEric W. Biederman 	if (ret)
7813895dbf8SEric W. Biederman 		goto done;
7823895dbf8SEric W. Biederman 
7833895dbf8SEric W. Biederman 	/* Add the new mountpoint to the hash table */
7843895dbf8SEric W. Biederman 	read_seqlock_excl(&mount_lock);
7854edbe133SAl Viro 	new->m_dentry = dget(dentry);
7863895dbf8SEric W. Biederman 	new->m_count = 1;
7873895dbf8SEric W. Biederman 	hlist_add_head(&new->m_hash, mp_hash(dentry));
7883895dbf8SEric W. Biederman 	INIT_HLIST_HEAD(&new->m_list);
7893895dbf8SEric W. Biederman 	read_sequnlock_excl(&mount_lock);
7903895dbf8SEric W. Biederman 
7913895dbf8SEric W. Biederman 	mp = new;
7923895dbf8SEric W. Biederman 	new = NULL;
7933895dbf8SEric W. Biederman done:
7943895dbf8SEric W. Biederman 	kfree(new);
79584d17192SAl Viro 	return mp;
79684d17192SAl Viro }
79784d17192SAl Viro 
7984edbe133SAl Viro /*
7994edbe133SAl Viro  * vfsmount lock must be held.  Additionally, the caller is responsible
8004edbe133SAl Viro  * for serializing calls for given disposal list.
8014edbe133SAl Viro  */
8024edbe133SAl Viro static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
80384d17192SAl Viro {
80484d17192SAl Viro 	if (!--mp->m_count) {
80584d17192SAl Viro 		struct dentry *dentry = mp->m_dentry;
8060a5eb7c8SEric W. Biederman 		BUG_ON(!hlist_empty(&mp->m_list));
80784d17192SAl Viro 		spin_lock(&dentry->d_lock);
80884d17192SAl Viro 		dentry->d_flags &= ~DCACHE_MOUNTED;
80984d17192SAl Viro 		spin_unlock(&dentry->d_lock);
8104edbe133SAl Viro 		dput_to_list(dentry, list);
8110818bf27SAl Viro 		hlist_del(&mp->m_hash);
81284d17192SAl Viro 		kfree(mp);
81384d17192SAl Viro 	}
81484d17192SAl Viro }
81584d17192SAl Viro 
8164edbe133SAl Viro /* called with namespace_lock and vfsmount lock */
8174edbe133SAl Viro static void put_mountpoint(struct mountpoint *mp)
8184edbe133SAl Viro {
8194edbe133SAl Viro 	__put_mountpoint(mp, &ex_mountpoints);
8204edbe133SAl Viro }
8214edbe133SAl Viro 
822143c8c91SAl Viro static inline int check_mnt(struct mount *mnt)
8231da177e4SLinus Torvalds {
8246b3286edSKirill Korotaev 	return mnt->mnt_ns == current->nsproxy->mnt_ns;
8251da177e4SLinus Torvalds }
8261da177e4SLinus Torvalds 
82799b7db7bSNick Piggin /*
82899b7db7bSNick Piggin  * vfsmount lock must be held for write
82999b7db7bSNick Piggin  */
8306b3286edSKirill Korotaev static void touch_mnt_namespace(struct mnt_namespace *ns)
8315addc5ddSAl Viro {
8325addc5ddSAl Viro 	if (ns) {
8335addc5ddSAl Viro 		ns->event = ++event;
8345addc5ddSAl Viro 		wake_up_interruptible(&ns->poll);
8355addc5ddSAl Viro 	}
8365addc5ddSAl Viro }
8375addc5ddSAl Viro 
83899b7db7bSNick Piggin /*
83999b7db7bSNick Piggin  * vfsmount lock must be held for write
84099b7db7bSNick Piggin  */
8416b3286edSKirill Korotaev static void __touch_mnt_namespace(struct mnt_namespace *ns)
8425addc5ddSAl Viro {
8435addc5ddSAl Viro 	if (ns && ns->event != event) {
8445addc5ddSAl Viro 		ns->event = event;
8455addc5ddSAl Viro 		wake_up_interruptible(&ns->poll);
8465addc5ddSAl Viro 	}
8475addc5ddSAl Viro }
8485addc5ddSAl Viro 
84999b7db7bSNick Piggin /*
85099b7db7bSNick Piggin  * vfsmount lock must be held for write
85199b7db7bSNick Piggin  */
852e4e59906SAl Viro static struct mountpoint *unhash_mnt(struct mount *mnt)
8531da177e4SLinus Torvalds {
854e4e59906SAl Viro 	struct mountpoint *mp;
8550714a533SAl Viro 	mnt->mnt_parent = mnt;
856a73324daSAl Viro 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
8576b41d536SAl Viro 	list_del_init(&mnt->mnt_child);
85838129a13SAl Viro 	hlist_del_init_rcu(&mnt->mnt_hash);
8590a5eb7c8SEric W. Biederman 	hlist_del_init(&mnt->mnt_mp_list);
860e4e59906SAl Viro 	mp = mnt->mnt_mp;
86184d17192SAl Viro 	mnt->mnt_mp = NULL;
862e4e59906SAl Viro 	return mp;
8637bdb11deSEric W. Biederman }
8647bdb11deSEric W. Biederman 
8657bdb11deSEric W. Biederman /*
8667bdb11deSEric W. Biederman  * vfsmount lock must be held for write
8677bdb11deSEric W. Biederman  */
8686a46c573SEric W. Biederman static void umount_mnt(struct mount *mnt)
8696a46c573SEric W. Biederman {
870e4e59906SAl Viro 	put_mountpoint(unhash_mnt(mnt));
8716a46c573SEric W. Biederman }
8726a46c573SEric W. Biederman 
8736a46c573SEric W. Biederman /*
8746a46c573SEric W. Biederman  * vfsmount lock must be held for write
8756a46c573SEric W. Biederman  */
87684d17192SAl Viro void mnt_set_mountpoint(struct mount *mnt,
87784d17192SAl Viro 			struct mountpoint *mp,
87844d964d6SAl Viro 			struct mount *child_mnt)
879b90fa9aeSRam Pai {
88084d17192SAl Viro 	mp->m_count++;
8813a2393d7SAl Viro 	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
8824edbe133SAl Viro 	child_mnt->mnt_mountpoint = mp->m_dentry;
8833a2393d7SAl Viro 	child_mnt->mnt_parent = mnt;
88484d17192SAl Viro 	child_mnt->mnt_mp = mp;
8850a5eb7c8SEric W. Biederman 	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
886b90fa9aeSRam Pai }
887b90fa9aeSRam Pai 
8881064f874SEric W. Biederman static void __attach_mnt(struct mount *mnt, struct mount *parent)
8891064f874SEric W. Biederman {
8901064f874SEric W. Biederman 	hlist_add_head_rcu(&mnt->mnt_hash,
8911064f874SEric W. Biederman 			   m_hash(&parent->mnt, mnt->mnt_mountpoint));
8921064f874SEric W. Biederman 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
8931064f874SEric W. Biederman }
8941064f874SEric W. Biederman 
89599b7db7bSNick Piggin /*
89699b7db7bSNick Piggin  * vfsmount lock must be held for write
89799b7db7bSNick Piggin  */
89884d17192SAl Viro static void attach_mnt(struct mount *mnt,
89984d17192SAl Viro 			struct mount *parent,
90084d17192SAl Viro 			struct mountpoint *mp)
9011da177e4SLinus Torvalds {
90284d17192SAl Viro 	mnt_set_mountpoint(parent, mp, mnt);
9031064f874SEric W. Biederman 	__attach_mnt(mnt, parent);
904b90fa9aeSRam Pai }
905b90fa9aeSRam Pai 
9061064f874SEric W. Biederman void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
90712a5b529SAl Viro {
9081064f874SEric W. Biederman 	struct mountpoint *old_mp = mnt->mnt_mp;
9091064f874SEric W. Biederman 	struct mount *old_parent = mnt->mnt_parent;
9101064f874SEric W. Biederman 
9111064f874SEric W. Biederman 	list_del_init(&mnt->mnt_child);
9121064f874SEric W. Biederman 	hlist_del_init(&mnt->mnt_mp_list);
9131064f874SEric W. Biederman 	hlist_del_init_rcu(&mnt->mnt_hash);
9141064f874SEric W. Biederman 
9151064f874SEric W. Biederman 	attach_mnt(mnt, parent, mp);
9161064f874SEric W. Biederman 
9171064f874SEric W. Biederman 	put_mountpoint(old_mp);
9181064f874SEric W. Biederman 	mnt_add_count(old_parent, -1);
91912a5b529SAl Viro }
92012a5b529SAl Viro 
921b90fa9aeSRam Pai /*
92299b7db7bSNick Piggin  * vfsmount lock must be held for write
923b90fa9aeSRam Pai  */
9241064f874SEric W. Biederman static void commit_tree(struct mount *mnt)
925b90fa9aeSRam Pai {
9260714a533SAl Viro 	struct mount *parent = mnt->mnt_parent;
92783adc753SAl Viro 	struct mount *m;
928b90fa9aeSRam Pai 	LIST_HEAD(head);
929143c8c91SAl Viro 	struct mnt_namespace *n = parent->mnt_ns;
930b90fa9aeSRam Pai 
9310714a533SAl Viro 	BUG_ON(parent == mnt);
932b90fa9aeSRam Pai 
9331a4eeaf2SAl Viro 	list_add_tail(&head, &mnt->mnt_list);
934f7a99c5bSAl Viro 	list_for_each_entry(m, &head, mnt_list)
935143c8c91SAl Viro 		m->mnt_ns = n;
936f03c6599SAl Viro 
937b90fa9aeSRam Pai 	list_splice(&head, n->list.prev);
938b90fa9aeSRam Pai 
939d2921684SEric W. Biederman 	n->mounts += n->pending_mounts;
940d2921684SEric W. Biederman 	n->pending_mounts = 0;
941d2921684SEric W. Biederman 
9421064f874SEric W. Biederman 	__attach_mnt(mnt, parent);
9436b3286edSKirill Korotaev 	touch_mnt_namespace(n);
9441da177e4SLinus Torvalds }
9451da177e4SLinus Torvalds 
946909b0a88SAl Viro static struct mount *next_mnt(struct mount *p, struct mount *root)
9471da177e4SLinus Torvalds {
9486b41d536SAl Viro 	struct list_head *next = p->mnt_mounts.next;
9496b41d536SAl Viro 	if (next == &p->mnt_mounts) {
9501da177e4SLinus Torvalds 		while (1) {
951909b0a88SAl Viro 			if (p == root)
9521da177e4SLinus Torvalds 				return NULL;
9536b41d536SAl Viro 			next = p->mnt_child.next;
9546b41d536SAl Viro 			if (next != &p->mnt_parent->mnt_mounts)
9551da177e4SLinus Torvalds 				break;
9560714a533SAl Viro 			p = p->mnt_parent;
9571da177e4SLinus Torvalds 		}
9581da177e4SLinus Torvalds 	}
9596b41d536SAl Viro 	return list_entry(next, struct mount, mnt_child);
9601da177e4SLinus Torvalds }
9611da177e4SLinus Torvalds 
962315fc83eSAl Viro static struct mount *skip_mnt_tree(struct mount *p)
9639676f0c6SRam Pai {
9646b41d536SAl Viro 	struct list_head *prev = p->mnt_mounts.prev;
9656b41d536SAl Viro 	while (prev != &p->mnt_mounts) {
9666b41d536SAl Viro 		p = list_entry(prev, struct mount, mnt_child);
9676b41d536SAl Viro 		prev = p->mnt_mounts.prev;
9689676f0c6SRam Pai 	}
9699676f0c6SRam Pai 	return p;
9709676f0c6SRam Pai }
9719676f0c6SRam Pai 
9728f291889SAl Viro /**
9738f291889SAl Viro  * vfs_create_mount - Create a mount for a configured superblock
9748f291889SAl Viro  * @fc: The configuration context with the superblock attached
9758f291889SAl Viro  *
9768f291889SAl Viro  * Create a mount to an already configured superblock.  If necessary, the
9778f291889SAl Viro  * caller should invoke vfs_get_tree() before calling this.
9788f291889SAl Viro  *
9798f291889SAl Viro  * Note that this does not attach the mount to anything.
9808f291889SAl Viro  */
9818f291889SAl Viro struct vfsmount *vfs_create_mount(struct fs_context *fc)
9829d412a43SAl Viro {
983b105e270SAl Viro 	struct mount *mnt;
9849d412a43SAl Viro 
9858f291889SAl Viro 	if (!fc->root)
9868f291889SAl Viro 		return ERR_PTR(-EINVAL);
9879d412a43SAl Viro 
9888f291889SAl Viro 	mnt = alloc_vfsmnt(fc->source ?: "none");
9899d412a43SAl Viro 	if (!mnt)
9909d412a43SAl Viro 		return ERR_PTR(-ENOMEM);
9919d412a43SAl Viro 
9928f291889SAl Viro 	if (fc->sb_flags & SB_KERNMOUNT)
993b105e270SAl Viro 		mnt->mnt.mnt_flags = MNT_INTERNAL;
9949d412a43SAl Viro 
9958f291889SAl Viro 	atomic_inc(&fc->root->d_sb->s_active);
9968f291889SAl Viro 	mnt->mnt.mnt_sb		= fc->root->d_sb;
9978f291889SAl Viro 	mnt->mnt.mnt_root	= dget(fc->root);
998a73324daSAl Viro 	mnt->mnt_mountpoint	= mnt->mnt.mnt_root;
9990714a533SAl Viro 	mnt->mnt_parent		= mnt;
10008f291889SAl Viro 
1001719ea2fbSAl Viro 	lock_mount_hash();
10028f291889SAl Viro 	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
1003719ea2fbSAl Viro 	unlock_mount_hash();
1004b105e270SAl Viro 	return &mnt->mnt;
10059d412a43SAl Viro }
10068f291889SAl Viro EXPORT_SYMBOL(vfs_create_mount);
10078f291889SAl Viro 
10088f291889SAl Viro struct vfsmount *fc_mount(struct fs_context *fc)
10098f291889SAl Viro {
10108f291889SAl Viro 	int err = vfs_get_tree(fc);
10118f291889SAl Viro 	if (!err) {
10128f291889SAl Viro 		up_write(&fc->root->d_sb->s_umount);
10138f291889SAl Viro 		return vfs_create_mount(fc);
10148f291889SAl Viro 	}
10158f291889SAl Viro 	return ERR_PTR(err);
10168f291889SAl Viro }
10178f291889SAl Viro EXPORT_SYMBOL(fc_mount);
10188f291889SAl Viro 
10199bc61ab1SDavid Howells struct vfsmount *vfs_kern_mount(struct file_system_type *type,
10209bc61ab1SDavid Howells 				int flags, const char *name,
10219bc61ab1SDavid Howells 				void *data)
10221da177e4SLinus Torvalds {
10239bc61ab1SDavid Howells 	struct fs_context *fc;
10248f291889SAl Viro 	struct vfsmount *mnt;
10259bc61ab1SDavid Howells 	int ret = 0;
10269d412a43SAl Viro 
10279d412a43SAl Viro 	if (!type)
10283e1aeb00SDavid Howells 		return ERR_PTR(-EINVAL);
10299d412a43SAl Viro 
10309bc61ab1SDavid Howells 	fc = fs_context_for_mount(type, flags);
10319bc61ab1SDavid Howells 	if (IS_ERR(fc))
10329bc61ab1SDavid Howells 		return ERR_CAST(fc);
10339bc61ab1SDavid Howells 
10343e1aeb00SDavid Howells 	if (name)
10353e1aeb00SDavid Howells 		ret = vfs_parse_fs_string(fc, "source",
10363e1aeb00SDavid Howells 					  name, strlen(name));
10379bc61ab1SDavid Howells 	if (!ret)
10389bc61ab1SDavid Howells 		ret = parse_monolithic_mount_data(fc, data);
10399bc61ab1SDavid Howells 	if (!ret)
10408f291889SAl Viro 		mnt = fc_mount(fc);
10418f291889SAl Viro 	else
10428f291889SAl Viro 		mnt = ERR_PTR(ret);
10439d412a43SAl Viro 
10449bc61ab1SDavid Howells 	put_fs_context(fc);
10458f291889SAl Viro 	return mnt;
10469d412a43SAl Viro }
10479d412a43SAl Viro EXPORT_SYMBOL_GPL(vfs_kern_mount);
10489d412a43SAl Viro 
104993faccbbSEric W. Biederman struct vfsmount *
105093faccbbSEric W. Biederman vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
105193faccbbSEric W. Biederman 	     const char *name, void *data)
105293faccbbSEric W. Biederman {
105393faccbbSEric W. Biederman 	/* Until it is worked out how to pass the user namespace
105493faccbbSEric W. Biederman 	 * through from the parent mount to the submount don't support
105593faccbbSEric W. Biederman 	 * unprivileged mounts with submounts.
105693faccbbSEric W. Biederman 	 */
105793faccbbSEric W. Biederman 	if (mountpoint->d_sb->s_user_ns != &init_user_ns)
105893faccbbSEric W. Biederman 		return ERR_PTR(-EPERM);
105993faccbbSEric W. Biederman 
1060e462ec50SDavid Howells 	return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
106193faccbbSEric W. Biederman }
106293faccbbSEric W. Biederman EXPORT_SYMBOL_GPL(vfs_submount);
106393faccbbSEric W. Biederman 
106487129cc0SAl Viro static struct mount *clone_mnt(struct mount *old, struct dentry *root,
106536341f64SRam Pai 					int flag)
10661da177e4SLinus Torvalds {
106787129cc0SAl Viro 	struct super_block *sb = old->mnt.mnt_sb;
1068be34d1a3SDavid Howells 	struct mount *mnt;
1069be34d1a3SDavid Howells 	int err;
10701da177e4SLinus Torvalds 
1071be34d1a3SDavid Howells 	mnt = alloc_vfsmnt(old->mnt_devname);
1072be34d1a3SDavid Howells 	if (!mnt)
1073be34d1a3SDavid Howells 		return ERR_PTR(-ENOMEM);
1074be34d1a3SDavid Howells 
10757a472ef4SEric W. Biederman 	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
107615169fe7SAl Viro 		mnt->mnt_group_id = 0; /* not a peer of original */
1077719f5d7fSMiklos Szeredi 	else
107815169fe7SAl Viro 		mnt->mnt_group_id = old->mnt_group_id;
1079719f5d7fSMiklos Szeredi 
108015169fe7SAl Viro 	if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1081be34d1a3SDavid Howells 		err = mnt_alloc_group_id(mnt);
1082719f5d7fSMiklos Szeredi 		if (err)
1083719f5d7fSMiklos Szeredi 			goto out_free;
1084719f5d7fSMiklos Szeredi 	}
1085719f5d7fSMiklos Szeredi 
108616a34adbSAl Viro 	mnt->mnt.mnt_flags = old->mnt.mnt_flags;
108716a34adbSAl Viro 	mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
10885ff9d8a6SEric W. Biederman 
10891da177e4SLinus Torvalds 	atomic_inc(&sb->s_active);
1090a6435940SChristian Brauner 	mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
1091a6435940SChristian Brauner 	if (mnt->mnt.mnt_userns != &init_user_ns)
1092a6435940SChristian Brauner 		mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
1093b105e270SAl Viro 	mnt->mnt.mnt_sb = sb;
1094b105e270SAl Viro 	mnt->mnt.mnt_root = dget(root);
1095a73324daSAl Viro 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
10960714a533SAl Viro 	mnt->mnt_parent = mnt;
1097719ea2fbSAl Viro 	lock_mount_hash();
109839f7c4dbSMiklos Szeredi 	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1099719ea2fbSAl Viro 	unlock_mount_hash();
1100b90fa9aeSRam Pai 
11017a472ef4SEric W. Biederman 	if ((flag & CL_SLAVE) ||
11027a472ef4SEric W. Biederman 	    ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
11036776db3dSAl Viro 		list_add(&mnt->mnt_slave, &old->mnt_slave_list);
110432301920SAl Viro 		mnt->mnt_master = old;
1105fc7be130SAl Viro 		CLEAR_MNT_SHARED(mnt);
11068aec0809SAl Viro 	} else if (!(flag & CL_PRIVATE)) {
1107fc7be130SAl Viro 		if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
11086776db3dSAl Viro 			list_add(&mnt->mnt_share, &old->mnt_share);
1109d10e8defSAl Viro 		if (IS_MNT_SLAVE(old))
11106776db3dSAl Viro 			list_add(&mnt->mnt_slave, &old->mnt_slave);
1111d10e8defSAl Viro 		mnt->mnt_master = old->mnt_master;
11125235d448SAl Viro 	} else {
11135235d448SAl Viro 		CLEAR_MNT_SHARED(mnt);
11145afe0022SRam Pai 	}
1115b90fa9aeSRam Pai 	if (flag & CL_MAKE_SHARED)
11160f0afb1dSAl Viro 		set_mnt_shared(mnt);
11171da177e4SLinus Torvalds 
11181da177e4SLinus Torvalds 	/* stick the duplicate mount on the same expiry list
11191da177e4SLinus Torvalds 	 * as the original if that was on one */
112036341f64SRam Pai 	if (flag & CL_EXPIRE) {
11216776db3dSAl Viro 		if (!list_empty(&old->mnt_expire))
11226776db3dSAl Viro 			list_add(&mnt->mnt_expire, &old->mnt_expire);
11231da177e4SLinus Torvalds 	}
1124be34d1a3SDavid Howells 
1125cb338d06SAl Viro 	return mnt;
1126719f5d7fSMiklos Szeredi 
1127719f5d7fSMiklos Szeredi  out_free:
11288ffcb32eSDavid Howells 	mnt_free_id(mnt);
1129719f5d7fSMiklos Szeredi 	free_vfsmnt(mnt);
1130be34d1a3SDavid Howells 	return ERR_PTR(err);
11311da177e4SLinus Torvalds }
11321da177e4SLinus Torvalds 
11339ea459e1SAl Viro static void cleanup_mnt(struct mount *mnt)
11349ea459e1SAl Viro {
113556cbb429SAl Viro 	struct hlist_node *p;
113656cbb429SAl Viro 	struct mount *m;
11379ea459e1SAl Viro 	/*
113856cbb429SAl Viro 	 * The warning here probably indicates that somebody messed
113956cbb429SAl Viro 	 * up a mnt_want/drop_write() pair.  If this happens, the
114056cbb429SAl Viro 	 * filesystem was probably unable to make r/w->r/o transitions.
11419ea459e1SAl Viro 	 * The locking used to deal with mnt_count decrement provides barriers,
11429ea459e1SAl Viro 	 * so mnt_get_writers() below is safe.
11439ea459e1SAl Viro 	 */
11449ea459e1SAl Viro 	WARN_ON(mnt_get_writers(mnt));
11459ea459e1SAl Viro 	if (unlikely(mnt->mnt_pins.first))
11469ea459e1SAl Viro 		mnt_pin_kill(mnt);
114756cbb429SAl Viro 	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
114856cbb429SAl Viro 		hlist_del(&m->mnt_umount);
114956cbb429SAl Viro 		mntput(&m->mnt);
115056cbb429SAl Viro 	}
11519ea459e1SAl Viro 	fsnotify_vfsmount_delete(&mnt->mnt);
11529ea459e1SAl Viro 	dput(mnt->mnt.mnt_root);
11539ea459e1SAl Viro 	deactivate_super(mnt->mnt.mnt_sb);
11549ea459e1SAl Viro 	mnt_free_id(mnt);
11559ea459e1SAl Viro 	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
11569ea459e1SAl Viro }
11579ea459e1SAl Viro 
11589ea459e1SAl Viro static void __cleanup_mnt(struct rcu_head *head)
11599ea459e1SAl Viro {
11609ea459e1SAl Viro 	cleanup_mnt(container_of(head, struct mount, mnt_rcu));
11619ea459e1SAl Viro }
11629ea459e1SAl Viro 
11639ea459e1SAl Viro static LLIST_HEAD(delayed_mntput_list);
11649ea459e1SAl Viro static void delayed_mntput(struct work_struct *unused)
11659ea459e1SAl Viro {
11669ea459e1SAl Viro 	struct llist_node *node = llist_del_all(&delayed_mntput_list);
116729785735SByungchul Park 	struct mount *m, *t;
11689ea459e1SAl Viro 
116929785735SByungchul Park 	llist_for_each_entry_safe(m, t, node, mnt_llist)
117029785735SByungchul Park 		cleanup_mnt(m);
11719ea459e1SAl Viro }
11729ea459e1SAl Viro static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
11739ea459e1SAl Viro 
1174900148dcSAl Viro static void mntput_no_expire(struct mount *mnt)
11757b7b1aceSAl Viro {
11764edbe133SAl Viro 	LIST_HEAD(list);
1177edf7ddbfSEric Biggers 	int count;
11784edbe133SAl Viro 
117948a066e7SAl Viro 	rcu_read_lock();
11809ea0a46cSAl Viro 	if (likely(READ_ONCE(mnt->mnt_ns))) {
11819ea0a46cSAl Viro 		/*
11829ea0a46cSAl Viro 		 * Since we don't do lock_mount_hash() here,
11839ea0a46cSAl Viro 		 * ->mnt_ns can change under us.  However, if it's
11849ea0a46cSAl Viro 		 * non-NULL, then there's a reference that won't
11859ea0a46cSAl Viro 		 * be dropped until after an RCU delay done after
11869ea0a46cSAl Viro 		 * turning ->mnt_ns NULL.  So if we observe it
11879ea0a46cSAl Viro 		 * non-NULL under rcu_read_lock(), the reference
11889ea0a46cSAl Viro 		 * we are dropping is not the final one.
11899ea0a46cSAl Viro 		 */
1190aa9c0e07SAl Viro 		mnt_add_count(mnt, -1);
119148a066e7SAl Viro 		rcu_read_unlock();
119299b7db7bSNick Piggin 		return;
1193b3e19d92SNick Piggin 	}
1194719ea2fbSAl Viro 	lock_mount_hash();
1195119e1ef8SAl Viro 	/*
1196119e1ef8SAl Viro 	 * make sure that if __legitimize_mnt() has not seen us grab
1197119e1ef8SAl Viro 	 * mount_lock, we'll see their refcount increment here.
1198119e1ef8SAl Viro 	 */
1199119e1ef8SAl Viro 	smp_mb();
12009ea0a46cSAl Viro 	mnt_add_count(mnt, -1);
1201edf7ddbfSEric Biggers 	count = mnt_get_count(mnt);
1202edf7ddbfSEric Biggers 	if (count != 0) {
1203edf7ddbfSEric Biggers 		WARN_ON(count < 0);
120448a066e7SAl Viro 		rcu_read_unlock();
1205719ea2fbSAl Viro 		unlock_mount_hash();
120699b7db7bSNick Piggin 		return;
120799b7db7bSNick Piggin 	}
120848a066e7SAl Viro 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
120948a066e7SAl Viro 		rcu_read_unlock();
121048a066e7SAl Viro 		unlock_mount_hash();
121148a066e7SAl Viro 		return;
121248a066e7SAl Viro 	}
121348a066e7SAl Viro 	mnt->mnt.mnt_flags |= MNT_DOOMED;
121448a066e7SAl Viro 	rcu_read_unlock();
1215962830dfSAndi Kleen 
121639f7c4dbSMiklos Szeredi 	list_del(&mnt->mnt_instance);
1217ce07d891SEric W. Biederman 
1218ce07d891SEric W. Biederman 	if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1219ce07d891SEric W. Biederman 		struct mount *p, *tmp;
1220ce07d891SEric W. Biederman 		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
12214edbe133SAl Viro 			__put_mountpoint(unhash_mnt(p), &list);
122256cbb429SAl Viro 			hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1223ce07d891SEric W. Biederman 		}
1224ce07d891SEric W. Biederman 	}
1225719ea2fbSAl Viro 	unlock_mount_hash();
12264edbe133SAl Viro 	shrink_dentry_list(&list);
1227649a795aSAl Viro 
12289ea459e1SAl Viro 	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
12299ea459e1SAl Viro 		struct task_struct *task = current;
12309ea459e1SAl Viro 		if (likely(!(task->flags & PF_KTHREAD))) {
12319ea459e1SAl Viro 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
123291989c70SJens Axboe 			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
12339ea459e1SAl Viro 				return;
12349ea459e1SAl Viro 		}
12359ea459e1SAl Viro 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
12369ea459e1SAl Viro 			schedule_delayed_work(&delayed_mntput_work, 1);
12379ea459e1SAl Viro 		return;
12389ea459e1SAl Viro 	}
12399ea459e1SAl Viro 	cleanup_mnt(mnt);
1240b3e19d92SNick Piggin }
1241b3e19d92SNick Piggin 
1242b3e19d92SNick Piggin void mntput(struct vfsmount *mnt)
1243b3e19d92SNick Piggin {
1244b3e19d92SNick Piggin 	if (mnt) {
1245863d684fSAl Viro 		struct mount *m = real_mount(mnt);
1246b3e19d92SNick Piggin 		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
1247863d684fSAl Viro 		if (unlikely(m->mnt_expiry_mark))
1248863d684fSAl Viro 			m->mnt_expiry_mark = 0;
1249863d684fSAl Viro 		mntput_no_expire(m);
1250b3e19d92SNick Piggin 	}
1251b3e19d92SNick Piggin }
1252b3e19d92SNick Piggin EXPORT_SYMBOL(mntput);
1253b3e19d92SNick Piggin 
1254b3e19d92SNick Piggin struct vfsmount *mntget(struct vfsmount *mnt)
1255b3e19d92SNick Piggin {
1256b3e19d92SNick Piggin 	if (mnt)
125783adc753SAl Viro 		mnt_add_count(real_mount(mnt), 1);
1258b3e19d92SNick Piggin 	return mnt;
1259b3e19d92SNick Piggin }
1260b3e19d92SNick Piggin EXPORT_SYMBOL(mntget);
1261b3e19d92SNick Piggin 
12621f287bc4SRandy Dunlap /**
12631f287bc4SRandy Dunlap  * path_is_mountpoint() - Check if path is a mount in the current namespace.
12641f287bc4SRandy Dunlap  * @path: path to check
1265c6609c0aSIan Kent  *
1266c6609c0aSIan Kent  *  d_mountpoint() can only be used reliably to establish if a dentry is
1267c6609c0aSIan Kent  *  not mounted in any namespace and that common case is handled inline.
1268c6609c0aSIan Kent  *  d_mountpoint() isn't aware of the possibility there may be multiple
1269c6609c0aSIan Kent  *  mounts using a given dentry in a different namespace. This function
1270c6609c0aSIan Kent  *  checks if the passed in path is a mountpoint rather than the dentry
1271c6609c0aSIan Kent  *  alone.
1272c6609c0aSIan Kent  */
1273c6609c0aSIan Kent bool path_is_mountpoint(const struct path *path)
1274c6609c0aSIan Kent {
1275c6609c0aSIan Kent 	unsigned seq;
1276c6609c0aSIan Kent 	bool res;
1277c6609c0aSIan Kent 
1278c6609c0aSIan Kent 	if (!d_mountpoint(path->dentry))
1279c6609c0aSIan Kent 		return false;
1280c6609c0aSIan Kent 
1281c6609c0aSIan Kent 	rcu_read_lock();
1282c6609c0aSIan Kent 	do {
1283c6609c0aSIan Kent 		seq = read_seqbegin(&mount_lock);
1284c6609c0aSIan Kent 		res = __path_is_mountpoint(path);
1285c6609c0aSIan Kent 	} while (read_seqretry(&mount_lock, seq));
1286c6609c0aSIan Kent 	rcu_read_unlock();
1287c6609c0aSIan Kent 
1288c6609c0aSIan Kent 	return res;
1289c6609c0aSIan Kent }
1290c6609c0aSIan Kent EXPORT_SYMBOL(path_is_mountpoint);
1291c6609c0aSIan Kent 
1292ca71cf71SAl Viro struct vfsmount *mnt_clone_internal(const struct path *path)
12937b7b1aceSAl Viro {
12943064c356SAl Viro 	struct mount *p;
12953064c356SAl Viro 	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
12963064c356SAl Viro 	if (IS_ERR(p))
12973064c356SAl Viro 		return ERR_CAST(p);
12983064c356SAl Viro 	p->mnt.mnt_flags |= MNT_INTERNAL;
12993064c356SAl Viro 	return &p->mnt;
13007b7b1aceSAl Viro }
13011da177e4SLinus Torvalds 
1302a1a2c409SMiklos Szeredi #ifdef CONFIG_PROC_FS
13039f6c61f9SMiklos Szeredi static struct mount *mnt_list_next(struct mnt_namespace *ns,
13049f6c61f9SMiklos Szeredi 				   struct list_head *p)
13059f6c61f9SMiklos Szeredi {
13069f6c61f9SMiklos Szeredi 	struct mount *mnt, *ret = NULL;
13079f6c61f9SMiklos Szeredi 
13089f6c61f9SMiklos Szeredi 	lock_ns_list(ns);
13099f6c61f9SMiklos Szeredi 	list_for_each_continue(p, &ns->list) {
13109f6c61f9SMiklos Szeredi 		mnt = list_entry(p, typeof(*mnt), mnt_list);
13119f6c61f9SMiklos Szeredi 		if (!mnt_is_cursor(mnt)) {
13129f6c61f9SMiklos Szeredi 			ret = mnt;
13139f6c61f9SMiklos Szeredi 			break;
13149f6c61f9SMiklos Szeredi 		}
13159f6c61f9SMiklos Szeredi 	}
13169f6c61f9SMiklos Szeredi 	unlock_ns_list(ns);
13179f6c61f9SMiklos Szeredi 
13189f6c61f9SMiklos Szeredi 	return ret;
13199f6c61f9SMiklos Szeredi }
13209f6c61f9SMiklos Szeredi 
13210226f492SAl Viro /* iterator; we want it to have access to namespace_sem, thus here... */
13221da177e4SLinus Torvalds static void *m_start(struct seq_file *m, loff_t *pos)
13231da177e4SLinus Torvalds {
1324ede1bf0dSYann Droneaud 	struct proc_mounts *p = m->private;
13259f6c61f9SMiklos Szeredi 	struct list_head *prev;
13261da177e4SLinus Torvalds 
1327390c6843SRam Pai 	down_read(&namespace_sem);
13289f6c61f9SMiklos Szeredi 	if (!*pos) {
13299f6c61f9SMiklos Szeredi 		prev = &p->ns->list;
13309f6c61f9SMiklos Szeredi 	} else {
13319f6c61f9SMiklos Szeredi 		prev = &p->cursor.mnt_list;
13329f6c61f9SMiklos Szeredi 
13339f6c61f9SMiklos Szeredi 		/* Read after we'd reached the end? */
13349f6c61f9SMiklos Szeredi 		if (list_empty(prev))
13359f6c61f9SMiklos Szeredi 			return NULL;
1336c7999c36SAl Viro 	}
1337c7999c36SAl Viro 
13389f6c61f9SMiklos Szeredi 	return mnt_list_next(p->ns, prev);
13391da177e4SLinus Torvalds }
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds static void *m_next(struct seq_file *m, void *v, loff_t *pos)
13421da177e4SLinus Torvalds {
1343ede1bf0dSYann Droneaud 	struct proc_mounts *p = m->private;
13449f6c61f9SMiklos Szeredi 	struct mount *mnt = v;
1345b0765fb8SPavel Emelianov 
13469f6c61f9SMiklos Szeredi 	++*pos;
13479f6c61f9SMiklos Szeredi 	return mnt_list_next(p->ns, &mnt->mnt_list);
13481da177e4SLinus Torvalds }
13491da177e4SLinus Torvalds 
13501da177e4SLinus Torvalds static void m_stop(struct seq_file *m, void *v)
13511da177e4SLinus Torvalds {
13529f6c61f9SMiklos Szeredi 	struct proc_mounts *p = m->private;
13539f6c61f9SMiklos Szeredi 	struct mount *mnt = v;
13549f6c61f9SMiklos Szeredi 
13559f6c61f9SMiklos Szeredi 	lock_ns_list(p->ns);
13569f6c61f9SMiklos Szeredi 	if (mnt)
13579f6c61f9SMiklos Szeredi 		list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
13589f6c61f9SMiklos Szeredi 	else
13599f6c61f9SMiklos Szeredi 		list_del_init(&p->cursor.mnt_list);
13609f6c61f9SMiklos Szeredi 	unlock_ns_list(p->ns);
1361390c6843SRam Pai 	up_read(&namespace_sem);
13621da177e4SLinus Torvalds }
13631da177e4SLinus Torvalds 
13640226f492SAl Viro static int m_show(struct seq_file *m, void *v)
13659f5596afSAl Viro {
1366ede1bf0dSYann Droneaud 	struct proc_mounts *p = m->private;
13679f6c61f9SMiklos Szeredi 	struct mount *r = v;
13680226f492SAl Viro 	return p->show(m, &r->mnt);
13691da177e4SLinus Torvalds }
13701da177e4SLinus Torvalds 
1371a1a2c409SMiklos Szeredi const struct seq_operations mounts_op = {
13721da177e4SLinus Torvalds 	.start	= m_start,
13731da177e4SLinus Torvalds 	.next	= m_next,
13741da177e4SLinus Torvalds 	.stop	= m_stop,
13750226f492SAl Viro 	.show	= m_show,
1376b4629fe2SChuck Lever };
13779f6c61f9SMiklos Szeredi 
13789f6c61f9SMiklos Szeredi void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
13799f6c61f9SMiklos Szeredi {
13809f6c61f9SMiklos Szeredi 	down_read(&namespace_sem);
13819f6c61f9SMiklos Szeredi 	lock_ns_list(ns);
13829f6c61f9SMiklos Szeredi 	list_del(&cursor->mnt_list);
13839f6c61f9SMiklos Szeredi 	unlock_ns_list(ns);
13849f6c61f9SMiklos Szeredi 	up_read(&namespace_sem);
13859f6c61f9SMiklos Szeredi }
1386a1a2c409SMiklos Szeredi #endif  /* CONFIG_PROC_FS */
1387b4629fe2SChuck Lever 
13881da177e4SLinus Torvalds /**
13891da177e4SLinus Torvalds  * may_umount_tree - check if a mount tree is busy
13901f287bc4SRandy Dunlap  * @m: root of mount tree
13911da177e4SLinus Torvalds  *
13921da177e4SLinus Torvalds  * This is called to check if a tree of mounts has any
13931da177e4SLinus Torvalds  * open files, pwds, chroots or sub mounts that are
13941da177e4SLinus Torvalds  * busy.
13951da177e4SLinus Torvalds  */
1396909b0a88SAl Viro int may_umount_tree(struct vfsmount *m)
13971da177e4SLinus Torvalds {
1398909b0a88SAl Viro 	struct mount *mnt = real_mount(m);
139936341f64SRam Pai 	int actual_refs = 0;
140036341f64SRam Pai 	int minimum_refs = 0;
1401315fc83eSAl Viro 	struct mount *p;
1402909b0a88SAl Viro 	BUG_ON(!m);
14031da177e4SLinus Torvalds 
1404b3e19d92SNick Piggin 	/* write lock needed for mnt_get_count */
1405719ea2fbSAl Viro 	lock_mount_hash();
1406909b0a88SAl Viro 	for (p = mnt; p; p = next_mnt(p, mnt)) {
140783adc753SAl Viro 		actual_refs += mnt_get_count(p);
14081da177e4SLinus Torvalds 		minimum_refs += 2;
14091da177e4SLinus Torvalds 	}
1410719ea2fbSAl Viro 	unlock_mount_hash();
14111da177e4SLinus Torvalds 
14121da177e4SLinus Torvalds 	if (actual_refs > minimum_refs)
14131da177e4SLinus Torvalds 		return 0;
1414e3474a8eSIan Kent 
1415e3474a8eSIan Kent 	return 1;
14161da177e4SLinus Torvalds }
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds EXPORT_SYMBOL(may_umount_tree);
14191da177e4SLinus Torvalds 
14201da177e4SLinus Torvalds /**
14211da177e4SLinus Torvalds  * may_umount - check if a mount point is busy
14221da177e4SLinus Torvalds  * @mnt: root of mount
14231da177e4SLinus Torvalds  *
14241da177e4SLinus Torvalds  * This is called to check if a mount point has any
14251da177e4SLinus Torvalds  * open files, pwds, chroots or sub mounts. If the
14261da177e4SLinus Torvalds  * mount has sub mounts this will return busy
14271da177e4SLinus Torvalds  * regardless of whether the sub mounts are busy.
14281da177e4SLinus Torvalds  *
14291da177e4SLinus Torvalds  * Doesn't take quota and stuff into account. IOW, in some cases it will
14301da177e4SLinus Torvalds  * give false negatives. The main reason why it's here is that we need
14311da177e4SLinus Torvalds  * a non-destructive way to look for easily umountable filesystems.
14321da177e4SLinus Torvalds  */
14331da177e4SLinus Torvalds int may_umount(struct vfsmount *mnt)
14341da177e4SLinus Torvalds {
1435e3474a8eSIan Kent 	int ret = 1;
14368ad08d8aSAl Viro 	down_read(&namespace_sem);
1437719ea2fbSAl Viro 	lock_mount_hash();
14381ab59738SAl Viro 	if (propagate_mount_busy(real_mount(mnt), 2))
1439e3474a8eSIan Kent 		ret = 0;
1440719ea2fbSAl Viro 	unlock_mount_hash();
14418ad08d8aSAl Viro 	up_read(&namespace_sem);
1442a05964f3SRam Pai 	return ret;
14431da177e4SLinus Torvalds }
14441da177e4SLinus Torvalds 
14451da177e4SLinus Torvalds EXPORT_SYMBOL(may_umount);
14461da177e4SLinus Torvalds 
144797216be0SAl Viro static void namespace_unlock(void)
14481da177e4SLinus Torvalds {
1449a3b3c562SEric W. Biederman 	struct hlist_head head;
145056cbb429SAl Viro 	struct hlist_node *p;
145156cbb429SAl Viro 	struct mount *m;
14524edbe133SAl Viro 	LIST_HEAD(list);
145397216be0SAl Viro 
1454a3b3c562SEric W. Biederman 	hlist_move_list(&unmounted, &head);
14554edbe133SAl Viro 	list_splice_init(&ex_mountpoints, &list);
1456a3b3c562SEric W. Biederman 
145797216be0SAl Viro 	up_write(&namespace_sem);
1458a3b3c562SEric W. Biederman 
14594edbe133SAl Viro 	shrink_dentry_list(&list);
14604edbe133SAl Viro 
1461a3b3c562SEric W. Biederman 	if (likely(hlist_empty(&head)))
146297216be0SAl Viro 		return;
146397216be0SAl Viro 
146422cb7405SNeilBrown 	synchronize_rcu_expedited();
146548a066e7SAl Viro 
146656cbb429SAl Viro 	hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
146756cbb429SAl Viro 		hlist_del(&m->mnt_umount);
146856cbb429SAl Viro 		mntput(&m->mnt);
146956cbb429SAl Viro 	}
147070fbcdf4SRam Pai }
147170fbcdf4SRam Pai 
147297216be0SAl Viro static inline void namespace_lock(void)
1473e3197d83SAl Viro {
147497216be0SAl Viro 	down_write(&namespace_sem);
1475e3197d83SAl Viro }
1476e3197d83SAl Viro 
1477e819f152SEric W. Biederman enum umount_tree_flags {
1478e819f152SEric W. Biederman 	UMOUNT_SYNC = 1,
1479e819f152SEric W. Biederman 	UMOUNT_PROPAGATE = 2,
1480e0c9c0afSEric W. Biederman 	UMOUNT_CONNECTED = 4,
1481e819f152SEric W. Biederman };
1482f2d0a123SEric W. Biederman 
1483f2d0a123SEric W. Biederman static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1484f2d0a123SEric W. Biederman {
1485f2d0a123SEric W. Biederman 	/* Leaving mounts connected is only valid for lazy umounts */
1486f2d0a123SEric W. Biederman 	if (how & UMOUNT_SYNC)
1487f2d0a123SEric W. Biederman 		return true;
1488f2d0a123SEric W. Biederman 
1489f2d0a123SEric W. Biederman 	/* A mount without a parent has nothing to be connected to */
1490f2d0a123SEric W. Biederman 	if (!mnt_has_parent(mnt))
1491f2d0a123SEric W. Biederman 		return true;
1492f2d0a123SEric W. Biederman 
1493f2d0a123SEric W. Biederman 	/* Because the reference counting rules change when mounts are
1494f2d0a123SEric W. Biederman 	 * unmounted and connected, umounted mounts may not be
1495f2d0a123SEric W. Biederman 	 * connected to mounted mounts.
1496f2d0a123SEric W. Biederman 	 */
1497f2d0a123SEric W. Biederman 	if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1498f2d0a123SEric W. Biederman 		return true;
1499f2d0a123SEric W. Biederman 
1500f2d0a123SEric W. Biederman 	/* Has it been requested that the mount remain connected? */
1501f2d0a123SEric W. Biederman 	if (how & UMOUNT_CONNECTED)
1502f2d0a123SEric W. Biederman 		return false;
1503f2d0a123SEric W. Biederman 
1504f2d0a123SEric W. Biederman 	/* Is the mount locked such that it needs to remain connected? */
1505f2d0a123SEric W. Biederman 	if (IS_MNT_LOCKED(mnt))
1506f2d0a123SEric W. Biederman 		return false;
1507f2d0a123SEric W. Biederman 
1508f2d0a123SEric W. Biederman 	/* By default disconnect the mount */
1509f2d0a123SEric W. Biederman 	return true;
1510f2d0a123SEric W. Biederman }
1511f2d0a123SEric W. Biederman 
151299b7db7bSNick Piggin /*
151348a066e7SAl Viro  * mount_lock must be held
151499b7db7bSNick Piggin  * namespace_sem must be held for write
151599b7db7bSNick Piggin  */
1516e819f152SEric W. Biederman static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
151770fbcdf4SRam Pai {
1518c003b26fSEric W. Biederman 	LIST_HEAD(tmp_list);
1519315fc83eSAl Viro 	struct mount *p;
152070fbcdf4SRam Pai 
15215d88457eSEric W. Biederman 	if (how & UMOUNT_PROPAGATE)
15225d88457eSEric W. Biederman 		propagate_mount_unlock(mnt);
15235d88457eSEric W. Biederman 
1524c003b26fSEric W. Biederman 	/* Gather the mounts to umount */
1525590ce4bcSEric W. Biederman 	for (p = mnt; p; p = next_mnt(p, mnt)) {
1526590ce4bcSEric W. Biederman 		p->mnt.mnt_flags |= MNT_UMOUNT;
1527c003b26fSEric W. Biederman 		list_move(&p->mnt_list, &tmp_list);
1528590ce4bcSEric W. Biederman 	}
1529c003b26fSEric W. Biederman 
1530411a938bSEric W. Biederman 	/* Hide the mounts from mnt_mounts */
1531c003b26fSEric W. Biederman 	list_for_each_entry(p, &tmp_list, mnt_list) {
1532c003b26fSEric W. Biederman 		list_del_init(&p->mnt_child);
153338129a13SAl Viro 	}
153470fbcdf4SRam Pai 
1535c003b26fSEric W. Biederman 	/* Add propogated mounts to the tmp_list */
1536e819f152SEric W. Biederman 	if (how & UMOUNT_PROPAGATE)
15377b8a53fdSAl Viro 		propagate_umount(&tmp_list);
1538a05964f3SRam Pai 
1539c003b26fSEric W. Biederman 	while (!list_empty(&tmp_list)) {
1540d2921684SEric W. Biederman 		struct mnt_namespace *ns;
1541ce07d891SEric W. Biederman 		bool disconnect;
1542c003b26fSEric W. Biederman 		p = list_first_entry(&tmp_list, struct mount, mnt_list);
15436776db3dSAl Viro 		list_del_init(&p->mnt_expire);
15441a4eeaf2SAl Viro 		list_del_init(&p->mnt_list);
1545d2921684SEric W. Biederman 		ns = p->mnt_ns;
1546d2921684SEric W. Biederman 		if (ns) {
1547d2921684SEric W. Biederman 			ns->mounts--;
1548d2921684SEric W. Biederman 			__touch_mnt_namespace(ns);
1549d2921684SEric W. Biederman 		}
155063d37a84SAl Viro 		p->mnt_ns = NULL;
1551e819f152SEric W. Biederman 		if (how & UMOUNT_SYNC)
155248a066e7SAl Viro 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
155387b95ce0SAl Viro 
1554f2d0a123SEric W. Biederman 		disconnect = disconnect_mount(p, how);
1555676da58dSAl Viro 		if (mnt_has_parent(p)) {
155681b6b061SAl Viro 			mnt_add_count(p->mnt_parent, -1);
1557ce07d891SEric W. Biederman 			if (!disconnect) {
1558ce07d891SEric W. Biederman 				/* Don't forget about p */
1559ce07d891SEric W. Biederman 				list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1560ce07d891SEric W. Biederman 			} else {
15616a46c573SEric W. Biederman 				umount_mnt(p);
15627c4b93d8SAl Viro 			}
1563ce07d891SEric W. Biederman 		}
15640f0afb1dSAl Viro 		change_mnt_propagation(p, MS_PRIVATE);
156519a1c409SAl Viro 		if (disconnect)
156619a1c409SAl Viro 			hlist_add_head(&p->mnt_umount, &unmounted);
156738129a13SAl Viro 	}
15681da177e4SLinus Torvalds }
15691da177e4SLinus Torvalds 
1570b54b9be7SAl Viro static void shrink_submounts(struct mount *mnt);
1571c35038beSAl Viro 
15728d0347f6SDavid Howells static int do_umount_root(struct super_block *sb)
15738d0347f6SDavid Howells {
15748d0347f6SDavid Howells 	int ret = 0;
15758d0347f6SDavid Howells 
15768d0347f6SDavid Howells 	down_write(&sb->s_umount);
15778d0347f6SDavid Howells 	if (!sb_rdonly(sb)) {
15788d0347f6SDavid Howells 		struct fs_context *fc;
15798d0347f6SDavid Howells 
15808d0347f6SDavid Howells 		fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
15818d0347f6SDavid Howells 						SB_RDONLY);
15828d0347f6SDavid Howells 		if (IS_ERR(fc)) {
15838d0347f6SDavid Howells 			ret = PTR_ERR(fc);
15848d0347f6SDavid Howells 		} else {
15858d0347f6SDavid Howells 			ret = parse_monolithic_mount_data(fc, NULL);
15868d0347f6SDavid Howells 			if (!ret)
15878d0347f6SDavid Howells 				ret = reconfigure_super(fc);
15888d0347f6SDavid Howells 			put_fs_context(fc);
15898d0347f6SDavid Howells 		}
15908d0347f6SDavid Howells 	}
15918d0347f6SDavid Howells 	up_write(&sb->s_umount);
15928d0347f6SDavid Howells 	return ret;
15938d0347f6SDavid Howells }
15948d0347f6SDavid Howells 
15951ab59738SAl Viro static int do_umount(struct mount *mnt, int flags)
15961da177e4SLinus Torvalds {
15971ab59738SAl Viro 	struct super_block *sb = mnt->mnt.mnt_sb;
15981da177e4SLinus Torvalds 	int retval;
15991da177e4SLinus Torvalds 
16001ab59738SAl Viro 	retval = security_sb_umount(&mnt->mnt, flags);
16011da177e4SLinus Torvalds 	if (retval)
16021da177e4SLinus Torvalds 		return retval;
16031da177e4SLinus Torvalds 
16041da177e4SLinus Torvalds 	/*
16051da177e4SLinus Torvalds 	 * Allow userspace to request a mountpoint be expired rather than
16061da177e4SLinus Torvalds 	 * unmounting unconditionally. Unmount only happens if:
16071da177e4SLinus Torvalds 	 *  (1) the mark is already set (the mark is cleared by mntput())
16081da177e4SLinus Torvalds 	 *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
16091da177e4SLinus Torvalds 	 */
16101da177e4SLinus Torvalds 	if (flags & MNT_EXPIRE) {
16111ab59738SAl Viro 		if (&mnt->mnt == current->fs->root.mnt ||
16121da177e4SLinus Torvalds 		    flags & (MNT_FORCE | MNT_DETACH))
16131da177e4SLinus Torvalds 			return -EINVAL;
16141da177e4SLinus Torvalds 
1615b3e19d92SNick Piggin 		/*
1616b3e19d92SNick Piggin 		 * probably don't strictly need the lock here if we examined
1617b3e19d92SNick Piggin 		 * all race cases, but it's a slowpath.
1618b3e19d92SNick Piggin 		 */
1619719ea2fbSAl Viro 		lock_mount_hash();
162083adc753SAl Viro 		if (mnt_get_count(mnt) != 2) {
1621719ea2fbSAl Viro 			unlock_mount_hash();
16221da177e4SLinus Torvalds 			return -EBUSY;
1623b3e19d92SNick Piggin 		}
1624719ea2fbSAl Viro 		unlock_mount_hash();
16251da177e4SLinus Torvalds 
1626863d684fSAl Viro 		if (!xchg(&mnt->mnt_expiry_mark, 1))
16271da177e4SLinus Torvalds 			return -EAGAIN;
16281da177e4SLinus Torvalds 	}
16291da177e4SLinus Torvalds 
16301da177e4SLinus Torvalds 	/*
16311da177e4SLinus Torvalds 	 * If we may have to abort operations to get out of this
16321da177e4SLinus Torvalds 	 * mount, and they will themselves hold resources we must
16331da177e4SLinus Torvalds 	 * allow the fs to do things. In the Unix tradition of
16341da177e4SLinus Torvalds 	 * 'Gee thats tricky lets do it in userspace' the umount_begin
16351da177e4SLinus Torvalds 	 * might fail to complete on the first run through as other tasks
16361da177e4SLinus Torvalds 	 * must return, and the like. Thats for the mount program to worry
16371da177e4SLinus Torvalds 	 * about for the moment.
16381da177e4SLinus Torvalds 	 */
16391da177e4SLinus Torvalds 
164042faad99SAl Viro 	if (flags & MNT_FORCE && sb->s_op->umount_begin) {
164142faad99SAl Viro 		sb->s_op->umount_begin(sb);
164242faad99SAl Viro 	}
16431da177e4SLinus Torvalds 
16441da177e4SLinus Torvalds 	/*
16451da177e4SLinus Torvalds 	 * No sense to grab the lock for this test, but test itself looks
16461da177e4SLinus Torvalds 	 * somewhat bogus. Suggestions for better replacement?
16471da177e4SLinus Torvalds 	 * Ho-hum... In principle, we might treat that as umount + switch
16481da177e4SLinus Torvalds 	 * to rootfs. GC would eventually take care of the old vfsmount.
16491da177e4SLinus Torvalds 	 * Actually it makes sense, especially if rootfs would contain a
16501da177e4SLinus Torvalds 	 * /reboot - static binary that would close all descriptors and
16511da177e4SLinus Torvalds 	 * call reboot(9). Then init(8) could umount root and exec /reboot.
16521da177e4SLinus Torvalds 	 */
16531ab59738SAl Viro 	if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
16541da177e4SLinus Torvalds 		/*
16551da177e4SLinus Torvalds 		 * Special case for "unmounting" root ...
16561da177e4SLinus Torvalds 		 * we just try to remount it readonly.
16571da177e4SLinus Torvalds 		 */
1658bc6155d1SEric W. Biederman 		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1659a1480dccSAndy Lutomirski 			return -EPERM;
16608d0347f6SDavid Howells 		return do_umount_root(sb);
16611da177e4SLinus Torvalds 	}
16621da177e4SLinus Torvalds 
166397216be0SAl Viro 	namespace_lock();
1664719ea2fbSAl Viro 	lock_mount_hash();
16651da177e4SLinus Torvalds 
166625d202edSEric W. Biederman 	/* Recheck MNT_LOCKED with the locks held */
166725d202edSEric W. Biederman 	retval = -EINVAL;
166825d202edSEric W. Biederman 	if (mnt->mnt.mnt_flags & MNT_LOCKED)
166925d202edSEric W. Biederman 		goto out;
167025d202edSEric W. Biederman 
167125d202edSEric W. Biederman 	event++;
167248a066e7SAl Viro 	if (flags & MNT_DETACH) {
167348a066e7SAl Viro 		if (!list_empty(&mnt->mnt_list))
1674e819f152SEric W. Biederman 			umount_tree(mnt, UMOUNT_PROPAGATE);
167548a066e7SAl Viro 		retval = 0;
167648a066e7SAl Viro 	} else {
1677b54b9be7SAl Viro 		shrink_submounts(mnt);
16781da177e4SLinus Torvalds 		retval = -EBUSY;
167948a066e7SAl Viro 		if (!propagate_mount_busy(mnt, 2)) {
16801a4eeaf2SAl Viro 			if (!list_empty(&mnt->mnt_list))
1681e819f152SEric W. Biederman 				umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
16821da177e4SLinus Torvalds 			retval = 0;
16831da177e4SLinus Torvalds 		}
168448a066e7SAl Viro 	}
168525d202edSEric W. Biederman out:
1686719ea2fbSAl Viro 	unlock_mount_hash();
1687e3197d83SAl Viro 	namespace_unlock();
16881da177e4SLinus Torvalds 	return retval;
16891da177e4SLinus Torvalds }
16901da177e4SLinus Torvalds 
16911da177e4SLinus Torvalds /*
169280b5dce8SEric W. Biederman  * __detach_mounts - lazily unmount all mounts on the specified dentry
169380b5dce8SEric W. Biederman  *
169480b5dce8SEric W. Biederman  * During unlink, rmdir, and d_drop it is possible to loose the path
169580b5dce8SEric W. Biederman  * to an existing mountpoint, and wind up leaking the mount.
169680b5dce8SEric W. Biederman  * detach_mounts allows lazily unmounting those mounts instead of
169780b5dce8SEric W. Biederman  * leaking them.
169880b5dce8SEric W. Biederman  *
169980b5dce8SEric W. Biederman  * The caller may hold dentry->d_inode->i_mutex.
170080b5dce8SEric W. Biederman  */
170180b5dce8SEric W. Biederman void __detach_mounts(struct dentry *dentry)
170280b5dce8SEric W. Biederman {
170380b5dce8SEric W. Biederman 	struct mountpoint *mp;
170480b5dce8SEric W. Biederman 	struct mount *mnt;
170580b5dce8SEric W. Biederman 
170680b5dce8SEric W. Biederman 	namespace_lock();
17073895dbf8SEric W. Biederman 	lock_mount_hash();
170880b5dce8SEric W. Biederman 	mp = lookup_mountpoint(dentry);
1709adc9b5c0SAl Viro 	if (!mp)
171080b5dce8SEric W. Biederman 		goto out_unlock;
171180b5dce8SEric W. Biederman 
1712e06b933eSAndrey Ulanov 	event++;
171380b5dce8SEric W. Biederman 	while (!hlist_empty(&mp->m_list)) {
171480b5dce8SEric W. Biederman 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1715ce07d891SEric W. Biederman 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1716fe78fcc8SEric W. Biederman 			umount_mnt(mnt);
171756cbb429SAl Viro 			hlist_add_head(&mnt->mnt_umount, &unmounted);
1718ce07d891SEric W. Biederman 		}
1719e0c9c0afSEric W. Biederman 		else umount_tree(mnt, UMOUNT_CONNECTED);
172080b5dce8SEric W. Biederman 	}
172180b5dce8SEric W. Biederman 	put_mountpoint(mp);
172280b5dce8SEric W. Biederman out_unlock:
17233895dbf8SEric W. Biederman 	unlock_mount_hash();
172480b5dce8SEric W. Biederman 	namespace_unlock();
172580b5dce8SEric W. Biederman }
172680b5dce8SEric W. Biederman 
172780b5dce8SEric W. Biederman /*
17289b40bc90SAl Viro  * Is the caller allowed to modify his namespace?
17299b40bc90SAl Viro  */
17309b40bc90SAl Viro static inline bool may_mount(void)
17319b40bc90SAl Viro {
17329b40bc90SAl Viro 	return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
17339b40bc90SAl Viro }
17349b40bc90SAl Viro 
1735f7e33bdbSJeff Layton static void warn_mandlock(void)
17369e8925b6SJeff Layton {
1737f7e33bdbSJeff Layton 	pr_warn_once("=======================================================\n"
1738f7e33bdbSJeff Layton 		     "WARNING: The mand mount option has been deprecated and\n"
1739f7e33bdbSJeff Layton 		     "         and is ignored by this kernel. Remove the mand\n"
1740f7e33bdbSJeff Layton 		     "         option from the mount to silence this warning.\n"
1741f7e33bdbSJeff Layton 		     "=======================================================\n");
17429e8925b6SJeff Layton }
17439e8925b6SJeff Layton 
174425ccd24fSChristoph Hellwig static int can_umount(const struct path *path, int flags)
17451da177e4SLinus Torvalds {
174625ccd24fSChristoph Hellwig 	struct mount *mnt = real_mount(path->mnt);
17471da177e4SLinus Torvalds 
17489b40bc90SAl Viro 	if (!may_mount())
17499b40bc90SAl Viro 		return -EPERM;
175041525f56SChristoph Hellwig 	if (path->dentry != path->mnt->mnt_root)
175125ccd24fSChristoph Hellwig 		return -EINVAL;
1752143c8c91SAl Viro 	if (!check_mnt(mnt))
175325ccd24fSChristoph Hellwig 		return -EINVAL;
175425d202edSEric W. Biederman 	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
175525ccd24fSChristoph Hellwig 		return -EINVAL;
1756b2f5d4dcSEric W. Biederman 	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
175725ccd24fSChristoph Hellwig 		return -EPERM;
175825ccd24fSChristoph Hellwig 	return 0;
175925ccd24fSChristoph Hellwig }
17601da177e4SLinus Torvalds 
1761a0a6df9aSAl Viro // caller is responsible for flags being sane
176225ccd24fSChristoph Hellwig int path_umount(struct path *path, int flags)
176325ccd24fSChristoph Hellwig {
176425ccd24fSChristoph Hellwig 	struct mount *mnt = real_mount(path->mnt);
176525ccd24fSChristoph Hellwig 	int ret;
176625ccd24fSChristoph Hellwig 
176725ccd24fSChristoph Hellwig 	ret = can_umount(path, flags);
176825ccd24fSChristoph Hellwig 	if (!ret)
176925ccd24fSChristoph Hellwig 		ret = do_umount(mnt, flags);
177025ccd24fSChristoph Hellwig 
1771429731b1SJan Blunck 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
177241525f56SChristoph Hellwig 	dput(path->dentry);
1773900148dcSAl Viro 	mntput_no_expire(mnt);
177425ccd24fSChristoph Hellwig 	return ret;
17751da177e4SLinus Torvalds }
17761da177e4SLinus Torvalds 
177709267defSChristoph Hellwig static int ksys_umount(char __user *name, int flags)
177841525f56SChristoph Hellwig {
177941525f56SChristoph Hellwig 	int lookup_flags = LOOKUP_MOUNTPOINT;
178041525f56SChristoph Hellwig 	struct path path;
178141525f56SChristoph Hellwig 	int ret;
178241525f56SChristoph Hellwig 
1783a0a6df9aSAl Viro 	// basic validity checks done first
1784a0a6df9aSAl Viro 	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1785a0a6df9aSAl Viro 		return -EINVAL;
1786a0a6df9aSAl Viro 
178741525f56SChristoph Hellwig 	if (!(flags & UMOUNT_NOFOLLOW))
178841525f56SChristoph Hellwig 		lookup_flags |= LOOKUP_FOLLOW;
178941525f56SChristoph Hellwig 	ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
179041525f56SChristoph Hellwig 	if (ret)
179141525f56SChristoph Hellwig 		return ret;
179241525f56SChristoph Hellwig 	return path_umount(&path, flags);
179341525f56SChristoph Hellwig }
179441525f56SChristoph Hellwig 
17953a18ef5cSDominik Brodowski SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
17963a18ef5cSDominik Brodowski {
17973a18ef5cSDominik Brodowski 	return ksys_umount(name, flags);
17983a18ef5cSDominik Brodowski }
17993a18ef5cSDominik Brodowski 
18001da177e4SLinus Torvalds #ifdef __ARCH_WANT_SYS_OLDUMOUNT
18011da177e4SLinus Torvalds 
18021da177e4SLinus Torvalds /*
18031da177e4SLinus Torvalds  *	The 2.0 compatible umount. No flags.
18041da177e4SLinus Torvalds  */
1805bdc480e3SHeiko Carstens SYSCALL_DEFINE1(oldumount, char __user *, name)
18061da177e4SLinus Torvalds {
18073a18ef5cSDominik Brodowski 	return ksys_umount(name, 0);
18081da177e4SLinus Torvalds }
18091da177e4SLinus Torvalds 
18101da177e4SLinus Torvalds #endif
18111da177e4SLinus Torvalds 
18124ce5d2b1SEric W. Biederman static bool is_mnt_ns_file(struct dentry *dentry)
18138823c079SEric W. Biederman {
18144ce5d2b1SEric W. Biederman 	/* Is this a proxy for a mount namespace? */
1815e149ed2bSAl Viro 	return dentry->d_op == &ns_dentry_operations &&
1816e149ed2bSAl Viro 	       dentry->d_fsdata == &mntns_operations;
18174ce5d2b1SEric W. Biederman }
18184ce5d2b1SEric W. Biederman 
1819213921f9SEric Biggers static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
182058be2825SAl Viro {
182158be2825SAl Viro 	return container_of(ns, struct mnt_namespace, ns);
182258be2825SAl Viro }
182358be2825SAl Viro 
1824303cc571SChristian Brauner struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1825303cc571SChristian Brauner {
1826303cc571SChristian Brauner 	return &mnt->ns;
1827303cc571SChristian Brauner }
1828303cc571SChristian Brauner 
18294ce5d2b1SEric W. Biederman static bool mnt_ns_loop(struct dentry *dentry)
18304ce5d2b1SEric W. Biederman {
18314ce5d2b1SEric W. Biederman 	/* Could bind mounting the mount namespace inode cause a
18324ce5d2b1SEric W. Biederman 	 * mount namespace loop?
18334ce5d2b1SEric W. Biederman 	 */
18344ce5d2b1SEric W. Biederman 	struct mnt_namespace *mnt_ns;
18354ce5d2b1SEric W. Biederman 	if (!is_mnt_ns_file(dentry))
18364ce5d2b1SEric W. Biederman 		return false;
18374ce5d2b1SEric W. Biederman 
1838f77c8014SAl Viro 	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
18398823c079SEric W. Biederman 	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
18408823c079SEric W. Biederman }
18418823c079SEric W. Biederman 
184287129cc0SAl Viro struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
184336341f64SRam Pai 					int flag)
18441da177e4SLinus Torvalds {
184584d17192SAl Viro 	struct mount *res, *p, *q, *r, *parent;
18461da177e4SLinus Torvalds 
18474ce5d2b1SEric W. Biederman 	if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
18484ce5d2b1SEric W. Biederman 		return ERR_PTR(-EINVAL);
18494ce5d2b1SEric W. Biederman 
18504ce5d2b1SEric W. Biederman 	if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1851be34d1a3SDavid Howells 		return ERR_PTR(-EINVAL);
18529676f0c6SRam Pai 
185336341f64SRam Pai 	res = q = clone_mnt(mnt, dentry, flag);
1854be34d1a3SDavid Howells 	if (IS_ERR(q))
1855be34d1a3SDavid Howells 		return q;
1856be34d1a3SDavid Howells 
1857a73324daSAl Viro 	q->mnt_mountpoint = mnt->mnt_mountpoint;
18581da177e4SLinus Torvalds 
18591da177e4SLinus Torvalds 	p = mnt;
18606b41d536SAl Viro 	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1861315fc83eSAl Viro 		struct mount *s;
18627ec02ef1SJan Blunck 		if (!is_subdir(r->mnt_mountpoint, dentry))
18631da177e4SLinus Torvalds 			continue;
18641da177e4SLinus Torvalds 
1865909b0a88SAl Viro 		for (s = r; s; s = next_mnt(s, r)) {
18664ce5d2b1SEric W. Biederman 			if (!(flag & CL_COPY_UNBINDABLE) &&
18674ce5d2b1SEric W. Biederman 			    IS_MNT_UNBINDABLE(s)) {
1868df7342b2SEric W. Biederman 				if (s->mnt.mnt_flags & MNT_LOCKED) {
1869df7342b2SEric W. Biederman 					/* Both unbindable and locked. */
1870df7342b2SEric W. Biederman 					q = ERR_PTR(-EPERM);
1871df7342b2SEric W. Biederman 					goto out;
1872df7342b2SEric W. Biederman 				} else {
18734ce5d2b1SEric W. Biederman 					s = skip_mnt_tree(s);
18744ce5d2b1SEric W. Biederman 					continue;
18754ce5d2b1SEric W. Biederman 				}
1876df7342b2SEric W. Biederman 			}
18774ce5d2b1SEric W. Biederman 			if (!(flag & CL_COPY_MNT_NS_FILE) &&
18784ce5d2b1SEric W. Biederman 			    is_mnt_ns_file(s->mnt.mnt_root)) {
18799676f0c6SRam Pai 				s = skip_mnt_tree(s);
18809676f0c6SRam Pai 				continue;
18819676f0c6SRam Pai 			}
18820714a533SAl Viro 			while (p != s->mnt_parent) {
18830714a533SAl Viro 				p = p->mnt_parent;
18840714a533SAl Viro 				q = q->mnt_parent;
18851da177e4SLinus Torvalds 			}
188687129cc0SAl Viro 			p = s;
188784d17192SAl Viro 			parent = q;
188887129cc0SAl Viro 			q = clone_mnt(p, p->mnt.mnt_root, flag);
1889be34d1a3SDavid Howells 			if (IS_ERR(q))
1890be34d1a3SDavid Howells 				goto out;
1891719ea2fbSAl Viro 			lock_mount_hash();
18921a4eeaf2SAl Viro 			list_add_tail(&q->mnt_list, &res->mnt_list);
18931064f874SEric W. Biederman 			attach_mnt(q, parent, p->mnt_mp);
1894719ea2fbSAl Viro 			unlock_mount_hash();
18951da177e4SLinus Torvalds 		}
18961da177e4SLinus Torvalds 	}
18971da177e4SLinus Torvalds 	return res;
1898be34d1a3SDavid Howells out:
18991da177e4SLinus Torvalds 	if (res) {
1900719ea2fbSAl Viro 		lock_mount_hash();
1901e819f152SEric W. Biederman 		umount_tree(res, UMOUNT_SYNC);
1902719ea2fbSAl Viro 		unlock_mount_hash();
19031da177e4SLinus Torvalds 	}
1904be34d1a3SDavid Howells 	return q;
19051da177e4SLinus Torvalds }
19061da177e4SLinus Torvalds 
1907be34d1a3SDavid Howells /* Caller should check returned pointer for errors */
1908be34d1a3SDavid Howells 
1909ca71cf71SAl Viro struct vfsmount *collect_mounts(const struct path *path)
19108aec0809SAl Viro {
1911cb338d06SAl Viro 	struct mount *tree;
191297216be0SAl Viro 	namespace_lock();
1913cd4a4017SEric W. Biederman 	if (!check_mnt(real_mount(path->mnt)))
1914cd4a4017SEric W. Biederman 		tree = ERR_PTR(-EINVAL);
1915cd4a4017SEric W. Biederman 	else
191687129cc0SAl Viro 		tree = copy_tree(real_mount(path->mnt), path->dentry,
191787129cc0SAl Viro 				 CL_COPY_ALL | CL_PRIVATE);
1918328e6d90SAl Viro 	namespace_unlock();
1919be34d1a3SDavid Howells 	if (IS_ERR(tree))
192052e220d3SDan Carpenter 		return ERR_CAST(tree);
1921be34d1a3SDavid Howells 	return &tree->mnt;
19228aec0809SAl Viro }
19238aec0809SAl Viro 
1924a07b2000SAl Viro static void free_mnt_ns(struct mnt_namespace *);
1925a07b2000SAl Viro static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1926a07b2000SAl Viro 
1927a07b2000SAl Viro void dissolve_on_fput(struct vfsmount *mnt)
1928a07b2000SAl Viro {
1929a07b2000SAl Viro 	struct mnt_namespace *ns;
1930a07b2000SAl Viro 	namespace_lock();
1931a07b2000SAl Viro 	lock_mount_hash();
1932a07b2000SAl Viro 	ns = real_mount(mnt)->mnt_ns;
193344dfd84aSDavid Howells 	if (ns) {
193444dfd84aSDavid Howells 		if (is_anon_ns(ns))
1935a07b2000SAl Viro 			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
193644dfd84aSDavid Howells 		else
193744dfd84aSDavid Howells 			ns = NULL;
193844dfd84aSDavid Howells 	}
1939a07b2000SAl Viro 	unlock_mount_hash();
1940a07b2000SAl Viro 	namespace_unlock();
194144dfd84aSDavid Howells 	if (ns)
1942a07b2000SAl Viro 		free_mnt_ns(ns);
1943a07b2000SAl Viro }
1944a07b2000SAl Viro 
19458aec0809SAl Viro void drop_collected_mounts(struct vfsmount *mnt)
19468aec0809SAl Viro {
194797216be0SAl Viro 	namespace_lock();
1948719ea2fbSAl Viro 	lock_mount_hash();
19499c8e0a1bSEric W. Biederman 	umount_tree(real_mount(mnt), 0);
1950719ea2fbSAl Viro 	unlock_mount_hash();
19513ab6abeeSAl Viro 	namespace_unlock();
19528aec0809SAl Viro }
19538aec0809SAl Viro 
1954427215d8SMiklos Szeredi static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1955427215d8SMiklos Szeredi {
1956427215d8SMiklos Szeredi 	struct mount *child;
1957427215d8SMiklos Szeredi 
1958427215d8SMiklos Szeredi 	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1959427215d8SMiklos Szeredi 		if (!is_subdir(child->mnt_mountpoint, dentry))
1960427215d8SMiklos Szeredi 			continue;
1961427215d8SMiklos Szeredi 
1962427215d8SMiklos Szeredi 		if (child->mnt.mnt_flags & MNT_LOCKED)
1963427215d8SMiklos Szeredi 			return true;
1964427215d8SMiklos Szeredi 	}
1965427215d8SMiklos Szeredi 	return false;
1966427215d8SMiklos Szeredi }
1967427215d8SMiklos Szeredi 
1968c771d683SMiklos Szeredi /**
1969c771d683SMiklos Szeredi  * clone_private_mount - create a private clone of a path
19701f287bc4SRandy Dunlap  * @path: path to clone
1971c771d683SMiklos Szeredi  *
19721f287bc4SRandy Dunlap  * This creates a new vfsmount, which will be the clone of @path.  The new mount
19731f287bc4SRandy Dunlap  * will not be attached anywhere in the namespace and will be private (i.e.
19741f287bc4SRandy Dunlap  * changes to the originating mount won't be propagated into this).
1975c771d683SMiklos Szeredi  *
1976c771d683SMiklos Szeredi  * Release with mntput().
1977c771d683SMiklos Szeredi  */
1978ca71cf71SAl Viro struct vfsmount *clone_private_mount(const struct path *path)
1979c771d683SMiklos Szeredi {
1980c771d683SMiklos Szeredi 	struct mount *old_mnt = real_mount(path->mnt);
1981c771d683SMiklos Szeredi 	struct mount *new_mnt;
1982c771d683SMiklos Szeredi 
1983427215d8SMiklos Szeredi 	down_read(&namespace_sem);
1984c771d683SMiklos Szeredi 	if (IS_MNT_UNBINDABLE(old_mnt))
1985427215d8SMiklos Szeredi 		goto invalid;
1986427215d8SMiklos Szeredi 
1987427215d8SMiklos Szeredi 	if (!check_mnt(old_mnt))
1988427215d8SMiklos Szeredi 		goto invalid;
1989427215d8SMiklos Szeredi 
1990427215d8SMiklos Szeredi 	if (has_locked_children(old_mnt, path->dentry))
1991427215d8SMiklos Szeredi 		goto invalid;
1992c771d683SMiklos Szeredi 
1993c771d683SMiklos Szeredi 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1994427215d8SMiklos Szeredi 	up_read(&namespace_sem);
1995427215d8SMiklos Szeredi 
1996c771d683SMiklos Szeredi 	if (IS_ERR(new_mnt))
1997c771d683SMiklos Szeredi 		return ERR_CAST(new_mnt);
1998c771d683SMiklos Szeredi 
1999df820f8dSMiklos Szeredi 	/* Longterm mount to be removed by kern_unmount*() */
2000df820f8dSMiklos Szeredi 	new_mnt->mnt_ns = MNT_NS_INTERNAL;
2001df820f8dSMiklos Szeredi 
2002c771d683SMiklos Szeredi 	return &new_mnt->mnt;
2003427215d8SMiklos Szeredi 
2004427215d8SMiklos Szeredi invalid:
2005427215d8SMiklos Szeredi 	up_read(&namespace_sem);
2006427215d8SMiklos Szeredi 	return ERR_PTR(-EINVAL);
2007c771d683SMiklos Szeredi }
2008c771d683SMiklos Szeredi EXPORT_SYMBOL_GPL(clone_private_mount);
2009c771d683SMiklos Szeredi 
20101f707137SAl Viro int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
20111f707137SAl Viro 		   struct vfsmount *root)
20121f707137SAl Viro {
20131a4eeaf2SAl Viro 	struct mount *mnt;
20141f707137SAl Viro 	int res = f(root, arg);
20151f707137SAl Viro 	if (res)
20161f707137SAl Viro 		return res;
20171a4eeaf2SAl Viro 	list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
20181a4eeaf2SAl Viro 		res = f(&mnt->mnt, arg);
20191f707137SAl Viro 		if (res)
20201f707137SAl Viro 			return res;
20211f707137SAl Viro 	}
20221f707137SAl Viro 	return 0;
20231f707137SAl Viro }
20241f707137SAl Viro 
20253bd045ccSAl Viro static void lock_mnt_tree(struct mount *mnt)
20263bd045ccSAl Viro {
20273bd045ccSAl Viro 	struct mount *p;
20283bd045ccSAl Viro 
20293bd045ccSAl Viro 	for (p = mnt; p; p = next_mnt(p, mnt)) {
20303bd045ccSAl Viro 		int flags = p->mnt.mnt_flags;
20313bd045ccSAl Viro 		/* Don't allow unprivileged users to change mount flags */
20323bd045ccSAl Viro 		flags |= MNT_LOCK_ATIME;
20333bd045ccSAl Viro 
20343bd045ccSAl Viro 		if (flags & MNT_READONLY)
20353bd045ccSAl Viro 			flags |= MNT_LOCK_READONLY;
20363bd045ccSAl Viro 
20373bd045ccSAl Viro 		if (flags & MNT_NODEV)
20383bd045ccSAl Viro 			flags |= MNT_LOCK_NODEV;
20393bd045ccSAl Viro 
20403bd045ccSAl Viro 		if (flags & MNT_NOSUID)
20413bd045ccSAl Viro 			flags |= MNT_LOCK_NOSUID;
20423bd045ccSAl Viro 
20433bd045ccSAl Viro 		if (flags & MNT_NOEXEC)
20443bd045ccSAl Viro 			flags |= MNT_LOCK_NOEXEC;
20453bd045ccSAl Viro 		/* Don't allow unprivileged users to reveal what is under a mount */
20463bd045ccSAl Viro 		if (list_empty(&p->mnt_expire))
20473bd045ccSAl Viro 			flags |= MNT_LOCKED;
20483bd045ccSAl Viro 		p->mnt.mnt_flags = flags;
20493bd045ccSAl Viro 	}
20503bd045ccSAl Viro }
20513bd045ccSAl Viro 
20524b8b21f4SAl Viro static void cleanup_group_ids(struct mount *mnt, struct mount *end)
2053719f5d7fSMiklos Szeredi {
2054315fc83eSAl Viro 	struct mount *p;
2055719f5d7fSMiklos Szeredi 
2056909b0a88SAl Viro 	for (p = mnt; p != end; p = next_mnt(p, mnt)) {
2057fc7be130SAl Viro 		if (p->mnt_group_id && !IS_MNT_SHARED(p))
20584b8b21f4SAl Viro 			mnt_release_group_id(p);
2059719f5d7fSMiklos Szeredi 	}
2060719f5d7fSMiklos Szeredi }
2061719f5d7fSMiklos Szeredi 
20624b8b21f4SAl Viro static int invent_group_ids(struct mount *mnt, bool recurse)
2063719f5d7fSMiklos Szeredi {
2064315fc83eSAl Viro 	struct mount *p;
2065719f5d7fSMiklos Szeredi 
2066909b0a88SAl Viro 	for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2067fc7be130SAl Viro 		if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
20684b8b21f4SAl Viro 			int err = mnt_alloc_group_id(p);
2069719f5d7fSMiklos Szeredi 			if (err) {
20704b8b21f4SAl Viro 				cleanup_group_ids(mnt, p);
2071719f5d7fSMiklos Szeredi 				return err;
2072719f5d7fSMiklos Szeredi 			}
2073719f5d7fSMiklos Szeredi 		}
2074719f5d7fSMiklos Szeredi 	}
2075719f5d7fSMiklos Szeredi 
2076719f5d7fSMiklos Szeredi 	return 0;
2077719f5d7fSMiklos Szeredi }
2078719f5d7fSMiklos Szeredi 
2079d2921684SEric W. Biederman int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2080d2921684SEric W. Biederman {
2081d2921684SEric W. Biederman 	unsigned int max = READ_ONCE(sysctl_mount_max);
2082d2921684SEric W. Biederman 	unsigned int mounts = 0, old, pending, sum;
2083d2921684SEric W. Biederman 	struct mount *p;
2084d2921684SEric W. Biederman 
2085d2921684SEric W. Biederman 	for (p = mnt; p; p = next_mnt(p, mnt))
2086d2921684SEric W. Biederman 		mounts++;
2087d2921684SEric W. Biederman 
2088d2921684SEric W. Biederman 	old = ns->mounts;
2089d2921684SEric W. Biederman 	pending = ns->pending_mounts;
2090d2921684SEric W. Biederman 	sum = old + pending;
2091d2921684SEric W. Biederman 	if ((old > sum) ||
2092d2921684SEric W. Biederman 	    (pending > sum) ||
2093d2921684SEric W. Biederman 	    (max < sum) ||
2094d2921684SEric W. Biederman 	    (mounts > (max - sum)))
2095d2921684SEric W. Biederman 		return -ENOSPC;
2096d2921684SEric W. Biederman 
2097d2921684SEric W. Biederman 	ns->pending_mounts = pending + mounts;
2098d2921684SEric W. Biederman 	return 0;
2099d2921684SEric W. Biederman }
2100d2921684SEric W. Biederman 
2101b90fa9aeSRam Pai /*
2102b90fa9aeSRam Pai  *  @source_mnt : mount tree to be attached
2103b90fa9aeSRam Pai  *  @nd         : place the mount tree @source_mnt is attached
210421444403SRam Pai  *  @parent_nd  : if non-null, detach the source_mnt from its parent and
210521444403SRam Pai  *  		   store the parent mount and mountpoint dentry.
210621444403SRam Pai  *  		   (done when source_mnt is moved)
2107b90fa9aeSRam Pai  *
2108b90fa9aeSRam Pai  *  NOTE: in the table below explains the semantics when a source mount
2109b90fa9aeSRam Pai  *  of a given type is attached to a destination mount of a given type.
21109676f0c6SRam Pai  * ---------------------------------------------------------------------------
2111b90fa9aeSRam Pai  * |         BIND MOUNT OPERATION                                            |
21129676f0c6SRam Pai  * |**************************************************************************
21139676f0c6SRam Pai  * | source-->| shared        |       private  |       slave    | unbindable |
21149676f0c6SRam Pai  * | dest     |               |                |                |            |
21159676f0c6SRam Pai  * |   |      |               |                |                |            |
21169676f0c6SRam Pai  * |   v      |               |                |                |            |
21179676f0c6SRam Pai  * |**************************************************************************
21189676f0c6SRam Pai  * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
21195afe0022SRam Pai  * |          |               |                |                |            |
21209676f0c6SRam Pai  * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
21219676f0c6SRam Pai  * ***************************************************************************
2122b90fa9aeSRam Pai  * A bind operation clones the source mount and mounts the clone on the
2123b90fa9aeSRam Pai  * destination mount.
2124b90fa9aeSRam Pai  *
2125b90fa9aeSRam Pai  * (++)  the cloned mount is propagated to all the mounts in the propagation
2126b90fa9aeSRam Pai  * 	 tree of the destination mount and the cloned mount is added to
2127b90fa9aeSRam Pai  * 	 the peer group of the source mount.
2128b90fa9aeSRam Pai  * (+)   the cloned mount is created under the destination mount and is marked
2129b90fa9aeSRam Pai  *       as shared. The cloned mount is added to the peer group of the source
2130b90fa9aeSRam Pai  *       mount.
21315afe0022SRam Pai  * (+++) the mount is propagated to all the mounts in the propagation tree
21325afe0022SRam Pai  *       of the destination mount and the cloned mount is made slave
21335afe0022SRam Pai  *       of the same master as that of the source mount. The cloned mount
21345afe0022SRam Pai  *       is marked as 'shared and slave'.
21355afe0022SRam Pai  * (*)   the cloned mount is made a slave of the same master as that of the
21365afe0022SRam Pai  * 	 source mount.
21375afe0022SRam Pai  *
21389676f0c6SRam Pai  * ---------------------------------------------------------------------------
213921444403SRam Pai  * |         		MOVE MOUNT OPERATION                                 |
21409676f0c6SRam Pai  * |**************************************************************************
21419676f0c6SRam Pai  * | source-->| shared        |       private  |       slave    | unbindable |
21429676f0c6SRam Pai  * | dest     |               |                |                |            |
21439676f0c6SRam Pai  * |   |      |               |                |                |            |
21449676f0c6SRam Pai  * |   v      |               |                |                |            |
21459676f0c6SRam Pai  * |**************************************************************************
21469676f0c6SRam Pai  * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
21475afe0022SRam Pai  * |          |               |                |                |            |
21489676f0c6SRam Pai  * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
21499676f0c6SRam Pai  * ***************************************************************************
21505afe0022SRam Pai  *
21515afe0022SRam Pai  * (+)  the mount is moved to the destination. And is then propagated to
21525afe0022SRam Pai  * 	all the mounts in the propagation tree of the destination mount.
215321444403SRam Pai  * (+*)  the mount is moved to the destination.
21545afe0022SRam Pai  * (+++)  the mount is moved to the destination and is then propagated to
21555afe0022SRam Pai  * 	all the mounts belonging to the destination mount's propagation tree.
21565afe0022SRam Pai  * 	the mount is marked as 'shared and slave'.
21575afe0022SRam Pai  * (*)	the mount continues to be a slave at the new location.
2158b90fa9aeSRam Pai  *
2159b90fa9aeSRam Pai  * if the source mount is a tree, the operations explained above is
2160b90fa9aeSRam Pai  * applied to each mount in the tree.
2161b90fa9aeSRam Pai  * Must be called without spinlocks held, since this function can sleep
2162b90fa9aeSRam Pai  * in allocations.
2163b90fa9aeSRam Pai  */
21640fb54e50SAl Viro static int attach_recursive_mnt(struct mount *source_mnt,
216584d17192SAl Viro 			struct mount *dest_mnt,
216684d17192SAl Viro 			struct mountpoint *dest_mp,
21672763d119SAl Viro 			bool moving)
2168b90fa9aeSRam Pai {
21693bd045ccSAl Viro 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
217038129a13SAl Viro 	HLIST_HEAD(tree_list);
2171d2921684SEric W. Biederman 	struct mnt_namespace *ns = dest_mnt->mnt_ns;
21721064f874SEric W. Biederman 	struct mountpoint *smp;
2173315fc83eSAl Viro 	struct mount *child, *p;
217438129a13SAl Viro 	struct hlist_node *n;
2175719f5d7fSMiklos Szeredi 	int err;
2176b90fa9aeSRam Pai 
21771064f874SEric W. Biederman 	/* Preallocate a mountpoint in case the new mounts need
21781064f874SEric W. Biederman 	 * to be tucked under other mounts.
21791064f874SEric W. Biederman 	 */
21801064f874SEric W. Biederman 	smp = get_mountpoint(source_mnt->mnt.mnt_root);
21811064f874SEric W. Biederman 	if (IS_ERR(smp))
21821064f874SEric W. Biederman 		return PTR_ERR(smp);
21831064f874SEric W. Biederman 
2184d2921684SEric W. Biederman 	/* Is there space to add these mounts to the mount namespace? */
21852763d119SAl Viro 	if (!moving) {
2186d2921684SEric W. Biederman 		err = count_mounts(ns, source_mnt);
2187d2921684SEric W. Biederman 		if (err)
2188d2921684SEric W. Biederman 			goto out;
2189d2921684SEric W. Biederman 	}
2190d2921684SEric W. Biederman 
2191fc7be130SAl Viro 	if (IS_MNT_SHARED(dest_mnt)) {
21920fb54e50SAl Viro 		err = invent_group_ids(source_mnt, true);
2193719f5d7fSMiklos Szeredi 		if (err)
2194719f5d7fSMiklos Szeredi 			goto out;
219584d17192SAl Viro 		err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2196f2ebb3a9SAl Viro 		lock_mount_hash();
2197719f5d7fSMiklos Szeredi 		if (err)
2198719f5d7fSMiklos Szeredi 			goto out_cleanup_ids;
2199909b0a88SAl Viro 		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
22000f0afb1dSAl Viro 			set_mnt_shared(p);
22010b1b901bSAl Viro 	} else {
22020b1b901bSAl Viro 		lock_mount_hash();
2203b90fa9aeSRam Pai 	}
22042763d119SAl Viro 	if (moving) {
22052763d119SAl Viro 		unhash_mnt(source_mnt);
220684d17192SAl Viro 		attach_mnt(source_mnt, dest_mnt, dest_mp);
2207143c8c91SAl Viro 		touch_mnt_namespace(source_mnt->mnt_ns);
220821444403SRam Pai 	} else {
220944dfd84aSDavid Howells 		if (source_mnt->mnt_ns) {
221044dfd84aSDavid Howells 			/* move from anon - the caller will destroy */
221144dfd84aSDavid Howells 			list_del_init(&source_mnt->mnt_ns->list);
221244dfd84aSDavid Howells 		}
221384d17192SAl Viro 		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
22141064f874SEric W. Biederman 		commit_tree(source_mnt);
221521444403SRam Pai 	}
2216b90fa9aeSRam Pai 
221738129a13SAl Viro 	hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
22181d6a32acSAl Viro 		struct mount *q;
221938129a13SAl Viro 		hlist_del_init(&child->mnt_hash);
22201064f874SEric W. Biederman 		q = __lookup_mnt(&child->mnt_parent->mnt,
22211d6a32acSAl Viro 				 child->mnt_mountpoint);
22221064f874SEric W. Biederman 		if (q)
22231064f874SEric W. Biederman 			mnt_change_mountpoint(child, smp, q);
22243bd045ccSAl Viro 		/* Notice when we are propagating across user namespaces */
22253bd045ccSAl Viro 		if (child->mnt_parent->mnt_ns->user_ns != user_ns)
22263bd045ccSAl Viro 			lock_mnt_tree(child);
2227d728cf79SChristian Brauner 		child->mnt.mnt_flags &= ~MNT_LOCKED;
22281064f874SEric W. Biederman 		commit_tree(child);
2229b90fa9aeSRam Pai 	}
22301064f874SEric W. Biederman 	put_mountpoint(smp);
2231719ea2fbSAl Viro 	unlock_mount_hash();
223299b7db7bSNick Piggin 
2233b90fa9aeSRam Pai 	return 0;
2234719f5d7fSMiklos Szeredi 
2235719f5d7fSMiklos Szeredi  out_cleanup_ids:
2236f2ebb3a9SAl Viro 	while (!hlist_empty(&tree_list)) {
2237f2ebb3a9SAl Viro 		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2238d2921684SEric W. Biederman 		child->mnt_parent->mnt_ns->pending_mounts = 0;
2239e819f152SEric W. Biederman 		umount_tree(child, UMOUNT_SYNC);
2240f2ebb3a9SAl Viro 	}
2241f2ebb3a9SAl Viro 	unlock_mount_hash();
22420fb54e50SAl Viro 	cleanup_group_ids(source_mnt, NULL);
2243719f5d7fSMiklos Szeredi  out:
2244d2921684SEric W. Biederman 	ns->pending_mounts = 0;
22451064f874SEric W. Biederman 
22461064f874SEric W. Biederman 	read_seqlock_excl(&mount_lock);
22471064f874SEric W. Biederman 	put_mountpoint(smp);
22481064f874SEric W. Biederman 	read_sequnlock_excl(&mount_lock);
22491064f874SEric W. Biederman 
2250719f5d7fSMiklos Szeredi 	return err;
2251b90fa9aeSRam Pai }
2252b90fa9aeSRam Pai 
225384d17192SAl Viro static struct mountpoint *lock_mount(struct path *path)
2254b12cea91SAl Viro {
2255b12cea91SAl Viro 	struct vfsmount *mnt;
225684d17192SAl Viro 	struct dentry *dentry = path->dentry;
2257b12cea91SAl Viro retry:
22585955102cSAl Viro 	inode_lock(dentry->d_inode);
225984d17192SAl Viro 	if (unlikely(cant_mount(dentry))) {
22605955102cSAl Viro 		inode_unlock(dentry->d_inode);
226184d17192SAl Viro 		return ERR_PTR(-ENOENT);
2262b12cea91SAl Viro 	}
226397216be0SAl Viro 	namespace_lock();
2264b12cea91SAl Viro 	mnt = lookup_mnt(path);
226584d17192SAl Viro 	if (likely(!mnt)) {
22663895dbf8SEric W. Biederman 		struct mountpoint *mp = get_mountpoint(dentry);
226784d17192SAl Viro 		if (IS_ERR(mp)) {
226897216be0SAl Viro 			namespace_unlock();
22695955102cSAl Viro 			inode_unlock(dentry->d_inode);
227084d17192SAl Viro 			return mp;
227184d17192SAl Viro 		}
227284d17192SAl Viro 		return mp;
227384d17192SAl Viro 	}
227497216be0SAl Viro 	namespace_unlock();
22755955102cSAl Viro 	inode_unlock(path->dentry->d_inode);
2276b12cea91SAl Viro 	path_put(path);
2277b12cea91SAl Viro 	path->mnt = mnt;
227884d17192SAl Viro 	dentry = path->dentry = dget(mnt->mnt_root);
2279b12cea91SAl Viro 	goto retry;
2280b12cea91SAl Viro }
2281b12cea91SAl Viro 
228284d17192SAl Viro static void unlock_mount(struct mountpoint *where)
2283b12cea91SAl Viro {
228484d17192SAl Viro 	struct dentry *dentry = where->m_dentry;
22853895dbf8SEric W. Biederman 
22863895dbf8SEric W. Biederman 	read_seqlock_excl(&mount_lock);
228784d17192SAl Viro 	put_mountpoint(where);
22883895dbf8SEric W. Biederman 	read_sequnlock_excl(&mount_lock);
22893895dbf8SEric W. Biederman 
2290328e6d90SAl Viro 	namespace_unlock();
22915955102cSAl Viro 	inode_unlock(dentry->d_inode);
2292b12cea91SAl Viro }
2293b12cea91SAl Viro 
229484d17192SAl Viro static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
22951da177e4SLinus Torvalds {
2296e462ec50SDavid Howells 	if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
22971da177e4SLinus Torvalds 		return -EINVAL;
22981da177e4SLinus Torvalds 
2299e36cb0b8SDavid Howells 	if (d_is_dir(mp->m_dentry) !=
2300e36cb0b8SDavid Howells 	      d_is_dir(mnt->mnt.mnt_root))
23011da177e4SLinus Torvalds 		return -ENOTDIR;
23021da177e4SLinus Torvalds 
23032763d119SAl Viro 	return attach_recursive_mnt(mnt, p, mp, false);
23041da177e4SLinus Torvalds }
23051da177e4SLinus Torvalds 
23061da177e4SLinus Torvalds /*
23077a2e8a8fSValerie Aurora  * Sanity check the flags to change_mnt_propagation.
23087a2e8a8fSValerie Aurora  */
23097a2e8a8fSValerie Aurora 
2310e462ec50SDavid Howells static int flags_to_propagation_type(int ms_flags)
23117a2e8a8fSValerie Aurora {
2312e462ec50SDavid Howells 	int type = ms_flags & ~(MS_REC | MS_SILENT);
23137a2e8a8fSValerie Aurora 
23147a2e8a8fSValerie Aurora 	/* Fail if any non-propagation flags are set */
23157a2e8a8fSValerie Aurora 	if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
23167a2e8a8fSValerie Aurora 		return 0;
23177a2e8a8fSValerie Aurora 	/* Only one propagation flag should be set */
23187a2e8a8fSValerie Aurora 	if (!is_power_of_2(type))
23197a2e8a8fSValerie Aurora 		return 0;
23207a2e8a8fSValerie Aurora 	return type;
23217a2e8a8fSValerie Aurora }
23227a2e8a8fSValerie Aurora 
23237a2e8a8fSValerie Aurora /*
232407b20889SRam Pai  * recursively change the type of the mountpoint.
232507b20889SRam Pai  */
2326e462ec50SDavid Howells static int do_change_type(struct path *path, int ms_flags)
232707b20889SRam Pai {
2328315fc83eSAl Viro 	struct mount *m;
23294b8b21f4SAl Viro 	struct mount *mnt = real_mount(path->mnt);
2330e462ec50SDavid Howells 	int recurse = ms_flags & MS_REC;
23317a2e8a8fSValerie Aurora 	int type;
2332719f5d7fSMiklos Szeredi 	int err = 0;
233307b20889SRam Pai 
23342d92ab3cSAl Viro 	if (path->dentry != path->mnt->mnt_root)
233507b20889SRam Pai 		return -EINVAL;
233607b20889SRam Pai 
2337e462ec50SDavid Howells 	type = flags_to_propagation_type(ms_flags);
23387a2e8a8fSValerie Aurora 	if (!type)
23397a2e8a8fSValerie Aurora 		return -EINVAL;
23407a2e8a8fSValerie Aurora 
234197216be0SAl Viro 	namespace_lock();
2342719f5d7fSMiklos Szeredi 	if (type == MS_SHARED) {
2343719f5d7fSMiklos Szeredi 		err = invent_group_ids(mnt, recurse);
2344719f5d7fSMiklos Szeredi 		if (err)
2345719f5d7fSMiklos Szeredi 			goto out_unlock;
2346719f5d7fSMiklos Szeredi 	}
2347719f5d7fSMiklos Szeredi 
2348719ea2fbSAl Viro 	lock_mount_hash();
2349909b0a88SAl Viro 	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
23500f0afb1dSAl Viro 		change_mnt_propagation(m, type);
2351719ea2fbSAl Viro 	unlock_mount_hash();
2352719f5d7fSMiklos Szeredi 
2353719f5d7fSMiklos Szeredi  out_unlock:
235497216be0SAl Viro 	namespace_unlock();
2355719f5d7fSMiklos Szeredi 	return err;
235607b20889SRam Pai }
235707b20889SRam Pai 
2358a07b2000SAl Viro static struct mount *__do_loopback(struct path *old_path, int recurse)
2359a07b2000SAl Viro {
2360a07b2000SAl Viro 	struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2361a07b2000SAl Viro 
2362a07b2000SAl Viro 	if (IS_MNT_UNBINDABLE(old))
2363a07b2000SAl Viro 		return mnt;
2364a07b2000SAl Viro 
2365a07b2000SAl Viro 	if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2366a07b2000SAl Viro 		return mnt;
2367a07b2000SAl Viro 
2368a07b2000SAl Viro 	if (!recurse && has_locked_children(old, old_path->dentry))
2369a07b2000SAl Viro 		return mnt;
2370a07b2000SAl Viro 
2371a07b2000SAl Viro 	if (recurse)
2372a07b2000SAl Viro 		mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2373a07b2000SAl Viro 	else
2374a07b2000SAl Viro 		mnt = clone_mnt(old, old_path->dentry, 0);
2375a07b2000SAl Viro 
2376a07b2000SAl Viro 	if (!IS_ERR(mnt))
2377a07b2000SAl Viro 		mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2378a07b2000SAl Viro 
2379a07b2000SAl Viro 	return mnt;
2380a07b2000SAl Viro }
2381a07b2000SAl Viro 
238207b20889SRam Pai /*
23831da177e4SLinus Torvalds  * do loopback mount.
23841da177e4SLinus Torvalds  */
2385808d4e3cSAl Viro static int do_loopback(struct path *path, const char *old_name,
23862dafe1c4SEric Sandeen 				int recurse)
23871da177e4SLinus Torvalds {
23882d92ab3cSAl Viro 	struct path old_path;
2389a07b2000SAl Viro 	struct mount *mnt = NULL, *parent;
239084d17192SAl Viro 	struct mountpoint *mp;
239157eccb83SAl Viro 	int err;
23921da177e4SLinus Torvalds 	if (!old_name || !*old_name)
23931da177e4SLinus Torvalds 		return -EINVAL;
2394815d405cSTrond Myklebust 	err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
23951da177e4SLinus Torvalds 	if (err)
23961da177e4SLinus Torvalds 		return err;
23971da177e4SLinus Torvalds 
23988823c079SEric W. Biederman 	err = -EINVAL;
23994ce5d2b1SEric W. Biederman 	if (mnt_ns_loop(old_path.dentry))
24008823c079SEric W. Biederman 		goto out;
24018823c079SEric W. Biederman 
240284d17192SAl Viro 	mp = lock_mount(path);
2403a07b2000SAl Viro 	if (IS_ERR(mp)) {
240484d17192SAl Viro 		err = PTR_ERR(mp);
24059676f0c6SRam Pai 		goto out;
2406a07b2000SAl Viro 	}
24079676f0c6SRam Pai 
240884d17192SAl Viro 	parent = real_mount(path->mnt);
2409e149ed2bSAl Viro 	if (!check_mnt(parent))
2410e149ed2bSAl Viro 		goto out2;
2411e149ed2bSAl Viro 
2412a07b2000SAl Viro 	mnt = __do_loopback(&old_path, recurse);
2413be34d1a3SDavid Howells 	if (IS_ERR(mnt)) {
2414be34d1a3SDavid Howells 		err = PTR_ERR(mnt);
2415e9c5d8a5SAndrey Vagin 		goto out2;
2416be34d1a3SDavid Howells 	}
2417ccd48bc7SAl Viro 
241884d17192SAl Viro 	err = graft_tree(mnt, parent, mp);
24191da177e4SLinus Torvalds 	if (err) {
2420719ea2fbSAl Viro 		lock_mount_hash();
2421e819f152SEric W. Biederman 		umount_tree(mnt, UMOUNT_SYNC);
2422719ea2fbSAl Viro 		unlock_mount_hash();
24235b83d2c5SRam Pai 	}
2424b12cea91SAl Viro out2:
242584d17192SAl Viro 	unlock_mount(mp);
2426ccd48bc7SAl Viro out:
24272d92ab3cSAl Viro 	path_put(&old_path);
24281da177e4SLinus Torvalds 	return err;
24291da177e4SLinus Torvalds }
24301da177e4SLinus Torvalds 
2431a07b2000SAl Viro static struct file *open_detached_copy(struct path *path, bool recursive)
2432a07b2000SAl Viro {
2433a07b2000SAl Viro 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2434a07b2000SAl Viro 	struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2435a07b2000SAl Viro 	struct mount *mnt, *p;
2436a07b2000SAl Viro 	struct file *file;
2437a07b2000SAl Viro 
2438a07b2000SAl Viro 	if (IS_ERR(ns))
2439a07b2000SAl Viro 		return ERR_CAST(ns);
2440a07b2000SAl Viro 
2441a07b2000SAl Viro 	namespace_lock();
2442a07b2000SAl Viro 	mnt = __do_loopback(path, recursive);
2443a07b2000SAl Viro 	if (IS_ERR(mnt)) {
2444a07b2000SAl Viro 		namespace_unlock();
2445a07b2000SAl Viro 		free_mnt_ns(ns);
2446a07b2000SAl Viro 		return ERR_CAST(mnt);
2447a07b2000SAl Viro 	}
2448a07b2000SAl Viro 
2449a07b2000SAl Viro 	lock_mount_hash();
2450a07b2000SAl Viro 	for (p = mnt; p; p = next_mnt(p, mnt)) {
2451a07b2000SAl Viro 		p->mnt_ns = ns;
2452a07b2000SAl Viro 		ns->mounts++;
2453a07b2000SAl Viro 	}
2454a07b2000SAl Viro 	ns->root = mnt;
2455a07b2000SAl Viro 	list_add_tail(&ns->list, &mnt->mnt_list);
2456a07b2000SAl Viro 	mntget(&mnt->mnt);
2457a07b2000SAl Viro 	unlock_mount_hash();
2458a07b2000SAl Viro 	namespace_unlock();
2459a07b2000SAl Viro 
2460a07b2000SAl Viro 	mntput(path->mnt);
2461a07b2000SAl Viro 	path->mnt = &mnt->mnt;
2462a07b2000SAl Viro 	file = dentry_open(path, O_PATH, current_cred());
2463a07b2000SAl Viro 	if (IS_ERR(file))
2464a07b2000SAl Viro 		dissolve_on_fput(path->mnt);
2465a07b2000SAl Viro 	else
2466a07b2000SAl Viro 		file->f_mode |= FMODE_NEED_UNMOUNT;
2467a07b2000SAl Viro 	return file;
2468a07b2000SAl Viro }
2469a07b2000SAl Viro 
24702658ce09SBen Dooks SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2471a07b2000SAl Viro {
2472a07b2000SAl Viro 	struct file *file;
2473a07b2000SAl Viro 	struct path path;
2474a07b2000SAl Viro 	int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2475a07b2000SAl Viro 	bool detached = flags & OPEN_TREE_CLONE;
2476a07b2000SAl Viro 	int error;
2477a07b2000SAl Viro 	int fd;
2478a07b2000SAl Viro 
2479a07b2000SAl Viro 	BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2480a07b2000SAl Viro 
2481a07b2000SAl Viro 	if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2482a07b2000SAl Viro 		      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2483a07b2000SAl Viro 		      OPEN_TREE_CLOEXEC))
2484a07b2000SAl Viro 		return -EINVAL;
2485a07b2000SAl Viro 
2486a07b2000SAl Viro 	if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2487a07b2000SAl Viro 		return -EINVAL;
2488a07b2000SAl Viro 
2489a07b2000SAl Viro 	if (flags & AT_NO_AUTOMOUNT)
2490a07b2000SAl Viro 		lookup_flags &= ~LOOKUP_AUTOMOUNT;
2491a07b2000SAl Viro 	if (flags & AT_SYMLINK_NOFOLLOW)
2492a07b2000SAl Viro 		lookup_flags &= ~LOOKUP_FOLLOW;
2493a07b2000SAl Viro 	if (flags & AT_EMPTY_PATH)
2494a07b2000SAl Viro 		lookup_flags |= LOOKUP_EMPTY;
2495a07b2000SAl Viro 
2496a07b2000SAl Viro 	if (detached && !may_mount())
2497a07b2000SAl Viro 		return -EPERM;
2498a07b2000SAl Viro 
2499a07b2000SAl Viro 	fd = get_unused_fd_flags(flags & O_CLOEXEC);
2500a07b2000SAl Viro 	if (fd < 0)
2501a07b2000SAl Viro 		return fd;
2502a07b2000SAl Viro 
2503a07b2000SAl Viro 	error = user_path_at(dfd, filename, lookup_flags, &path);
2504a07b2000SAl Viro 	if (unlikely(error)) {
2505a07b2000SAl Viro 		file = ERR_PTR(error);
2506a07b2000SAl Viro 	} else {
2507a07b2000SAl Viro 		if (detached)
2508a07b2000SAl Viro 			file = open_detached_copy(&path, flags & AT_RECURSIVE);
2509a07b2000SAl Viro 		else
2510a07b2000SAl Viro 			file = dentry_open(&path, O_PATH, current_cred());
2511a07b2000SAl Viro 		path_put(&path);
2512a07b2000SAl Viro 	}
2513a07b2000SAl Viro 	if (IS_ERR(file)) {
2514a07b2000SAl Viro 		put_unused_fd(fd);
2515a07b2000SAl Viro 		return PTR_ERR(file);
2516a07b2000SAl Viro 	}
2517a07b2000SAl Viro 	fd_install(fd, file);
2518a07b2000SAl Viro 	return fd;
2519a07b2000SAl Viro }
2520a07b2000SAl Viro 
252143f5e655SDavid Howells /*
252243f5e655SDavid Howells  * Don't allow locked mount flags to be cleared.
252343f5e655SDavid Howells  *
252443f5e655SDavid Howells  * No locks need to be held here while testing the various MNT_LOCK
252543f5e655SDavid Howells  * flags because those flags can never be cleared once they are set.
252643f5e655SDavid Howells  */
252743f5e655SDavid Howells static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
25282e4b7fcdSDave Hansen {
252943f5e655SDavid Howells 	unsigned int fl = mnt->mnt.mnt_flags;
25302e4b7fcdSDave Hansen 
253143f5e655SDavid Howells 	if ((fl & MNT_LOCK_READONLY) &&
253243f5e655SDavid Howells 	    !(mnt_flags & MNT_READONLY))
253343f5e655SDavid Howells 		return false;
253443f5e655SDavid Howells 
253543f5e655SDavid Howells 	if ((fl & MNT_LOCK_NODEV) &&
253643f5e655SDavid Howells 	    !(mnt_flags & MNT_NODEV))
253743f5e655SDavid Howells 		return false;
253843f5e655SDavid Howells 
253943f5e655SDavid Howells 	if ((fl & MNT_LOCK_NOSUID) &&
254043f5e655SDavid Howells 	    !(mnt_flags & MNT_NOSUID))
254143f5e655SDavid Howells 		return false;
254243f5e655SDavid Howells 
254343f5e655SDavid Howells 	if ((fl & MNT_LOCK_NOEXEC) &&
254443f5e655SDavid Howells 	    !(mnt_flags & MNT_NOEXEC))
254543f5e655SDavid Howells 		return false;
254643f5e655SDavid Howells 
254743f5e655SDavid Howells 	if ((fl & MNT_LOCK_ATIME) &&
254843f5e655SDavid Howells 	    ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
254943f5e655SDavid Howells 		return false;
255043f5e655SDavid Howells 
255143f5e655SDavid Howells 	return true;
255243f5e655SDavid Howells }
255343f5e655SDavid Howells 
255443f5e655SDavid Howells static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
255543f5e655SDavid Howells {
255643f5e655SDavid Howells 	bool readonly_request = (mnt_flags & MNT_READONLY);
255743f5e655SDavid Howells 
255843f5e655SDavid Howells 	if (readonly_request == __mnt_is_readonly(&mnt->mnt))
25592e4b7fcdSDave Hansen 		return 0;
25602e4b7fcdSDave Hansen 
25612e4b7fcdSDave Hansen 	if (readonly_request)
256243f5e655SDavid Howells 		return mnt_make_readonly(mnt);
256343f5e655SDavid Howells 
256468847c94SChristian Brauner 	mnt->mnt.mnt_flags &= ~MNT_READONLY;
256568847c94SChristian Brauner 	return 0;
256643f5e655SDavid Howells }
256743f5e655SDavid Howells 
256843f5e655SDavid Howells static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
256943f5e655SDavid Howells {
257043f5e655SDavid Howells 	mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
257143f5e655SDavid Howells 	mnt->mnt.mnt_flags = mnt_flags;
257243f5e655SDavid Howells 	touch_mnt_namespace(mnt->mnt_ns);
257343f5e655SDavid Howells }
257443f5e655SDavid Howells 
2575f8b92ba6SDeepa Dinamani static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2576f8b92ba6SDeepa Dinamani {
2577f8b92ba6SDeepa Dinamani 	struct super_block *sb = mnt->mnt_sb;
2578f8b92ba6SDeepa Dinamani 
2579f8b92ba6SDeepa Dinamani 	if (!__mnt_is_readonly(mnt) &&
2580f8b92ba6SDeepa Dinamani 	   (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2581f8b92ba6SDeepa Dinamani 		char *buf = (char *)__get_free_page(GFP_KERNEL);
2582f8b92ba6SDeepa Dinamani 		char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2583f8b92ba6SDeepa Dinamani 		struct tm tm;
2584f8b92ba6SDeepa Dinamani 
2585f8b92ba6SDeepa Dinamani 		time64_to_tm(sb->s_time_max, 0, &tm);
2586f8b92ba6SDeepa Dinamani 
25870ecee669SEric Biggers 		pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
25880ecee669SEric Biggers 			sb->s_type->name,
25890ecee669SEric Biggers 			is_mounted(mnt) ? "remounted" : "mounted",
25900ecee669SEric Biggers 			mntpath,
2591f8b92ba6SDeepa Dinamani 			tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2592f8b92ba6SDeepa Dinamani 
2593f8b92ba6SDeepa Dinamani 		free_page((unsigned long)buf);
2594f8b92ba6SDeepa Dinamani 	}
2595f8b92ba6SDeepa Dinamani }
2596f8b92ba6SDeepa Dinamani 
259743f5e655SDavid Howells /*
259843f5e655SDavid Howells  * Handle reconfiguration of the mountpoint only without alteration of the
259943f5e655SDavid Howells  * superblock it refers to.  This is triggered by specifying MS_REMOUNT|MS_BIND
260043f5e655SDavid Howells  * to mount(2).
260143f5e655SDavid Howells  */
260243f5e655SDavid Howells static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
260343f5e655SDavid Howells {
260443f5e655SDavid Howells 	struct super_block *sb = path->mnt->mnt_sb;
260543f5e655SDavid Howells 	struct mount *mnt = real_mount(path->mnt);
260643f5e655SDavid Howells 	int ret;
260743f5e655SDavid Howells 
260843f5e655SDavid Howells 	if (!check_mnt(mnt))
260943f5e655SDavid Howells 		return -EINVAL;
261043f5e655SDavid Howells 
261143f5e655SDavid Howells 	if (path->dentry != mnt->mnt.mnt_root)
261243f5e655SDavid Howells 		return -EINVAL;
261343f5e655SDavid Howells 
261443f5e655SDavid Howells 	if (!can_change_locked_flags(mnt, mnt_flags))
261543f5e655SDavid Howells 		return -EPERM;
261643f5e655SDavid Howells 
2617e58ace1aSChristian Brauner 	/*
2618e58ace1aSChristian Brauner 	 * We're only checking whether the superblock is read-only not
2619e58ace1aSChristian Brauner 	 * changing it, so only take down_read(&sb->s_umount).
2620e58ace1aSChristian Brauner 	 */
2621e58ace1aSChristian Brauner 	down_read(&sb->s_umount);
262268847c94SChristian Brauner 	lock_mount_hash();
262343f5e655SDavid Howells 	ret = change_mount_ro_state(mnt, mnt_flags);
262443f5e655SDavid Howells 	if (ret == 0)
262543f5e655SDavid Howells 		set_mount_attributes(mnt, mnt_flags);
262668847c94SChristian Brauner 	unlock_mount_hash();
2627e58ace1aSChristian Brauner 	up_read(&sb->s_umount);
2628f8b92ba6SDeepa Dinamani 
2629f8b92ba6SDeepa Dinamani 	mnt_warn_timestamp_expiry(path, &mnt->mnt);
2630f8b92ba6SDeepa Dinamani 
263143f5e655SDavid Howells 	return ret;
26322e4b7fcdSDave Hansen }
26332e4b7fcdSDave Hansen 
26341da177e4SLinus Torvalds /*
26351da177e4SLinus Torvalds  * change filesystem flags. dir should be a physical root of filesystem.
26361da177e4SLinus Torvalds  * If you've mounted a non-root directory somewhere and want to do remount
26371da177e4SLinus Torvalds  * on it - tough luck.
26381da177e4SLinus Torvalds  */
2639e462ec50SDavid Howells static int do_remount(struct path *path, int ms_flags, int sb_flags,
2640e462ec50SDavid Howells 		      int mnt_flags, void *data)
26411da177e4SLinus Torvalds {
26421da177e4SLinus Torvalds 	int err;
26432d92ab3cSAl Viro 	struct super_block *sb = path->mnt->mnt_sb;
2644143c8c91SAl Viro 	struct mount *mnt = real_mount(path->mnt);
26458d0347f6SDavid Howells 	struct fs_context *fc;
26461da177e4SLinus Torvalds 
2647143c8c91SAl Viro 	if (!check_mnt(mnt))
26481da177e4SLinus Torvalds 		return -EINVAL;
26491da177e4SLinus Torvalds 
26502d92ab3cSAl Viro 	if (path->dentry != path->mnt->mnt_root)
26511da177e4SLinus Torvalds 		return -EINVAL;
26521da177e4SLinus Torvalds 
265343f5e655SDavid Howells 	if (!can_change_locked_flags(mnt, mnt_flags))
265407b64558SEric W. Biederman 		return -EPERM;
26559566d674SEric W. Biederman 
26568d0347f6SDavid Howells 	fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
26578d0347f6SDavid Howells 	if (IS_ERR(fc))
26588d0347f6SDavid Howells 		return PTR_ERR(fc);
2659ff36fe2cSEric Paris 
2660b330966fSMiklos Szeredi 	fc->oldapi = true;
26618d0347f6SDavid Howells 	err = parse_monolithic_mount_data(fc, data);
26628d0347f6SDavid Howells 	if (!err) {
26631da177e4SLinus Torvalds 		down_write(&sb->s_umount);
266457eccb83SAl Viro 		err = -EPERM;
266543f5e655SDavid Howells 		if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
26668d0347f6SDavid Howells 			err = reconfigure_super(fc);
266768847c94SChristian Brauner 			if (!err) {
266868847c94SChristian Brauner 				lock_mount_hash();
266943f5e655SDavid Howells 				set_mount_attributes(mnt, mnt_flags);
267068847c94SChristian Brauner 				unlock_mount_hash();
267168847c94SChristian Brauner 			}
26720e55a7ccSDan Williams 		}
26736339dab8SAl Viro 		up_write(&sb->s_umount);
26748d0347f6SDavid Howells 	}
2675f8b92ba6SDeepa Dinamani 
2676f8b92ba6SDeepa Dinamani 	mnt_warn_timestamp_expiry(path, &mnt->mnt);
2677f8b92ba6SDeepa Dinamani 
26788d0347f6SDavid Howells 	put_fs_context(fc);
26791da177e4SLinus Torvalds 	return err;
26801da177e4SLinus Torvalds }
26811da177e4SLinus Torvalds 
2682cbbe362cSAl Viro static inline int tree_contains_unbindable(struct mount *mnt)
26839676f0c6SRam Pai {
2684315fc83eSAl Viro 	struct mount *p;
2685909b0a88SAl Viro 	for (p = mnt; p; p = next_mnt(p, mnt)) {
2686fc7be130SAl Viro 		if (IS_MNT_UNBINDABLE(p))
26879676f0c6SRam Pai 			return 1;
26889676f0c6SRam Pai 	}
26899676f0c6SRam Pai 	return 0;
26909676f0c6SRam Pai }
26919676f0c6SRam Pai 
269244dfd84aSDavid Howells /*
269344dfd84aSDavid Howells  * Check that there aren't references to earlier/same mount namespaces in the
269444dfd84aSDavid Howells  * specified subtree.  Such references can act as pins for mount namespaces
269544dfd84aSDavid Howells  * that aren't checked by the mount-cycle checking code, thereby allowing
269644dfd84aSDavid Howells  * cycles to be made.
269744dfd84aSDavid Howells  */
269844dfd84aSDavid Howells static bool check_for_nsfs_mounts(struct mount *subtree)
269944dfd84aSDavid Howells {
270044dfd84aSDavid Howells 	struct mount *p;
270144dfd84aSDavid Howells 	bool ret = false;
270244dfd84aSDavid Howells 
270344dfd84aSDavid Howells 	lock_mount_hash();
270444dfd84aSDavid Howells 	for (p = subtree; p; p = next_mnt(p, subtree))
270544dfd84aSDavid Howells 		if (mnt_ns_loop(p->mnt.mnt_root))
270644dfd84aSDavid Howells 			goto out;
270744dfd84aSDavid Howells 
270844dfd84aSDavid Howells 	ret = true;
270944dfd84aSDavid Howells out:
271044dfd84aSDavid Howells 	unlock_mount_hash();
271144dfd84aSDavid Howells 	return ret;
271244dfd84aSDavid Howells }
271344dfd84aSDavid Howells 
27149ffb14efSPavel Tikhomirov static int do_set_group(struct path *from_path, struct path *to_path)
27159ffb14efSPavel Tikhomirov {
27169ffb14efSPavel Tikhomirov 	struct mount *from, *to;
27179ffb14efSPavel Tikhomirov 	int err;
27189ffb14efSPavel Tikhomirov 
27199ffb14efSPavel Tikhomirov 	from = real_mount(from_path->mnt);
27209ffb14efSPavel Tikhomirov 	to = real_mount(to_path->mnt);
27219ffb14efSPavel Tikhomirov 
27229ffb14efSPavel Tikhomirov 	namespace_lock();
27239ffb14efSPavel Tikhomirov 
27249ffb14efSPavel Tikhomirov 	err = -EINVAL;
27259ffb14efSPavel Tikhomirov 	/* To and From must be mounted */
27269ffb14efSPavel Tikhomirov 	if (!is_mounted(&from->mnt))
27279ffb14efSPavel Tikhomirov 		goto out;
27289ffb14efSPavel Tikhomirov 	if (!is_mounted(&to->mnt))
27299ffb14efSPavel Tikhomirov 		goto out;
27309ffb14efSPavel Tikhomirov 
27319ffb14efSPavel Tikhomirov 	err = -EPERM;
27329ffb14efSPavel Tikhomirov 	/* We should be allowed to modify mount namespaces of both mounts */
27339ffb14efSPavel Tikhomirov 	if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
27349ffb14efSPavel Tikhomirov 		goto out;
27359ffb14efSPavel Tikhomirov 	if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
27369ffb14efSPavel Tikhomirov 		goto out;
27379ffb14efSPavel Tikhomirov 
27389ffb14efSPavel Tikhomirov 	err = -EINVAL;
27399ffb14efSPavel Tikhomirov 	/* To and From paths should be mount roots */
27409ffb14efSPavel Tikhomirov 	if (from_path->dentry != from_path->mnt->mnt_root)
27419ffb14efSPavel Tikhomirov 		goto out;
27429ffb14efSPavel Tikhomirov 	if (to_path->dentry != to_path->mnt->mnt_root)
27439ffb14efSPavel Tikhomirov 		goto out;
27449ffb14efSPavel Tikhomirov 
27459ffb14efSPavel Tikhomirov 	/* Setting sharing groups is only allowed across same superblock */
27469ffb14efSPavel Tikhomirov 	if (from->mnt.mnt_sb != to->mnt.mnt_sb)
27479ffb14efSPavel Tikhomirov 		goto out;
27489ffb14efSPavel Tikhomirov 
27499ffb14efSPavel Tikhomirov 	/* From mount root should be wider than To mount root */
27509ffb14efSPavel Tikhomirov 	if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
27519ffb14efSPavel Tikhomirov 		goto out;
27529ffb14efSPavel Tikhomirov 
27539ffb14efSPavel Tikhomirov 	/* From mount should not have locked children in place of To's root */
27549ffb14efSPavel Tikhomirov 	if (has_locked_children(from, to->mnt.mnt_root))
27559ffb14efSPavel Tikhomirov 		goto out;
27569ffb14efSPavel Tikhomirov 
27579ffb14efSPavel Tikhomirov 	/* Setting sharing groups is only allowed on private mounts */
27589ffb14efSPavel Tikhomirov 	if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
27599ffb14efSPavel Tikhomirov 		goto out;
27609ffb14efSPavel Tikhomirov 
27619ffb14efSPavel Tikhomirov 	/* From should not be private */
27629ffb14efSPavel Tikhomirov 	if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
27639ffb14efSPavel Tikhomirov 		goto out;
27649ffb14efSPavel Tikhomirov 
27659ffb14efSPavel Tikhomirov 	if (IS_MNT_SLAVE(from)) {
27669ffb14efSPavel Tikhomirov 		struct mount *m = from->mnt_master;
27679ffb14efSPavel Tikhomirov 
27689ffb14efSPavel Tikhomirov 		list_add(&to->mnt_slave, &m->mnt_slave_list);
27699ffb14efSPavel Tikhomirov 		to->mnt_master = m;
27709ffb14efSPavel Tikhomirov 	}
27719ffb14efSPavel Tikhomirov 
27729ffb14efSPavel Tikhomirov 	if (IS_MNT_SHARED(from)) {
27739ffb14efSPavel Tikhomirov 		to->mnt_group_id = from->mnt_group_id;
27749ffb14efSPavel Tikhomirov 		list_add(&to->mnt_share, &from->mnt_share);
27759ffb14efSPavel Tikhomirov 		lock_mount_hash();
27769ffb14efSPavel Tikhomirov 		set_mnt_shared(to);
27779ffb14efSPavel Tikhomirov 		unlock_mount_hash();
27789ffb14efSPavel Tikhomirov 	}
27799ffb14efSPavel Tikhomirov 
27809ffb14efSPavel Tikhomirov 	err = 0;
27819ffb14efSPavel Tikhomirov out:
27829ffb14efSPavel Tikhomirov 	namespace_unlock();
27839ffb14efSPavel Tikhomirov 	return err;
27849ffb14efSPavel Tikhomirov }
27859ffb14efSPavel Tikhomirov 
27862db154b3SDavid Howells static int do_move_mount(struct path *old_path, struct path *new_path)
27871da177e4SLinus Torvalds {
278844dfd84aSDavid Howells 	struct mnt_namespace *ns;
2789676da58dSAl Viro 	struct mount *p;
27900fb54e50SAl Viro 	struct mount *old;
27912763d119SAl Viro 	struct mount *parent;
27922763d119SAl Viro 	struct mountpoint *mp, *old_mp;
279357eccb83SAl Viro 	int err;
279444dfd84aSDavid Howells 	bool attached;
27951da177e4SLinus Torvalds 
27962db154b3SDavid Howells 	mp = lock_mount(new_path);
279784d17192SAl Viro 	if (IS_ERR(mp))
27982db154b3SDavid Howells 		return PTR_ERR(mp);
2799cc53ce53SDavid Howells 
28002db154b3SDavid Howells 	old = real_mount(old_path->mnt);
28012db154b3SDavid Howells 	p = real_mount(new_path->mnt);
28022763d119SAl Viro 	parent = old->mnt_parent;
280344dfd84aSDavid Howells 	attached = mnt_has_parent(old);
28042763d119SAl Viro 	old_mp = old->mnt_mp;
280544dfd84aSDavid Howells 	ns = old->mnt_ns;
2806143c8c91SAl Viro 
28071da177e4SLinus Torvalds 	err = -EINVAL;
280844dfd84aSDavid Howells 	/* The mountpoint must be in our namespace. */
280944dfd84aSDavid Howells 	if (!check_mnt(p))
28102db154b3SDavid Howells 		goto out;
28111da177e4SLinus Torvalds 
2812570d7a98SEric Biggers 	/* The thing moved must be mounted... */
2813570d7a98SEric Biggers 	if (!is_mounted(&old->mnt))
281444dfd84aSDavid Howells 		goto out;
281544dfd84aSDavid Howells 
2816570d7a98SEric Biggers 	/* ... and either ours or the root of anon namespace */
2817570d7a98SEric Biggers 	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
28182db154b3SDavid Howells 		goto out;
28191da177e4SLinus Torvalds 
28202db154b3SDavid Howells 	if (old->mnt.mnt_flags & MNT_LOCKED)
28212db154b3SDavid Howells 		goto out;
28222db154b3SDavid Howells 
28232db154b3SDavid Howells 	if (old_path->dentry != old_path->mnt->mnt_root)
28242db154b3SDavid Howells 		goto out;
28252db154b3SDavid Howells 
28262db154b3SDavid Howells 	if (d_is_dir(new_path->dentry) !=
28272db154b3SDavid Howells 	    d_is_dir(old_path->dentry))
28282db154b3SDavid Howells 		goto out;
282921444403SRam Pai 	/*
283021444403SRam Pai 	 * Don't move a mount residing in a shared parent.
283121444403SRam Pai 	 */
28322763d119SAl Viro 	if (attached && IS_MNT_SHARED(parent))
28332db154b3SDavid Howells 		goto out;
28349676f0c6SRam Pai 	/*
28359676f0c6SRam Pai 	 * Don't move a mount tree containing unbindable mounts to a destination
28369676f0c6SRam Pai 	 * mount which is shared.
28379676f0c6SRam Pai 	 */
2838fc7be130SAl Viro 	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
28392db154b3SDavid Howells 		goto out;
28401da177e4SLinus Torvalds 	err = -ELOOP;
284144dfd84aSDavid Howells 	if (!check_for_nsfs_mounts(old))
284244dfd84aSDavid Howells 		goto out;
2843fc7be130SAl Viro 	for (; mnt_has_parent(p); p = p->mnt_parent)
2844676da58dSAl Viro 		if (p == old)
28452db154b3SDavid Howells 			goto out;
28461da177e4SLinus Torvalds 
28472db154b3SDavid Howells 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
28482763d119SAl Viro 				   attached);
28494ac91378SJan Blunck 	if (err)
28502db154b3SDavid Howells 		goto out;
28511da177e4SLinus Torvalds 
28521da177e4SLinus Torvalds 	/* if the mount is moved, it should no longer be expire
28531da177e4SLinus Torvalds 	 * automatically */
28546776db3dSAl Viro 	list_del_init(&old->mnt_expire);
28552763d119SAl Viro 	if (attached)
28562763d119SAl Viro 		put_mountpoint(old_mp);
28571da177e4SLinus Torvalds out:
28582db154b3SDavid Howells 	unlock_mount(mp);
285944dfd84aSDavid Howells 	if (!err) {
28602763d119SAl Viro 		if (attached)
28612763d119SAl Viro 			mntput_no_expire(parent);
28622763d119SAl Viro 		else
286344dfd84aSDavid Howells 			free_mnt_ns(ns);
286444dfd84aSDavid Howells 	}
28652db154b3SDavid Howells 	return err;
28662db154b3SDavid Howells }
28672db154b3SDavid Howells 
28682db154b3SDavid Howells static int do_move_mount_old(struct path *path, const char *old_name)
28692db154b3SDavid Howells {
28702db154b3SDavid Howells 	struct path old_path;
28712db154b3SDavid Howells 	int err;
28722db154b3SDavid Howells 
28732db154b3SDavid Howells 	if (!old_name || !*old_name)
28742db154b3SDavid Howells 		return -EINVAL;
28752db154b3SDavid Howells 
28762db154b3SDavid Howells 	err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
28772db154b3SDavid Howells 	if (err)
28782db154b3SDavid Howells 		return err;
28792db154b3SDavid Howells 
28802db154b3SDavid Howells 	err = do_move_mount(&old_path, path);
28812d92ab3cSAl Viro 	path_put(&old_path);
28821da177e4SLinus Torvalds 	return err;
28831da177e4SLinus Torvalds }
28841da177e4SLinus Torvalds 
28859d412a43SAl Viro /*
28869d412a43SAl Viro  * add a mount into a namespace's mount tree
28879d412a43SAl Viro  */
28888f11538eSAl Viro static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
28898f11538eSAl Viro 			struct path *path, int mnt_flags)
28909d412a43SAl Viro {
28918f11538eSAl Viro 	struct mount *parent = real_mount(path->mnt);
28929d412a43SAl Viro 
2893f2ebb3a9SAl Viro 	mnt_flags &= ~MNT_INTERNAL_FLAGS;
28949d412a43SAl Viro 
289584d17192SAl Viro 	if (unlikely(!check_mnt(parent))) {
2896156cacb1SAl Viro 		/* that's acceptable only for automounts done in private ns */
2897156cacb1SAl Viro 		if (!(mnt_flags & MNT_SHRINKABLE))
28988f11538eSAl Viro 			return -EINVAL;
2899156cacb1SAl Viro 		/* ... and for those we'd better have mountpoint still alive */
290084d17192SAl Viro 		if (!parent->mnt_ns)
29018f11538eSAl Viro 			return -EINVAL;
2902156cacb1SAl Viro 	}
29039d412a43SAl Viro 
29049d412a43SAl Viro 	/* Refuse the same filesystem on the same mount point */
290595bc5f25SAl Viro 	if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
29069d412a43SAl Viro 	    path->mnt->mnt_root == path->dentry)
29078f11538eSAl Viro 		return -EBUSY;
29089d412a43SAl Viro 
2909e36cb0b8SDavid Howells 	if (d_is_symlink(newmnt->mnt.mnt_root))
29108f11538eSAl Viro 		return -EINVAL;
29119d412a43SAl Viro 
291295bc5f25SAl Viro 	newmnt->mnt.mnt_flags = mnt_flags;
29138f11538eSAl Viro 	return graft_tree(newmnt, parent, mp);
29149d412a43SAl Viro }
2915b1e75df4SAl Viro 
2916132e4608SDavid Howells static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2917132e4608SDavid Howells 
2918132e4608SDavid Howells /*
2919132e4608SDavid Howells  * Create a new mount using a superblock configuration and request it
2920132e4608SDavid Howells  * be added to the namespace tree.
2921132e4608SDavid Howells  */
2922132e4608SDavid Howells static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2923132e4608SDavid Howells 			   unsigned int mnt_flags)
2924132e4608SDavid Howells {
2925132e4608SDavid Howells 	struct vfsmount *mnt;
29268f11538eSAl Viro 	struct mountpoint *mp;
2927132e4608SDavid Howells 	struct super_block *sb = fc->root->d_sb;
2928132e4608SDavid Howells 	int error;
2929132e4608SDavid Howells 
2930c9ce29edSAl Viro 	error = security_sb_kern_mount(sb);
2931c9ce29edSAl Viro 	if (!error && mount_too_revealing(sb, &mnt_flags))
2932c9ce29edSAl Viro 		error = -EPERM;
2933c9ce29edSAl Viro 
2934c9ce29edSAl Viro 	if (unlikely(error)) {
2935c9ce29edSAl Viro 		fc_drop_locked(fc);
2936c9ce29edSAl Viro 		return error;
2937132e4608SDavid Howells 	}
2938132e4608SDavid Howells 
2939132e4608SDavid Howells 	up_write(&sb->s_umount);
2940132e4608SDavid Howells 
2941132e4608SDavid Howells 	mnt = vfs_create_mount(fc);
2942132e4608SDavid Howells 	if (IS_ERR(mnt))
2943132e4608SDavid Howells 		return PTR_ERR(mnt);
2944132e4608SDavid Howells 
2945f8b92ba6SDeepa Dinamani 	mnt_warn_timestamp_expiry(mountpoint, mnt);
2946f8b92ba6SDeepa Dinamani 
29478f11538eSAl Viro 	mp = lock_mount(mountpoint);
29488f11538eSAl Viro 	if (IS_ERR(mp)) {
29498f11538eSAl Viro 		mntput(mnt);
29508f11538eSAl Viro 		return PTR_ERR(mp);
29518f11538eSAl Viro 	}
29528f11538eSAl Viro 	error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
29538f11538eSAl Viro 	unlock_mount(mp);
29540ecee669SEric Biggers 	if (error < 0)
29550ecee669SEric Biggers 		mntput(mnt);
2956f8b92ba6SDeepa Dinamani 	return error;
2957f8b92ba6SDeepa Dinamani }
2958f8b92ba6SDeepa Dinamani 
29591da177e4SLinus Torvalds /*
29601da177e4SLinus Torvalds  * create a new mount for userspace and request it to be added into the
29611da177e4SLinus Torvalds  * namespace's tree
29621da177e4SLinus Torvalds  */
2963e462ec50SDavid Howells static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2964808d4e3cSAl Viro 			int mnt_flags, const char *name, void *data)
29651da177e4SLinus Torvalds {
29660c55cfc4SEric W. Biederman 	struct file_system_type *type;
2967a0c9a8b8SAl Viro 	struct fs_context *fc;
2968a0c9a8b8SAl Viro 	const char *subtype = NULL;
2969a0c9a8b8SAl Viro 	int err = 0;
29701da177e4SLinus Torvalds 
29710c55cfc4SEric W. Biederman 	if (!fstype)
29721da177e4SLinus Torvalds 		return -EINVAL;
29731da177e4SLinus Torvalds 
29740c55cfc4SEric W. Biederman 	type = get_fs_type(fstype);
29750c55cfc4SEric W. Biederman 	if (!type)
29760c55cfc4SEric W. Biederman 		return -ENODEV;
29770c55cfc4SEric W. Biederman 
2978a0c9a8b8SAl Viro 	if (type->fs_flags & FS_HAS_SUBTYPE) {
2979a0c9a8b8SAl Viro 		subtype = strchr(fstype, '.');
2980a0c9a8b8SAl Viro 		if (subtype) {
2981a0c9a8b8SAl Viro 			subtype++;
2982a0c9a8b8SAl Viro 			if (!*subtype) {
29830c55cfc4SEric W. Biederman 				put_filesystem(type);
2984a0c9a8b8SAl Viro 				return -EINVAL;
2985a0c9a8b8SAl Viro 			}
2986a0c9a8b8SAl Viro 		}
29878654df4eSEric W. Biederman 	}
29888654df4eSEric W. Biederman 
2989a0c9a8b8SAl Viro 	fc = fs_context_for_mount(type, sb_flags);
2990a0c9a8b8SAl Viro 	put_filesystem(type);
2991a0c9a8b8SAl Viro 	if (IS_ERR(fc))
2992a0c9a8b8SAl Viro 		return PTR_ERR(fc);
2993a0c9a8b8SAl Viro 
29943e1aeb00SDavid Howells 	if (subtype)
29953e1aeb00SDavid Howells 		err = vfs_parse_fs_string(fc, "subtype",
29963e1aeb00SDavid Howells 					  subtype, strlen(subtype));
29973e1aeb00SDavid Howells 	if (!err && name)
29983e1aeb00SDavid Howells 		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2999a0c9a8b8SAl Viro 	if (!err)
3000a0c9a8b8SAl Viro 		err = parse_monolithic_mount_data(fc, data);
3001c3aabf07SAl Viro 	if (!err && !mount_capable(fc))
3002c3aabf07SAl Viro 		err = -EPERM;
3003a0c9a8b8SAl Viro 	if (!err)
3004a0c9a8b8SAl Viro 		err = vfs_get_tree(fc);
3005132e4608SDavid Howells 	if (!err)
3006132e4608SDavid Howells 		err = do_new_mount_fc(fc, path, mnt_flags);
3007a0c9a8b8SAl Viro 
3008a0c9a8b8SAl Viro 	put_fs_context(fc);
300915f9a3f3SAl Viro 	return err;
30101da177e4SLinus Torvalds }
30111da177e4SLinus Torvalds 
301219a167afSAl Viro int finish_automount(struct vfsmount *m, struct path *path)
301319a167afSAl Viro {
301426df6034SAl Viro 	struct dentry *dentry = path->dentry;
30158f11538eSAl Viro 	struct mountpoint *mp;
301625e195aaSAl Viro 	struct mount *mnt;
301719a167afSAl Viro 	int err;
301825e195aaSAl Viro 
301925e195aaSAl Viro 	if (!m)
302025e195aaSAl Viro 		return 0;
302125e195aaSAl Viro 	if (IS_ERR(m))
302225e195aaSAl Viro 		return PTR_ERR(m);
302325e195aaSAl Viro 
302425e195aaSAl Viro 	mnt = real_mount(m);
302519a167afSAl Viro 	/* The new mount record should have at least 2 refs to prevent it being
302619a167afSAl Viro 	 * expired before we get a chance to add it
302719a167afSAl Viro 	 */
30286776db3dSAl Viro 	BUG_ON(mnt_get_count(mnt) < 2);
302919a167afSAl Viro 
303019a167afSAl Viro 	if (m->mnt_sb == path->mnt->mnt_sb &&
303126df6034SAl Viro 	    m->mnt_root == dentry) {
3032b1e75df4SAl Viro 		err = -ELOOP;
303326df6034SAl Viro 		goto discard;
303419a167afSAl Viro 	}
303519a167afSAl Viro 
303626df6034SAl Viro 	/*
303726df6034SAl Viro 	 * we don't want to use lock_mount() - in this case finding something
303826df6034SAl Viro 	 * that overmounts our mountpoint to be means "quitely drop what we've
303926df6034SAl Viro 	 * got", not "try to mount it on top".
304026df6034SAl Viro 	 */
304126df6034SAl Viro 	inode_lock(dentry->d_inode);
304226df6034SAl Viro 	namespace_lock();
304326df6034SAl Viro 	if (unlikely(cant_mount(dentry))) {
304426df6034SAl Viro 		err = -ENOENT;
304526df6034SAl Viro 		goto discard_locked;
304626df6034SAl Viro 	}
304726df6034SAl Viro 	rcu_read_lock();
304826df6034SAl Viro 	if (unlikely(__lookup_mnt(path->mnt, dentry))) {
304926df6034SAl Viro 		rcu_read_unlock();
305026df6034SAl Viro 		err = 0;
305126df6034SAl Viro 		goto discard_locked;
305226df6034SAl Viro 	}
305326df6034SAl Viro 	rcu_read_unlock();
305426df6034SAl Viro 	mp = get_mountpoint(dentry);
30558f11538eSAl Viro 	if (IS_ERR(mp)) {
30568f11538eSAl Viro 		err = PTR_ERR(mp);
305726df6034SAl Viro 		goto discard_locked;
30588f11538eSAl Viro 	}
305926df6034SAl Viro 
30608f11538eSAl Viro 	err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
30618f11538eSAl Viro 	unlock_mount(mp);
306226df6034SAl Viro 	if (unlikely(err))
306326df6034SAl Viro 		goto discard;
306426df6034SAl Viro 	mntput(m);
3065b1e75df4SAl Viro 	return 0;
306626df6034SAl Viro 
306726df6034SAl Viro discard_locked:
306826df6034SAl Viro 	namespace_unlock();
306926df6034SAl Viro 	inode_unlock(dentry->d_inode);
307026df6034SAl Viro discard:
3071b1e75df4SAl Viro 	/* remove m from any expiration list it may be on */
30726776db3dSAl Viro 	if (!list_empty(&mnt->mnt_expire)) {
307397216be0SAl Viro 		namespace_lock();
30746776db3dSAl Viro 		list_del_init(&mnt->mnt_expire);
307597216be0SAl Viro 		namespace_unlock();
307619a167afSAl Viro 	}
3077b1e75df4SAl Viro 	mntput(m);
3078b1e75df4SAl Viro 	mntput(m);
307919a167afSAl Viro 	return err;
308019a167afSAl Viro }
308119a167afSAl Viro 
3082ea5b778aSDavid Howells /**
3083ea5b778aSDavid Howells  * mnt_set_expiry - Put a mount on an expiration list
3084ea5b778aSDavid Howells  * @mnt: The mount to list.
3085ea5b778aSDavid Howells  * @expiry_list: The list to add the mount to.
3086ea5b778aSDavid Howells  */
3087ea5b778aSDavid Howells void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
3088ea5b778aSDavid Howells {
308997216be0SAl Viro 	namespace_lock();
3090ea5b778aSDavid Howells 
30916776db3dSAl Viro 	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
3092ea5b778aSDavid Howells 
309397216be0SAl Viro 	namespace_unlock();
3094ea5b778aSDavid Howells }
3095ea5b778aSDavid Howells EXPORT_SYMBOL(mnt_set_expiry);
3096ea5b778aSDavid Howells 
3097ea5b778aSDavid Howells /*
30981da177e4SLinus Torvalds  * process a list of expirable mountpoints with the intent of discarding any
30991da177e4SLinus Torvalds  * mountpoints that aren't in use and haven't been touched since last we came
31001da177e4SLinus Torvalds  * here
31011da177e4SLinus Torvalds  */
31021da177e4SLinus Torvalds void mark_mounts_for_expiry(struct list_head *mounts)
31031da177e4SLinus Torvalds {
3104761d5c38SAl Viro 	struct mount *mnt, *next;
31051da177e4SLinus Torvalds 	LIST_HEAD(graveyard);
31061da177e4SLinus Torvalds 
31071da177e4SLinus Torvalds 	if (list_empty(mounts))
31081da177e4SLinus Torvalds 		return;
31091da177e4SLinus Torvalds 
311097216be0SAl Viro 	namespace_lock();
3111719ea2fbSAl Viro 	lock_mount_hash();
31121da177e4SLinus Torvalds 
31131da177e4SLinus Torvalds 	/* extract from the expiration list every vfsmount that matches the
31141da177e4SLinus Torvalds 	 * following criteria:
31151da177e4SLinus Torvalds 	 * - only referenced by its parent vfsmount
31161da177e4SLinus Torvalds 	 * - still marked for expiry (marked on the last call here; marks are
31171da177e4SLinus Torvalds 	 *   cleared by mntput())
31181da177e4SLinus Torvalds 	 */
31196776db3dSAl Viro 	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
3120863d684fSAl Viro 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
31211ab59738SAl Viro 			propagate_mount_busy(mnt, 1))
31221da177e4SLinus Torvalds 			continue;
31236776db3dSAl Viro 		list_move(&mnt->mnt_expire, &graveyard);
31241da177e4SLinus Torvalds 	}
3125bcc5c7d2SAl Viro 	while (!list_empty(&graveyard)) {
31266776db3dSAl Viro 		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
3127143c8c91SAl Viro 		touch_mnt_namespace(mnt->mnt_ns);
3128e819f152SEric W. Biederman 		umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3129bcc5c7d2SAl Viro 	}
3130719ea2fbSAl Viro 	unlock_mount_hash();
31313ab6abeeSAl Viro 	namespace_unlock();
31321da177e4SLinus Torvalds }
31331da177e4SLinus Torvalds 
31341da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
31351da177e4SLinus Torvalds 
31361da177e4SLinus Torvalds /*
31375528f911STrond Myklebust  * Ripoff of 'select_parent()'
31385528f911STrond Myklebust  *
31395528f911STrond Myklebust  * search the list of submounts for a given mountpoint, and move any
31405528f911STrond Myklebust  * shrinkable submounts to the 'graveyard' list.
31415528f911STrond Myklebust  */
3142692afc31SAl Viro static int select_submounts(struct mount *parent, struct list_head *graveyard)
31435528f911STrond Myklebust {
3144692afc31SAl Viro 	struct mount *this_parent = parent;
31455528f911STrond Myklebust 	struct list_head *next;
31465528f911STrond Myklebust 	int found = 0;
31475528f911STrond Myklebust 
31485528f911STrond Myklebust repeat:
31496b41d536SAl Viro 	next = this_parent->mnt_mounts.next;
31505528f911STrond Myklebust resume:
31516b41d536SAl Viro 	while (next != &this_parent->mnt_mounts) {
31525528f911STrond Myklebust 		struct list_head *tmp = next;
31536b41d536SAl Viro 		struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
31545528f911STrond Myklebust 
31555528f911STrond Myklebust 		next = tmp->next;
3156692afc31SAl Viro 		if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
31575528f911STrond Myklebust 			continue;
31585528f911STrond Myklebust 		/*
31595528f911STrond Myklebust 		 * Descend a level if the d_mounts list is non-empty.
31605528f911STrond Myklebust 		 */
31616b41d536SAl Viro 		if (!list_empty(&mnt->mnt_mounts)) {
31625528f911STrond Myklebust 			this_parent = mnt;
31635528f911STrond Myklebust 			goto repeat;
31645528f911STrond Myklebust 		}
31655528f911STrond Myklebust 
31661ab59738SAl Viro 		if (!propagate_mount_busy(mnt, 1)) {
31676776db3dSAl Viro 			list_move_tail(&mnt->mnt_expire, graveyard);
31685528f911STrond Myklebust 			found++;
31695528f911STrond Myklebust 		}
31705528f911STrond Myklebust 	}
31715528f911STrond Myklebust 	/*
31725528f911STrond Myklebust 	 * All done at this level ... ascend and resume the search
31735528f911STrond Myklebust 	 */
31745528f911STrond Myklebust 	if (this_parent != parent) {
31756b41d536SAl Viro 		next = this_parent->mnt_child.next;
31760714a533SAl Viro 		this_parent = this_parent->mnt_parent;
31775528f911STrond Myklebust 		goto resume;
31785528f911STrond Myklebust 	}
31795528f911STrond Myklebust 	return found;
31805528f911STrond Myklebust }
31815528f911STrond Myklebust 
31825528f911STrond Myklebust /*
31835528f911STrond Myklebust  * process a list of expirable mountpoints with the intent of discarding any
31845528f911STrond Myklebust  * submounts of a specific parent mountpoint
318599b7db7bSNick Piggin  *
318648a066e7SAl Viro  * mount_lock must be held for write
31875528f911STrond Myklebust  */
3188b54b9be7SAl Viro static void shrink_submounts(struct mount *mnt)
31895528f911STrond Myklebust {
31905528f911STrond Myklebust 	LIST_HEAD(graveyard);
3191761d5c38SAl Viro 	struct mount *m;
31925528f911STrond Myklebust 
31935528f911STrond Myklebust 	/* extract submounts of 'mountpoint' from the expiration list */
3194c35038beSAl Viro 	while (select_submounts(mnt, &graveyard)) {
3195bcc5c7d2SAl Viro 		while (!list_empty(&graveyard)) {
3196761d5c38SAl Viro 			m = list_first_entry(&graveyard, struct mount,
31976776db3dSAl Viro 						mnt_expire);
3198143c8c91SAl Viro 			touch_mnt_namespace(m->mnt_ns);
3199e819f152SEric W. Biederman 			umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3200bcc5c7d2SAl Viro 		}
3201bcc5c7d2SAl Viro 	}
32025528f911STrond Myklebust }
32035528f911STrond Myklebust 
3204028abd92SChristoph Hellwig static void *copy_mount_options(const void __user * data)
32051da177e4SLinus Torvalds {
3206b40ef869SAl Viro 	char *copy;
3207d563d678SCatalin Marinas 	unsigned left, offset;
32081da177e4SLinus Torvalds 
32091da177e4SLinus Torvalds 	if (!data)
3210b40ef869SAl Viro 		return NULL;
32111da177e4SLinus Torvalds 
3212b40ef869SAl Viro 	copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3213b40ef869SAl Viro 	if (!copy)
3214b40ef869SAl Viro 		return ERR_PTR(-ENOMEM);
32151da177e4SLinus Torvalds 
3216d563d678SCatalin Marinas 	left = copy_from_user(copy, data, PAGE_SIZE);
32171da177e4SLinus Torvalds 
3218d563d678SCatalin Marinas 	/*
3219d563d678SCatalin Marinas 	 * Not all architectures have an exact copy_from_user(). Resort to
3220d563d678SCatalin Marinas 	 * byte at a time.
3221d563d678SCatalin Marinas 	 */
3222d563d678SCatalin Marinas 	offset = PAGE_SIZE - left;
3223d563d678SCatalin Marinas 	while (left) {
3224d563d678SCatalin Marinas 		char c;
3225d563d678SCatalin Marinas 		if (get_user(c, (const char __user *)data + offset))
3226d563d678SCatalin Marinas 			break;
3227d563d678SCatalin Marinas 		copy[offset] = c;
3228d563d678SCatalin Marinas 		left--;
3229d563d678SCatalin Marinas 		offset++;
3230d563d678SCatalin Marinas 	}
3231d563d678SCatalin Marinas 
3232d563d678SCatalin Marinas 	if (left == PAGE_SIZE) {
3233b40ef869SAl Viro 		kfree(copy);
3234b40ef869SAl Viro 		return ERR_PTR(-EFAULT);
32351da177e4SLinus Torvalds 	}
3236d563d678SCatalin Marinas 
3237b40ef869SAl Viro 	return copy;
32381da177e4SLinus Torvalds }
32391da177e4SLinus Torvalds 
3240028abd92SChristoph Hellwig static char *copy_mount_string(const void __user *data)
3241eca6f534SVegard Nossum {
3242fbdb4401SChandan Rajendra 	return data ? strndup_user(data, PATH_MAX) : NULL;
3243eca6f534SVegard Nossum }
3244eca6f534SVegard Nossum 
32451da177e4SLinus Torvalds /*
32461da177e4SLinus Torvalds  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
32471da177e4SLinus Torvalds  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
32481da177e4SLinus Torvalds  *
32491da177e4SLinus Torvalds  * data is a (void *) that can point to any structure up to
32501da177e4SLinus Torvalds  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
32511da177e4SLinus Torvalds  * information (or be NULL).
32521da177e4SLinus Torvalds  *
32531da177e4SLinus Torvalds  * Pre-0.97 versions of mount() didn't have a flags word.
32541da177e4SLinus Torvalds  * When the flags word was introduced its top half was required
32551da177e4SLinus Torvalds  * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
32561da177e4SLinus Torvalds  * Therefore, if this magic number is present, it carries no information
32571da177e4SLinus Torvalds  * and must be discarded.
32581da177e4SLinus Torvalds  */
3259c60166f0SChristoph Hellwig int path_mount(const char *dev_name, struct path *path,
3260808d4e3cSAl Viro 		const char *type_page, unsigned long flags, void *data_page)
32611da177e4SLinus Torvalds {
3262e462ec50SDavid Howells 	unsigned int mnt_flags = 0, sb_flags;
3263a1e6aaa3SChristoph Hellwig 	int ret;
32641da177e4SLinus Torvalds 
32651da177e4SLinus Torvalds 	/* Discard magic */
32661da177e4SLinus Torvalds 	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
32671da177e4SLinus Torvalds 		flags &= ~MS_MGC_MSK;
32681da177e4SLinus Torvalds 
32691da177e4SLinus Torvalds 	/* Basic sanity checks */
32701da177e4SLinus Torvalds 	if (data_page)
32711da177e4SLinus Torvalds 		((char *)data_page)[PAGE_SIZE - 1] = 0;
32721da177e4SLinus Torvalds 
3273e462ec50SDavid Howells 	if (flags & MS_NOUSER)
3274e462ec50SDavid Howells 		return -EINVAL;
3275e462ec50SDavid Howells 
3276a1e6aaa3SChristoph Hellwig 	ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3277a1e6aaa3SChristoph Hellwig 	if (ret)
3278a1e6aaa3SChristoph Hellwig 		return ret;
3279a1e6aaa3SChristoph Hellwig 	if (!may_mount())
3280a1e6aaa3SChristoph Hellwig 		return -EPERM;
3281f7e33bdbSJeff Layton 	if (flags & SB_MANDLOCK)
3282f7e33bdbSJeff Layton 		warn_mandlock();
3283a27ab9f2STetsuo Handa 
3284613cbe3dSAndi Kleen 	/* Default to relatime unless overriden */
3285613cbe3dSAndi Kleen 	if (!(flags & MS_NOATIME))
32860a1c01c9SMatthew Garrett 		mnt_flags |= MNT_RELATIME;
32870a1c01c9SMatthew Garrett 
32881da177e4SLinus Torvalds 	/* Separate the per-mountpoint flags */
32891da177e4SLinus Torvalds 	if (flags & MS_NOSUID)
32901da177e4SLinus Torvalds 		mnt_flags |= MNT_NOSUID;
32911da177e4SLinus Torvalds 	if (flags & MS_NODEV)
32921da177e4SLinus Torvalds 		mnt_flags |= MNT_NODEV;
32931da177e4SLinus Torvalds 	if (flags & MS_NOEXEC)
32941da177e4SLinus Torvalds 		mnt_flags |= MNT_NOEXEC;
3295fc33a7bbSChristoph Hellwig 	if (flags & MS_NOATIME)
3296fc33a7bbSChristoph Hellwig 		mnt_flags |= MNT_NOATIME;
3297fc33a7bbSChristoph Hellwig 	if (flags & MS_NODIRATIME)
3298fc33a7bbSChristoph Hellwig 		mnt_flags |= MNT_NODIRATIME;
3299d0adde57SMatthew Garrett 	if (flags & MS_STRICTATIME)
3300d0adde57SMatthew Garrett 		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3301a9e5b732SDavid Howells 	if (flags & MS_RDONLY)
33022e4b7fcdSDave Hansen 		mnt_flags |= MNT_READONLY;
3303dab741e0SMattias Nissler 	if (flags & MS_NOSYMFOLLOW)
3304dab741e0SMattias Nissler 		mnt_flags |= MNT_NOSYMFOLLOW;
3305fc33a7bbSChristoph Hellwig 
3306ffbc6f0eSEric W. Biederman 	/* The default atime for remount is preservation */
3307ffbc6f0eSEric W. Biederman 	if ((flags & MS_REMOUNT) &&
3308ffbc6f0eSEric W. Biederman 	    ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3309ffbc6f0eSEric W. Biederman 		       MS_STRICTATIME)) == 0)) {
3310ffbc6f0eSEric W. Biederman 		mnt_flags &= ~MNT_ATIME_MASK;
3311a1e6aaa3SChristoph Hellwig 		mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3312ffbc6f0eSEric W. Biederman 	}
3313ffbc6f0eSEric W. Biederman 
3314e462ec50SDavid Howells 	sb_flags = flags & (SB_RDONLY |
3315e462ec50SDavid Howells 			    SB_SYNCHRONOUS |
3316e462ec50SDavid Howells 			    SB_MANDLOCK |
3317e462ec50SDavid Howells 			    SB_DIRSYNC |
3318e462ec50SDavid Howells 			    SB_SILENT |
3319917086ffSMimi Zohar 			    SB_POSIXACL |
3320d7ee9469SMarkus Trippelsdorf 			    SB_LAZYTIME |
3321917086ffSMimi Zohar 			    SB_I_VERSION);
33221da177e4SLinus Torvalds 
332343f5e655SDavid Howells 	if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3324a1e6aaa3SChristoph Hellwig 		return do_reconfigure_mnt(path, mnt_flags);
3325a1e6aaa3SChristoph Hellwig 	if (flags & MS_REMOUNT)
3326a1e6aaa3SChristoph Hellwig 		return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3327a1e6aaa3SChristoph Hellwig 	if (flags & MS_BIND)
3328a1e6aaa3SChristoph Hellwig 		return do_loopback(path, dev_name, flags & MS_REC);
3329a1e6aaa3SChristoph Hellwig 	if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3330a1e6aaa3SChristoph Hellwig 		return do_change_type(path, flags);
3331a1e6aaa3SChristoph Hellwig 	if (flags & MS_MOVE)
3332a1e6aaa3SChristoph Hellwig 		return do_move_mount_old(path, dev_name);
3333a1e6aaa3SChristoph Hellwig 
3334a1e6aaa3SChristoph Hellwig 	return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
33351da177e4SLinus Torvalds 			    data_page);
3336a1e6aaa3SChristoph Hellwig }
3337a1e6aaa3SChristoph Hellwig 
3338a1e6aaa3SChristoph Hellwig long do_mount(const char *dev_name, const char __user *dir_name,
3339a1e6aaa3SChristoph Hellwig 		const char *type_page, unsigned long flags, void *data_page)
3340a1e6aaa3SChristoph Hellwig {
3341a1e6aaa3SChristoph Hellwig 	struct path path;
3342a1e6aaa3SChristoph Hellwig 	int ret;
3343a1e6aaa3SChristoph Hellwig 
3344a1e6aaa3SChristoph Hellwig 	ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3345a1e6aaa3SChristoph Hellwig 	if (ret)
3346a1e6aaa3SChristoph Hellwig 		return ret;
3347a1e6aaa3SChristoph Hellwig 	ret = path_mount(dev_name, &path, type_page, flags, data_page);
33482d92ab3cSAl Viro 	path_put(&path);
3349a1e6aaa3SChristoph Hellwig 	return ret;
33501da177e4SLinus Torvalds }
33511da177e4SLinus Torvalds 
3352537f7ccbSEric W. Biederman static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3353537f7ccbSEric W. Biederman {
3354537f7ccbSEric W. Biederman 	return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3355537f7ccbSEric W. Biederman }
3356537f7ccbSEric W. Biederman 
3357537f7ccbSEric W. Biederman static void dec_mnt_namespaces(struct ucounts *ucounts)
3358537f7ccbSEric W. Biederman {
3359537f7ccbSEric W. Biederman 	dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3360537f7ccbSEric W. Biederman }
3361537f7ccbSEric W. Biederman 
3362771b1371SEric W. Biederman static void free_mnt_ns(struct mnt_namespace *ns)
3363771b1371SEric W. Biederman {
336474e83122SAl Viro 	if (!is_anon_ns(ns))
33656344c433SAl Viro 		ns_free_inum(&ns->ns);
3366537f7ccbSEric W. Biederman 	dec_mnt_namespaces(ns->ucounts);
3367771b1371SEric W. Biederman 	put_user_ns(ns->user_ns);
3368771b1371SEric W. Biederman 	kfree(ns);
3369771b1371SEric W. Biederman }
3370771b1371SEric W. Biederman 
33718823c079SEric W. Biederman /*
33728823c079SEric W. Biederman  * Assign a sequence number so we can detect when we attempt to bind
33738823c079SEric W. Biederman  * mount a reference to an older mount namespace into the current
33748823c079SEric W. Biederman  * mount namespace, preventing reference counting loops.  A 64bit
33758823c079SEric W. Biederman  * number incrementing at 10Ghz will take 12,427 years to wrap which
33768823c079SEric W. Biederman  * is effectively never, so we can ignore the possibility.
33778823c079SEric W. Biederman  */
33788823c079SEric W. Biederman static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
33798823c079SEric W. Biederman 
338074e83122SAl Viro static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3381cf8d2c11STrond Myklebust {
3382cf8d2c11STrond Myklebust 	struct mnt_namespace *new_ns;
3383537f7ccbSEric W. Biederman 	struct ucounts *ucounts;
338498f842e6SEric W. Biederman 	int ret;
3385cf8d2c11STrond Myklebust 
3386537f7ccbSEric W. Biederman 	ucounts = inc_mnt_namespaces(user_ns);
3387537f7ccbSEric W. Biederman 	if (!ucounts)
3388df75e774SEric W. Biederman 		return ERR_PTR(-ENOSPC);
3389537f7ccbSEric W. Biederman 
339030acd0bdSVasily Averin 	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
3391537f7ccbSEric W. Biederman 	if (!new_ns) {
3392537f7ccbSEric W. Biederman 		dec_mnt_namespaces(ucounts);
3393cf8d2c11STrond Myklebust 		return ERR_PTR(-ENOMEM);
3394537f7ccbSEric W. Biederman 	}
339574e83122SAl Viro 	if (!anon) {
33966344c433SAl Viro 		ret = ns_alloc_inum(&new_ns->ns);
339798f842e6SEric W. Biederman 		if (ret) {
339898f842e6SEric W. Biederman 			kfree(new_ns);
3399537f7ccbSEric W. Biederman 			dec_mnt_namespaces(ucounts);
340098f842e6SEric W. Biederman 			return ERR_PTR(ret);
340198f842e6SEric W. Biederman 		}
340274e83122SAl Viro 	}
340333c42940SAl Viro 	new_ns->ns.ops = &mntns_operations;
340474e83122SAl Viro 	if (!anon)
34058823c079SEric W. Biederman 		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
34061a7b8969SKirill Tkhai 	refcount_set(&new_ns->ns.count, 1);
3407cf8d2c11STrond Myklebust 	INIT_LIST_HEAD(&new_ns->list);
3408cf8d2c11STrond Myklebust 	init_waitqueue_head(&new_ns->poll);
34099f6c61f9SMiklos Szeredi 	spin_lock_init(&new_ns->ns_lock);
3410771b1371SEric W. Biederman 	new_ns->user_ns = get_user_ns(user_ns);
3411537f7ccbSEric W. Biederman 	new_ns->ucounts = ucounts;
3412cf8d2c11STrond Myklebust 	return new_ns;
3413cf8d2c11STrond Myklebust }
3414cf8d2c11STrond Myklebust 
34150766f788SEmese Revfy __latent_entropy
34169559f689SAl Viro struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
34179559f689SAl Viro 		struct user_namespace *user_ns, struct fs_struct *new_fs)
34181da177e4SLinus Torvalds {
34196b3286edSKirill Korotaev 	struct mnt_namespace *new_ns;
34207f2da1e7SAl Viro 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3421315fc83eSAl Viro 	struct mount *p, *q;
34229559f689SAl Viro 	struct mount *old;
3423cb338d06SAl Viro 	struct mount *new;
34247a472ef4SEric W. Biederman 	int copy_flags;
34251da177e4SLinus Torvalds 
34269559f689SAl Viro 	BUG_ON(!ns);
34279559f689SAl Viro 
34289559f689SAl Viro 	if (likely(!(flags & CLONE_NEWNS))) {
34299559f689SAl Viro 		get_mnt_ns(ns);
34309559f689SAl Viro 		return ns;
34319559f689SAl Viro 	}
34329559f689SAl Viro 
34339559f689SAl Viro 	old = ns->root;
34349559f689SAl Viro 
343574e83122SAl Viro 	new_ns = alloc_mnt_ns(user_ns, false);
3436cf8d2c11STrond Myklebust 	if (IS_ERR(new_ns))
3437cf8d2c11STrond Myklebust 		return new_ns;
34381da177e4SLinus Torvalds 
343997216be0SAl Viro 	namespace_lock();
34401da177e4SLinus Torvalds 	/* First pass: copy the tree topology */
34414ce5d2b1SEric W. Biederman 	copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
34429559f689SAl Viro 	if (user_ns != ns->user_ns)
34433bd045ccSAl Viro 		copy_flags |= CL_SHARED_TO_SLAVE;
34447a472ef4SEric W. Biederman 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3445be34d1a3SDavid Howells 	if (IS_ERR(new)) {
3446328e6d90SAl Viro 		namespace_unlock();
3447771b1371SEric W. Biederman 		free_mnt_ns(new_ns);
3448be34d1a3SDavid Howells 		return ERR_CAST(new);
34491da177e4SLinus Torvalds 	}
34503bd045ccSAl Viro 	if (user_ns != ns->user_ns) {
34513bd045ccSAl Viro 		lock_mount_hash();
34523bd045ccSAl Viro 		lock_mnt_tree(new);
34533bd045ccSAl Viro 		unlock_mount_hash();
34543bd045ccSAl Viro 	}
3455be08d6d2SAl Viro 	new_ns->root = new;
34561a4eeaf2SAl Viro 	list_add_tail(&new_ns->list, &new->mnt_list);
34571da177e4SLinus Torvalds 
34581da177e4SLinus Torvalds 	/*
34591da177e4SLinus Torvalds 	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
34601da177e4SLinus Torvalds 	 * as belonging to new namespace.  We have already acquired a private
34611da177e4SLinus Torvalds 	 * fs_struct, so tsk->fs->lock is not needed.
34621da177e4SLinus Torvalds 	 */
3463909b0a88SAl Viro 	p = old;
3464cb338d06SAl Viro 	q = new;
34651da177e4SLinus Torvalds 	while (p) {
3466143c8c91SAl Viro 		q->mnt_ns = new_ns;
3467d2921684SEric W. Biederman 		new_ns->mounts++;
34689559f689SAl Viro 		if (new_fs) {
34699559f689SAl Viro 			if (&p->mnt == new_fs->root.mnt) {
34709559f689SAl Viro 				new_fs->root.mnt = mntget(&q->mnt);
3471315fc83eSAl Viro 				rootmnt = &p->mnt;
34721da177e4SLinus Torvalds 			}
34739559f689SAl Viro 			if (&p->mnt == new_fs->pwd.mnt) {
34749559f689SAl Viro 				new_fs->pwd.mnt = mntget(&q->mnt);
3475315fc83eSAl Viro 				pwdmnt = &p->mnt;
34761da177e4SLinus Torvalds 			}
34771da177e4SLinus Torvalds 		}
3478909b0a88SAl Viro 		p = next_mnt(p, old);
3479909b0a88SAl Viro 		q = next_mnt(q, new);
34804ce5d2b1SEric W. Biederman 		if (!q)
34814ce5d2b1SEric W. Biederman 			break;
34824ce5d2b1SEric W. Biederman 		while (p->mnt.mnt_root != q->mnt.mnt_root)
34834ce5d2b1SEric W. Biederman 			p = next_mnt(p, old);
34841da177e4SLinus Torvalds 	}
3485328e6d90SAl Viro 	namespace_unlock();
34861da177e4SLinus Torvalds 
34871da177e4SLinus Torvalds 	if (rootmnt)
3488f03c6599SAl Viro 		mntput(rootmnt);
34891da177e4SLinus Torvalds 	if (pwdmnt)
3490f03c6599SAl Viro 		mntput(pwdmnt);
34911da177e4SLinus Torvalds 
3492741a2951SJANAK DESAI 	return new_ns;
3493741a2951SJANAK DESAI }
3494741a2951SJANAK DESAI 
349574e83122SAl Viro struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3496cf8d2c11STrond Myklebust {
34971a4eeaf2SAl Viro 	struct mount *mnt = real_mount(m);
3498ea441d11SAl Viro 	struct mnt_namespace *ns;
3499d31da0f0SAl Viro 	struct super_block *s;
3500ea441d11SAl Viro 	struct path path;
3501ea441d11SAl Viro 	int err;
3502ea441d11SAl Viro 
350374e83122SAl Viro 	ns = alloc_mnt_ns(&init_user_ns, true);
350474e83122SAl Viro 	if (IS_ERR(ns)) {
350574e83122SAl Viro 		mntput(m);
3506ea441d11SAl Viro 		return ERR_CAST(ns);
350774e83122SAl Viro 	}
350874e83122SAl Viro 	mnt->mnt_ns = ns;
350974e83122SAl Viro 	ns->root = mnt;
351074e83122SAl Viro 	ns->mounts++;
351174e83122SAl Viro 	list_add(&mnt->mnt_list, &ns->list);
3512ea441d11SAl Viro 
351374e83122SAl Viro 	err = vfs_path_lookup(m->mnt_root, m,
3514ea441d11SAl Viro 			name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3515ea441d11SAl Viro 
3516ea441d11SAl Viro 	put_mnt_ns(ns);
3517ea441d11SAl Viro 
3518ea441d11SAl Viro 	if (err)
3519ea441d11SAl Viro 		return ERR_PTR(err);
3520ea441d11SAl Viro 
3521ea441d11SAl Viro 	/* trade a vfsmount reference for active sb one */
3522d31da0f0SAl Viro 	s = path.mnt->mnt_sb;
3523d31da0f0SAl Viro 	atomic_inc(&s->s_active);
3524ea441d11SAl Viro 	mntput(path.mnt);
3525ea441d11SAl Viro 	/* lock the sucker */
3526d31da0f0SAl Viro 	down_write(&s->s_umount);
3527ea441d11SAl Viro 	/* ... and return the root of (sub)tree on it */
3528ea441d11SAl Viro 	return path.dentry;
3529ea441d11SAl Viro }
3530ea441d11SAl Viro EXPORT_SYMBOL(mount_subtree);
3531ea441d11SAl Viro 
3532cccaa5e3SDominik Brodowski SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3533cccaa5e3SDominik Brodowski 		char __user *, type, unsigned long, flags, void __user *, data)
35341da177e4SLinus Torvalds {
3535eca6f534SVegard Nossum 	int ret;
3536eca6f534SVegard Nossum 	char *kernel_type;
3537eca6f534SVegard Nossum 	char *kernel_dev;
3538b40ef869SAl Viro 	void *options;
35391da177e4SLinus Torvalds 
3540b8850d1fSTim Gardner 	kernel_type = copy_mount_string(type);
3541b8850d1fSTim Gardner 	ret = PTR_ERR(kernel_type);
3542b8850d1fSTim Gardner 	if (IS_ERR(kernel_type))
3543eca6f534SVegard Nossum 		goto out_type;
35441da177e4SLinus Torvalds 
3545b8850d1fSTim Gardner 	kernel_dev = copy_mount_string(dev_name);
3546b8850d1fSTim Gardner 	ret = PTR_ERR(kernel_dev);
3547b8850d1fSTim Gardner 	if (IS_ERR(kernel_dev))
3548eca6f534SVegard Nossum 		goto out_dev;
35491da177e4SLinus Torvalds 
3550b40ef869SAl Viro 	options = copy_mount_options(data);
3551b40ef869SAl Viro 	ret = PTR_ERR(options);
3552b40ef869SAl Viro 	if (IS_ERR(options))
3553eca6f534SVegard Nossum 		goto out_data;
35541da177e4SLinus Torvalds 
3555b40ef869SAl Viro 	ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3556eca6f534SVegard Nossum 
3557b40ef869SAl Viro 	kfree(options);
3558eca6f534SVegard Nossum out_data:
3559eca6f534SVegard Nossum 	kfree(kernel_dev);
3560eca6f534SVegard Nossum out_dev:
3561eca6f534SVegard Nossum 	kfree(kernel_type);
3562eca6f534SVegard Nossum out_type:
3563eca6f534SVegard Nossum 	return ret;
35641da177e4SLinus Torvalds }
35651da177e4SLinus Torvalds 
35665b490500SChristian Brauner #define FSMOUNT_VALID_FLAGS                                                    \
35675b490500SChristian Brauner 	(MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |            \
3568dd8b477fSChristian Brauner 	 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME |       \
3569dd8b477fSChristian Brauner 	 MOUNT_ATTR_NOSYMFOLLOW)
35705b490500SChristian Brauner 
35719caccd41SChristian Brauner #define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
35722a186721SChristian Brauner 
35732a186721SChristian Brauner #define MOUNT_SETATTR_PROPAGATION_FLAGS \
35742a186721SChristian Brauner 	(MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
35752a186721SChristian Brauner 
35765b490500SChristian Brauner static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
35775b490500SChristian Brauner {
35785b490500SChristian Brauner 	unsigned int mnt_flags = 0;
35795b490500SChristian Brauner 
35805b490500SChristian Brauner 	if (attr_flags & MOUNT_ATTR_RDONLY)
35815b490500SChristian Brauner 		mnt_flags |= MNT_READONLY;
35825b490500SChristian Brauner 	if (attr_flags & MOUNT_ATTR_NOSUID)
35835b490500SChristian Brauner 		mnt_flags |= MNT_NOSUID;
35845b490500SChristian Brauner 	if (attr_flags & MOUNT_ATTR_NODEV)
35855b490500SChristian Brauner 		mnt_flags |= MNT_NODEV;
35865b490500SChristian Brauner 	if (attr_flags & MOUNT_ATTR_NOEXEC)
35875b490500SChristian Brauner 		mnt_flags |= MNT_NOEXEC;
35885b490500SChristian Brauner 	if (attr_flags & MOUNT_ATTR_NODIRATIME)
35895b490500SChristian Brauner 		mnt_flags |= MNT_NODIRATIME;
3590dd8b477fSChristian Brauner 	if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
3591dd8b477fSChristian Brauner 		mnt_flags |= MNT_NOSYMFOLLOW;
35925b490500SChristian Brauner 
35935b490500SChristian Brauner 	return mnt_flags;
35945b490500SChristian Brauner }
35955b490500SChristian Brauner 
35961da177e4SLinus Torvalds /*
359793766fbdSDavid Howells  * Create a kernel mount representation for a new, prepared superblock
359893766fbdSDavid Howells  * (specified by fs_fd) and attach to an open_tree-like file descriptor.
359993766fbdSDavid Howells  */
360093766fbdSDavid Howells SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
360193766fbdSDavid Howells 		unsigned int, attr_flags)
360293766fbdSDavid Howells {
360393766fbdSDavid Howells 	struct mnt_namespace *ns;
360493766fbdSDavid Howells 	struct fs_context *fc;
360593766fbdSDavid Howells 	struct file *file;
360693766fbdSDavid Howells 	struct path newmount;
360793766fbdSDavid Howells 	struct mount *mnt;
360893766fbdSDavid Howells 	struct fd f;
360993766fbdSDavid Howells 	unsigned int mnt_flags = 0;
361093766fbdSDavid Howells 	long ret;
361193766fbdSDavid Howells 
361293766fbdSDavid Howells 	if (!may_mount())
361393766fbdSDavid Howells 		return -EPERM;
361493766fbdSDavid Howells 
361593766fbdSDavid Howells 	if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
361693766fbdSDavid Howells 		return -EINVAL;
361793766fbdSDavid Howells 
36185b490500SChristian Brauner 	if (attr_flags & ~FSMOUNT_VALID_FLAGS)
361993766fbdSDavid Howells 		return -EINVAL;
362093766fbdSDavid Howells 
36215b490500SChristian Brauner 	mnt_flags = attr_flags_to_mnt_flags(attr_flags);
362293766fbdSDavid Howells 
362393766fbdSDavid Howells 	switch (attr_flags & MOUNT_ATTR__ATIME) {
362493766fbdSDavid Howells 	case MOUNT_ATTR_STRICTATIME:
362593766fbdSDavid Howells 		break;
362693766fbdSDavid Howells 	case MOUNT_ATTR_NOATIME:
362793766fbdSDavid Howells 		mnt_flags |= MNT_NOATIME;
362893766fbdSDavid Howells 		break;
362993766fbdSDavid Howells 	case MOUNT_ATTR_RELATIME:
363093766fbdSDavid Howells 		mnt_flags |= MNT_RELATIME;
363193766fbdSDavid Howells 		break;
363293766fbdSDavid Howells 	default:
363393766fbdSDavid Howells 		return -EINVAL;
363493766fbdSDavid Howells 	}
363593766fbdSDavid Howells 
363693766fbdSDavid Howells 	f = fdget(fs_fd);
363793766fbdSDavid Howells 	if (!f.file)
363893766fbdSDavid Howells 		return -EBADF;
363993766fbdSDavid Howells 
364093766fbdSDavid Howells 	ret = -EINVAL;
364193766fbdSDavid Howells 	if (f.file->f_op != &fscontext_fops)
364293766fbdSDavid Howells 		goto err_fsfd;
364393766fbdSDavid Howells 
364493766fbdSDavid Howells 	fc = f.file->private_data;
364593766fbdSDavid Howells 
364693766fbdSDavid Howells 	ret = mutex_lock_interruptible(&fc->uapi_mutex);
364793766fbdSDavid Howells 	if (ret < 0)
364893766fbdSDavid Howells 		goto err_fsfd;
364993766fbdSDavid Howells 
365093766fbdSDavid Howells 	/* There must be a valid superblock or we can't mount it */
365193766fbdSDavid Howells 	ret = -EINVAL;
365293766fbdSDavid Howells 	if (!fc->root)
365393766fbdSDavid Howells 		goto err_unlock;
365493766fbdSDavid Howells 
365593766fbdSDavid Howells 	ret = -EPERM;
365693766fbdSDavid Howells 	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
365793766fbdSDavid Howells 		pr_warn("VFS: Mount too revealing\n");
365893766fbdSDavid Howells 		goto err_unlock;
365993766fbdSDavid Howells 	}
366093766fbdSDavid Howells 
366193766fbdSDavid Howells 	ret = -EBUSY;
366293766fbdSDavid Howells 	if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
366393766fbdSDavid Howells 		goto err_unlock;
366493766fbdSDavid Howells 
3665f7e33bdbSJeff Layton 	if (fc->sb_flags & SB_MANDLOCK)
3666f7e33bdbSJeff Layton 		warn_mandlock();
366793766fbdSDavid Howells 
366893766fbdSDavid Howells 	newmount.mnt = vfs_create_mount(fc);
366993766fbdSDavid Howells 	if (IS_ERR(newmount.mnt)) {
367093766fbdSDavid Howells 		ret = PTR_ERR(newmount.mnt);
367193766fbdSDavid Howells 		goto err_unlock;
367293766fbdSDavid Howells 	}
367393766fbdSDavid Howells 	newmount.dentry = dget(fc->root);
367493766fbdSDavid Howells 	newmount.mnt->mnt_flags = mnt_flags;
367593766fbdSDavid Howells 
367693766fbdSDavid Howells 	/* We've done the mount bit - now move the file context into more or
367793766fbdSDavid Howells 	 * less the same state as if we'd done an fspick().  We don't want to
367893766fbdSDavid Howells 	 * do any memory allocation or anything like that at this point as we
367993766fbdSDavid Howells 	 * don't want to have to handle any errors incurred.
368093766fbdSDavid Howells 	 */
368193766fbdSDavid Howells 	vfs_clean_context(fc);
368293766fbdSDavid Howells 
368393766fbdSDavid Howells 	ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
368493766fbdSDavid Howells 	if (IS_ERR(ns)) {
368593766fbdSDavid Howells 		ret = PTR_ERR(ns);
368693766fbdSDavid Howells 		goto err_path;
368793766fbdSDavid Howells 	}
368893766fbdSDavid Howells 	mnt = real_mount(newmount.mnt);
368993766fbdSDavid Howells 	mnt->mnt_ns = ns;
369093766fbdSDavid Howells 	ns->root = mnt;
369193766fbdSDavid Howells 	ns->mounts = 1;
369293766fbdSDavid Howells 	list_add(&mnt->mnt_list, &ns->list);
36931b0b9cc8SEric Biggers 	mntget(newmount.mnt);
369493766fbdSDavid Howells 
369593766fbdSDavid Howells 	/* Attach to an apparent O_PATH fd with a note that we need to unmount
369693766fbdSDavid Howells 	 * it, not just simply put it.
369793766fbdSDavid Howells 	 */
369893766fbdSDavid Howells 	file = dentry_open(&newmount, O_PATH, fc->cred);
369993766fbdSDavid Howells 	if (IS_ERR(file)) {
370093766fbdSDavid Howells 		dissolve_on_fput(newmount.mnt);
370193766fbdSDavid Howells 		ret = PTR_ERR(file);
370293766fbdSDavid Howells 		goto err_path;
370393766fbdSDavid Howells 	}
370493766fbdSDavid Howells 	file->f_mode |= FMODE_NEED_UNMOUNT;
370593766fbdSDavid Howells 
370693766fbdSDavid Howells 	ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
370793766fbdSDavid Howells 	if (ret >= 0)
370893766fbdSDavid Howells 		fd_install(ret, file);
370993766fbdSDavid Howells 	else
371093766fbdSDavid Howells 		fput(file);
371193766fbdSDavid Howells 
371293766fbdSDavid Howells err_path:
371393766fbdSDavid Howells 	path_put(&newmount);
371493766fbdSDavid Howells err_unlock:
371593766fbdSDavid Howells 	mutex_unlock(&fc->uapi_mutex);
371693766fbdSDavid Howells err_fsfd:
371793766fbdSDavid Howells 	fdput(f);
371893766fbdSDavid Howells 	return ret;
371993766fbdSDavid Howells }
372093766fbdSDavid Howells 
372193766fbdSDavid Howells /*
372293766fbdSDavid Howells  * Move a mount from one place to another.  In combination with
372393766fbdSDavid Howells  * fsopen()/fsmount() this is used to install a new mount and in combination
372493766fbdSDavid Howells  * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
372593766fbdSDavid Howells  * a mount subtree.
37262db154b3SDavid Howells  *
37272db154b3SDavid Howells  * Note the flags value is a combination of MOVE_MOUNT_* flags.
37282db154b3SDavid Howells  */
37292db154b3SDavid Howells SYSCALL_DEFINE5(move_mount,
37302658ce09SBen Dooks 		int, from_dfd, const char __user *, from_pathname,
37312658ce09SBen Dooks 		int, to_dfd, const char __user *, to_pathname,
37322db154b3SDavid Howells 		unsigned int, flags)
37332db154b3SDavid Howells {
37342db154b3SDavid Howells 	struct path from_path, to_path;
37352db154b3SDavid Howells 	unsigned int lflags;
37362db154b3SDavid Howells 	int ret = 0;
37372db154b3SDavid Howells 
37382db154b3SDavid Howells 	if (!may_mount())
37392db154b3SDavid Howells 		return -EPERM;
37402db154b3SDavid Howells 
37412db154b3SDavid Howells 	if (flags & ~MOVE_MOUNT__MASK)
37422db154b3SDavid Howells 		return -EINVAL;
37432db154b3SDavid Howells 
37442db154b3SDavid Howells 	/* If someone gives a pathname, they aren't permitted to move
37452db154b3SDavid Howells 	 * from an fd that requires unmount as we can't get at the flag
37462db154b3SDavid Howells 	 * to clear it afterwards.
37472db154b3SDavid Howells 	 */
37482db154b3SDavid Howells 	lflags = 0;
37492db154b3SDavid Howells 	if (flags & MOVE_MOUNT_F_SYMLINKS)	lflags |= LOOKUP_FOLLOW;
37502db154b3SDavid Howells 	if (flags & MOVE_MOUNT_F_AUTOMOUNTS)	lflags |= LOOKUP_AUTOMOUNT;
37512db154b3SDavid Howells 	if (flags & MOVE_MOUNT_F_EMPTY_PATH)	lflags |= LOOKUP_EMPTY;
37522db154b3SDavid Howells 
37532db154b3SDavid Howells 	ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
37542db154b3SDavid Howells 	if (ret < 0)
37552db154b3SDavid Howells 		return ret;
37562db154b3SDavid Howells 
37572db154b3SDavid Howells 	lflags = 0;
37582db154b3SDavid Howells 	if (flags & MOVE_MOUNT_T_SYMLINKS)	lflags |= LOOKUP_FOLLOW;
37592db154b3SDavid Howells 	if (flags & MOVE_MOUNT_T_AUTOMOUNTS)	lflags |= LOOKUP_AUTOMOUNT;
37602db154b3SDavid Howells 	if (flags & MOVE_MOUNT_T_EMPTY_PATH)	lflags |= LOOKUP_EMPTY;
37612db154b3SDavid Howells 
37622db154b3SDavid Howells 	ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
37632db154b3SDavid Howells 	if (ret < 0)
37642db154b3SDavid Howells 		goto out_from;
37652db154b3SDavid Howells 
37662db154b3SDavid Howells 	ret = security_move_mount(&from_path, &to_path);
37672db154b3SDavid Howells 	if (ret < 0)
37682db154b3SDavid Howells 		goto out_to;
37692db154b3SDavid Howells 
37709ffb14efSPavel Tikhomirov 	if (flags & MOVE_MOUNT_SET_GROUP)
37719ffb14efSPavel Tikhomirov 		ret = do_set_group(&from_path, &to_path);
37729ffb14efSPavel Tikhomirov 	else
37732db154b3SDavid Howells 		ret = do_move_mount(&from_path, &to_path);
37742db154b3SDavid Howells 
37752db154b3SDavid Howells out_to:
37762db154b3SDavid Howells 	path_put(&to_path);
37772db154b3SDavid Howells out_from:
37782db154b3SDavid Howells 	path_put(&from_path);
37792db154b3SDavid Howells 	return ret;
37802db154b3SDavid Howells }
37812db154b3SDavid Howells 
37822db154b3SDavid Howells /*
3783afac7cbaSAl Viro  * Return true if path is reachable from root
3784afac7cbaSAl Viro  *
378548a066e7SAl Viro  * namespace_sem or mount_lock is held
3786afac7cbaSAl Viro  */
3787643822b4SAl Viro bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3788afac7cbaSAl Viro 			 const struct path *root)
3789afac7cbaSAl Viro {
3790643822b4SAl Viro 	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3791a73324daSAl Viro 		dentry = mnt->mnt_mountpoint;
37920714a533SAl Viro 		mnt = mnt->mnt_parent;
3793afac7cbaSAl Viro 	}
3794643822b4SAl Viro 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3795afac7cbaSAl Viro }
3796afac7cbaSAl Viro 
3797640eb7e7SMickaël Salaün bool path_is_under(const struct path *path1, const struct path *path2)
3798afac7cbaSAl Viro {
379925ab4c9bSYaowei Bai 	bool res;
380048a066e7SAl Viro 	read_seqlock_excl(&mount_lock);
3801643822b4SAl Viro 	res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
380248a066e7SAl Viro 	read_sequnlock_excl(&mount_lock);
3803afac7cbaSAl Viro 	return res;
3804afac7cbaSAl Viro }
3805afac7cbaSAl Viro EXPORT_SYMBOL(path_is_under);
3806afac7cbaSAl Viro 
3807afac7cbaSAl Viro /*
38081da177e4SLinus Torvalds  * pivot_root Semantics:
38091da177e4SLinus Torvalds  * Moves the root file system of the current process to the directory put_old,
38101da177e4SLinus Torvalds  * makes new_root as the new root file system of the current process, and sets
38111da177e4SLinus Torvalds  * root/cwd of all processes which had them on the current root to new_root.
38121da177e4SLinus Torvalds  *
38131da177e4SLinus Torvalds  * Restrictions:
38141da177e4SLinus Torvalds  * The new_root and put_old must be directories, and  must not be on the
38151da177e4SLinus Torvalds  * same file  system as the current process root. The put_old  must  be
38161da177e4SLinus Torvalds  * underneath new_root,  i.e. adding a non-zero number of /.. to the string
38171da177e4SLinus Torvalds  * pointed to by put_old must yield the same directory as new_root. No other
38181da177e4SLinus Torvalds  * file system may be mounted on put_old. After all, new_root is a mountpoint.
38191da177e4SLinus Torvalds  *
38204a0d11faSNeil Brown  * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
38210c1bc6b8SMauro Carvalho Chehab  * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
38224a0d11faSNeil Brown  * in this situation.
38234a0d11faSNeil Brown  *
38241da177e4SLinus Torvalds  * Notes:
38251da177e4SLinus Torvalds  *  - we don't move root/cwd if they are not at the root (reason: if something
38261da177e4SLinus Torvalds  *    cared enough to change them, it's probably wrong to force them elsewhere)
38271da177e4SLinus Torvalds  *  - it's okay to pick a root that isn't the root of a file system, e.g.
38281da177e4SLinus Torvalds  *    /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
38291da177e4SLinus Torvalds  *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
38301da177e4SLinus Torvalds  *    first.
38311da177e4SLinus Torvalds  */
38323480b257SHeiko Carstens SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
38333480b257SHeiko Carstens 		const char __user *, put_old)
38341da177e4SLinus Torvalds {
38352763d119SAl Viro 	struct path new, old, root;
38362763d119SAl Viro 	struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
383784d17192SAl Viro 	struct mountpoint *old_mp, *root_mp;
38381da177e4SLinus Torvalds 	int error;
38391da177e4SLinus Torvalds 
38409b40bc90SAl Viro 	if (!may_mount())
38411da177e4SLinus Torvalds 		return -EPERM;
38421da177e4SLinus Torvalds 
3843ce6595a2SAl Viro 	error = user_path_at(AT_FDCWD, new_root,
3844ce6595a2SAl Viro 			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
38451da177e4SLinus Torvalds 	if (error)
38461da177e4SLinus Torvalds 		goto out0;
38471da177e4SLinus Torvalds 
3848ce6595a2SAl Viro 	error = user_path_at(AT_FDCWD, put_old,
3849ce6595a2SAl Viro 			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
38501da177e4SLinus Torvalds 	if (error)
38511da177e4SLinus Torvalds 		goto out1;
38521da177e4SLinus Torvalds 
38532d8f3038SAl Viro 	error = security_sb_pivotroot(&old, &new);
3854b12cea91SAl Viro 	if (error)
3855b12cea91SAl Viro 		goto out2;
38561da177e4SLinus Torvalds 
3857f7ad3c6bSMiklos Szeredi 	get_fs_root(current->fs, &root);
385884d17192SAl Viro 	old_mp = lock_mount(&old);
385984d17192SAl Viro 	error = PTR_ERR(old_mp);
386084d17192SAl Viro 	if (IS_ERR(old_mp))
3861b12cea91SAl Viro 		goto out3;
3862b12cea91SAl Viro 
38631da177e4SLinus Torvalds 	error = -EINVAL;
3864419148daSAl Viro 	new_mnt = real_mount(new.mnt);
3865419148daSAl Viro 	root_mnt = real_mount(root.mnt);
386684d17192SAl Viro 	old_mnt = real_mount(old.mnt);
38672763d119SAl Viro 	ex_parent = new_mnt->mnt_parent;
38682763d119SAl Viro 	root_parent = root_mnt->mnt_parent;
386984d17192SAl Viro 	if (IS_MNT_SHARED(old_mnt) ||
38702763d119SAl Viro 		IS_MNT_SHARED(ex_parent) ||
38712763d119SAl Viro 		IS_MNT_SHARED(root_parent))
3872b12cea91SAl Viro 		goto out4;
3873143c8c91SAl Viro 	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3874b12cea91SAl Viro 		goto out4;
38755ff9d8a6SEric W. Biederman 	if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
38765ff9d8a6SEric W. Biederman 		goto out4;
38771da177e4SLinus Torvalds 	error = -ENOENT;
3878f3da392eSAlexey Dobriyan 	if (d_unlinked(new.dentry))
3879b12cea91SAl Viro 		goto out4;
38801da177e4SLinus Torvalds 	error = -EBUSY;
388184d17192SAl Viro 	if (new_mnt == root_mnt || old_mnt == root_mnt)
3882b12cea91SAl Viro 		goto out4; /* loop, on the same file system  */
38831da177e4SLinus Torvalds 	error = -EINVAL;
38848c3ee42eSAl Viro 	if (root.mnt->mnt_root != root.dentry)
3885b12cea91SAl Viro 		goto out4; /* not a mountpoint */
3886676da58dSAl Viro 	if (!mnt_has_parent(root_mnt))
3887b12cea91SAl Viro 		goto out4; /* not attached */
38882d8f3038SAl Viro 	if (new.mnt->mnt_root != new.dentry)
3889b12cea91SAl Viro 		goto out4; /* not a mountpoint */
3890676da58dSAl Viro 	if (!mnt_has_parent(new_mnt))
3891b12cea91SAl Viro 		goto out4; /* not attached */
38924ac91378SJan Blunck 	/* make sure we can reach put_old from new_root */
389384d17192SAl Viro 	if (!is_path_reachable(old_mnt, old.dentry, &new))
3894b12cea91SAl Viro 		goto out4;
38950d082601SEric W. Biederman 	/* make certain new is below the root */
38960d082601SEric W. Biederman 	if (!is_path_reachable(new_mnt, new.dentry, &root))
38970d082601SEric W. Biederman 		goto out4;
3898719ea2fbSAl Viro 	lock_mount_hash();
38992763d119SAl Viro 	umount_mnt(new_mnt);
39002763d119SAl Viro 	root_mp = unhash_mnt(root_mnt);  /* we'll need its mountpoint */
39015ff9d8a6SEric W. Biederman 	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
39025ff9d8a6SEric W. Biederman 		new_mnt->mnt.mnt_flags |= MNT_LOCKED;
39035ff9d8a6SEric W. Biederman 		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
39045ff9d8a6SEric W. Biederman 	}
39054ac91378SJan Blunck 	/* mount old root on put_old */
390684d17192SAl Viro 	attach_mnt(root_mnt, old_mnt, old_mp);
39074ac91378SJan Blunck 	/* mount new_root on / */
39082763d119SAl Viro 	attach_mnt(new_mnt, root_parent, root_mp);
39092763d119SAl Viro 	mnt_add_count(root_parent, -1);
39106b3286edSKirill Korotaev 	touch_mnt_namespace(current->nsproxy->mnt_ns);
39114fed655cSEric W. Biederman 	/* A moved mount should not expire automatically */
39124fed655cSEric W. Biederman 	list_del_init(&new_mnt->mnt_expire);
39133895dbf8SEric W. Biederman 	put_mountpoint(root_mp);
3914719ea2fbSAl Viro 	unlock_mount_hash();
39152d8f3038SAl Viro 	chroot_fs_refs(&root, &new);
39161da177e4SLinus Torvalds 	error = 0;
3917b12cea91SAl Viro out4:
391884d17192SAl Viro 	unlock_mount(old_mp);
39192763d119SAl Viro 	if (!error)
39202763d119SAl Viro 		mntput_no_expire(ex_parent);
3921b12cea91SAl Viro out3:
39228c3ee42eSAl Viro 	path_put(&root);
3923b12cea91SAl Viro out2:
39242d8f3038SAl Viro 	path_put(&old);
39251da177e4SLinus Torvalds out1:
39262d8f3038SAl Viro 	path_put(&new);
39271da177e4SLinus Torvalds out0:
39281da177e4SLinus Torvalds 	return error;
39291da177e4SLinus Torvalds }
39301da177e4SLinus Torvalds 
39312a186721SChristian Brauner static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
39322a186721SChristian Brauner {
39332a186721SChristian Brauner 	unsigned int flags = mnt->mnt.mnt_flags;
39342a186721SChristian Brauner 
39352a186721SChristian Brauner 	/*  flags to clear */
39362a186721SChristian Brauner 	flags &= ~kattr->attr_clr;
39372a186721SChristian Brauner 	/* flags to raise */
39382a186721SChristian Brauner 	flags |= kattr->attr_set;
39392a186721SChristian Brauner 
39402a186721SChristian Brauner 	return flags;
39412a186721SChristian Brauner }
39422a186721SChristian Brauner 
39439caccd41SChristian Brauner static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
39449caccd41SChristian Brauner {
39459caccd41SChristian Brauner 	struct vfsmount *m = &mnt->mnt;
39469caccd41SChristian Brauner 
39479caccd41SChristian Brauner 	if (!kattr->mnt_userns)
39489caccd41SChristian Brauner 		return 0;
39499caccd41SChristian Brauner 
39509caccd41SChristian Brauner 	/*
39519caccd41SChristian Brauner 	 * Once a mount has been idmapped we don't allow it to change its
39529caccd41SChristian Brauner 	 * mapping. It makes things simpler and callers can just create
39539caccd41SChristian Brauner 	 * another bind-mount they can idmap if they want to.
39549caccd41SChristian Brauner 	 */
39559caccd41SChristian Brauner 	if (mnt_user_ns(m) != &init_user_ns)
39569caccd41SChristian Brauner 		return -EPERM;
39579caccd41SChristian Brauner 
39589caccd41SChristian Brauner 	/* The underlying filesystem doesn't support idmapped mounts yet. */
39599caccd41SChristian Brauner 	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
39609caccd41SChristian Brauner 		return -EINVAL;
39619caccd41SChristian Brauner 
39622ca4dcc4SChristian Brauner 	/* Don't yet support filesystem mountable in user namespaces. */
39632ca4dcc4SChristian Brauner 	if (m->mnt_sb->s_user_ns != &init_user_ns)
39642ca4dcc4SChristian Brauner 		return -EINVAL;
39652ca4dcc4SChristian Brauner 
39669caccd41SChristian Brauner 	/* We're not controlling the superblock. */
39672ca4dcc4SChristian Brauner 	if (!capable(CAP_SYS_ADMIN))
39689caccd41SChristian Brauner 		return -EPERM;
39699caccd41SChristian Brauner 
39709caccd41SChristian Brauner 	/* Mount has already been visible in the filesystem hierarchy. */
39719caccd41SChristian Brauner 	if (!is_anon_ns(mnt->mnt_ns))
39729caccd41SChristian Brauner 		return -EINVAL;
39739caccd41SChristian Brauner 
39749caccd41SChristian Brauner 	return 0;
39759caccd41SChristian Brauner }
39769caccd41SChristian Brauner 
39772a186721SChristian Brauner static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
39782a186721SChristian Brauner 					   struct mount *mnt, int *err)
39792a186721SChristian Brauner {
39802a186721SChristian Brauner 	struct mount *m = mnt, *last = NULL;
39812a186721SChristian Brauner 
39822a186721SChristian Brauner 	if (!is_mounted(&m->mnt)) {
39832a186721SChristian Brauner 		*err = -EINVAL;
39842a186721SChristian Brauner 		goto out;
39852a186721SChristian Brauner 	}
39862a186721SChristian Brauner 
39872a186721SChristian Brauner 	if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
39882a186721SChristian Brauner 		*err = -EINVAL;
39892a186721SChristian Brauner 		goto out;
39902a186721SChristian Brauner 	}
39912a186721SChristian Brauner 
39922a186721SChristian Brauner 	do {
39932a186721SChristian Brauner 		unsigned int flags;
39942a186721SChristian Brauner 
39952a186721SChristian Brauner 		flags = recalc_flags(kattr, m);
39962a186721SChristian Brauner 		if (!can_change_locked_flags(m, flags)) {
39972a186721SChristian Brauner 			*err = -EPERM;
39982a186721SChristian Brauner 			goto out;
39992a186721SChristian Brauner 		}
40002a186721SChristian Brauner 
40019caccd41SChristian Brauner 		*err = can_idmap_mount(kattr, m);
40029caccd41SChristian Brauner 		if (*err)
40039caccd41SChristian Brauner 			goto out;
40049caccd41SChristian Brauner 
40052a186721SChristian Brauner 		last = m;
40062a186721SChristian Brauner 
40072a186721SChristian Brauner 		if ((kattr->attr_set & MNT_READONLY) &&
40082a186721SChristian Brauner 		    !(m->mnt.mnt_flags & MNT_READONLY)) {
40092a186721SChristian Brauner 			*err = mnt_hold_writers(m);
40102a186721SChristian Brauner 			if (*err)
40112a186721SChristian Brauner 				goto out;
40122a186721SChristian Brauner 		}
40132a186721SChristian Brauner 	} while (kattr->recurse && (m = next_mnt(m, mnt)));
40142a186721SChristian Brauner 
40152a186721SChristian Brauner out:
40162a186721SChristian Brauner 	return last;
40172a186721SChristian Brauner }
40182a186721SChristian Brauner 
40199caccd41SChristian Brauner static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
40209caccd41SChristian Brauner {
40219caccd41SChristian Brauner 	struct user_namespace *mnt_userns;
40229caccd41SChristian Brauner 
40239caccd41SChristian Brauner 	if (!kattr->mnt_userns)
40249caccd41SChristian Brauner 		return;
40259caccd41SChristian Brauner 
40269caccd41SChristian Brauner 	mnt_userns = get_user_ns(kattr->mnt_userns);
40279caccd41SChristian Brauner 	/* Pairs with smp_load_acquire() in mnt_user_ns(). */
40289caccd41SChristian Brauner 	smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
40299caccd41SChristian Brauner }
40309caccd41SChristian Brauner 
40312a186721SChristian Brauner static void mount_setattr_commit(struct mount_kattr *kattr,
40322a186721SChristian Brauner 				 struct mount *mnt, struct mount *last,
40332a186721SChristian Brauner 				 int err)
40342a186721SChristian Brauner {
40352a186721SChristian Brauner 	struct mount *m = mnt;
40362a186721SChristian Brauner 
40372a186721SChristian Brauner 	do {
40382a186721SChristian Brauner 		if (!err) {
40392a186721SChristian Brauner 			unsigned int flags;
40402a186721SChristian Brauner 
40419caccd41SChristian Brauner 			do_idmap_mount(kattr, m);
40422a186721SChristian Brauner 			flags = recalc_flags(kattr, m);
40432a186721SChristian Brauner 			WRITE_ONCE(m->mnt.mnt_flags, flags);
40442a186721SChristian Brauner 		}
40452a186721SChristian Brauner 
40462a186721SChristian Brauner 		/*
40472a186721SChristian Brauner 		 * We either set MNT_READONLY above so make it visible
40482a186721SChristian Brauner 		 * before ~MNT_WRITE_HOLD or we failed to recursively
40492a186721SChristian Brauner 		 * apply mount options.
40502a186721SChristian Brauner 		 */
40512a186721SChristian Brauner 		if ((kattr->attr_set & MNT_READONLY) &&
40522a186721SChristian Brauner 		    (m->mnt.mnt_flags & MNT_WRITE_HOLD))
40532a186721SChristian Brauner 			mnt_unhold_writers(m);
40542a186721SChristian Brauner 
40552a186721SChristian Brauner 		if (!err && kattr->propagation)
40562a186721SChristian Brauner 			change_mnt_propagation(m, kattr->propagation);
40572a186721SChristian Brauner 
40582a186721SChristian Brauner 		/*
40592a186721SChristian Brauner 		 * On failure, only cleanup until we found the first mount
40602a186721SChristian Brauner 		 * we failed to handle.
40612a186721SChristian Brauner 		 */
40622a186721SChristian Brauner 		if (err && m == last)
40632a186721SChristian Brauner 			break;
40642a186721SChristian Brauner 	} while (kattr->recurse && (m = next_mnt(m, mnt)));
40652a186721SChristian Brauner 
40662a186721SChristian Brauner 	if (!err)
40672a186721SChristian Brauner 		touch_mnt_namespace(mnt->mnt_ns);
40682a186721SChristian Brauner }
40692a186721SChristian Brauner 
40702a186721SChristian Brauner static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
40712a186721SChristian Brauner {
40722a186721SChristian Brauner 	struct mount *mnt = real_mount(path->mnt), *last = NULL;
40732a186721SChristian Brauner 	int err = 0;
40742a186721SChristian Brauner 
40752a186721SChristian Brauner 	if (path->dentry != mnt->mnt.mnt_root)
40762a186721SChristian Brauner 		return -EINVAL;
40772a186721SChristian Brauner 
40782a186721SChristian Brauner 	if (kattr->propagation) {
40792a186721SChristian Brauner 		/*
40802a186721SChristian Brauner 		 * Only take namespace_lock() if we're actually changing
40812a186721SChristian Brauner 		 * propagation.
40822a186721SChristian Brauner 		 */
40832a186721SChristian Brauner 		namespace_lock();
40842a186721SChristian Brauner 		if (kattr->propagation == MS_SHARED) {
40852a186721SChristian Brauner 			err = invent_group_ids(mnt, kattr->recurse);
40862a186721SChristian Brauner 			if (err) {
40872a186721SChristian Brauner 				namespace_unlock();
40882a186721SChristian Brauner 				return err;
40892a186721SChristian Brauner 			}
40902a186721SChristian Brauner 		}
40912a186721SChristian Brauner 	}
40922a186721SChristian Brauner 
40932a186721SChristian Brauner 	lock_mount_hash();
40942a186721SChristian Brauner 
40952a186721SChristian Brauner 	/*
40962a186721SChristian Brauner 	 * Get the mount tree in a shape where we can change mount
40972a186721SChristian Brauner 	 * properties without failure.
40982a186721SChristian Brauner 	 */
40992a186721SChristian Brauner 	last = mount_setattr_prepare(kattr, mnt, &err);
41002a186721SChristian Brauner 	if (last) /* Commit all changes or revert to the old state. */
41012a186721SChristian Brauner 		mount_setattr_commit(kattr, mnt, last, err);
41022a186721SChristian Brauner 
41032a186721SChristian Brauner 	unlock_mount_hash();
41042a186721SChristian Brauner 
41052a186721SChristian Brauner 	if (kattr->propagation) {
41062a186721SChristian Brauner 		namespace_unlock();
41072a186721SChristian Brauner 		if (err)
41082a186721SChristian Brauner 			cleanup_group_ids(mnt, NULL);
41092a186721SChristian Brauner 	}
41102a186721SChristian Brauner 
41112a186721SChristian Brauner 	return err;
41122a186721SChristian Brauner }
41132a186721SChristian Brauner 
41149caccd41SChristian Brauner static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
41159caccd41SChristian Brauner 				struct mount_kattr *kattr, unsigned int flags)
41169caccd41SChristian Brauner {
41179caccd41SChristian Brauner 	int err = 0;
41189caccd41SChristian Brauner 	struct ns_common *ns;
41199caccd41SChristian Brauner 	struct user_namespace *mnt_userns;
41209caccd41SChristian Brauner 	struct file *file;
41219caccd41SChristian Brauner 
41229caccd41SChristian Brauner 	if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
41239caccd41SChristian Brauner 		return 0;
41249caccd41SChristian Brauner 
41259caccd41SChristian Brauner 	/*
41269caccd41SChristian Brauner 	 * We currently do not support clearing an idmapped mount. If this ever
41279caccd41SChristian Brauner 	 * is a use-case we can revisit this but for now let's keep it simple
41289caccd41SChristian Brauner 	 * and not allow it.
41299caccd41SChristian Brauner 	 */
41309caccd41SChristian Brauner 	if (attr->attr_clr & MOUNT_ATTR_IDMAP)
41319caccd41SChristian Brauner 		return -EINVAL;
41329caccd41SChristian Brauner 
41339caccd41SChristian Brauner 	if (attr->userns_fd > INT_MAX)
41349caccd41SChristian Brauner 		return -EINVAL;
41359caccd41SChristian Brauner 
41369caccd41SChristian Brauner 	file = fget(attr->userns_fd);
41379caccd41SChristian Brauner 	if (!file)
41389caccd41SChristian Brauner 		return -EBADF;
41399caccd41SChristian Brauner 
41409caccd41SChristian Brauner 	if (!proc_ns_file(file)) {
41419caccd41SChristian Brauner 		err = -EINVAL;
41429caccd41SChristian Brauner 		goto out_fput;
41439caccd41SChristian Brauner 	}
41449caccd41SChristian Brauner 
41459caccd41SChristian Brauner 	ns = get_proc_ns(file_inode(file));
41469caccd41SChristian Brauner 	if (ns->ops->type != CLONE_NEWUSER) {
41479caccd41SChristian Brauner 		err = -EINVAL;
41489caccd41SChristian Brauner 		goto out_fput;
41499caccd41SChristian Brauner 	}
41509caccd41SChristian Brauner 
41519caccd41SChristian Brauner 	/*
41529caccd41SChristian Brauner 	 * The init_user_ns is used to indicate that a vfsmount is not idmapped.
41539caccd41SChristian Brauner 	 * This is simpler than just having to treat NULL as unmapped. Users
41549caccd41SChristian Brauner 	 * wanting to idmap a mount to init_user_ns can just use a namespace
41559caccd41SChristian Brauner 	 * with an identity mapping.
41569caccd41SChristian Brauner 	 */
41579caccd41SChristian Brauner 	mnt_userns = container_of(ns, struct user_namespace, ns);
41589caccd41SChristian Brauner 	if (mnt_userns == &init_user_ns) {
41599caccd41SChristian Brauner 		err = -EPERM;
41609caccd41SChristian Brauner 		goto out_fput;
41619caccd41SChristian Brauner 	}
41629caccd41SChristian Brauner 	kattr->mnt_userns = get_user_ns(mnt_userns);
41639caccd41SChristian Brauner 
41649caccd41SChristian Brauner out_fput:
41659caccd41SChristian Brauner 	fput(file);
41669caccd41SChristian Brauner 	return err;
41679caccd41SChristian Brauner }
41689caccd41SChristian Brauner 
41699caccd41SChristian Brauner static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
41702a186721SChristian Brauner 			     struct mount_kattr *kattr, unsigned int flags)
41712a186721SChristian Brauner {
41722a186721SChristian Brauner 	unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
41732a186721SChristian Brauner 
41742a186721SChristian Brauner 	if (flags & AT_NO_AUTOMOUNT)
41752a186721SChristian Brauner 		lookup_flags &= ~LOOKUP_AUTOMOUNT;
41762a186721SChristian Brauner 	if (flags & AT_SYMLINK_NOFOLLOW)
41772a186721SChristian Brauner 		lookup_flags &= ~LOOKUP_FOLLOW;
41782a186721SChristian Brauner 	if (flags & AT_EMPTY_PATH)
41792a186721SChristian Brauner 		lookup_flags |= LOOKUP_EMPTY;
41802a186721SChristian Brauner 
41812a186721SChristian Brauner 	*kattr = (struct mount_kattr) {
41822a186721SChristian Brauner 		.lookup_flags	= lookup_flags,
41832a186721SChristian Brauner 		.recurse	= !!(flags & AT_RECURSIVE),
41842a186721SChristian Brauner 	};
41852a186721SChristian Brauner 
41862a186721SChristian Brauner 	if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
41872a186721SChristian Brauner 		return -EINVAL;
41882a186721SChristian Brauner 	if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
41892a186721SChristian Brauner 		return -EINVAL;
41902a186721SChristian Brauner 	kattr->propagation = attr->propagation;
41912a186721SChristian Brauner 
41922a186721SChristian Brauner 	if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
41932a186721SChristian Brauner 		return -EINVAL;
41942a186721SChristian Brauner 
41952a186721SChristian Brauner 	kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
41962a186721SChristian Brauner 	kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
41972a186721SChristian Brauner 
41982a186721SChristian Brauner 	/*
41992a186721SChristian Brauner 	 * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
42002a186721SChristian Brauner 	 * users wanting to transition to a different atime setting cannot
42012a186721SChristian Brauner 	 * simply specify the atime setting in @attr_set, but must also
42022a186721SChristian Brauner 	 * specify MOUNT_ATTR__ATIME in the @attr_clr field.
42032a186721SChristian Brauner 	 * So ensure that MOUNT_ATTR__ATIME can't be partially set in
42042a186721SChristian Brauner 	 * @attr_clr and that @attr_set can't have any atime bits set if
42052a186721SChristian Brauner 	 * MOUNT_ATTR__ATIME isn't set in @attr_clr.
42062a186721SChristian Brauner 	 */
42072a186721SChristian Brauner 	if (attr->attr_clr & MOUNT_ATTR__ATIME) {
42082a186721SChristian Brauner 		if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
42092a186721SChristian Brauner 			return -EINVAL;
42102a186721SChristian Brauner 
42112a186721SChristian Brauner 		/*
42122a186721SChristian Brauner 		 * Clear all previous time settings as they are mutually
42132a186721SChristian Brauner 		 * exclusive.
42142a186721SChristian Brauner 		 */
42152a186721SChristian Brauner 		kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
42162a186721SChristian Brauner 		switch (attr->attr_set & MOUNT_ATTR__ATIME) {
42172a186721SChristian Brauner 		case MOUNT_ATTR_RELATIME:
42182a186721SChristian Brauner 			kattr->attr_set |= MNT_RELATIME;
42192a186721SChristian Brauner 			break;
42202a186721SChristian Brauner 		case MOUNT_ATTR_NOATIME:
42212a186721SChristian Brauner 			kattr->attr_set |= MNT_NOATIME;
42222a186721SChristian Brauner 			break;
42232a186721SChristian Brauner 		case MOUNT_ATTR_STRICTATIME:
42242a186721SChristian Brauner 			break;
42252a186721SChristian Brauner 		default:
42262a186721SChristian Brauner 			return -EINVAL;
42272a186721SChristian Brauner 		}
42282a186721SChristian Brauner 	} else {
42292a186721SChristian Brauner 		if (attr->attr_set & MOUNT_ATTR__ATIME)
42302a186721SChristian Brauner 			return -EINVAL;
42312a186721SChristian Brauner 	}
42322a186721SChristian Brauner 
42339caccd41SChristian Brauner 	return build_mount_idmapped(attr, usize, kattr, flags);
42349caccd41SChristian Brauner }
42359caccd41SChristian Brauner 
42369caccd41SChristian Brauner static void finish_mount_kattr(struct mount_kattr *kattr)
42379caccd41SChristian Brauner {
42389caccd41SChristian Brauner 	put_user_ns(kattr->mnt_userns);
42399caccd41SChristian Brauner 	kattr->mnt_userns = NULL;
42402a186721SChristian Brauner }
42412a186721SChristian Brauner 
42422a186721SChristian Brauner SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
42432a186721SChristian Brauner 		unsigned int, flags, struct mount_attr __user *, uattr,
42442a186721SChristian Brauner 		size_t, usize)
42452a186721SChristian Brauner {
42462a186721SChristian Brauner 	int err;
42472a186721SChristian Brauner 	struct path target;
42482a186721SChristian Brauner 	struct mount_attr attr;
42492a186721SChristian Brauner 	struct mount_kattr kattr;
42502a186721SChristian Brauner 
42512a186721SChristian Brauner 	BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
42522a186721SChristian Brauner 
42532a186721SChristian Brauner 	if (flags & ~(AT_EMPTY_PATH |
42542a186721SChristian Brauner 		      AT_RECURSIVE |
42552a186721SChristian Brauner 		      AT_SYMLINK_NOFOLLOW |
42562a186721SChristian Brauner 		      AT_NO_AUTOMOUNT))
42572a186721SChristian Brauner 		return -EINVAL;
42582a186721SChristian Brauner 
42592a186721SChristian Brauner 	if (unlikely(usize > PAGE_SIZE))
42602a186721SChristian Brauner 		return -E2BIG;
42612a186721SChristian Brauner 	if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
42622a186721SChristian Brauner 		return -EINVAL;
42632a186721SChristian Brauner 
42642a186721SChristian Brauner 	if (!may_mount())
42652a186721SChristian Brauner 		return -EPERM;
42662a186721SChristian Brauner 
42672a186721SChristian Brauner 	err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
42682a186721SChristian Brauner 	if (err)
42692a186721SChristian Brauner 		return err;
42702a186721SChristian Brauner 
42712a186721SChristian Brauner 	/* Don't bother walking through the mounts if this is a nop. */
42722a186721SChristian Brauner 	if (attr.attr_set == 0 &&
42732a186721SChristian Brauner 	    attr.attr_clr == 0 &&
42742a186721SChristian Brauner 	    attr.propagation == 0)
42752a186721SChristian Brauner 		return 0;
42762a186721SChristian Brauner 
42779caccd41SChristian Brauner 	err = build_mount_kattr(&attr, usize, &kattr, flags);
42782a186721SChristian Brauner 	if (err)
42792a186721SChristian Brauner 		return err;
42802a186721SChristian Brauner 
42812a186721SChristian Brauner 	err = user_path_at(dfd, path, kattr.lookup_flags, &target);
42822a186721SChristian Brauner 	if (err)
42832a186721SChristian Brauner 		return err;
42842a186721SChristian Brauner 
42852a186721SChristian Brauner 	err = do_mount_setattr(&target, &kattr);
42869caccd41SChristian Brauner 	finish_mount_kattr(&kattr);
42872a186721SChristian Brauner 	path_put(&target);
42882a186721SChristian Brauner 	return err;
42892a186721SChristian Brauner }
42902a186721SChristian Brauner 
42911da177e4SLinus Torvalds static void __init init_mount_tree(void)
42921da177e4SLinus Torvalds {
42931da177e4SLinus Torvalds 	struct vfsmount *mnt;
429474e83122SAl Viro 	struct mount *m;
42956b3286edSKirill Korotaev 	struct mnt_namespace *ns;
4296ac748a09SJan Blunck 	struct path root;
42971da177e4SLinus Torvalds 
4298fd3e007fSAl Viro 	mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
42991da177e4SLinus Torvalds 	if (IS_ERR(mnt))
43001da177e4SLinus Torvalds 		panic("Can't create rootfs");
4301b3e19d92SNick Piggin 
430274e83122SAl Viro 	ns = alloc_mnt_ns(&init_user_ns, false);
43033b22edc5STrond Myklebust 	if (IS_ERR(ns))
43041da177e4SLinus Torvalds 		panic("Can't allocate initial namespace");
430574e83122SAl Viro 	m = real_mount(mnt);
430674e83122SAl Viro 	m->mnt_ns = ns;
430774e83122SAl Viro 	ns->root = m;
430874e83122SAl Viro 	ns->mounts = 1;
430974e83122SAl Viro 	list_add(&m->mnt_list, &ns->list);
43106b3286edSKirill Korotaev 	init_task.nsproxy->mnt_ns = ns;
43116b3286edSKirill Korotaev 	get_mnt_ns(ns);
43121da177e4SLinus Torvalds 
4313be08d6d2SAl Viro 	root.mnt = mnt;
4314be08d6d2SAl Viro 	root.dentry = mnt->mnt_root;
4315da362b09SEric W. Biederman 	mnt->mnt_flags |= MNT_LOCKED;
4316ac748a09SJan Blunck 
4317ac748a09SJan Blunck 	set_fs_pwd(current->fs, &root);
4318ac748a09SJan Blunck 	set_fs_root(current->fs, &root);
43191da177e4SLinus Torvalds }
43201da177e4SLinus Torvalds 
432174bf17cfSDenis Cheng void __init mnt_init(void)
43221da177e4SLinus Torvalds {
432315a67dd8SRandy Dunlap 	int err;
43241da177e4SLinus Torvalds 
43257d6fec45SAl Viro 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
432679f6540bSVasily Averin 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
43271da177e4SLinus Torvalds 
43280818bf27SAl Viro 	mount_hashtable = alloc_large_system_hash("Mount-cache",
432938129a13SAl Viro 				sizeof(struct hlist_head),
43300818bf27SAl Viro 				mhash_entries, 19,
43313d375d78SPavel Tatashin 				HASH_ZERO,
43320818bf27SAl Viro 				&m_hash_shift, &m_hash_mask, 0, 0);
43330818bf27SAl Viro 	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
43340818bf27SAl Viro 				sizeof(struct hlist_head),
43350818bf27SAl Viro 				mphash_entries, 19,
43363d375d78SPavel Tatashin 				HASH_ZERO,
43370818bf27SAl Viro 				&mp_hash_shift, &mp_hash_mask, 0, 0);
43381da177e4SLinus Torvalds 
433984d17192SAl Viro 	if (!mount_hashtable || !mountpoint_hashtable)
43401da177e4SLinus Torvalds 		panic("Failed to allocate mount hash table\n");
43411da177e4SLinus Torvalds 
43424b93dc9bSTejun Heo 	kernfs_init();
43434b93dc9bSTejun Heo 
434415a67dd8SRandy Dunlap 	err = sysfs_init();
434515a67dd8SRandy Dunlap 	if (err)
434615a67dd8SRandy Dunlap 		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
43478e24eea7SHarvey Harrison 			__func__, err);
434800d26666SGreg Kroah-Hartman 	fs_kobj = kobject_create_and_add("fs", NULL);
434900d26666SGreg Kroah-Hartman 	if (!fs_kobj)
43508e24eea7SHarvey Harrison 		printk(KERN_WARNING "%s: kobj create error\n", __func__);
4351037f11b4SAl Viro 	shmem_init();
43521da177e4SLinus Torvalds 	init_rootfs();
43531da177e4SLinus Torvalds 	init_mount_tree();
43541da177e4SLinus Torvalds }
43551da177e4SLinus Torvalds 
4356616511d0STrond Myklebust void put_mnt_ns(struct mnt_namespace *ns)
43571da177e4SLinus Torvalds {
43581a7b8969SKirill Tkhai 	if (!refcount_dec_and_test(&ns->ns.count))
4359616511d0STrond Myklebust 		return;
43607b00ed6fSAl Viro 	drop_collected_mounts(&ns->root->mnt);
4361771b1371SEric W. Biederman 	free_mnt_ns(ns);
43621da177e4SLinus Torvalds }
43639d412a43SAl Viro 
4364d911b458SDavid Howells struct vfsmount *kern_mount(struct file_system_type *type)
43659d412a43SAl Viro {
4366423e0ab0STim Chen 	struct vfsmount *mnt;
4367d911b458SDavid Howells 	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
4368423e0ab0STim Chen 	if (!IS_ERR(mnt)) {
4369423e0ab0STim Chen 		/*
4370423e0ab0STim Chen 		 * it is a longterm mount, don't release mnt until
4371423e0ab0STim Chen 		 * we unmount before file sys is unregistered
4372423e0ab0STim Chen 		*/
4373f7a99c5bSAl Viro 		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
4374423e0ab0STim Chen 	}
4375423e0ab0STim Chen 	return mnt;
43769d412a43SAl Viro }
4377d911b458SDavid Howells EXPORT_SYMBOL_GPL(kern_mount);
4378423e0ab0STim Chen 
4379423e0ab0STim Chen void kern_unmount(struct vfsmount *mnt)
4380423e0ab0STim Chen {
4381423e0ab0STim Chen 	/* release long term mount so mount point can be released */
4382423e0ab0STim Chen 	if (!IS_ERR_OR_NULL(mnt)) {
4383f7a99c5bSAl Viro 		real_mount(mnt)->mnt_ns = NULL;
438448a066e7SAl Viro 		synchronize_rcu();	/* yecchhh... */
4385423e0ab0STim Chen 		mntput(mnt);
4386423e0ab0STim Chen 	}
4387423e0ab0STim Chen }
4388423e0ab0STim Chen EXPORT_SYMBOL(kern_unmount);
438902125a82SAl Viro 
4390df820f8dSMiklos Szeredi void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
4391df820f8dSMiklos Szeredi {
4392df820f8dSMiklos Szeredi 	unsigned int i;
4393df820f8dSMiklos Szeredi 
4394df820f8dSMiklos Szeredi 	for (i = 0; i < num; i++)
4395df820f8dSMiklos Szeredi 		if (mnt[i])
4396df820f8dSMiklos Szeredi 			real_mount(mnt[i])->mnt_ns = NULL;
4397df820f8dSMiklos Szeredi 	synchronize_rcu_expedited();
4398df820f8dSMiklos Szeredi 	for (i = 0; i < num; i++)
4399df820f8dSMiklos Szeredi 		mntput(mnt[i]);
4400df820f8dSMiklos Szeredi }
4401df820f8dSMiklos Szeredi EXPORT_SYMBOL(kern_unmount_array);
4402df820f8dSMiklos Szeredi 
440302125a82SAl Viro bool our_mnt(struct vfsmount *mnt)
440402125a82SAl Viro {
4405143c8c91SAl Viro 	return check_mnt(real_mount(mnt));
440602125a82SAl Viro }
44078823c079SEric W. Biederman 
44083151527eSEric W. Biederman bool current_chrooted(void)
44093151527eSEric W. Biederman {
44103151527eSEric W. Biederman 	/* Does the current process have a non-standard root */
44113151527eSEric W. Biederman 	struct path ns_root;
44123151527eSEric W. Biederman 	struct path fs_root;
44133151527eSEric W. Biederman 	bool chrooted;
44143151527eSEric W. Biederman 
44153151527eSEric W. Biederman 	/* Find the namespace root */
44163151527eSEric W. Biederman 	ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
44173151527eSEric W. Biederman 	ns_root.dentry = ns_root.mnt->mnt_root;
44183151527eSEric W. Biederman 	path_get(&ns_root);
44193151527eSEric W. Biederman 	while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
44203151527eSEric W. Biederman 		;
44213151527eSEric W. Biederman 
44223151527eSEric W. Biederman 	get_fs_root(current->fs, &fs_root);
44233151527eSEric W. Biederman 
44243151527eSEric W. Biederman 	chrooted = !path_equal(&fs_root, &ns_root);
44253151527eSEric W. Biederman 
44263151527eSEric W. Biederman 	path_put(&fs_root);
44273151527eSEric W. Biederman 	path_put(&ns_root);
44283151527eSEric W. Biederman 
44293151527eSEric W. Biederman 	return chrooted;
44303151527eSEric W. Biederman }
44313151527eSEric W. Biederman 
4432132e4608SDavid Howells static bool mnt_already_visible(struct mnt_namespace *ns,
4433132e4608SDavid Howells 				const struct super_block *sb,
44348654df4eSEric W. Biederman 				int *new_mnt_flags)
443587a8ebd6SEric W. Biederman {
44368c6cf9ccSEric W. Biederman 	int new_flags = *new_mnt_flags;
443787a8ebd6SEric W. Biederman 	struct mount *mnt;
4438e51db735SEric W. Biederman 	bool visible = false;
443987a8ebd6SEric W. Biederman 
444044bb4385SAl Viro 	down_read(&namespace_sem);
44419f6c61f9SMiklos Szeredi 	lock_ns_list(ns);
444287a8ebd6SEric W. Biederman 	list_for_each_entry(mnt, &ns->list, mnt_list) {
4443e51db735SEric W. Biederman 		struct mount *child;
444477b1a97dSEric W. Biederman 		int mnt_flags;
444577b1a97dSEric W. Biederman 
44469f6c61f9SMiklos Szeredi 		if (mnt_is_cursor(mnt))
44479f6c61f9SMiklos Szeredi 			continue;
44489f6c61f9SMiklos Szeredi 
4449132e4608SDavid Howells 		if (mnt->mnt.mnt_sb->s_type != sb->s_type)
4450e51db735SEric W. Biederman 			continue;
4451e51db735SEric W. Biederman 
44527e96c1b0SEric W. Biederman 		/* This mount is not fully visible if it's root directory
44537e96c1b0SEric W. Biederman 		 * is not the root directory of the filesystem.
44547e96c1b0SEric W. Biederman 		 */
44557e96c1b0SEric W. Biederman 		if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
44567e96c1b0SEric W. Biederman 			continue;
44577e96c1b0SEric W. Biederman 
4458a1935c17SEric W. Biederman 		/* A local view of the mount flags */
445977b1a97dSEric W. Biederman 		mnt_flags = mnt->mnt.mnt_flags;
446077b1a97dSEric W. Biederman 
4461695e9df0SEric W. Biederman 		/* Don't miss readonly hidden in the superblock flags */
4462bc98a42cSDavid Howells 		if (sb_rdonly(mnt->mnt.mnt_sb))
4463695e9df0SEric W. Biederman 			mnt_flags |= MNT_LOCK_READONLY;
4464695e9df0SEric W. Biederman 
44658c6cf9ccSEric W. Biederman 		/* Verify the mount flags are equal to or more permissive
44668c6cf9ccSEric W. Biederman 		 * than the proposed new mount.
44678c6cf9ccSEric W. Biederman 		 */
446877b1a97dSEric W. Biederman 		if ((mnt_flags & MNT_LOCK_READONLY) &&
44698c6cf9ccSEric W. Biederman 		    !(new_flags & MNT_READONLY))
44708c6cf9ccSEric W. Biederman 			continue;
447177b1a97dSEric W. Biederman 		if ((mnt_flags & MNT_LOCK_ATIME) &&
447277b1a97dSEric W. Biederman 		    ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
44738c6cf9ccSEric W. Biederman 			continue;
44748c6cf9ccSEric W. Biederman 
4475ceeb0e5dSEric W. Biederman 		/* This mount is not fully visible if there are any
4476ceeb0e5dSEric W. Biederman 		 * locked child mounts that cover anything except for
4477ceeb0e5dSEric W. Biederman 		 * empty directories.
4478e51db735SEric W. Biederman 		 */
4479e51db735SEric W. Biederman 		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
4480e51db735SEric W. Biederman 			struct inode *inode = child->mnt_mountpoint->d_inode;
4481ceeb0e5dSEric W. Biederman 			/* Only worry about locked mounts */
4482d71ed6c9SEric W. Biederman 			if (!(child->mnt.mnt_flags & MNT_LOCKED))
4483ceeb0e5dSEric W. Biederman 				continue;
44847236c85eSEric W. Biederman 			/* Is the directory permanetly empty? */
44857236c85eSEric W. Biederman 			if (!is_empty_dir_inode(inode))
4486e51db735SEric W. Biederman 				goto next;
448787a8ebd6SEric W. Biederman 		}
44888c6cf9ccSEric W. Biederman 		/* Preserve the locked attributes */
448977b1a97dSEric W. Biederman 		*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
44908c6cf9ccSEric W. Biederman 					       MNT_LOCK_ATIME);
4491e51db735SEric W. Biederman 		visible = true;
4492e51db735SEric W. Biederman 		goto found;
4493e51db735SEric W. Biederman 	next:	;
449487a8ebd6SEric W. Biederman 	}
4495e51db735SEric W. Biederman found:
44969f6c61f9SMiklos Szeredi 	unlock_ns_list(ns);
449744bb4385SAl Viro 	up_read(&namespace_sem);
4498e51db735SEric W. Biederman 	return visible;
449987a8ebd6SEric W. Biederman }
450087a8ebd6SEric W. Biederman 
4501132e4608SDavid Howells static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
45028654df4eSEric W. Biederman {
4503a1935c17SEric W. Biederman 	const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
45048654df4eSEric W. Biederman 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
45058654df4eSEric W. Biederman 	unsigned long s_iflags;
45068654df4eSEric W. Biederman 
45078654df4eSEric W. Biederman 	if (ns->user_ns == &init_user_ns)
45088654df4eSEric W. Biederman 		return false;
45098654df4eSEric W. Biederman 
45108654df4eSEric W. Biederman 	/* Can this filesystem be too revealing? */
4511132e4608SDavid Howells 	s_iflags = sb->s_iflags;
45128654df4eSEric W. Biederman 	if (!(s_iflags & SB_I_USERNS_VISIBLE))
45138654df4eSEric W. Biederman 		return false;
45148654df4eSEric W. Biederman 
4515a1935c17SEric W. Biederman 	if ((s_iflags & required_iflags) != required_iflags) {
4516a1935c17SEric W. Biederman 		WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
4517a1935c17SEric W. Biederman 			  required_iflags);
4518a1935c17SEric W. Biederman 		return true;
4519a1935c17SEric W. Biederman 	}
4520a1935c17SEric W. Biederman 
4521132e4608SDavid Howells 	return !mnt_already_visible(ns, sb, new_mnt_flags);
45228654df4eSEric W. Biederman }
45238654df4eSEric W. Biederman 
4524380cf5baSAndy Lutomirski bool mnt_may_suid(struct vfsmount *mnt)
4525380cf5baSAndy Lutomirski {
4526380cf5baSAndy Lutomirski 	/*
4527380cf5baSAndy Lutomirski 	 * Foreign mounts (accessed via fchdir or through /proc
4528380cf5baSAndy Lutomirski 	 * symlinks) are always treated as if they are nosuid.  This
4529380cf5baSAndy Lutomirski 	 * prevents namespaces from trusting potentially unsafe
4530380cf5baSAndy Lutomirski 	 * suid/sgid bits, file caps, or security labels that originate
4531380cf5baSAndy Lutomirski 	 * in other namespaces.
4532380cf5baSAndy Lutomirski 	 */
4533380cf5baSAndy Lutomirski 	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4534380cf5baSAndy Lutomirski 	       current_in_userns(mnt->mnt_sb->s_user_ns);
4535380cf5baSAndy Lutomirski }
4536380cf5baSAndy Lutomirski 
453764964528SAl Viro static struct ns_common *mntns_get(struct task_struct *task)
45388823c079SEric W. Biederman {
453958be2825SAl Viro 	struct ns_common *ns = NULL;
45408823c079SEric W. Biederman 	struct nsproxy *nsproxy;
45418823c079SEric W. Biederman 
4542728dba3aSEric W. Biederman 	task_lock(task);
4543728dba3aSEric W. Biederman 	nsproxy = task->nsproxy;
45448823c079SEric W. Biederman 	if (nsproxy) {
454558be2825SAl Viro 		ns = &nsproxy->mnt_ns->ns;
454658be2825SAl Viro 		get_mnt_ns(to_mnt_ns(ns));
45478823c079SEric W. Biederman 	}
4548728dba3aSEric W. Biederman 	task_unlock(task);
45498823c079SEric W. Biederman 
45508823c079SEric W. Biederman 	return ns;
45518823c079SEric W. Biederman }
45528823c079SEric W. Biederman 
455364964528SAl Viro static void mntns_put(struct ns_common *ns)
45548823c079SEric W. Biederman {
455558be2825SAl Viro 	put_mnt_ns(to_mnt_ns(ns));
45568823c079SEric W. Biederman }
45578823c079SEric W. Biederman 
4558f2a8d52eSChristian Brauner static int mntns_install(struct nsset *nsset, struct ns_common *ns)
45598823c079SEric W. Biederman {
4560f2a8d52eSChristian Brauner 	struct nsproxy *nsproxy = nsset->nsproxy;
4561f2a8d52eSChristian Brauner 	struct fs_struct *fs = nsset->fs;
45624f757f3cSAl Viro 	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4563f2a8d52eSChristian Brauner 	struct user_namespace *user_ns = nsset->cred->user_ns;
45648823c079SEric W. Biederman 	struct path root;
45654f757f3cSAl Viro 	int err;
45668823c079SEric W. Biederman 
45670c55cfc4SEric W. Biederman 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4568f2a8d52eSChristian Brauner 	    !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4569f2a8d52eSChristian Brauner 	    !ns_capable(user_ns, CAP_SYS_ADMIN))
4570ae11e0f1SZhao Hongjiang 		return -EPERM;
45718823c079SEric W. Biederman 
457274e83122SAl Viro 	if (is_anon_ns(mnt_ns))
457374e83122SAl Viro 		return -EINVAL;
457474e83122SAl Viro 
45758823c079SEric W. Biederman 	if (fs->users != 1)
45768823c079SEric W. Biederman 		return -EINVAL;
45778823c079SEric W. Biederman 
45788823c079SEric W. Biederman 	get_mnt_ns(mnt_ns);
45794f757f3cSAl Viro 	old_mnt_ns = nsproxy->mnt_ns;
45808823c079SEric W. Biederman 	nsproxy->mnt_ns = mnt_ns;
45818823c079SEric W. Biederman 
45828823c079SEric W. Biederman 	/* Find the root */
45834f757f3cSAl Viro 	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
45844f757f3cSAl Viro 				"/", LOOKUP_DOWN, &root);
45854f757f3cSAl Viro 	if (err) {
45864f757f3cSAl Viro 		/* revert to old namespace */
45874f757f3cSAl Viro 		nsproxy->mnt_ns = old_mnt_ns;
45884f757f3cSAl Viro 		put_mnt_ns(mnt_ns);
45894f757f3cSAl Viro 		return err;
45904f757f3cSAl Viro 	}
45918823c079SEric W. Biederman 
45924068367cSAndrei Vagin 	put_mnt_ns(old_mnt_ns);
45934068367cSAndrei Vagin 
45948823c079SEric W. Biederman 	/* Update the pwd and root */
45958823c079SEric W. Biederman 	set_fs_pwd(fs, &root);
45968823c079SEric W. Biederman 	set_fs_root(fs, &root);
45978823c079SEric W. Biederman 
45988823c079SEric W. Biederman 	path_put(&root);
45998823c079SEric W. Biederman 	return 0;
46008823c079SEric W. Biederman }
46018823c079SEric W. Biederman 
4602bcac25a5SAndrey Vagin static struct user_namespace *mntns_owner(struct ns_common *ns)
4603bcac25a5SAndrey Vagin {
4604bcac25a5SAndrey Vagin 	return to_mnt_ns(ns)->user_ns;
4605bcac25a5SAndrey Vagin }
4606bcac25a5SAndrey Vagin 
46078823c079SEric W. Biederman const struct proc_ns_operations mntns_operations = {
46088823c079SEric W. Biederman 	.name		= "mnt",
46098823c079SEric W. Biederman 	.type		= CLONE_NEWNS,
46108823c079SEric W. Biederman 	.get		= mntns_get,
46118823c079SEric W. Biederman 	.put		= mntns_put,
46128823c079SEric W. Biederman 	.install	= mntns_install,
4613bcac25a5SAndrey Vagin 	.owner		= mntns_owner,
46148823c079SEric W. Biederman };
4615