linux/fs/namespace.c

1 // SPDX-License-Identifier: GPL-2.0-only
7  * Based on code from fs/super.c, copyright Linus Torvalds and others.
133 	mnt->mnt_id = res;  in mnt_alloc_id()
139 	ida_free(&mnt_id_ida, mnt->mnt_id);  in mnt_free_id()
151 	mnt->mnt_group_id = res;  in mnt_alloc_group_id()
160 	ida_free(&mnt_group_ida, mnt->mnt_group_id);  in mnt_release_group_id()
161 	mnt->mnt_group_id = 0;  in mnt_release_group_id()
170 	this_cpu_add(mnt->mnt_pcp->mnt_count, n);  in mnt_add_count()
173 	mnt->mnt_count += n;  in mnt_add_count()
188 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;  in mnt_get_count()
193 	return mnt->mnt_count;  in mnt_get_count()
208 			mnt->mnt_devname = kstrdup_const(name,  in alloc_vfsmnt()
210 			if (!mnt->mnt_devname)  in alloc_vfsmnt()
215 		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);  in alloc_vfsmnt()
216 		if (!mnt->mnt_pcp)  in alloc_vfsmnt()
219 		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);  in alloc_vfsmnt()
221 		mnt->mnt_count = 1;  in alloc_vfsmnt()
222 		mnt->mnt_writers = 0;  in alloc_vfsmnt()
225 		INIT_HLIST_NODE(&mnt->mnt_hash);  in alloc_vfsmnt()
226 		INIT_LIST_HEAD(&mnt->mnt_child);  in alloc_vfsmnt()
227 		INIT_LIST_HEAD(&mnt->mnt_mounts);  in alloc_vfsmnt()
228 		INIT_LIST_HEAD(&mnt->mnt_list);  in alloc_vfsmnt()
229 		INIT_LIST_HEAD(&mnt->mnt_expire);  in alloc_vfsmnt()
230 		INIT_LIST_HEAD(&mnt->mnt_share);  in alloc_vfsmnt()
231 		INIT_LIST_HEAD(&mnt->mnt_slave_list);  in alloc_vfsmnt()
232 		INIT_LIST_HEAD(&mnt->mnt_slave);  in alloc_vfsmnt()
233 		INIT_HLIST_NODE(&mnt->mnt_mp_list);  in alloc_vfsmnt()
234 		INIT_LIST_HEAD(&mnt->mnt_umounting);  in alloc_vfsmnt()
235 		INIT_HLIST_HEAD(&mnt->mnt_stuck_children);  in alloc_vfsmnt()
236 		mnt->mnt.mnt_idmap = &nop_mnt_idmap;  in alloc_vfsmnt()
242 	kfree_const(mnt->mnt_devname);  in alloc_vfsmnt()
252  * Most r/o checks on a fs are for operations that take
254  * We must keep track of when those operations start
256  * we can determine when writes are able to occur to
260  * __mnt_is_readonly: check whether a mount is read-only
272 	return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);  in __mnt_is_readonly()
279 	this_cpu_inc(mnt->mnt_pcp->mnt_writers);  in mnt_inc_writers()
281 	mnt->mnt_writers++;  in mnt_inc_writers()
288 	this_cpu_dec(mnt->mnt_pcp->mnt_writers);  in mnt_dec_writers()
290 	mnt->mnt_writers--;  in mnt_dec_writers()
301 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;  in mnt_get_writers()
306 	return mnt->mnt_writers;  in mnt_get_writers()
312 	if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))  in mnt_is_readonly()
316 	 * making sure if we don't see s_readonly_remount set yet, we also will  in mnt_is_readonly()
319 	 * assuring that if we see s_readonly_remount already cleared, we will  in mnt_is_readonly()
327  * Most r/o & frozen checks on a fs are for operations that take discrete
328  * amounts of time, like a write() or unlink().  We must keep track of when
329  * those operations start (for permission checks) and when they end, so that we
333  * __mnt_want_write - get write access to a mount without freeze protection
334  * @m: the mount on which to take a write
336  * This tells the low-level filesystem that a write is about to be performed to
337  * it, and makes sure that writes are allowed (mnt it read-write) before
350 	 * The store to mnt_inc_writers must be visible before we pass  in __mnt_want_write()
356 	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {  in __mnt_want_write()
362 			 * setting MNT_WRITE_HOLD got preempted on a remote  in __mnt_want_write()
375 	 * sure that if we see MNT_WRITE_HOLD cleared, we will also see  in __mnt_want_write()
377 	 * mnt_is_readonly() and bail in case we are racing with remount  in __mnt_want_write()
378 	 * read-only.  in __mnt_want_write()
383 		ret = -EROFS;  in __mnt_want_write()
391  * mnt_want_write - get write access to a mount
392  * @m: the mount on which to take a write
394  * This tells the low-level filesystem that a write is about to be performed to
395  * it, and makes sure that writes are allowed (mount is read-write, filesystem
403 	sb_start_write(m->mnt_sb);  in mnt_want_write()
406 		sb_end_write(m->mnt_sb);  in mnt_want_write()
412  * __mnt_want_write_file - get write access to a file's mount
413  * @file: the file who's mount on which to take a write
422 	if (file->f_mode & FMODE_WRITER) {  in __mnt_want_write_file()
425 		 * writable fd's, e.g. due to a fs error with errors=remount-ro  in __mnt_want_write_file()
427 		if (__mnt_is_readonly(file->f_path.mnt))  in __mnt_want_write_file()
428 			return -EROFS;  in __mnt_want_write_file()
431 	return __mnt_want_write(file->f_path.mnt);  in __mnt_want_write_file()
435  * mnt_want_write_file - get write access to a file's mount
436  * @file: the file who's mount on which to take a write
447 	sb_start_write(file_inode(file)->i_sb);  in mnt_want_write_file()
450 		sb_end_write(file_inode(file)->i_sb);  in mnt_want_write_file()
456  * __mnt_drop_write - give up write access to a mount
457  * @mnt: the mount on which to give up write access
459  * Tells the low-level filesystem that we are done
471  * mnt_drop_write - give up write access to a mount
472  * @mnt: the mount on which to give up write access
474  * Tells the low-level filesystem that we are done performing writes to it and
481 	sb_end_write(mnt->mnt_sb);  in mnt_drop_write()
487 	if (!(file->f_mode & FMODE_WRITER))  in __mnt_drop_write_file()
488 		__mnt_drop_write(file->f_path.mnt);  in __mnt_drop_write_file()
494 	sb_end_write(file_inode(file)->i_sb);  in mnt_drop_write_file()
499  * mnt_hold_writers - prevent write access to the given mount
513  * Return: On success 0 is returned.
514  *	   On error, -EBUSY is returned.
518 	mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;  in mnt_hold_writers()
520 	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store  in mnt_hold_writers()
521 	 * should be visible before we do.  in mnt_hold_writers()
526 	 * With writers on hold, if this value is zero, then there are  in mnt_hold_writers()
531 	 * It is OK to have counter incremented on one CPU and decremented on  in mnt_hold_writers()
532 	 * another: the sum will add up correctly. The danger would be when we  in mnt_hold_writers()
533 	 * sum up each counter, if we read a counter before it is incremented,  in mnt_hold_writers()
535 	 * decremented from -- we would see more decrements than we should.  in mnt_hold_writers()
537 	 * mnt_want_write first increments count, then smp_mb, then spins on  in mnt_hold_writers()
539 	 * we're counting up here.  in mnt_hold_writers()
542 		return -EBUSY;  in mnt_hold_writers()
548  * mnt_unhold_writers - stop preventing write access to the given mount
566 	mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;  in mnt_unhold_writers()
575 		mnt->mnt.mnt_flags |= MNT_READONLY;  in mnt_make_readonly()
586 	if (atomic_long_read(&sb->s_remove_count))  in sb_prepare_remount_readonly()
587 		return -EBUSY;  in sb_prepare_remount_readonly()
590 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {  in sb_prepare_remount_readonly()
591 		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {  in sb_prepare_remount_readonly()
597 	if (!err && atomic_long_read(&sb->s_remove_count))  in sb_prepare_remount_readonly()
598 		err = -EBUSY;  in sb_prepare_remount_readonly()
602 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {  in sb_prepare_remount_readonly()
603 		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)  in sb_prepare_remount_readonly()
604 			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;  in sb_prepare_remount_readonly()
613 	mnt_idmap_put(mnt_idmap(&mnt->mnt));  in free_vfsmnt()
614 	kfree_const(mnt->mnt_devname);  in free_vfsmnt()
616 	free_percpu(mnt->mnt_pcp);  in free_vfsmnt()
639 	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {  in __legitimize_mnt()
640 		mnt_add_count(mnt, -1);  in __legitimize_mnt()
644 	if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {  in __legitimize_mnt()
645 		mnt_add_count(mnt, -1);  in __legitimize_mnt()
651 	return -1;  in __legitimize_mnt()
669  * __lookup_mnt - find first child mount
682  * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on
683  * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt
684  * on @dentry.
694 		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)  in __lookup_mnt()
700  * lookup_mnt - Return the first child mount mounted at path
709  * Then lookup_mnt() on the base /mnt dentry in the root mount will
724 		child_mnt = __lookup_mnt(path->mnt, path->dentry);  in lookup_mnt()
725 		m = child_mnt ? &child_mnt->mnt : NULL;  in lookup_mnt()
731 static inline void lock_ns_list(struct mnt_namespace *ns)  in lock_ns_list()  argument
733 	spin_lock(&ns->ns_lock);  in lock_ns_list()
736 static inline void unlock_ns_list(struct mnt_namespace *ns)  in unlock_ns_list()  argument
738 	spin_unlock(&ns->ns_lock);  in unlock_ns_list()
743 	return mnt->mnt.mnt_flags & MNT_CURSOR;  in mnt_is_cursor()
747  * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
751  * test is handled inline.  For the slow case when we are actually
756  * The mount_hashtable is not usable in the context because we
763 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;  in __is_local_mountpoint()  local
768 	lock_ns_list(ns);  in __is_local_mountpoint()
769 	list_for_each_entry(mnt, &ns->list, mnt_list) {  in __is_local_mountpoint()
772 		is_covered = (mnt->mnt_mountpoint == dentry);  in __is_local_mountpoint()
776 	unlock_ns_list(ns);  in __is_local_mountpoint()
788 		if (mp->m_dentry == dentry) {  in lookup_mountpoint()
789 			mp->m_count++;  in lookup_mountpoint()
804 			return ERR_PTR(-ENOENT);  in get_mountpoint()
816 		return ERR_PTR(-ENOMEM);  in get_mountpoint()
823 	if (ret == -EBUSY)  in get_mountpoint()
833 	new->m_dentry = dget(dentry);  in get_mountpoint()
834 	new->m_count = 1;  in get_mountpoint()
835 	hlist_add_head(&new->m_hash, mp_hash(dentry));  in get_mountpoint()
836 	INIT_HLIST_HEAD(&new->m_list);  in get_mountpoint()
852 	if (!--mp->m_count) {  in __put_mountpoint()
853 		struct dentry *dentry = mp->m_dentry;  in __put_mountpoint()
854 		BUG_ON(!hlist_empty(&mp->m_list));  in __put_mountpoint()
855 		spin_lock(&dentry->d_lock);  in __put_mountpoint()
856 		dentry->d_flags &= ~DCACHE_MOUNTED;  in __put_mountpoint()
857 		spin_unlock(&dentry->d_lock);  in __put_mountpoint()
859 		hlist_del(&mp->m_hash);  in __put_mountpoint()
872 	return mnt->mnt_ns == current->nsproxy->mnt_ns;  in check_mnt()
878 static void touch_mnt_namespace(struct mnt_namespace *ns)  in touch_mnt_namespace()  argument
880 	if (ns) {  in touch_mnt_namespace()
881 		ns->event = ++event;  in touch_mnt_namespace()
882 		wake_up_interruptible(&ns->poll);  in touch_mnt_namespace()
889 static void __touch_mnt_namespace(struct mnt_namespace *ns)  in __touch_mnt_namespace()  argument
891 	if (ns && ns->event != event) {  in __touch_mnt_namespace()
892 		ns->event = event;  in __touch_mnt_namespace()
893 		wake_up_interruptible(&ns->poll);  in __touch_mnt_namespace()
903 	mnt->mnt_parent = mnt;  in unhash_mnt()
904 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;  in unhash_mnt()
905 	list_del_init(&mnt->mnt_child);  in unhash_mnt()
906 	hlist_del_init_rcu(&mnt->mnt_hash);  in unhash_mnt()
907 	hlist_del_init(&mnt->mnt_mp_list);  in unhash_mnt()
908 	mp = mnt->mnt_mp;  in unhash_mnt()
909 	mnt->mnt_mp = NULL;  in unhash_mnt()
928 	mp->m_count++;  in mnt_set_mountpoint()
930 	child_mnt->mnt_mountpoint = mp->m_dentry;  in mnt_set_mountpoint()
931 	child_mnt->mnt_parent = mnt;  in mnt_set_mountpoint()
932 	child_mnt->mnt_mp = mp;  in mnt_set_mountpoint()
933 	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);  in mnt_set_mountpoint()
937  * mnt_set_mountpoint_beneath - mount a mount beneath another one
941  * @new_mp:     the new mountpoint of @top_mnt on @new_parent
943  * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and
944  * parent @top_mnt->mnt_parent and mount it on top of @new_parent at
945  * @new_mp. And mount @new_parent on the old parent and old
955 	struct mount *old_top_parent = top_mnt->mnt_parent;  in mnt_set_mountpoint_beneath()
956 	struct mountpoint *old_top_mp = top_mnt->mnt_mp;  in mnt_set_mountpoint_beneath()
965 	hlist_add_head_rcu(&mnt->mnt_hash,  in __attach_mnt()
966 			   m_hash(&parent->mnt, mnt->mnt_mountpoint));  in __attach_mnt()
967 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);  in __attach_mnt()
971  * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
976  * @beneath: whether to mount @mnt beneath or on top of @parent
978  * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt
982  * mountpoint and mount it on @mp on @parent, and mount @parent on the
984  * @mnt_hashtable and @parent->mnt_parent->mnt_mounts.
986  * Note, when __attach_mnt() is called @mnt->mnt_parent already points
1000 	 * Note, @mnt->mnt_parent has to be used. If @mnt was mounted  in attach_mnt()
1002 	 * @parent's old parent, not @parent. IOW, @mnt->mnt_parent  in attach_mnt()
1005 	__attach_mnt(mnt, mnt->mnt_parent);  in attach_mnt()
1010 	struct mountpoint *old_mp = mnt->mnt_mp;  in mnt_change_mountpoint()
1011 	struct mount *old_parent = mnt->mnt_parent;  in mnt_change_mountpoint()
1013 	list_del_init(&mnt->mnt_child);  in mnt_change_mountpoint()
1014 	hlist_del_init(&mnt->mnt_mp_list);  in mnt_change_mountpoint()
1015 	hlist_del_init_rcu(&mnt->mnt_hash);  in mnt_change_mountpoint()
1020 	mnt_add_count(old_parent, -1);  in mnt_change_mountpoint()
1028 	struct mount *parent = mnt->mnt_parent;  in commit_tree()
1031 	struct mnt_namespace *n = parent->mnt_ns;  in commit_tree()
1035 	list_add_tail(&head, &mnt->mnt_list);  in commit_tree()
1037 		m->mnt_ns = n;  in commit_tree()
1039 	list_splice(&head, n->list.prev);  in commit_tree()
1041 	n->mounts += n->pending_mounts;  in commit_tree()
1042 	n->pending_mounts = 0;  in commit_tree()
1050 	struct list_head *next = p->mnt_mounts.next;  in next_mnt()
1051 	if (next == &p->mnt_mounts) {  in next_mnt()
1055 			next = p->mnt_child.next;  in next_mnt()
1056 			if (next != &p->mnt_parent->mnt_mounts)  in next_mnt()
1058 			p = p->mnt_parent;  in next_mnt()
1066 	struct list_head *prev = p->mnt_mounts.prev;  in skip_mnt_tree()
1067 	while (prev != &p->mnt_mounts) {  in skip_mnt_tree()
1069 		prev = p->mnt_mounts.prev;  in skip_mnt_tree()
1075  * vfs_create_mount - Create a mount for a configured superblock
1087 	if (!fc->root)  in vfs_create_mount()
1088 		return ERR_PTR(-EINVAL);  in vfs_create_mount()
1090 	mnt = alloc_vfsmnt(fc->source ?: "none");  in vfs_create_mount()
1092 		return ERR_PTR(-ENOMEM);  in vfs_create_mount()
1094 	if (fc->sb_flags & SB_KERNMOUNT)  in vfs_create_mount()
1095 		mnt->mnt.mnt_flags = MNT_INTERNAL;  in vfs_create_mount()
1097 	atomic_inc(&fc->root->d_sb->s_active);  in vfs_create_mount()
1098 	mnt->mnt.mnt_sb		= fc->root->d_sb;  in vfs_create_mount()
1099 	mnt->mnt.mnt_root	= dget(fc->root);  in vfs_create_mount()
1100 	mnt->mnt_mountpoint	= mnt->mnt.mnt_root;  in vfs_create_mount()
1101 	mnt->mnt_parent		= mnt;  in vfs_create_mount()
1104 	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);  in vfs_create_mount()
1106 	return &mnt->mnt;  in vfs_create_mount()
1114 		up_write(&fc->root->d_sb->s_umount);  in fc_mount()
1130 		return ERR_PTR(-EINVAL);  in vfs_kern_mount()
1159 	if (mountpoint->d_sb->s_user_ns != &init_user_ns)  in vfs_submount()
1160 		return ERR_PTR(-EPERM);  in vfs_submount()
1169 	struct super_block *sb = old->mnt.mnt_sb;  in clone_mnt()
1173 	mnt = alloc_vfsmnt(old->mnt_devname);  in clone_mnt()
1175 		return ERR_PTR(-ENOMEM);  in clone_mnt()
1178 		mnt->mnt_group_id = 0; /* not a peer of original */  in clone_mnt()
1180 		mnt->mnt_group_id = old->mnt_group_id;  in clone_mnt()
1182 	if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {  in clone_mnt()
1188 	mnt->mnt.mnt_flags = old->mnt.mnt_flags;  in clone_mnt()
1189 	mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);  in clone_mnt()
1191 	atomic_inc(&sb->s_active);  in clone_mnt()
1192 	mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));  in clone_mnt()
1194 	mnt->mnt.mnt_sb = sb;  in clone_mnt()
1195 	mnt->mnt.mnt_root = dget(root);  in clone_mnt()
1196 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;  in clone_mnt()
1197 	mnt->mnt_parent = mnt;  in clone_mnt()
1199 	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);  in clone_mnt()
1204 		list_add(&mnt->mnt_slave, &old->mnt_slave_list);  in clone_mnt()
1205 		mnt->mnt_master = old;  in clone_mnt()
1209 			list_add(&mnt->mnt_share, &old->mnt_share);  in clone_mnt()
1211 			list_add(&mnt->mnt_slave, &old->mnt_slave);  in clone_mnt()
1212 		mnt->mnt_master = old->mnt_master;  in clone_mnt()
1219 	/* stick the duplicate mount on the same expiry list  in clone_mnt()
1220 	 * as the original if that was on one */  in clone_mnt()
1222 		if (!list_empty(&old->mnt_expire))  in clone_mnt()
1223 			list_add(&mnt->mnt_expire, &old->mnt_expire);  in clone_mnt()
1241 	 * filesystem was probably unable to make r/w->r/o transitions.  in cleanup_mnt()
1246 	if (unlikely(mnt->mnt_pins.first))  in cleanup_mnt()
1248 	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {  in cleanup_mnt()
1249 		hlist_del(&m->mnt_umount);  in cleanup_mnt()
1250 		mntput(&m->mnt);  in cleanup_mnt()
1252 	fsnotify_vfsmount_delete(&mnt->mnt);  in cleanup_mnt()
1253 	dput(mnt->mnt.mnt_root);  in cleanup_mnt()
1254 	deactivate_super(mnt->mnt.mnt_sb);  in cleanup_mnt()
1256 	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);  in cleanup_mnt()
1281 	if (likely(READ_ONCE(mnt->mnt_ns))) {  in mntput_no_expire()
1283 		 * Since we don't do lock_mount_hash() here,  in mntput_no_expire()
1284 		 * ->mnt_ns can change under us.  However, if it's  in mntput_no_expire()
1285 		 * non-NULL, then there's a reference that won't  in mntput_no_expire()
1287 		 * turning ->mnt_ns NULL.  So if we observe it  in mntput_no_expire()
1288 		 * non-NULL under rcu_read_lock(), the reference  in mntput_no_expire()
1289 		 * we are dropping is not the final one.  in mntput_no_expire()
1291 		mnt_add_count(mnt, -1);  in mntput_no_expire()
1298 	 * mount_lock, we'll see their refcount increment here.  in mntput_no_expire()
1301 	mnt_add_count(mnt, -1);  in mntput_no_expire()
1309 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {  in mntput_no_expire()
1314 	mnt->mnt.mnt_flags |= MNT_DOOMED;  in mntput_no_expire()
1317 	list_del(&mnt->mnt_instance);  in mntput_no_expire()
1319 	if (unlikely(!list_empty(&mnt->mnt_mounts))) {  in mntput_no_expire()
1321 		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {  in mntput_no_expire()
1323 			hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);  in mntput_no_expire()
1329 	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {  in mntput_no_expire()
1331 		if (likely(!(task->flags & PF_KTHREAD))) {  in mntput_no_expire()
1332 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);  in mntput_no_expire()
1333 			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))  in mntput_no_expire()
1336 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))  in mntput_no_expire()
1348 		if (unlikely(m->mnt_expiry_mark))  in mntput()
1349 			m->mnt_expiry_mark = 0;  in mntput()
1371 		real_mount(mnt)->mnt_ns = NULL;  in mnt_make_shortterm()
1375  * path_is_mountpoint() - Check if path is a mount in the current namespace.
1390 	if (!d_mountpoint(path->dentry))  in path_is_mountpoint()
1407 	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);  in mnt_clone_internal()
1410 	p->mnt.mnt_flags |= MNT_INTERNAL;  in mnt_clone_internal()
1411 	return &p->mnt;  in mnt_clone_internal()
1415 static struct mount *mnt_list_next(struct mnt_namespace *ns,  in mnt_list_next()  argument
1420 	lock_ns_list(ns);  in mnt_list_next()
1421 	list_for_each_continue(p, &ns->list) {  in mnt_list_next()
1428 	unlock_ns_list(ns);  in mnt_list_next()
1433 /* iterator; we want it to have access to namespace_sem, thus here... */
1436 	struct proc_mounts *p = m->private;  in m_start()
1441 		prev = &p->ns->list;  in m_start()
1443 		prev = &p->cursor.mnt_list;  in m_start()
1445 		/* Read after we'd reached the end? */  in m_start()
1450 	return mnt_list_next(p->ns, prev);  in m_start()
1455 	struct proc_mounts *p = m->private;  in m_next()
1459 	return mnt_list_next(p->ns, &mnt->mnt_list);  in m_next()
1464 	struct proc_mounts *p = m->private;  in m_stop()
1467 	lock_ns_list(p->ns);  in m_stop()
1469 		list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);  in m_stop()
1471 		list_del_init(&p->cursor.mnt_list);  in m_stop()
1472 	unlock_ns_list(p->ns);  in m_stop()
1478 	struct proc_mounts *p = m->private;  in m_show()
1480 	return p->show(m, &r->mnt);  in m_show()
1490 void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)  in mnt_cursor_del()  argument
1493 	lock_ns_list(ns);  in mnt_cursor_del()
1494 	list_del(&cursor->mnt_list);  in mnt_cursor_del()
1495 	unlock_ns_list(ns);  in mnt_cursor_del()
1501  * may_umount_tree - check if a mount tree is busy
1533  * may_umount - check if a mount point is busy
1542  * give false negatives. The main reason why it's here is that we need
1543  * a non-destructive way to look for easily umountable filesystems.
1579 		hlist_del(&m->mnt_umount);  in namespace_unlock()
1580 		mntput(&m->mnt);  in namespace_unlock()
1609 	if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))  in disconnect_mount()
1638 		p->mnt.mnt_flags |= MNT_UMOUNT;  in umount_tree()
1639 		list_move(&p->mnt_list, &tmp_list);  in umount_tree()
1644 		list_del_init(&p->mnt_child);  in umount_tree()
1652 		struct mnt_namespace *ns;  in umount_tree()  local
1655 		list_del_init(&p->mnt_expire);  in umount_tree()
1656 		list_del_init(&p->mnt_list);  in umount_tree()
1657 		ns = p->mnt_ns;  in umount_tree()
1658 		if (ns) {  in umount_tree()
1659 			ns->mounts--;  in umount_tree()
1660 			__touch_mnt_namespace(ns);  in umount_tree()
1662 		p->mnt_ns = NULL;  in umount_tree()
1664 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;  in umount_tree()
1668 			mnt_add_count(p->mnt_parent, -1);  in umount_tree()
1671 				list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);  in umount_tree()
1678 			hlist_add_head(&p->mnt_umount, &unmounted);  in umount_tree()
1688 	down_write(&sb->s_umount);  in do_umount_root()
1692 		fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,  in do_umount_root()
1703 	up_write(&sb->s_umount);  in do_umount_root()
1709 	struct super_block *sb = mnt->mnt.mnt_sb;  in do_umount()
1712 	retval = security_sb_umount(&mnt->mnt, flags);  in do_umount()
1723 		if (&mnt->mnt == current->fs->root.mnt ||  in do_umount()
1725 			return -EINVAL;  in do_umount()
1728 		 * probably don't strictly need the lock here if we examined  in do_umount()
1734 			return -EBUSY;  in do_umount()
1738 		if (!xchg(&mnt->mnt_expiry_mark, 1))  in do_umount()
1739 			return -EAGAIN;  in do_umount()
1743 	 * If we may have to abort operations to get out of this  in do_umount()
1744 	 * mount, and they will themselves hold resources we must  in do_umount()
1747 	 * might fail to complete on the first run through as other tasks  in do_umount()
1752 	if (flags & MNT_FORCE && sb->s_op->umount_begin) {  in do_umount()
1753 		sb->s_op->umount_begin(sb);  in do_umount()
1759 	 * Ho-hum... In principle, we might treat that as umount + switch  in do_umount()
1762 	 * /reboot - static binary that would close all descriptors and  in do_umount()
1765 	if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {  in do_umount()
1768 		 * we just try to remount it readonly.  in do_umount()
1770 		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))  in do_umount()
1771 			return -EPERM;  in do_umount()
1779 	retval = -EINVAL;  in do_umount()
1780 	if (mnt->mnt.mnt_flags & MNT_LOCKED)  in do_umount()
1785 		if (!list_empty(&mnt->mnt_list))  in do_umount()
1790 		retval = -EBUSY;  in do_umount()
1792 			if (!list_empty(&mnt->mnt_list))  in do_umount()
1804  * __detach_mounts - lazily unmount all mounts on the specified dentry
1811  * The caller may hold dentry->d_inode->i_mutex.
1825 	while (!hlist_empty(&mp->m_list)) {  in __detach_mounts()
1826 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);  in __detach_mounts()
1827 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {  in __detach_mounts()
1829 			hlist_add_head(&mnt->mnt_umount, &unmounted);  in __detach_mounts()
1844 	return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);  in may_mount()
1848  * path_mounted - check whether path is mounted
1857 	return path->mnt->mnt_root == path->dentry;  in path_mounted()
1871 	struct mount *mnt = real_mount(path->mnt);  in can_umount()
1874 		return -EPERM;  in can_umount()
1876 		return -EINVAL;  in can_umount()
1878 		return -EINVAL;  in can_umount()
1879 	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */  in can_umount()
1880 		return -EINVAL;  in can_umount()
1882 		return -EPERM;  in can_umount()
1889 	struct mount *mnt = real_mount(path->mnt);  in path_umount()
1896 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */  in path_umount()
1897 	dput(path->dentry);  in path_umount()
1910 		return -EINVAL;  in ksys_umount()
1940 	return dentry->d_op == &ns_dentry_operations &&  in is_mnt_ns_file()
1941 	       dentry->d_fsdata == &mntns_operations;  in is_mnt_ns_file()
1944 static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)  in to_mnt_ns()  argument
1946 	return container_of(ns, struct mnt_namespace, ns);  in to_mnt_ns()
1951 	return &mnt->ns;  in from_mnt_ns()
1963 	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));  in mnt_ns_loop()
1964 	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;  in mnt_ns_loop()
1973 		return ERR_PTR(-EINVAL);  in copy_tree()
1976 		return ERR_PTR(-EINVAL);  in copy_tree()
1982 	q->mnt_mountpoint = mnt->mnt_mountpoint;  in copy_tree()
1985 	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {  in copy_tree()
1987 		if (!is_subdir(r->mnt_mountpoint, dentry))  in copy_tree()
1993 				if (s->mnt.mnt_flags & MNT_LOCKED) {  in copy_tree()
1995 					q = ERR_PTR(-EPERM);  in copy_tree()
2003 			    is_mnt_ns_file(s->mnt.mnt_root)) {  in copy_tree()
2007 			while (p != s->mnt_parent) {  in copy_tree()
2008 				p = p->mnt_parent;  in copy_tree()
2009 				q = q->mnt_parent;  in copy_tree()
2013 			q = clone_mnt(p, p->mnt.mnt_root, flag);  in copy_tree()
2017 			list_add_tail(&q->mnt_list, &res->mnt_list);  in copy_tree()
2018 			attach_mnt(q, parent, p->mnt_mp, false);  in copy_tree()
2038 	if (!check_mnt(real_mount(path->mnt)))  in collect_mounts()
2039 		tree = ERR_PTR(-EINVAL);  in collect_mounts()
2041 		tree = copy_tree(real_mount(path->mnt), path->dentry,  in collect_mounts()
2046 	return &tree->mnt;  in collect_mounts()
2054 	struct mnt_namespace *ns;  in dissolve_on_fput()  local
2057 	ns = real_mount(mnt)->mnt_ns;  in dissolve_on_fput()
2058 	if (ns) {  in dissolve_on_fput()
2059 		if (is_anon_ns(ns))  in dissolve_on_fput()
2062 			ns = NULL;  in dissolve_on_fput()
2066 	if (ns)  in dissolve_on_fput()
2067 		free_mnt_ns(ns);  in dissolve_on_fput()
2083 	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {  in has_locked_children()
2084 		if (!is_subdir(child->mnt_mountpoint, dentry))  in has_locked_children()
2087 		if (child->mnt.mnt_flags & MNT_LOCKED)  in has_locked_children()
2094  * clone_private_mount - create a private clone of a path
2105 	struct mount *old_mnt = real_mount(path->mnt);  in clone_private_mount()
2115 	if (has_locked_children(old_mnt, path->dentry))  in clone_private_mount()
2118 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);  in clone_private_mount()
2125 	new_mnt->mnt_ns = MNT_NS_INTERNAL;  in clone_private_mount()
2127 	return &new_mnt->mnt;  in clone_private_mount()
2131 	return ERR_PTR(-EINVAL);  in clone_private_mount()
2142 	list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {  in iterate_mounts()
2143 		res = f(&mnt->mnt, arg);  in iterate_mounts()
2155 		int flags = p->mnt.mnt_flags;  in lock_mnt_tree()
2171 		if (list_empty(&p->mnt_expire))  in lock_mnt_tree()
2173 		p->mnt.mnt_flags = flags;  in lock_mnt_tree()
2182 		if (p->mnt_group_id && !IS_MNT_SHARED(p))  in cleanup_group_ids()
2192 		if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {  in invent_group_ids()
2204 int count_mounts(struct mnt_namespace *ns, struct mount *mnt)  in count_mounts()  argument
2210 	if (ns->mounts >= max)  in count_mounts()
2211 		return -ENOSPC;  in count_mounts()
2212 	max -= ns->mounts;  in count_mounts()
2213 	if (ns->pending_mounts >= max)  in count_mounts()
2214 		return -ENOSPC;  in count_mounts()
2215 	max -= ns->pending_mounts;  in count_mounts()
2221 		return -ENOSPC;  in count_mounts()
2223 	ns->pending_mounts += mounts;  in count_mounts()
2233  * attach_recursive_mnt - attach a source mount tree
2235  * @top_mnt:    mount that @source_mnt will be mounted on or mounted beneath
2241  * ---------------------------------------------------------------------------
2244  * | source-->| shared        |       private  |       slave    | unbindable |
2251  * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
2253  * A bind operation clones the source mount and mounts the clone on the
2269  * ---------------------------------------------------------------------------
2272  * | source-->| shared        |       private  |       slave    | unbindable |
2279  * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
2304 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;  in attach_recursive_mnt()
2306 	struct mnt_namespace *ns = top_mnt->mnt_ns;  in attach_recursive_mnt()  local
2315 	 * mounted beneath mounts on the same mountpoint.  in attach_recursive_mnt()
2317 	smp = get_mountpoint(source_mnt->mnt.mnt_root);  in attach_recursive_mnt()
2323 		err = count_mounts(ns, source_mnt);  in attach_recursive_mnt()
2329 		dest_mnt = top_mnt->mnt_parent;  in attach_recursive_mnt()
2353 		touch_mnt_namespace(source_mnt->mnt_ns);  in attach_recursive_mnt()
2355 		if (source_mnt->mnt_ns) {  in attach_recursive_mnt()
2356 			/* move from anon - the caller will destroy */  in attach_recursive_mnt()
2357 			list_del_init(&source_mnt->mnt_ns->list);  in attach_recursive_mnt()
2368 		hlist_del_init(&child->mnt_hash);  in attach_recursive_mnt()
2369 		q = __lookup_mnt(&child->mnt_parent->mnt,  in attach_recursive_mnt()
2370 				 child->mnt_mountpoint);  in attach_recursive_mnt()
2373 		/* Notice when we are propagating across user namespaces */  in attach_recursive_mnt()
2374 		if (child->mnt_parent->mnt_ns->user_ns != user_ns)  in attach_recursive_mnt()
2376 		child->mnt.mnt_flags &= ~MNT_LOCKED;  in attach_recursive_mnt()
2387 		child->mnt_parent->mnt_ns->pending_mounts = 0;  in attach_recursive_mnt()
2393 	ns->pending_mounts = 0;  in attach_recursive_mnt()
2403  * do_lock_mount - lock mount and mountpoint
2407  * Follow the mount stack on @path until the top mount @mnt is found. If
2408  * the initial @path->{mnt,dentry} is a mountpoint lookup the first
2409  * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root}
2410  * until nothing is stacked on top of it anymore.
2412  * Acquire the inode_lock() on the top mount's ->mnt_root to protect
2416  * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint
2417  * @mp on @mnt->mnt_parent must be acquired. This protects against a
2418  * concurrent unlink of @mp->mnt_dentry from another mount namespace
2420  * removal of @mnt->mnt_root doesn't matter as nothing will be mounted
2421  * on top of it for @beneath.
2429  * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points
2430  * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will
2431  * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL.
2433  * Return: Either the target mountpoint on the top mount or the top
2438 	struct vfsmount *mnt = path->mnt;  in do_lock_mount()
2440 	struct mountpoint *mp = ERR_PTR(-ENOENT);  in do_lock_mount()
2448 			dentry = dget(m->mnt_mountpoint);  in do_lock_mount()
2451 			dentry = path->dentry;  in do_lock_mount()
2454 		inode_lock(dentry->d_inode);  in do_lock_mount()
2456 			inode_unlock(dentry->d_inode);  in do_lock_mount()
2462 		if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {  in do_lock_mount()
2464 			inode_unlock(dentry->d_inode);  in do_lock_mount()
2473 		inode_unlock(dentry->d_inode);  in do_lock_mount()
2477 		path->mnt = mnt;  in do_lock_mount()
2478 		path->dentry = dget(mnt->mnt_root);  in do_lock_mount()
2484 		inode_unlock(dentry->d_inode);  in do_lock_mount()
2501 	struct dentry *dentry = where->m_dentry;  in unlock_mount()
2508 	inode_unlock(dentry->d_inode);  in unlock_mount()
2513 	if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)  in graft_tree()
2514 		return -EINVAL;  in graft_tree()
2516 	if (d_is_dir(mp->m_dentry) !=  in graft_tree()
2517 	      d_is_dir(mnt->mnt.mnt_root))  in graft_tree()
2518 		return -ENOTDIR;  in graft_tree()
2531 	/* Fail if any non-propagation flags are set */  in flags_to_propagation_type()
2546 	struct mount *mnt = real_mount(path->mnt);  in do_change_type()
2552 		return -EINVAL;  in do_change_type()
2556 		return -EINVAL;  in do_change_type()
2577 	struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);  in __do_loopback()
2582 	if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)  in __do_loopback()
2585 	if (!recurse && has_locked_children(old, old_path->dentry))  in __do_loopback()
2589 		mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);  in __do_loopback()
2591 		mnt = clone_mnt(old, old_path->dentry, 0);  in __do_loopback()
2594 		mnt->mnt.mnt_flags &= ~MNT_LOCKED;  in __do_loopback()
2610 		return -EINVAL;  in do_loopback()
2615 	err = -EINVAL;  in do_loopback()
2625 	parent = real_mount(path->mnt);  in do_loopback()
2650 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;  in open_detached_copy()
2651 	struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);  in open_detached_copy()  local
2655 	if (IS_ERR(ns))  in open_detached_copy()
2656 		return ERR_CAST(ns);  in open_detached_copy()
2662 		free_mnt_ns(ns);  in open_detached_copy()
2668 		p->mnt_ns = ns;  in open_detached_copy()
2669 		ns->mounts++;  in open_detached_copy()
2671 	ns->root = mnt;  in open_detached_copy()
2672 	list_add_tail(&ns->list, &mnt->mnt_list);  in open_detached_copy()
2673 	mntget(&mnt->mnt);  in open_detached_copy()
2677 	mntput(path->mnt);  in open_detached_copy()
2678 	path->mnt = &mnt->mnt;  in open_detached_copy()
2681 		dissolve_on_fput(path->mnt);  in open_detached_copy()
2683 		file->f_mode |= FMODE_NEED_UNMOUNT;  in open_detached_copy()
2701 		return -EINVAL;  in SYSCALL_DEFINE3()
2704 		return -EINVAL;  in SYSCALL_DEFINE3()
2714 		return -EPERM;  in SYSCALL_DEFINE3()
2746 	unsigned int fl = mnt->mnt.mnt_flags;  in can_change_locked_flags()
2775 	if (readonly_request == __mnt_is_readonly(&mnt->mnt))  in change_mount_ro_state()
2781 	mnt->mnt.mnt_flags &= ~MNT_READONLY;  in change_mount_ro_state()
2787 	mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;  in set_mount_attributes()
2788 	mnt->mnt.mnt_flags = mnt_flags;  in set_mount_attributes()
2789 	touch_mnt_namespace(mnt->mnt_ns);  in set_mount_attributes()
2794 	struct super_block *sb = mnt->mnt_sb;  in mnt_warn_timestamp_expiry()
2797 	   (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&  in mnt_warn_timestamp_expiry()
2798 	   (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {  in mnt_warn_timestamp_expiry()
2805 			mntpath = ERR_PTR(-ENOMEM);  in mnt_warn_timestamp_expiry()
2810 			sb->s_type->name,  in mnt_warn_timestamp_expiry()
2812 			mntpath, &sb->s_time_max,  in mnt_warn_timestamp_expiry()
2813 			(unsigned long long)sb->s_time_max);  in mnt_warn_timestamp_expiry()
2815 		sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;  in mnt_warn_timestamp_expiry()
2828 	struct super_block *sb = path->mnt->mnt_sb;  in do_reconfigure_mnt()
2829 	struct mount *mnt = real_mount(path->mnt);  in do_reconfigure_mnt()
2833 		return -EINVAL;  in do_reconfigure_mnt()
2836 		return -EINVAL;  in do_reconfigure_mnt()
2839 		return -EPERM;  in do_reconfigure_mnt()
2842 	 * We're only checking whether the superblock is read-only not  in do_reconfigure_mnt()
2843 	 * changing it, so only take down_read(&sb->s_umount).  in do_reconfigure_mnt()
2845 	down_read(&sb->s_umount);  in do_reconfigure_mnt()
2851 	up_read(&sb->s_umount);  in do_reconfigure_mnt()
2853 	mnt_warn_timestamp_expiry(path, &mnt->mnt);  in do_reconfigure_mnt()
2860  * If you've mounted a non-root directory somewhere and want to do remount
2861  * on it - tough luck.
2867 	struct super_block *sb = path->mnt->mnt_sb;  in do_remount()
2868 	struct mount *mnt = real_mount(path->mnt);  in do_remount()
2872 		return -EINVAL;  in do_remount()
2875 		return -EINVAL;  in do_remount()
2878 		return -EPERM;  in do_remount()
2880 	fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);  in do_remount()
2888 	fc->oldapi = true;  in do_remount()
2892 		down_write(&sb->s_umount);  in do_remount()
2893 		err = -EPERM;  in do_remount()
2894 		if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {  in do_remount()
2902 		up_write(&sb->s_umount);  in do_remount()
2905 	mnt_warn_timestamp_expiry(path, &mnt->mnt);  in do_remount()
2924  * that aren't checked by the mount-cycle checking code, thereby allowing
2934 		if (mnt_ns_loop(p->mnt.mnt_root))  in check_for_nsfs_mounts()
2948 	from = real_mount(from_path->mnt);  in do_set_group()
2949 	to = real_mount(to_path->mnt);  in do_set_group()
2953 	err = -EINVAL;  in do_set_group()
2955 	if (!is_mounted(&from->mnt))  in do_set_group()
2957 	if (!is_mounted(&to->mnt))  in do_set_group()
2960 	err = -EPERM;  in do_set_group()
2961 	/* We should be allowed to modify mount namespaces of both mounts */  in do_set_group()
2962 	if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))  in do_set_group()
2964 	if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))  in do_set_group()
2967 	err = -EINVAL;  in do_set_group()
2975 	if (from->mnt.mnt_sb != to->mnt.mnt_sb)  in do_set_group()
2979 	if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))  in do_set_group()
2983 	if (has_locked_children(from, to->mnt.mnt_root))  in do_set_group()
2986 	/* Setting sharing groups is only allowed on private mounts */  in do_set_group()
2995 		struct mount *m = from->mnt_master;  in do_set_group()
2997 		list_add(&to->mnt_slave, &m->mnt_slave_list);  in do_set_group()
2998 		to->mnt_master = m;  in do_set_group()
3002 		to->mnt_group_id = from->mnt_group_id;  in do_set_group()
3003 		list_add(&to->mnt_share, &from->mnt_share);  in do_set_group()
3016  * path_overmounted - check if path is overmounted
3019  * Check if path is overmounted, i.e., if there's a mount on top of
3020  * @path->mnt with @path->dentry as mountpoint.
3028 	if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {  in path_overmounted()
3037  * can_move_mount_beneath - check that we can mount beneath the top mount
3041  * - Make sure that @to->dentry is actually the root of a mount under
3042  *   which we can mount another mount.
3043  * - Make sure that nothing can be mounted beneath the caller's current
3045  * - Make sure that the caller can unmount the topmost mount ensuring
3047  * - Ensure that nothing has been mounted on top of @from before we
3049  * - Prevent mounting beneath a mount if the propagation relationship
3054  * Return: On success 0, and on error a negative error code is returned.
3060 	struct mount *mnt_from = real_mount(from->mnt),  in can_move_mount_beneath()
3061 		     *mnt_to = real_mount(to->mnt),  in can_move_mount_beneath()
3062 		     *parent_mnt_to = mnt_to->mnt_parent;  in can_move_mount_beneath()
3065 		return -EINVAL;  in can_move_mount_beneath()
3068 		return -EINVAL;  in can_move_mount_beneath()
3071 		return -EINVAL;  in can_move_mount_beneath()
3075 		return -EINVAL;  in can_move_mount_beneath()
3081 	if (&mnt_to->mnt == current->fs->root.mnt)  in can_move_mount_beneath()
3082 		return -EINVAL;  in can_move_mount_beneath()
3083 	if (parent_mnt_to == current->nsproxy->mnt_ns->root)  in can_move_mount_beneath()
3084 		return -EINVAL;  in can_move_mount_beneath()
3086 	for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent)  in can_move_mount_beneath()
3088 			return -EINVAL;  in can_move_mount_beneath()
3092 	 * mean mounting @mnt_from on @mnt_to->mnt_parent and then  in can_move_mount_beneath()
3093 	 * propagating a copy @c of @mnt_from on top of @mnt_to. This  in can_move_mount_beneath()
3097 		return -EINVAL;  in can_move_mount_beneath()
3100 	 * If @mnt_to->mnt_parent propagates to @mnt_from this would  in can_move_mount_beneath()
3101 	 * mean propagating a copy @c of @mnt_from on top of @mnt_from.  in can_move_mount_beneath()
3102 	 * Afterwards @mnt_from would be mounted on top of  in can_move_mount_beneath()
3103 	 * @mnt_to->mnt_parent and @mnt_to would be unmounted from  in can_move_mount_beneath()
3104 	 * @mnt->mnt_parent and remounted on @mnt_from. But since @c is  in can_move_mount_beneath()
3105 	 * already mounted on @mnt_from, @mnt_to would ultimately be  in can_move_mount_beneath()
3106 	 * remounted on top of @c. Afterwards, @mnt_from would be  in can_move_mount_beneath()
3112 		return -EINVAL;  in can_move_mount_beneath()
3120 	struct mnt_namespace *ns;  in do_move_mount()  local
3133 	old = real_mount(old_path->mnt);  in do_move_mount()
3134 	p = real_mount(new_path->mnt);  in do_move_mount()
3135 	parent = old->mnt_parent;  in do_move_mount()
3139 	old_mp = old->mnt_mp;  in do_move_mount()
3140 	ns = old->mnt_ns;  in do_move_mount()
3142 	err = -EINVAL;  in do_move_mount()
3148 	if (!is_mounted(&old->mnt))  in do_move_mount()
3152 	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))  in do_move_mount()
3155 	if (old->mnt.mnt_flags & MNT_LOCKED)  in do_move_mount()
3161 	if (d_is_dir(new_path->dentry) !=  in do_move_mount()
3162 	    d_is_dir(old_path->dentry))  in do_move_mount()
3175 		err = -EINVAL;  in do_move_mount()
3176 		p = p->mnt_parent;  in do_move_mount()
3186 	err = -ELOOP;  in do_move_mount()
3189 	for (; mnt_has_parent(p); p = p->mnt_parent)  in do_move_mount()
3193 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags);  in do_move_mount()
3199 	list_del_init(&old->mnt_expire);  in do_move_mount()
3208 			free_mnt_ns(ns);  in do_move_mount()
3219 		return -EINVAL;  in do_move_mount_old()
3236 	struct mount *parent = real_mount(path->mnt);  in do_add_mount()
3241 		/* that's acceptable only for automounts done in private ns */  in do_add_mount()
3243 			return -EINVAL;  in do_add_mount()
3244 		/* ... and for those we'd better have mountpoint still alive */  in do_add_mount()
3245 		if (!parent->mnt_ns)  in do_add_mount()
3246 			return -EINVAL;  in do_add_mount()
3249 	/* Refuse the same filesystem on the same mount point */  in do_add_mount()
3250 	if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path))  in do_add_mount()
3251 		return -EBUSY;  in do_add_mount()
3253 	if (d_is_symlink(newmnt->mnt.mnt_root))  in do_add_mount()
3254 		return -EINVAL;  in do_add_mount()
3256 	newmnt->mnt.mnt_flags = mnt_flags;  in do_add_mount()
3271 	struct super_block *sb = fc->root->d_sb;  in do_new_mount_fc()
3276 		error = -EPERM;  in do_new_mount_fc()
3283 	up_write(&sb->s_umount);  in do_new_mount_fc()
3316 		return -EINVAL;  in do_new_mount()
3320 		return -ENODEV;  in do_new_mount()
3322 	if (type->fs_flags & FS_HAS_SUBTYPE) {  in do_new_mount()
3328 				return -EINVAL;  in do_new_mount()
3342 	fc->oldapi = true;  in do_new_mount()
3352 		err = -EPERM;  in do_new_mount()
3364 	struct dentry *dentry = path->dentry;  in finish_automount()
3376 	 * expired before we get a chance to add it  in finish_automount()
3380 	if (m->mnt_sb == path->mnt->mnt_sb &&  in finish_automount()
3381 	    m->mnt_root == dentry) {  in finish_automount()
3382 		err = -ELOOP;  in finish_automount()
3387 	 * we don't want to use lock_mount() - in this case finding something  in finish_automount()
3388 	 * that overmounts our mountpoint to be means "quitely drop what we've  in finish_automount()
3389 	 * got", not "try to mount it on top".  in finish_automount()
3391 	inode_lock(dentry->d_inode);  in finish_automount()
3394 		err = -ENOENT;  in finish_automount()
3407 	err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);  in finish_automount()
3416 	inode_unlock(dentry->d_inode);  in finish_automount()
3418 	/* remove m from any expiration list it may be on */  in finish_automount()
3419 	if (!list_empty(&mnt->mnt_expire)) {  in finish_automount()
3421 		list_del_init(&mnt->mnt_expire);  in finish_automount()
3430  * mnt_set_expiry - Put a mount on an expiration list
3438 	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);  in mnt_set_expiry()
3446  * mountpoints that aren't in use and haven't been touched since last we came
3462 	 * - only referenced by its parent vfsmount  in mark_mounts_for_expiry()
3463 	 * - still marked for expiry (marked on the last call here; marks are  in mark_mounts_for_expiry()
3467 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||  in mark_mounts_for_expiry()
3470 		list_move(&mnt->mnt_expire, &graveyard);  in mark_mounts_for_expiry()
3474 		touch_mnt_namespace(mnt->mnt_ns);  in mark_mounts_for_expiry()
3496 	next = this_parent->mnt_mounts.next;  in select_submounts()
3498 	while (next != &this_parent->mnt_mounts) {  in select_submounts()
3502 		next = tmp->next;  in select_submounts()
3503 		if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))  in select_submounts()
3506 		 * Descend a level if the d_mounts list is non-empty.  in select_submounts()
3508 		if (!list_empty(&mnt->mnt_mounts)) {  in select_submounts()
3514 			list_move_tail(&mnt->mnt_expire, graveyard);  in select_submounts()
3522 		next = this_parent->mnt_child.next;  in select_submounts()
3523 		this_parent = this_parent->mnt_parent;  in select_submounts()
3545 			touch_mnt_namespace(m->mnt_ns);  in shrink_submounts()
3561 		return ERR_PTR(-ENOMEM);  in copy_mount_options()
3569 	offset = PAGE_SIZE - left;  in copy_mount_options()
3575 		left--;  in copy_mount_options()
3581 		return ERR_PTR(-EFAULT);  in copy_mount_options()
3593  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
3594  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
3597  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
3600  * Pre-0.97 versions of mount() didn't have a flags word.
3602  * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
3618 		((char *)data_page)[PAGE_SIZE - 1] = 0;  in path_mount()
3621 		return -EINVAL;  in path_mount()
3627 		return -EPERM;  in path_mount()
3635 	/* Separate the per-mountpoint flags */  in path_mount()
3658 		mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;  in path_mount()
3699 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)  in inc_mnt_namespaces()  argument
3701 	return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);  in inc_mnt_namespaces()
3709 static void free_mnt_ns(struct mnt_namespace *ns)  in free_mnt_ns()  argument
3711 	if (!is_anon_ns(ns))  in free_mnt_ns()
3712 		ns_free_inum(&ns->ns);  in free_mnt_ns()
3713 	dec_mnt_namespaces(ns->ucounts);  in free_mnt_ns()
3714 	put_user_ns(ns->user_ns);  in free_mnt_ns()
3715 	kfree(ns);  in free_mnt_ns()
3719  * Assign a sequence number so we can detect when we attempt to bind
3723  * is effectively never, so we can ignore the possibility.
3735 		return ERR_PTR(-ENOSPC);  in alloc_mnt_ns()
3740 		return ERR_PTR(-ENOMEM);  in alloc_mnt_ns()
3743 		ret = ns_alloc_inum(&new_ns->ns);  in alloc_mnt_ns()
3750 	new_ns->ns.ops = &mntns_operations;  in alloc_mnt_ns()
3752 		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);  in alloc_mnt_ns()
3753 	refcount_set(&new_ns->ns.count, 1);  in alloc_mnt_ns()
3754 	INIT_LIST_HEAD(&new_ns->list);  in alloc_mnt_ns()
3755 	init_waitqueue_head(&new_ns->poll);  in alloc_mnt_ns()
3756 	spin_lock_init(&new_ns->ns_lock);  in alloc_mnt_ns()
3757 	new_ns->user_ns = get_user_ns(user_ns);  in alloc_mnt_ns()
3758 	new_ns->ucounts = ucounts;  in alloc_mnt_ns()
3763 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,  in copy_mnt_ns()  argument
3773 	BUG_ON(!ns);  in copy_mnt_ns()
3776 		get_mnt_ns(ns);  in copy_mnt_ns()
3777 		return ns;  in copy_mnt_ns()
3780 	old = ns->root;  in copy_mnt_ns()
3789 	if (user_ns != ns->user_ns)  in copy_mnt_ns()
3791 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);  in copy_mnt_ns()
3797 	if (user_ns != ns->user_ns) {  in copy_mnt_ns()
3802 	new_ns->root = new;  in copy_mnt_ns()
3803 	list_add_tail(&new_ns->list, &new->mnt_list);  in copy_mnt_ns()
3806 	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts  in copy_mnt_ns()
3807 	 * as belonging to new namespace.  We have already acquired a private  in copy_mnt_ns()
3808 	 * fs_struct, so tsk->fs->lock is not needed.  in copy_mnt_ns()
3813 		q->mnt_ns = new_ns;  in copy_mnt_ns()
3814 		new_ns->mounts++;  in copy_mnt_ns()
3816 			if (&p->mnt == new_fs->root.mnt) {  in copy_mnt_ns()
3817 				new_fs->root.mnt = mntget(&q->mnt);  in copy_mnt_ns()
3818 				rootmnt = &p->mnt;  in copy_mnt_ns()
3820 			if (&p->mnt == new_fs->pwd.mnt) {  in copy_mnt_ns()
3821 				new_fs->pwd.mnt = mntget(&q->mnt);  in copy_mnt_ns()
3822 				pwdmnt = &p->mnt;  in copy_mnt_ns()
3829 		// an mntns binding we'd skipped?  in copy_mnt_ns()
3830 		while (p->mnt.mnt_root != q->mnt.mnt_root)  in copy_mnt_ns()
3846 	struct mnt_namespace *ns;  in mount_subtree()  local
3851 	ns = alloc_mnt_ns(&init_user_ns, true);  in mount_subtree()
3852 	if (IS_ERR(ns)) {  in mount_subtree()
3854 		return ERR_CAST(ns);  in mount_subtree()
3856 	mnt->mnt_ns = ns;  in mount_subtree()
3857 	ns->root = mnt;  in mount_subtree()
3858 	ns->mounts++;  in mount_subtree()
3859 	list_add(&mnt->mnt_list, &ns->list);  in mount_subtree()
3861 	err = vfs_path_lookup(m->mnt_root, m,  in mount_subtree()
3864 	put_mnt_ns(ns);  in mount_subtree()
3870 	s = path.mnt->mnt_sb;  in mount_subtree()
3871 	atomic_inc(&s->s_active);  in mount_subtree()
3874 	down_write(&s->s_umount);  in mount_subtree()
3875 	/* ... and return the root of (sub)tree on it */  in mount_subtree()
3946  * (specified by fs_fd) and attach to an open_tree-like file descriptor.
3951 	struct mnt_namespace *ns;  in SYSCALL_DEFINE3()  local
3961 		return -EPERM;  in SYSCALL_DEFINE3()
3964 		return -EINVAL;  in SYSCALL_DEFINE3()
3967 		return -EINVAL;  in SYSCALL_DEFINE3()
3981 		return -EINVAL;  in SYSCALL_DEFINE3()
3986 		return -EBADF;  in SYSCALL_DEFINE3()
3988 	ret = -EINVAL;  in SYSCALL_DEFINE3()
3989 	if (f.file->f_op != &fscontext_fops)  in SYSCALL_DEFINE3()
3992 	fc = f.file->private_data;  in SYSCALL_DEFINE3()
3994 	ret = mutex_lock_interruptible(&fc->uapi_mutex);  in SYSCALL_DEFINE3()
3998 	/* There must be a valid superblock or we can't mount it */  in SYSCALL_DEFINE3()
3999 	ret = -EINVAL;  in SYSCALL_DEFINE3()
4000 	if (!fc->root)  in SYSCALL_DEFINE3()
4003 	ret = -EPERM;  in SYSCALL_DEFINE3()
4004 	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {  in SYSCALL_DEFINE3()
4009 	ret = -EBUSY;  in SYSCALL_DEFINE3()
4010 	if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)  in SYSCALL_DEFINE3()
4013 	if (fc->sb_flags & SB_MANDLOCK)  in SYSCALL_DEFINE3()
4021 	newmount.dentry = dget(fc->root);  in SYSCALL_DEFINE3()
4022 	newmount.mnt->mnt_flags = mnt_flags;  in SYSCALL_DEFINE3()
4024 	/* We've done the mount bit - now move the file context into more or  in SYSCALL_DEFINE3()
4025 	 * less the same state as if we'd done an fspick().  We don't want to  in SYSCALL_DEFINE3()
4026 	 * do any memory allocation or anything like that at this point as we  in SYSCALL_DEFINE3()
4031 	ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);  in SYSCALL_DEFINE3()
4032 	if (IS_ERR(ns)) {  in SYSCALL_DEFINE3()
4033 		ret = PTR_ERR(ns);  in SYSCALL_DEFINE3()
4037 	mnt->mnt_ns = ns;  in SYSCALL_DEFINE3()
4038 	ns->root = mnt;  in SYSCALL_DEFINE3()
4039 	ns->mounts = 1;  in SYSCALL_DEFINE3()
4040 	list_add(&mnt->mnt_list, &ns->list);  in SYSCALL_DEFINE3()
4043 	/* Attach to an apparent O_PATH fd with a note that we need to unmount  in SYSCALL_DEFINE3()
4046 	file = dentry_open(&newmount, O_PATH, fc->cred);  in SYSCALL_DEFINE3()
4052 	file->f_mode |= FMODE_NEED_UNMOUNT;  in SYSCALL_DEFINE3()
4063 	mutex_unlock(&fc->uapi_mutex);  in SYSCALL_DEFINE3()
4087 		return -EPERM;  in SYSCALL_DEFINE5()
4090 		return -EINVAL;  in SYSCALL_DEFINE5()
4094 		return -EINVAL;  in SYSCALL_DEFINE5()
4097 	 * from an fd that requires unmount as we can't get at the flag  in SYSCALL_DEFINE5()
4143 	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {  in is_path_reachable()
4144 		dentry = mnt->mnt_mountpoint;  in is_path_reachable()
4145 		mnt = mnt->mnt_parent;  in is_path_reachable()
4147 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);  in is_path_reachable()
4154 	res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);  in path_is_under()
4164  * root/cwd of all processes which had them on the current root to new_root.
4167  * The new_root and put_old must be directories, and  must not be on the
4169  * underneath new_root,  i.e. adding a non-zero number of /.. to the string
4171  * file system may be mounted on put_old. After all, new_root is a mountpoint.
4173  * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
4174  * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
4178  *  - we don't move root/cwd if they are not at the root (reason: if something
4180  *  - it's okay to pick a root that isn't the root of a file system, e.g.
4182  *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
4194 		return -EPERM;  in SYSCALL_DEFINE2()
4210 	get_fs_root(current->fs, &root);  in SYSCALL_DEFINE2()
4216 	error = -EINVAL;  in SYSCALL_DEFINE2()
4220 	ex_parent = new_mnt->mnt_parent;  in SYSCALL_DEFINE2()
4221 	root_parent = root_mnt->mnt_parent;  in SYSCALL_DEFINE2()
4228 	if (new_mnt->mnt.mnt_flags & MNT_LOCKED)  in SYSCALL_DEFINE2()
4230 	error = -ENOENT;  in SYSCALL_DEFINE2()
4233 	error = -EBUSY;  in SYSCALL_DEFINE2()
4235 		goto out4; /* loop, on the same file system  */  in SYSCALL_DEFINE2()
4236 	error = -EINVAL;  in SYSCALL_DEFINE2()
4245 	/* make sure we can reach put_old from new_root */  in SYSCALL_DEFINE2()
4253 	root_mp = unhash_mnt(root_mnt);  /* we'll need its mountpoint */  in SYSCALL_DEFINE2()
4254 	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {  in SYSCALL_DEFINE2()
4255 		new_mnt->mnt.mnt_flags |= MNT_LOCKED;  in SYSCALL_DEFINE2()
4256 		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;  in SYSCALL_DEFINE2()
4258 	/* mount old root on put_old */  in SYSCALL_DEFINE2()
4260 	/* mount new_root on / */  in SYSCALL_DEFINE2()
4262 	mnt_add_count(root_parent, -1);  in SYSCALL_DEFINE2()
4263 	touch_mnt_namespace(current->nsproxy->mnt_ns);  in SYSCALL_DEFINE2()
4265 	list_del_init(&new_mnt->mnt_expire);  in SYSCALL_DEFINE2()
4286 	unsigned int flags = mnt->mnt.mnt_flags;  in recalc_flags()
4289 	flags &= ~kattr->attr_clr;  in recalc_flags()
4291 	flags |= kattr->attr_set;  in recalc_flags()
4298 	struct vfsmount *m = &mnt->mnt;  in can_idmap_mount()
4299 	struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;  in can_idmap_mount()
4301 	if (!kattr->mnt_idmap)  in can_idmap_mount()
4306 	 * doesn't make sense so block that. We don't allow mushy semantics.  in can_idmap_mount()
4308 	if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb))  in can_idmap_mount()
4309 		return -EINVAL;  in can_idmap_mount()
4312 	 * Once a mount has been idmapped we don't allow it to change its  in can_idmap_mount()
4314 	 * another bind-mount they can idmap if they want to.  in can_idmap_mount()
4317 		return -EPERM;  in can_idmap_mount()
4320 	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))  in can_idmap_mount()
4321 		return -EINVAL;  in can_idmap_mount()
4323 	/* We're not controlling the superblock. */  in can_idmap_mount()
4325 		return -EPERM;  in can_idmap_mount()
4328 	if (!is_anon_ns(mnt->mnt_ns))  in can_idmap_mount()
4329 		return -EINVAL;  in can_idmap_mount()
4335  * mnt_allow_writers() - check whether the attribute change allows writers
4346 	return (!(kattr->attr_set & MNT_READONLY) ||  in mnt_allow_writers()
4347 		(mnt->mnt.mnt_flags & MNT_READONLY)) &&  in mnt_allow_writers()
4348 	       !kattr->mnt_idmap;  in mnt_allow_writers()
4358 			err = -EPERM;  in mount_setattr_prepare()
4372 		if (!kattr->recurse)  in mount_setattr_prepare()
4380 		 * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will  in mount_setattr_prepare()
4385 			/* If we had to hold writers unblock them. */  in mount_setattr_prepare()
4386 			if (p->mnt.mnt_flags & MNT_WRITE_HOLD)  in mount_setattr_prepare()
4390 			 * We're done once the first mount we changed got  in mount_setattr_prepare()
4402 	if (!kattr->mnt_idmap)  in do_idmap_mount()
4408 	 * Since we only allow a mount to change the idmapping once and  in do_idmap_mount()
4409 	 * verified this in can_idmap_mount() we know that the mount has  in do_idmap_mount()
4413 	smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));  in do_idmap_mount()
4425 		WRITE_ONCE(m->mnt.mnt_flags, flags);  in mount_setattr_commit()
4427 		/* If we had to hold writers unblock them. */  in mount_setattr_commit()
4428 		if (m->mnt.mnt_flags & MNT_WRITE_HOLD)  in mount_setattr_commit()
4431 		if (kattr->propagation)  in mount_setattr_commit()
4432 			change_mnt_propagation(m, kattr->propagation);  in mount_setattr_commit()
4433 		if (!kattr->recurse)  in mount_setattr_commit()
4436 	touch_mnt_namespace(mnt->mnt_ns);  in mount_setattr_commit()
4441 	struct mount *mnt = real_mount(path->mnt);  in do_mount_setattr()
4445 		return -EINVAL;  in do_mount_setattr()
4447 	if (kattr->mnt_userns) {  in do_mount_setattr()
4450 		mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns);  in do_mount_setattr()
4453 		kattr->mnt_idmap = mnt_idmap;  in do_mount_setattr()
4456 	if (kattr->propagation) {  in do_mount_setattr()
4458 		 * Only take namespace_lock() if we're actually changing  in do_mount_setattr()
4462 		if (kattr->propagation == MS_SHARED) {  in do_mount_setattr()
4463 			err = invent_group_ids(mnt, kattr->recurse);  in do_mount_setattr()
4471 	err = -EINVAL;  in do_mount_setattr()
4475 	if (!is_mounted(&mnt->mnt))  in do_mount_setattr()
4485 	 * that do change mount properties on the rootfs itself. That obviously  in do_mount_setattr()
4486 	 * neither has a parent nor is it a detached mount so we cannot  in do_mount_setattr()
4489 	if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))  in do_mount_setattr()
4493 	 * First, we get the mount tree in a shape where we can change mount  in do_mount_setattr()
4494 	 * properties without failure. If we succeeded to do so we commit all  in do_mount_setattr()
4495 	 * changes and if we failed we clean up.  in do_mount_setattr()
4504 	if (kattr->propagation) {  in do_mount_setattr()
4517 	struct ns_common *ns;  in build_mount_idmapped()  local
4521 	if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))  in build_mount_idmapped()
4525 	 * We currently do not support clearing an idmapped mount. If this ever  in build_mount_idmapped()
4526 	 * is a use-case we can revisit this but for now let's keep it simple  in build_mount_idmapped()
4529 	if (attr->attr_clr & MOUNT_ATTR_IDMAP)  in build_mount_idmapped()
4530 		return -EINVAL;  in build_mount_idmapped()
4532 	if (attr->userns_fd > INT_MAX)  in build_mount_idmapped()
4533 		return -EINVAL;  in build_mount_idmapped()
4535 	f = fdget(attr->userns_fd);  in build_mount_idmapped()
4537 		return -EBADF;  in build_mount_idmapped()
4540 		err = -EINVAL;  in build_mount_idmapped()
4544 	ns = get_proc_ns(file_inode(f.file));  in build_mount_idmapped()
4545 	if (ns->ops->type != CLONE_NEWUSER) {  in build_mount_idmapped()
4546 		err = -EINVAL;  in build_mount_idmapped()
4552 	 * mount. We use the initial idmapping as an indicator of a mount  in build_mount_idmapped()
4558 	mnt_userns = container_of(ns, struct user_namespace, ns);  in build_mount_idmapped()
4560 		err = -EPERM;  in build_mount_idmapped()
4564 	/* We're not controlling the target namespace. */  in build_mount_idmapped()
4566 		err = -EPERM;  in build_mount_idmapped()
4570 	kattr->mnt_userns = get_user_ns(mnt_userns);  in build_mount_idmapped()
4594 	if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)  in build_mount_kattr()
4595 		return -EINVAL;  in build_mount_kattr()
4596 	if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)  in build_mount_kattr()
4597 		return -EINVAL;  in build_mount_kattr()
4598 	kattr->propagation = attr->propagation;  in build_mount_kattr()
4600 	if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)  in build_mount_kattr()
4601 		return -EINVAL;  in build_mount_kattr()
4603 	kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);  in build_mount_kattr()
4604 	kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);  in build_mount_kattr()
4615 	if (attr->attr_clr & MOUNT_ATTR__ATIME) {  in build_mount_kattr()
4616 		if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)  in build_mount_kattr()
4617 			return -EINVAL;  in build_mount_kattr()
4623 		kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;  in build_mount_kattr()
4624 		switch (attr->attr_set & MOUNT_ATTR__ATIME) {  in build_mount_kattr()
4626 			kattr->attr_set |= MNT_RELATIME;  in build_mount_kattr()
4629 			kattr->attr_set |= MNT_NOATIME;  in build_mount_kattr()
4634 			return -EINVAL;  in build_mount_kattr()
4637 		if (attr->attr_set & MOUNT_ATTR__ATIME)  in build_mount_kattr()
4638 			return -EINVAL;  in build_mount_kattr()
4646 	put_user_ns(kattr->mnt_userns);  in finish_mount_kattr()
4647 	kattr->mnt_userns = NULL;  in finish_mount_kattr()
4649 	if (kattr->mnt_idmap)  in finish_mount_kattr()
4650 		mnt_idmap_put(kattr->mnt_idmap);  in finish_mount_kattr()
4668 		return -EINVAL;  in SYSCALL_DEFINE5()
4671 		return -E2BIG;  in SYSCALL_DEFINE5()
4673 		return -EINVAL;  in SYSCALL_DEFINE5()
4676 		return -EPERM;  in SYSCALL_DEFINE5()
4705 	struct mnt_namespace *ns;  in init_mount_tree()  local
4712 	ns = alloc_mnt_ns(&init_user_ns, false);  in init_mount_tree()
4713 	if (IS_ERR(ns))  in init_mount_tree()
4716 	m->mnt_ns = ns;  in init_mount_tree()
4717 	ns->root = m;  in init_mount_tree()
4718 	ns->mounts = 1;  in init_mount_tree()
4719 	list_add(&m->mnt_list, &ns->list);  in init_mount_tree()
4720 	init_task.nsproxy->mnt_ns = ns;  in init_mount_tree()
4721 	get_mnt_ns(ns);  in init_mount_tree()
4724 	root.dentry = mnt->mnt_root;  in init_mount_tree()
4725 	mnt->mnt_flags |= MNT_LOCKED;  in init_mount_tree()
4727 	set_fs_pwd(current->fs, &root);  in init_mount_tree()
4728 	set_fs_root(current->fs, &root);  in init_mount_tree()
4738 	mount_hashtable = alloc_large_system_hash("Mount-cache",  in mnt_init()
4743 	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",  in mnt_init()
4766 void put_mnt_ns(struct mnt_namespace *ns)  in put_mnt_ns()  argument
4768 	if (!refcount_dec_and_test(&ns->ns.count))  in put_mnt_ns()
4770 	drop_collected_mounts(&ns->root->mnt);  in put_mnt_ns()
4771 	free_mnt_ns(ns);  in put_mnt_ns()
4777 	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);  in kern_mount()
4781 		 * we unmount before file sys is unregistered  in kern_mount()
4783 		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;  in kern_mount()
4819 	/* Does the current process have a non-standard root */  in current_chrooted()
4825 	ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;  in current_chrooted()
4826 	ns_root.dentry = ns_root.mnt->mnt_root;  in current_chrooted()
4831 	get_fs_root(current->fs, &fs_root);  in current_chrooted()
4841 static bool mnt_already_visible(struct mnt_namespace *ns,  in mnt_already_visible()  argument
4850 	lock_ns_list(ns);  in mnt_already_visible()
4851 	list_for_each_entry(mnt, &ns->list, mnt_list) {  in mnt_already_visible()
4858 		if (mnt->mnt.mnt_sb->s_type != sb->s_type)  in mnt_already_visible()
4864 		if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)  in mnt_already_visible()
4868 		mnt_flags = mnt->mnt.mnt_flags;  in mnt_already_visible()
4871 		if (sb_rdonly(mnt->mnt.mnt_sb))  in mnt_already_visible()
4888 		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {  in mnt_already_visible()
4889 			struct inode *inode = child->mnt_mountpoint->d_inode;  in mnt_already_visible()
4891 			if (!(child->mnt.mnt_flags & MNT_LOCKED))  in mnt_already_visible()
4905 	unlock_ns_list(ns);  in mnt_already_visible()
4913 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;  in mount_too_revealing()  local
4916 	if (ns->user_ns == &init_user_ns)  in mount_too_revealing()
4920 	s_iflags = sb->s_iflags;  in mount_too_revealing()
4930 	return !mnt_already_visible(ns, sb, new_mnt_flags);  in mount_too_revealing()
4942 	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&  in mnt_may_suid()
4943 	       current_in_userns(mnt->mnt_sb->s_user_ns);  in mnt_may_suid()
4948 	struct ns_common *ns = NULL;  in mntns_get()  local
4952 	nsproxy = task->nsproxy;  in mntns_get()
4954 		ns = &nsproxy->mnt_ns->ns;  in mntns_get()
4955 		get_mnt_ns(to_mnt_ns(ns));  in mntns_get()
4959 	return ns;  in mntns_get()
4962 static void mntns_put(struct ns_common *ns)  in mntns_put()  argument
4964 	put_mnt_ns(to_mnt_ns(ns));  in mntns_put()
4967 static int mntns_install(struct nsset *nsset, struct ns_common *ns)  in mntns_install()  argument
4969 	struct nsproxy *nsproxy = nsset->nsproxy;  in mntns_install()
4970 	struct fs_struct *fs = nsset->fs;  in mntns_install()
4971 	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;  in mntns_install()
4972 	struct user_namespace *user_ns = nsset->cred->user_ns;  in mntns_install()
4976 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||  in mntns_install()
4979 		return -EPERM;  in mntns_install()
4982 		return -EINVAL;  in mntns_install()
4984 	if (fs->users != 1)  in mntns_install()
4985 		return -EINVAL;  in mntns_install()
4988 	old_mnt_ns = nsproxy->mnt_ns;  in mntns_install()
4989 	nsproxy->mnt_ns = mnt_ns;  in mntns_install()
4992 	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,  in mntns_install()
4996 		nsproxy->mnt_ns = old_mnt_ns;  in mntns_install()
5011 static struct user_namespace *mntns_owner(struct ns_common *ns)  in mntns_owner()  argument
5013 	return to_mnt_ns(ns)->user_ns;  in mntns_owner()
5028 		.procname	= "mount-max",