xref: /openbmc/linux/fs/pnode.c (revision 1b39e7607144337d752f36c2068ed79447462f99)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/pnode.c
4   *
5   * (C) Copyright IBM Corporation 2005.
6   *	Author : Ram Pai (linuxram@us.ibm.com)
7   */
8  #include <linux/mnt_namespace.h>
9  #include <linux/mount.h>
10  #include <linux/fs.h>
11  #include <linux/nsproxy.h>
12  #include <uapi/linux/mount.h>
13  #include "internal.h"
14  #include "pnode.h"
15  
16  /* return the next shared peer mount of @p */
17  static inline struct mount *next_peer(struct mount *p)
18  {
19  	return list_entry(p->mnt_share.next, struct mount, mnt_share);
20  }
21  
22  static inline struct mount *first_slave(struct mount *p)
23  {
24  	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25  }
26  
27  static inline struct mount *last_slave(struct mount *p)
28  {
29  	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30  }
31  
32  static inline struct mount *next_slave(struct mount *p)
33  {
34  	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
35  }
36  
37  static struct mount *get_peer_under_root(struct mount *mnt,
38  					 struct mnt_namespace *ns,
39  					 const struct path *root)
40  {
41  	struct mount *m = mnt;
42  
43  	do {
44  		/* Check the namespace first for optimization */
45  		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
46  			return m;
47  
48  		m = next_peer(m);
49  	} while (m != mnt);
50  
51  	return NULL;
52  }
53  
54  /*
55   * Get ID of closest dominating peer group having a representative
56   * under the given root.
57   *
58   * Caller must hold namespace_sem
59   */
60  int get_dominating_id(struct mount *mnt, const struct path *root)
61  {
62  	struct mount *m;
63  
64  	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
65  		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
66  		if (d)
67  			return d->mnt_group_id;
68  	}
69  
70  	return 0;
71  }
72  
73  static int do_make_slave(struct mount *mnt)
74  {
75  	struct mount *master, *slave_mnt;
76  
77  	if (list_empty(&mnt->mnt_share)) {
78  		if (IS_MNT_SHARED(mnt)) {
79  			mnt_release_group_id(mnt);
80  			CLEAR_MNT_SHARED(mnt);
81  		}
82  		master = mnt->mnt_master;
83  		if (!master) {
84  			struct list_head *p = &mnt->mnt_slave_list;
85  			while (!list_empty(p)) {
86  				slave_mnt = list_first_entry(p,
87  						struct mount, mnt_slave);
88  				list_del_init(&slave_mnt->mnt_slave);
89  				slave_mnt->mnt_master = NULL;
90  			}
91  			return 0;
92  		}
93  	} else {
94  		struct mount *m;
95  		/*
96  		 * slave 'mnt' to a peer mount that has the
97  		 * same root dentry. If none is available then
98  		 * slave it to anything that is available.
99  		 */
100  		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
101  			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
102  				master = m;
103  				break;
104  			}
105  		}
106  		list_del_init(&mnt->mnt_share);
107  		mnt->mnt_group_id = 0;
108  		CLEAR_MNT_SHARED(mnt);
109  	}
110  	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
111  		slave_mnt->mnt_master = master;
112  	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
113  	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
114  	INIT_LIST_HEAD(&mnt->mnt_slave_list);
115  	mnt->mnt_master = master;
116  	return 0;
117  }
118  
119  /*
120   * vfsmount lock must be held for write
121   */
122  void change_mnt_propagation(struct mount *mnt, int type)
123  {
124  	if (type == MS_SHARED) {
125  		set_mnt_shared(mnt);
126  		return;
127  	}
128  	do_make_slave(mnt);
129  	if (type != MS_SLAVE) {
130  		list_del_init(&mnt->mnt_slave);
131  		mnt->mnt_master = NULL;
132  		if (type == MS_UNBINDABLE)
133  			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
134  		else
135  			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
136  	}
137  }
138  
139  /*
140   * get the next mount in the propagation tree.
141   * @m: the mount seen last
142   * @origin: the original mount from where the tree walk initiated
143   *
144   * Note that peer groups form contiguous segments of slave lists.
145   * We rely on that in get_source() to be able to find out if
146   * vfsmount found while iterating with propagation_next() is
147   * a peer of one we'd found earlier.
148   */
149  static struct mount *propagation_next(struct mount *m,
150  					 struct mount *origin)
151  {
152  	/* are there any slaves of this mount? */
153  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
154  		return first_slave(m);
155  
156  	while (1) {
157  		struct mount *master = m->mnt_master;
158  
159  		if (master == origin->mnt_master) {
160  			struct mount *next = next_peer(m);
161  			return (next == origin) ? NULL : next;
162  		} else if (m->mnt_slave.next != &master->mnt_slave_list)
163  			return next_slave(m);
164  
165  		/* back at master */
166  		m = master;
167  	}
168  }
169  
170  static struct mount *skip_propagation_subtree(struct mount *m,
171  						struct mount *origin)
172  {
173  	/*
174  	 * Advance m such that propagation_next will not return
175  	 * the slaves of m.
176  	 */
177  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
178  		m = last_slave(m);
179  
180  	return m;
181  }
182  
183  static struct mount *next_group(struct mount *m, struct mount *origin)
184  {
185  	while (1) {
186  		while (1) {
187  			struct mount *next;
188  			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
189  				return first_slave(m);
190  			next = next_peer(m);
191  			if (m->mnt_group_id == origin->mnt_group_id) {
192  				if (next == origin)
193  					return NULL;
194  			} else if (m->mnt_slave.next != &next->mnt_slave)
195  				break;
196  			m = next;
197  		}
198  		/* m is the last peer */
199  		while (1) {
200  			struct mount *master = m->mnt_master;
201  			if (m->mnt_slave.next != &master->mnt_slave_list)
202  				return next_slave(m);
203  			m = next_peer(master);
204  			if (master->mnt_group_id == origin->mnt_group_id)
205  				break;
206  			if (master->mnt_slave.next == &m->mnt_slave)
207  				break;
208  			m = master;
209  		}
210  		if (m == origin)
211  			return NULL;
212  	}
213  }
214  
215  /* all accesses are serialized by namespace_sem */
216  static struct mount *last_dest, *first_source, *last_source, *dest_master;
217  static struct hlist_head *list;
218  
219  static inline bool peers(struct mount *m1, struct mount *m2)
220  {
221  	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
222  }
223  
224  static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
225  {
226  	struct mount *child;
227  	int type;
228  	/* skip ones added by this propagate_mnt() */
229  	if (IS_MNT_NEW(m))
230  		return 0;
231  	/* skip if mountpoint isn't covered by it */
232  	if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
233  		return 0;
234  	if (peers(m, last_dest)) {
235  		type = CL_MAKE_SHARED;
236  	} else {
237  		struct mount *n, *p;
238  		bool done;
239  		for (n = m; ; n = p) {
240  			p = n->mnt_master;
241  			if (p == dest_master || IS_MNT_MARKED(p))
242  				break;
243  		}
244  		do {
245  			struct mount *parent = last_source->mnt_parent;
246  			if (peers(last_source, first_source))
247  				break;
248  			done = parent->mnt_master == p;
249  			if (done && peers(n, parent))
250  				break;
251  			last_source = last_source->mnt_master;
252  		} while (!done);
253  
254  		type = CL_SLAVE;
255  		/* beginning of peer group among the slaves? */
256  		if (IS_MNT_SHARED(m))
257  			type |= CL_MAKE_SHARED;
258  	}
259  
260  	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
261  	if (IS_ERR(child))
262  		return PTR_ERR(child);
263  	read_seqlock_excl(&mount_lock);
264  	mnt_set_mountpoint(m, dest_mp, child);
265  	if (m->mnt_master != dest_master)
266  		SET_MNT_MARK(m->mnt_master);
267  	read_sequnlock_excl(&mount_lock);
268  	last_dest = m;
269  	last_source = child;
270  	hlist_add_head(&child->mnt_hash, list);
271  	return count_mounts(m->mnt_ns, child);
272  }
273  
274  /*
275   * mount 'source_mnt' under the destination 'dest_mnt' at
276   * dentry 'dest_dentry'. And propagate that mount to
277   * all the peer and slave mounts of 'dest_mnt'.
278   * Link all the new mounts into a propagation tree headed at
279   * source_mnt. Also link all the new mounts using ->mnt_list
280   * headed at source_mnt's ->mnt_list
281   *
282   * @dest_mnt: destination mount.
283   * @dest_dentry: destination dentry.
284   * @source_mnt: source mount.
285   * @tree_list : list of heads of trees to be attached.
286   */
287  int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
288  		    struct mount *source_mnt, struct hlist_head *tree_list)
289  {
290  	struct mount *m, *n;
291  	int ret = 0;
292  
293  	/*
294  	 * we don't want to bother passing tons of arguments to
295  	 * propagate_one(); everything is serialized by namespace_sem,
296  	 * so globals will do just fine.
297  	 */
298  	last_dest = dest_mnt;
299  	first_source = source_mnt;
300  	last_source = source_mnt;
301  	list = tree_list;
302  	dest_master = dest_mnt->mnt_master;
303  
304  	/* all peers of dest_mnt, except dest_mnt itself */
305  	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
306  		ret = propagate_one(n, dest_mp);
307  		if (ret)
308  			goto out;
309  	}
310  
311  	/* all slave groups */
312  	for (m = next_group(dest_mnt, dest_mnt); m;
313  			m = next_group(m, dest_mnt)) {
314  		/* everything in that slave group */
315  		n = m;
316  		do {
317  			ret = propagate_one(n, dest_mp);
318  			if (ret)
319  				goto out;
320  			n = next_peer(n);
321  		} while (n != m);
322  	}
323  out:
324  	read_seqlock_excl(&mount_lock);
325  	hlist_for_each_entry(n, tree_list, mnt_hash) {
326  		m = n->mnt_parent;
327  		if (m->mnt_master != dest_mnt->mnt_master)
328  			CLEAR_MNT_MARK(m->mnt_master);
329  	}
330  	read_sequnlock_excl(&mount_lock);
331  	return ret;
332  }
333  
334  static struct mount *find_topper(struct mount *mnt)
335  {
336  	/* If there is exactly one mount covering mnt completely return it. */
337  	struct mount *child;
338  
339  	if (!list_is_singular(&mnt->mnt_mounts))
340  		return NULL;
341  
342  	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
343  	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
344  		return NULL;
345  
346  	return child;
347  }
348  
349  /*
350   * return true if the refcount is greater than count
351   */
352  static inline int do_refcount_check(struct mount *mnt, int count)
353  {
354  	return mnt_get_count(mnt) > count;
355  }
356  
357  /*
358   * check if the mount 'mnt' can be unmounted successfully.
359   * @mnt: the mount to be checked for unmount
360   * NOTE: unmounting 'mnt' would naturally propagate to all
361   * other mounts its parent propagates to.
362   * Check if any of these mounts that **do not have submounts**
363   * have more references than 'refcnt'. If so return busy.
364   *
365   * vfsmount lock must be held for write
366   */
367  int propagate_mount_busy(struct mount *mnt, int refcnt)
368  {
369  	struct mount *m, *child, *topper;
370  	struct mount *parent = mnt->mnt_parent;
371  
372  	if (mnt == parent)
373  		return do_refcount_check(mnt, refcnt);
374  
375  	/*
376  	 * quickly check if the current mount can be unmounted.
377  	 * If not, we don't have to go checking for all other
378  	 * mounts
379  	 */
380  	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
381  		return 1;
382  
383  	for (m = propagation_next(parent, parent); m;
384  	     		m = propagation_next(m, parent)) {
385  		int count = 1;
386  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
387  		if (!child)
388  			continue;
389  
390  		/* Is there exactly one mount on the child that covers
391  		 * it completely whose reference should be ignored?
392  		 */
393  		topper = find_topper(child);
394  		if (topper)
395  			count += 1;
396  		else if (!list_empty(&child->mnt_mounts))
397  			continue;
398  
399  		if (do_refcount_check(child, count))
400  			return 1;
401  	}
402  	return 0;
403  }
404  
405  /*
406   * Clear MNT_LOCKED when it can be shown to be safe.
407   *
408   * mount_lock lock must be held for write
409   */
410  void propagate_mount_unlock(struct mount *mnt)
411  {
412  	struct mount *parent = mnt->mnt_parent;
413  	struct mount *m, *child;
414  
415  	BUG_ON(parent == mnt);
416  
417  	for (m = propagation_next(parent, parent); m;
418  			m = propagation_next(m, parent)) {
419  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
420  		if (child)
421  			child->mnt.mnt_flags &= ~MNT_LOCKED;
422  	}
423  }
424  
425  static void umount_one(struct mount *mnt, struct list_head *to_umount)
426  {
427  	CLEAR_MNT_MARK(mnt);
428  	mnt->mnt.mnt_flags |= MNT_UMOUNT;
429  	list_del_init(&mnt->mnt_child);
430  	list_del_init(&mnt->mnt_umounting);
431  	list_move_tail(&mnt->mnt_list, to_umount);
432  }
433  
434  /*
435   * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
436   * parent propagates to.
437   */
438  static bool __propagate_umount(struct mount *mnt,
439  			       struct list_head *to_umount,
440  			       struct list_head *to_restore)
441  {
442  	bool progress = false;
443  	struct mount *child;
444  
445  	/*
446  	 * The state of the parent won't change if this mount is
447  	 * already unmounted or marked as without children.
448  	 */
449  	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
450  		goto out;
451  
452  	/* Verify topper is the only grandchild that has not been
453  	 * speculatively unmounted.
454  	 */
455  	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
456  		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
457  			continue;
458  		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
459  			continue;
460  		/* Found a mounted child */
461  		goto children;
462  	}
463  
464  	/* Mark mounts that can be unmounted if not locked */
465  	SET_MNT_MARK(mnt);
466  	progress = true;
467  
468  	/* If a mount is without children and not locked umount it. */
469  	if (!IS_MNT_LOCKED(mnt)) {
470  		umount_one(mnt, to_umount);
471  	} else {
472  children:
473  		list_move_tail(&mnt->mnt_umounting, to_restore);
474  	}
475  out:
476  	return progress;
477  }
478  
479  static void umount_list(struct list_head *to_umount,
480  			struct list_head *to_restore)
481  {
482  	struct mount *mnt, *child, *tmp;
483  	list_for_each_entry(mnt, to_umount, mnt_list) {
484  		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
485  			/* topper? */
486  			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
487  				list_move_tail(&child->mnt_umounting, to_restore);
488  			else
489  				umount_one(child, to_umount);
490  		}
491  	}
492  }
493  
494  static void restore_mounts(struct list_head *to_restore)
495  {
496  	/* Restore mounts to a clean working state */
497  	while (!list_empty(to_restore)) {
498  		struct mount *mnt, *parent;
499  		struct mountpoint *mp;
500  
501  		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
502  		CLEAR_MNT_MARK(mnt);
503  		list_del_init(&mnt->mnt_umounting);
504  
505  		/* Should this mount be reparented? */
506  		mp = mnt->mnt_mp;
507  		parent = mnt->mnt_parent;
508  		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
509  			mp = parent->mnt_mp;
510  			parent = parent->mnt_parent;
511  		}
512  		if (parent != mnt->mnt_parent)
513  			mnt_change_mountpoint(parent, mp, mnt);
514  	}
515  }
516  
517  static void cleanup_umount_visitations(struct list_head *visited)
518  {
519  	while (!list_empty(visited)) {
520  		struct mount *mnt =
521  			list_first_entry(visited, struct mount, mnt_umounting);
522  		list_del_init(&mnt->mnt_umounting);
523  	}
524  }
525  
526  /*
527   * collect all mounts that receive propagation from the mount in @list,
528   * and return these additional mounts in the same list.
529   * @list: the list of mounts to be unmounted.
530   *
531   * vfsmount lock must be held for write
532   */
533  int propagate_umount(struct list_head *list)
534  {
535  	struct mount *mnt;
536  	LIST_HEAD(to_restore);
537  	LIST_HEAD(to_umount);
538  	LIST_HEAD(visited);
539  
540  	/* Find candidates for unmounting */
541  	list_for_each_entry_reverse(mnt, list, mnt_list) {
542  		struct mount *parent = mnt->mnt_parent;
543  		struct mount *m;
544  
545  		/*
546  		 * If this mount has already been visited it is known that it's
547  		 * entire peer group and all of their slaves in the propagation
548  		 * tree for the mountpoint has already been visited and there is
549  		 * no need to visit them again.
550  		 */
551  		if (!list_empty(&mnt->mnt_umounting))
552  			continue;
553  
554  		list_add_tail(&mnt->mnt_umounting, &visited);
555  		for (m = propagation_next(parent, parent); m;
556  		     m = propagation_next(m, parent)) {
557  			struct mount *child = __lookup_mnt(&m->mnt,
558  							   mnt->mnt_mountpoint);
559  			if (!child)
560  				continue;
561  
562  			if (!list_empty(&child->mnt_umounting)) {
563  				/*
564  				 * If the child has already been visited it is
565  				 * know that it's entire peer group and all of
566  				 * their slaves in the propgation tree for the
567  				 * mountpoint has already been visited and there
568  				 * is no need to visit this subtree again.
569  				 */
570  				m = skip_propagation_subtree(m, parent);
571  				continue;
572  			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
573  				/*
574  				 * We have come accross an partially unmounted
575  				 * mount in list that has not been visited yet.
576  				 * Remember it has been visited and continue
577  				 * about our merry way.
578  				 */
579  				list_add_tail(&child->mnt_umounting, &visited);
580  				continue;
581  			}
582  
583  			/* Check the child and parents while progress is made */
584  			while (__propagate_umount(child,
585  						  &to_umount, &to_restore)) {
586  				/* Is the parent a umount candidate? */
587  				child = child->mnt_parent;
588  				if (list_empty(&child->mnt_umounting))
589  					break;
590  			}
591  		}
592  	}
593  
594  	umount_list(&to_umount, &to_restore);
595  	restore_mounts(&to_restore);
596  	cleanup_umount_visitations(&visited);
597  	list_splice_tail(&to_umount, list);
598  
599  	return 0;
600  }
601