xref: /openbmc/linux/fs/pnode.c (revision 5f2fb52fac15a8a8e10ce020dd532504a8abfc4e)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/pnode.c
4   *
5   * (C) Copyright IBM Corporation 2005.
6   *	Author : Ram Pai (linuxram@us.ibm.com)
7   */
8  #include <linux/mnt_namespace.h>
9  #include <linux/mount.h>
10  #include <linux/fs.h>
11  #include <linux/nsproxy.h>
12  #include <uapi/linux/mount.h>
13  #include "internal.h"
14  #include "pnode.h"
15  
16  /* return the next shared peer mount of @p */
17  static inline struct mount *next_peer(struct mount *p)
18  {
19  	return list_entry(p->mnt_share.next, struct mount, mnt_share);
20  }
21  
22  static inline struct mount *first_slave(struct mount *p)
23  {
24  	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25  }
26  
27  static inline struct mount *last_slave(struct mount *p)
28  {
29  	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30  }
31  
32  static inline struct mount *next_slave(struct mount *p)
33  {
34  	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
35  }
36  
37  static struct mount *get_peer_under_root(struct mount *mnt,
38  					 struct mnt_namespace *ns,
39  					 const struct path *root)
40  {
41  	struct mount *m = mnt;
42  
43  	do {
44  		/* Check the namespace first for optimization */
45  		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
46  			return m;
47  
48  		m = next_peer(m);
49  	} while (m != mnt);
50  
51  	return NULL;
52  }
53  
54  /*
55   * Get ID of closest dominating peer group having a representative
56   * under the given root.
57   *
58   * Caller must hold namespace_sem
59   */
60  int get_dominating_id(struct mount *mnt, const struct path *root)
61  {
62  	struct mount *m;
63  
64  	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
65  		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
66  		if (d)
67  			return d->mnt_group_id;
68  	}
69  
70  	return 0;
71  }
72  
73  static int do_make_slave(struct mount *mnt)
74  {
75  	struct mount *master, *slave_mnt;
76  
77  	if (list_empty(&mnt->mnt_share)) {
78  		if (IS_MNT_SHARED(mnt)) {
79  			mnt_release_group_id(mnt);
80  			CLEAR_MNT_SHARED(mnt);
81  		}
82  		master = mnt->mnt_master;
83  		if (!master) {
84  			struct list_head *p = &mnt->mnt_slave_list;
85  			while (!list_empty(p)) {
86  				slave_mnt = list_first_entry(p,
87  						struct mount, mnt_slave);
88  				list_del_init(&slave_mnt->mnt_slave);
89  				slave_mnt->mnt_master = NULL;
90  			}
91  			return 0;
92  		}
93  	} else {
94  		struct mount *m;
95  		/*
96  		 * slave 'mnt' to a peer mount that has the
97  		 * same root dentry. If none is available then
98  		 * slave it to anything that is available.
99  		 */
100  		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
101  			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
102  				master = m;
103  				break;
104  			}
105  		}
106  		list_del_init(&mnt->mnt_share);
107  		mnt->mnt_group_id = 0;
108  		CLEAR_MNT_SHARED(mnt);
109  	}
110  	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
111  		slave_mnt->mnt_master = master;
112  	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
113  	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
114  	INIT_LIST_HEAD(&mnt->mnt_slave_list);
115  	mnt->mnt_master = master;
116  	return 0;
117  }
118  
119  /*
120   * vfsmount lock must be held for write
121   */
122  void change_mnt_propagation(struct mount *mnt, int type)
123  {
124  	if (type == MS_SHARED) {
125  		set_mnt_shared(mnt);
126  		return;
127  	}
128  	do_make_slave(mnt);
129  	if (type != MS_SLAVE) {
130  		list_del_init(&mnt->mnt_slave);
131  		mnt->mnt_master = NULL;
132  		if (type == MS_UNBINDABLE)
133  			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
134  		else
135  			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
136  	}
137  }
138  
139  /*
140   * get the next mount in the propagation tree.
141   * @m: the mount seen last
142   * @origin: the original mount from where the tree walk initiated
143   *
144   * Note that peer groups form contiguous segments of slave lists.
145   * We rely on that in get_source() to be able to find out if
146   * vfsmount found while iterating with propagation_next() is
147   * a peer of one we'd found earlier.
148   */
149  static struct mount *propagation_next(struct mount *m,
150  					 struct mount *origin)
151  {
152  	/* are there any slaves of this mount? */
153  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
154  		return first_slave(m);
155  
156  	while (1) {
157  		struct mount *master = m->mnt_master;
158  
159  		if (master == origin->mnt_master) {
160  			struct mount *next = next_peer(m);
161  			return (next == origin) ? NULL : next;
162  		} else if (m->mnt_slave.next != &master->mnt_slave_list)
163  			return next_slave(m);
164  
165  		/* back at master */
166  		m = master;
167  	}
168  }
169  
170  static struct mount *skip_propagation_subtree(struct mount *m,
171  						struct mount *origin)
172  {
173  	/*
174  	 * Advance m such that propagation_next will not return
175  	 * the slaves of m.
176  	 */
177  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
178  		m = last_slave(m);
179  
180  	return m;
181  }
182  
183  static struct mount *next_group(struct mount *m, struct mount *origin)
184  {
185  	while (1) {
186  		while (1) {
187  			struct mount *next;
188  			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
189  				return first_slave(m);
190  			next = next_peer(m);
191  			if (m->mnt_group_id == origin->mnt_group_id) {
192  				if (next == origin)
193  					return NULL;
194  			} else if (m->mnt_slave.next != &next->mnt_slave)
195  				break;
196  			m = next;
197  		}
198  		/* m is the last peer */
199  		while (1) {
200  			struct mount *master = m->mnt_master;
201  			if (m->mnt_slave.next != &master->mnt_slave_list)
202  				return next_slave(m);
203  			m = next_peer(master);
204  			if (master->mnt_group_id == origin->mnt_group_id)
205  				break;
206  			if (master->mnt_slave.next == &m->mnt_slave)
207  				break;
208  			m = master;
209  		}
210  		if (m == origin)
211  			return NULL;
212  	}
213  }
214  
215  /* all accesses are serialized by namespace_sem */
216  static struct mount *last_dest, *first_source, *last_source, *dest_master;
217  static struct mountpoint *mp;
218  static struct hlist_head *list;
219  
220  static inline bool peers(struct mount *m1, struct mount *m2)
221  {
222  	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
223  }
224  
225  static int propagate_one(struct mount *m)
226  {
227  	struct mount *child;
228  	int type;
229  	/* skip ones added by this propagate_mnt() */
230  	if (IS_MNT_NEW(m))
231  		return 0;
232  	/* skip if mountpoint isn't covered by it */
233  	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
234  		return 0;
235  	if (peers(m, last_dest)) {
236  		type = CL_MAKE_SHARED;
237  	} else {
238  		struct mount *n, *p;
239  		bool done;
240  		for (n = m; ; n = p) {
241  			p = n->mnt_master;
242  			if (p == dest_master || IS_MNT_MARKED(p))
243  				break;
244  		}
245  		do {
246  			struct mount *parent = last_source->mnt_parent;
247  			if (last_source == first_source)
248  				break;
249  			done = parent->mnt_master == p;
250  			if (done && peers(n, parent))
251  				break;
252  			last_source = last_source->mnt_master;
253  		} while (!done);
254  
255  		type = CL_SLAVE;
256  		/* beginning of peer group among the slaves? */
257  		if (IS_MNT_SHARED(m))
258  			type |= CL_MAKE_SHARED;
259  	}
260  
261  	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
262  	if (IS_ERR(child))
263  		return PTR_ERR(child);
264  	mnt_set_mountpoint(m, mp, child);
265  	last_dest = m;
266  	last_source = child;
267  	if (m->mnt_master != dest_master) {
268  		read_seqlock_excl(&mount_lock);
269  		SET_MNT_MARK(m->mnt_master);
270  		read_sequnlock_excl(&mount_lock);
271  	}
272  	hlist_add_head(&child->mnt_hash, list);
273  	return count_mounts(m->mnt_ns, child);
274  }
275  
276  /*
277   * mount 'source_mnt' under the destination 'dest_mnt' at
278   * dentry 'dest_dentry'. And propagate that mount to
279   * all the peer and slave mounts of 'dest_mnt'.
280   * Link all the new mounts into a propagation tree headed at
281   * source_mnt. Also link all the new mounts using ->mnt_list
282   * headed at source_mnt's ->mnt_list
283   *
284   * @dest_mnt: destination mount.
285   * @dest_dentry: destination dentry.
286   * @source_mnt: source mount.
287   * @tree_list : list of heads of trees to be attached.
288   */
289  int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
290  		    struct mount *source_mnt, struct hlist_head *tree_list)
291  {
292  	struct mount *m, *n;
293  	int ret = 0;
294  
295  	/*
296  	 * we don't want to bother passing tons of arguments to
297  	 * propagate_one(); everything is serialized by namespace_sem,
298  	 * so globals will do just fine.
299  	 */
300  	last_dest = dest_mnt;
301  	first_source = source_mnt;
302  	last_source = source_mnt;
303  	mp = dest_mp;
304  	list = tree_list;
305  	dest_master = dest_mnt->mnt_master;
306  
307  	/* all peers of dest_mnt, except dest_mnt itself */
308  	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
309  		ret = propagate_one(n);
310  		if (ret)
311  			goto out;
312  	}
313  
314  	/* all slave groups */
315  	for (m = next_group(dest_mnt, dest_mnt); m;
316  			m = next_group(m, dest_mnt)) {
317  		/* everything in that slave group */
318  		n = m;
319  		do {
320  			ret = propagate_one(n);
321  			if (ret)
322  				goto out;
323  			n = next_peer(n);
324  		} while (n != m);
325  	}
326  out:
327  	read_seqlock_excl(&mount_lock);
328  	hlist_for_each_entry(n, tree_list, mnt_hash) {
329  		m = n->mnt_parent;
330  		if (m->mnt_master != dest_mnt->mnt_master)
331  			CLEAR_MNT_MARK(m->mnt_master);
332  	}
333  	read_sequnlock_excl(&mount_lock);
334  	return ret;
335  }
336  
337  static struct mount *find_topper(struct mount *mnt)
338  {
339  	/* If there is exactly one mount covering mnt completely return it. */
340  	struct mount *child;
341  
342  	if (!list_is_singular(&mnt->mnt_mounts))
343  		return NULL;
344  
345  	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
346  	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
347  		return NULL;
348  
349  	return child;
350  }
351  
352  /*
353   * return true if the refcount is greater than count
354   */
355  static inline int do_refcount_check(struct mount *mnt, int count)
356  {
357  	return mnt_get_count(mnt) > count;
358  }
359  
360  /*
361   * check if the mount 'mnt' can be unmounted successfully.
362   * @mnt: the mount to be checked for unmount
363   * NOTE: unmounting 'mnt' would naturally propagate to all
364   * other mounts its parent propagates to.
365   * Check if any of these mounts that **do not have submounts**
366   * have more references than 'refcnt'. If so return busy.
367   *
368   * vfsmount lock must be held for write
369   */
370  int propagate_mount_busy(struct mount *mnt, int refcnt)
371  {
372  	struct mount *m, *child, *topper;
373  	struct mount *parent = mnt->mnt_parent;
374  
375  	if (mnt == parent)
376  		return do_refcount_check(mnt, refcnt);
377  
378  	/*
379  	 * quickly check if the current mount can be unmounted.
380  	 * If not, we don't have to go checking for all other
381  	 * mounts
382  	 */
383  	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
384  		return 1;
385  
386  	for (m = propagation_next(parent, parent); m;
387  	     		m = propagation_next(m, parent)) {
388  		int count = 1;
389  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
390  		if (!child)
391  			continue;
392  
393  		/* Is there exactly one mount on the child that covers
394  		 * it completely whose reference should be ignored?
395  		 */
396  		topper = find_topper(child);
397  		if (topper)
398  			count += 1;
399  		else if (!list_empty(&child->mnt_mounts))
400  			continue;
401  
402  		if (do_refcount_check(child, count))
403  			return 1;
404  	}
405  	return 0;
406  }
407  
408  /*
409   * Clear MNT_LOCKED when it can be shown to be safe.
410   *
411   * mount_lock lock must be held for write
412   */
413  void propagate_mount_unlock(struct mount *mnt)
414  {
415  	struct mount *parent = mnt->mnt_parent;
416  	struct mount *m, *child;
417  
418  	BUG_ON(parent == mnt);
419  
420  	for (m = propagation_next(parent, parent); m;
421  			m = propagation_next(m, parent)) {
422  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
423  		if (child)
424  			child->mnt.mnt_flags &= ~MNT_LOCKED;
425  	}
426  }
427  
428  static void umount_one(struct mount *mnt, struct list_head *to_umount)
429  {
430  	CLEAR_MNT_MARK(mnt);
431  	mnt->mnt.mnt_flags |= MNT_UMOUNT;
432  	list_del_init(&mnt->mnt_child);
433  	list_del_init(&mnt->mnt_umounting);
434  	list_move_tail(&mnt->mnt_list, to_umount);
435  }
436  
437  /*
438   * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
439   * parent propagates to.
440   */
441  static bool __propagate_umount(struct mount *mnt,
442  			       struct list_head *to_umount,
443  			       struct list_head *to_restore)
444  {
445  	bool progress = false;
446  	struct mount *child;
447  
448  	/*
449  	 * The state of the parent won't change if this mount is
450  	 * already unmounted or marked as without children.
451  	 */
452  	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
453  		goto out;
454  
455  	/* Verify topper is the only grandchild that has not been
456  	 * speculatively unmounted.
457  	 */
458  	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
459  		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
460  			continue;
461  		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
462  			continue;
463  		/* Found a mounted child */
464  		goto children;
465  	}
466  
467  	/* Mark mounts that can be unmounted if not locked */
468  	SET_MNT_MARK(mnt);
469  	progress = true;
470  
471  	/* If a mount is without children and not locked umount it. */
472  	if (!IS_MNT_LOCKED(mnt)) {
473  		umount_one(mnt, to_umount);
474  	} else {
475  children:
476  		list_move_tail(&mnt->mnt_umounting, to_restore);
477  	}
478  out:
479  	return progress;
480  }
481  
482  static void umount_list(struct list_head *to_umount,
483  			struct list_head *to_restore)
484  {
485  	struct mount *mnt, *child, *tmp;
486  	list_for_each_entry(mnt, to_umount, mnt_list) {
487  		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
488  			/* topper? */
489  			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
490  				list_move_tail(&child->mnt_umounting, to_restore);
491  			else
492  				umount_one(child, to_umount);
493  		}
494  	}
495  }
496  
497  static void restore_mounts(struct list_head *to_restore)
498  {
499  	/* Restore mounts to a clean working state */
500  	while (!list_empty(to_restore)) {
501  		struct mount *mnt, *parent;
502  		struct mountpoint *mp;
503  
504  		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
505  		CLEAR_MNT_MARK(mnt);
506  		list_del_init(&mnt->mnt_umounting);
507  
508  		/* Should this mount be reparented? */
509  		mp = mnt->mnt_mp;
510  		parent = mnt->mnt_parent;
511  		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
512  			mp = parent->mnt_mp;
513  			parent = parent->mnt_parent;
514  		}
515  		if (parent != mnt->mnt_parent)
516  			mnt_change_mountpoint(parent, mp, mnt);
517  	}
518  }
519  
520  static void cleanup_umount_visitations(struct list_head *visited)
521  {
522  	while (!list_empty(visited)) {
523  		struct mount *mnt =
524  			list_first_entry(visited, struct mount, mnt_umounting);
525  		list_del_init(&mnt->mnt_umounting);
526  	}
527  }
528  
529  /*
530   * collect all mounts that receive propagation from the mount in @list,
531   * and return these additional mounts in the same list.
532   * @list: the list of mounts to be unmounted.
533   *
534   * vfsmount lock must be held for write
535   */
536  int propagate_umount(struct list_head *list)
537  {
538  	struct mount *mnt;
539  	LIST_HEAD(to_restore);
540  	LIST_HEAD(to_umount);
541  	LIST_HEAD(visited);
542  
543  	/* Find candidates for unmounting */
544  	list_for_each_entry_reverse(mnt, list, mnt_list) {
545  		struct mount *parent = mnt->mnt_parent;
546  		struct mount *m;
547  
548  		/*
549  		 * If this mount has already been visited it is known that it's
550  		 * entire peer group and all of their slaves in the propagation
551  		 * tree for the mountpoint has already been visited and there is
552  		 * no need to visit them again.
553  		 */
554  		if (!list_empty(&mnt->mnt_umounting))
555  			continue;
556  
557  		list_add_tail(&mnt->mnt_umounting, &visited);
558  		for (m = propagation_next(parent, parent); m;
559  		     m = propagation_next(m, parent)) {
560  			struct mount *child = __lookup_mnt(&m->mnt,
561  							   mnt->mnt_mountpoint);
562  			if (!child)
563  				continue;
564  
565  			if (!list_empty(&child->mnt_umounting)) {
566  				/*
567  				 * If the child has already been visited it is
568  				 * know that it's entire peer group and all of
569  				 * their slaves in the propgation tree for the
570  				 * mountpoint has already been visited and there
571  				 * is no need to visit this subtree again.
572  				 */
573  				m = skip_propagation_subtree(m, parent);
574  				continue;
575  			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
576  				/*
577  				 * We have come accross an partially unmounted
578  				 * mount in list that has not been visited yet.
579  				 * Remember it has been visited and continue
580  				 * about our merry way.
581  				 */
582  				list_add_tail(&child->mnt_umounting, &visited);
583  				continue;
584  			}
585  
586  			/* Check the child and parents while progress is made */
587  			while (__propagate_umount(child,
588  						  &to_umount, &to_restore)) {
589  				/* Is the parent a umount candidate? */
590  				child = child->mnt_parent;
591  				if (list_empty(&child->mnt_umounting))
592  					break;
593  			}
594  		}
595  	}
596  
597  	umount_list(&to_umount, &to_restore);
598  	restore_mounts(&to_restore);
599  	cleanup_umount_visitations(&visited);
600  	list_splice_tail(&to_umount, list);
601  
602  	return 0;
603  }
604