1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/fs/super.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Copyright (C) 1991, 1992 Linus Torvalds
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * super.c contains code to handle: - mount structures
81da177e4SLinus Torvalds * - super-block tables
91da177e4SLinus Torvalds * - filesystem drivers list
101da177e4SLinus Torvalds * - mount system call
111da177e4SLinus Torvalds * - umount system call
121da177e4SLinus Torvalds * - ustat system call
131da177e4SLinus Torvalds *
141da177e4SLinus Torvalds * GK 2/5/95 - Changed to support mounting the root fs via NFS
151da177e4SLinus Torvalds *
161da177e4SLinus Torvalds * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
171da177e4SLinus Torvalds * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
181da177e4SLinus Torvalds * Added options to /proc/mounts:
1996de0e25SJan Engelhardt * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
201da177e4SLinus Torvalds * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
211da177e4SLinus Torvalds * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
221da177e4SLinus Torvalds */
231da177e4SLinus Torvalds
24630d9c47SPaul Gortmaker #include <linux/export.h>
251da177e4SLinus Torvalds #include <linux/slab.h>
261da177e4SLinus Torvalds #include <linux/blkdev.h>
271da177e4SLinus Torvalds #include <linux/mount.h>
281da177e4SLinus Torvalds #include <linux/security.h>
291da177e4SLinus Torvalds #include <linux/writeback.h> /* for the emergency remount stuff */
301da177e4SLinus Torvalds #include <linux/idr.h>
31353ab6e9SIngo Molnar #include <linux/mutex.h>
325477d0faSJens Axboe #include <linux/backing-dev.h>
33ceb5bdc2SNick Piggin #include <linux/rculist_bl.h>
3422d94f49SEric Biggers #include <linux/fscrypt.h>
3540401530SAl Viro #include <linux/fsnotify.h>
365accdf82SJan Kara #include <linux/lockdep.h>
376e4eab57SEric W. Biederman #include <linux/user_namespace.h>
389bc61ab1SDavid Howells #include <linux/fs_context.h>
39e262e32dSDavid Howells #include <uapi/linux/mount.h>
406d59e7f5SAl Viro #include "internal.h"
411da177e4SLinus Torvalds
42880b9577SDarrick J. Wong static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
431da177e4SLinus Torvalds
4415d0f5eaSAl Viro static LIST_HEAD(super_blocks);
4515d0f5eaSAl Viro static DEFINE_SPINLOCK(sb_lock);
461da177e4SLinus Torvalds
475accdf82SJan Kara static char *sb_writers_name[SB_FREEZE_LEVELS] = {
485accdf82SJan Kara "sb_writers",
495accdf82SJan Kara "sb_pagefaults",
505accdf82SJan Kara "sb_internal",
515accdf82SJan Kara };
525accdf82SJan Kara
__super_lock(struct super_block * sb,bool excl)535e874914SChristian Brauner static inline void __super_lock(struct super_block *sb, bool excl)
540ed33598SChristian Brauner {
550ed33598SChristian Brauner if (excl)
560ed33598SChristian Brauner down_write(&sb->s_umount);
570ed33598SChristian Brauner else
580ed33598SChristian Brauner down_read(&sb->s_umount);
590ed33598SChristian Brauner }
600ed33598SChristian Brauner
super_unlock(struct super_block * sb,bool excl)610ed33598SChristian Brauner static inline void super_unlock(struct super_block *sb, bool excl)
620ed33598SChristian Brauner {
630ed33598SChristian Brauner if (excl)
640ed33598SChristian Brauner up_write(&sb->s_umount);
650ed33598SChristian Brauner else
660ed33598SChristian Brauner up_read(&sb->s_umount);
670ed33598SChristian Brauner }
680ed33598SChristian Brauner
__super_lock_excl(struct super_block * sb)695e874914SChristian Brauner static inline void __super_lock_excl(struct super_block *sb)
700ed33598SChristian Brauner {
715e874914SChristian Brauner __super_lock(sb, true);
720ed33598SChristian Brauner }
730ed33598SChristian Brauner
super_unlock_excl(struct super_block * sb)740ed33598SChristian Brauner static inline void super_unlock_excl(struct super_block *sb)
750ed33598SChristian Brauner {
760ed33598SChristian Brauner super_unlock(sb, true);
770ed33598SChristian Brauner }
780ed33598SChristian Brauner
super_unlock_shared(struct super_block * sb)790ed33598SChristian Brauner static inline void super_unlock_shared(struct super_block *sb)
800ed33598SChristian Brauner {
810ed33598SChristian Brauner super_unlock(sb, false);
820ed33598SChristian Brauner }
830ed33598SChristian Brauner
wait_born(struct super_block * sb)845e874914SChristian Brauner static inline bool wait_born(struct super_block *sb)
855e874914SChristian Brauner {
865e874914SChristian Brauner unsigned int flags;
875e874914SChristian Brauner
885e874914SChristian Brauner /*
895e874914SChristian Brauner * Pairs with smp_store_release() in super_wake() and ensures
905e874914SChristian Brauner * that we see SB_BORN or SB_DYING after we're woken.
915e874914SChristian Brauner */
925e874914SChristian Brauner flags = smp_load_acquire(&sb->s_flags);
935e874914SChristian Brauner return flags & (SB_BORN | SB_DYING);
945e874914SChristian Brauner }
955e874914SChristian Brauner
965e874914SChristian Brauner /**
975e874914SChristian Brauner * super_lock - wait for superblock to become ready and lock it
985e874914SChristian Brauner * @sb: superblock to wait for
995e874914SChristian Brauner * @excl: whether exclusive access is required
1005e874914SChristian Brauner *
1015e874914SChristian Brauner * If the superblock has neither passed through vfs_get_tree() or
1025e874914SChristian Brauner * generic_shutdown_super() yet wait for it to happen. Either superblock
1035e874914SChristian Brauner * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
1045e874914SChristian Brauner * woken and we'll see SB_DYING.
1055e874914SChristian Brauner *
1065e874914SChristian Brauner * The caller must have acquired a temporary reference on @sb->s_count.
1075e874914SChristian Brauner *
1085e874914SChristian Brauner * Return: This returns true if SB_BORN was set, false if SB_DYING was
1095e874914SChristian Brauner * set. The function acquires s_umount and returns with it held.
1105e874914SChristian Brauner */
super_lock(struct super_block * sb,bool excl)1115e874914SChristian Brauner static __must_check bool super_lock(struct super_block *sb, bool excl)
1125e874914SChristian Brauner {
1135e874914SChristian Brauner
1145e874914SChristian Brauner lockdep_assert_not_held(&sb->s_umount);
1155e874914SChristian Brauner
1165e874914SChristian Brauner relock:
1175e874914SChristian Brauner __super_lock(sb, excl);
1185e874914SChristian Brauner
1195e874914SChristian Brauner /*
1205e874914SChristian Brauner * Has gone through generic_shutdown_super() in the meantime.
1215e874914SChristian Brauner * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
1225e874914SChristian Brauner * grab a reference to this. Tell them so.
1235e874914SChristian Brauner */
1245e874914SChristian Brauner if (sb->s_flags & SB_DYING)
1255e874914SChristian Brauner return false;
1265e874914SChristian Brauner
1275e874914SChristian Brauner /* Has called ->get_tree() successfully. */
1285e874914SChristian Brauner if (sb->s_flags & SB_BORN)
1295e874914SChristian Brauner return true;
1305e874914SChristian Brauner
1315e874914SChristian Brauner super_unlock(sb, excl);
1325e874914SChristian Brauner
1335e874914SChristian Brauner /* wait until the superblock is ready or dying */
1345e874914SChristian Brauner wait_var_event(&sb->s_flags, wait_born(sb));
1355e874914SChristian Brauner
1365e874914SChristian Brauner /*
1375e874914SChristian Brauner * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
1385e874914SChristian Brauner * Just reacquire @sb->s_umount for the caller.
1395e874914SChristian Brauner */
1405e874914SChristian Brauner goto relock;
1415e874914SChristian Brauner }
1425e874914SChristian Brauner
1435e874914SChristian Brauner /* wait and acquire read-side of @sb->s_umount */
super_lock_shared(struct super_block * sb)1445e874914SChristian Brauner static inline bool super_lock_shared(struct super_block *sb)
1455e874914SChristian Brauner {
1465e874914SChristian Brauner return super_lock(sb, false);
1475e874914SChristian Brauner }
1485e874914SChristian Brauner
1495e874914SChristian Brauner /* wait and acquire write-side of @sb->s_umount */
super_lock_excl(struct super_block * sb)1505e874914SChristian Brauner static inline bool super_lock_excl(struct super_block *sb)
1515e874914SChristian Brauner {
1525e874914SChristian Brauner return super_lock(sb, true);
1535e874914SChristian Brauner }
1545e874914SChristian Brauner
1555e874914SChristian Brauner /* wake waiters */
1562c18a63bSChristian Brauner #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
super_wake(struct super_block * sb,unsigned int flag)1575e874914SChristian Brauner static void super_wake(struct super_block *sb, unsigned int flag)
1585e874914SChristian Brauner {
1595e874914SChristian Brauner WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
1605e874914SChristian Brauner WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1);
1615e874914SChristian Brauner
1625e874914SChristian Brauner /*
1635e874914SChristian Brauner * Pairs with smp_load_acquire() in super_lock() to make sure
1645e874914SChristian Brauner * all initializations in the superblock are seen by the user
1655e874914SChristian Brauner * seeing SB_BORN sent.
1665e874914SChristian Brauner */
1675e874914SChristian Brauner smp_store_release(&sb->s_flags, sb->s_flags | flag);
1685e874914SChristian Brauner /*
1695e874914SChristian Brauner * Pairs with the barrier in prepare_to_wait_event() to make sure
1705e874914SChristian Brauner * ___wait_var_event() either sees SB_BORN set or
1715e874914SChristian Brauner * waitqueue_active() check in wake_up_var() sees the waiter.
1725e874914SChristian Brauner */
1735e874914SChristian Brauner smp_mb();
1745e874914SChristian Brauner wake_up_var(&sb->s_flags);
1755e874914SChristian Brauner }
1765e874914SChristian Brauner
177b0d40c92SDave Chinner /*
178b0d40c92SDave Chinner * One thing we have to be careful of with a per-sb shrinker is that we don't
179b0d40c92SDave Chinner * drop the last active reference to the superblock from within the shrinker.
180b0d40c92SDave Chinner * If that happens we could trigger unregistering the shrinker from within the
18147a7c01cSQi Zheng * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
182b0d40c92SDave Chinner * take a passive reference to the superblock to avoid this from occurring.
183b0d40c92SDave Chinner */
super_cache_scan(struct shrinker * shrink,struct shrink_control * sc)1840a234c6dSDave Chinner static unsigned long super_cache_scan(struct shrinker *shrink,
1850a234c6dSDave Chinner struct shrink_control *sc)
186b0d40c92SDave Chinner {
187b0d40c92SDave Chinner struct super_block *sb;
1880a234c6dSDave Chinner long fs_objects = 0;
1890a234c6dSDave Chinner long total_objects;
1900a234c6dSDave Chinner long freed = 0;
1910a234c6dSDave Chinner long dentries;
1920a234c6dSDave Chinner long inodes;
193b0d40c92SDave Chinner
194b0d40c92SDave Chinner sb = container_of(shrink, struct super_block, s_shrink);
195b0d40c92SDave Chinner
196b0d40c92SDave Chinner /*
197b0d40c92SDave Chinner * Deadlock avoidance. We may hold various FS locks, and we don't want
198b0d40c92SDave Chinner * to recurse into the FS that called us in clear_inode() and friends..
199b0d40c92SDave Chinner */
2000a234c6dSDave Chinner if (!(sc->gfp_mask & __GFP_FS))
2010a234c6dSDave Chinner return SHRINK_STOP;
202b0d40c92SDave Chinner
203d8ce82efSChristian Brauner if (!super_trylock_shared(sb))
2040a234c6dSDave Chinner return SHRINK_STOP;
205b0d40c92SDave Chinner
206d0407903SAl Viro if (sb->s_op->nr_cached_objects)
2074101b624SVladimir Davydov fs_objects = sb->s_op->nr_cached_objects(sb, sc);
2080e1fdafdSDave Chinner
209503c358cSVladimir Davydov inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
210503c358cSVladimir Davydov dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
211f6041567SDave Chinner total_objects = dentries + inodes + fs_objects + 1;
212475d0db7STetsuo Handa if (!total_objects)
213475d0db7STetsuo Handa total_objects = 1;
2140e1fdafdSDave Chinner
2150e1fdafdSDave Chinner /* proportion the scan between the caches */
216f6041567SDave Chinner dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
217bc3b14cbSDave Chinner inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
218503c358cSVladimir Davydov fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
2190a234c6dSDave Chinner
2200e1fdafdSDave Chinner /*
2210e1fdafdSDave Chinner * prune the dcache first as the icache is pinned by it, then
2220e1fdafdSDave Chinner * prune the icache, followed by the filesystem specific caches
22349e7e7ffSVladimir Davydov *
22449e7e7ffSVladimir Davydov * Ensure that we always scan at least one object - memcg kmem
22549e7e7ffSVladimir Davydov * accounting uses this to fully empty the caches.
2260e1fdafdSDave Chinner */
22749e7e7ffSVladimir Davydov sc->nr_to_scan = dentries + 1;
228503c358cSVladimir Davydov freed = prune_dcache_sb(sb, sc);
22949e7e7ffSVladimir Davydov sc->nr_to_scan = inodes + 1;
230503c358cSVladimir Davydov freed += prune_icache_sb(sb, sc);
231b0d40c92SDave Chinner
2320a234c6dSDave Chinner if (fs_objects) {
23349e7e7ffSVladimir Davydov sc->nr_to_scan = fs_objects + 1;
2344101b624SVladimir Davydov freed += sb->s_op->free_cached_objects(sb, sc);
2350e1fdafdSDave Chinner }
2360a234c6dSDave Chinner
2370ed33598SChristian Brauner super_unlock_shared(sb);
2380a234c6dSDave Chinner return freed;
239b0d40c92SDave Chinner }
240b0d40c92SDave Chinner
super_cache_count(struct shrinker * shrink,struct shrink_control * sc)2410a234c6dSDave Chinner static unsigned long super_cache_count(struct shrinker *shrink,
2420a234c6dSDave Chinner struct shrink_control *sc)
2430a234c6dSDave Chinner {
2440a234c6dSDave Chinner struct super_block *sb;
2450a234c6dSDave Chinner long total_objects = 0;
2460a234c6dSDave Chinner
2470a234c6dSDave Chinner sb = container_of(shrink, struct super_block, s_shrink);
2480a234c6dSDave Chinner
249d23da150STim Chen /*
250d8ce82efSChristian Brauner * We don't call super_trylock_shared() here as it is a scalability
251d8ce82efSChristian Brauner * bottleneck, so we're exposed to partial setup state. The shrinker
252d8ce82efSChristian Brauner * rwsem does not protect filesystem operations backing
253d8ce82efSChristian Brauner * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can
254d8ce82efSChristian Brauner * change between super_cache_count and super_cache_scan, so we really
255d8ce82efSChristian Brauner * don't need locks here.
25679f546a6SDave Chinner *
25779f546a6SDave Chinner * However, if we are currently mounting the superblock, the underlying
25879f546a6SDave Chinner * filesystem might be in a state of partial construction and hence it
259d8ce82efSChristian Brauner * is dangerous to access it. super_trylock_shared() uses a SB_BORN check
260d8ce82efSChristian Brauner * to avoid this situation, so do the same here. The memory barrier is
26179f546a6SDave Chinner * matched with the one in mount_fs() as we don't hold locks here.
262d23da150STim Chen */
26379f546a6SDave Chinner if (!(sb->s_flags & SB_BORN))
26479f546a6SDave Chinner return 0;
26579f546a6SDave Chinner smp_rmb();
26679f546a6SDave Chinner
2670a234c6dSDave Chinner if (sb->s_op && sb->s_op->nr_cached_objects)
2684101b624SVladimir Davydov total_objects = sb->s_op->nr_cached_objects(sb, sc);
2690a234c6dSDave Chinner
270503c358cSVladimir Davydov total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
271503c358cSVladimir Davydov total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
2720a234c6dSDave Chinner
2739b996468SKirill Tkhai if (!total_objects)
2749b996468SKirill Tkhai return SHRINK_EMPTY;
2759b996468SKirill Tkhai
27655f841ceSGlauber Costa total_objects = vfs_pressure_ratio(total_objects);
2770e1fdafdSDave Chinner return total_objects;
278b0d40c92SDave Chinner }
279b0d40c92SDave Chinner
destroy_super_work(struct work_struct * work)280853b39a7SOleg Nesterov static void destroy_super_work(struct work_struct *work)
281853b39a7SOleg Nesterov {
282853b39a7SOleg Nesterov struct super_block *s = container_of(work, struct super_block,
283853b39a7SOleg Nesterov destroy_work);
284853b39a7SOleg Nesterov int i;
285853b39a7SOleg Nesterov
286853b39a7SOleg Nesterov for (i = 0; i < SB_FREEZE_LEVELS; i++)
2878129ed29SOleg Nesterov percpu_free_rwsem(&s->s_writers.rw_sem[i]);
288853b39a7SOleg Nesterov kfree(s);
289853b39a7SOleg Nesterov }
290853b39a7SOleg Nesterov
destroy_super_rcu(struct rcu_head * head)291853b39a7SOleg Nesterov static void destroy_super_rcu(struct rcu_head *head)
292853b39a7SOleg Nesterov {
293853b39a7SOleg Nesterov struct super_block *s = container_of(head, struct super_block, rcu);
294853b39a7SOleg Nesterov INIT_WORK(&s->destroy_work, destroy_super_work);
295853b39a7SOleg Nesterov schedule_work(&s->destroy_work);
296853b39a7SOleg Nesterov }
297853b39a7SOleg Nesterov
2980200894dSAl Viro /* Free a superblock that has never been seen by anyone */
destroy_unused_super(struct super_block * s)2990200894dSAl Viro static void destroy_unused_super(struct super_block *s)
3005accdf82SJan Kara {
3010200894dSAl Viro if (!s)
3020200894dSAl Viro return;
3030ed33598SChristian Brauner super_unlock_excl(s);
3047eb5e882SAl Viro list_lru_destroy(&s->s_dentry_lru);
3057eb5e882SAl Viro list_lru_destroy(&s->s_inode_lru);
3067eb5e882SAl Viro security_sb_free(s);
3076e4eab57SEric W. Biederman put_user_ns(s->s_user_ns);
3087eb5e882SAl Viro kfree(s->s_subtype);
3098e04944fSTetsuo Handa free_prealloced_shrinker(&s->s_shrink);
3100200894dSAl Viro /* no delays needed */
3110200894dSAl Viro destroy_super_work(&s->destroy_work);
3125accdf82SJan Kara }
3135accdf82SJan Kara
3141da177e4SLinus Torvalds /**
3151da177e4SLinus Torvalds * alloc_super - create new superblock
316fe2bbc48SHenrik Kretzschmar * @type: filesystem type superblock should belong to
3179249e17fSDavid Howells * @flags: the mount flags
3186e4eab57SEric W. Biederman * @user_ns: User namespace for the super_block
3191da177e4SLinus Torvalds *
3201da177e4SLinus Torvalds * Allocates and initializes a new &struct super_block. alloc_super()
3211da177e4SLinus Torvalds * returns a pointer new superblock or %NULL if allocation had failed.
3221da177e4SLinus Torvalds */
alloc_super(struct file_system_type * type,int flags,struct user_namespace * user_ns)3236e4eab57SEric W. Biederman static struct super_block *alloc_super(struct file_system_type *type, int flags,
3246e4eab57SEric W. Biederman struct user_namespace *user_ns)
3251da177e4SLinus Torvalds {
32611b0b5abSOliver Neukum struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
327b87221deSAlexey Dobriyan static const struct super_operations default_op;
3287eb5e882SAl Viro int i;
3291da177e4SLinus Torvalds
3307eb5e882SAl Viro if (!s)
3317eb5e882SAl Viro return NULL;
3327eb5e882SAl Viro
333b5bd856aSVladimir Davydov INIT_LIST_HEAD(&s->s_mounts);
3346e4eab57SEric W. Biederman s->s_user_ns = get_user_ns(user_ns);
335ca0168e8SAl Viro init_rwsem(&s->s_umount);
336ca0168e8SAl Viro lockdep_set_class(&s->s_umount, &type->s_umount_key);
337ca0168e8SAl Viro /*
338ca0168e8SAl Viro * sget() can have s_umount recursion.
339ca0168e8SAl Viro *
340ca0168e8SAl Viro * When it cannot find a suitable sb, it allocates a new
341ca0168e8SAl Viro * one (this one), and tries again to find a suitable old
342ca0168e8SAl Viro * one.
343ca0168e8SAl Viro *
344ca0168e8SAl Viro * In case that succeeds, it will acquire the s_umount
345ca0168e8SAl Viro * lock of the old one. Since these are clearly distrinct
346ca0168e8SAl Viro * locks, and this object isn't exposed yet, there's no
347ca0168e8SAl Viro * risk of deadlocks.
348ca0168e8SAl Viro *
349ca0168e8SAl Viro * Annotate this by putting this lock in a different
350ca0168e8SAl Viro * subclass.
351ca0168e8SAl Viro */
352ca0168e8SAl Viro down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
353b5bd856aSVladimir Davydov
3547b7a8665SChristoph Hellwig if (security_sb_alloc(s))
3557eb5e882SAl Viro goto fail;
3567b7a8665SChristoph Hellwig
3577eb5e882SAl Viro for (i = 0; i < SB_FREEZE_LEVELS; i++) {
3588129ed29SOleg Nesterov if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
3598129ed29SOleg Nesterov sb_writers_name[i],
3608129ed29SOleg Nesterov &type->s_writers_key[i]))
3617eb5e882SAl Viro goto fail;
3627eb5e882SAl Viro }
363df0ce26cSChristoph Hellwig s->s_bdi = &noop_backing_dev_info;
3649249e17fSDavid Howells s->s_flags = flags;
365cc50a07aSEric W. Biederman if (s->s_user_ns != &init_user_ns)
36667690f93SEric W. Biederman s->s_iflags |= SB_I_NODEV;
367a5166169SAl Viro INIT_HLIST_NODE(&s->s_instances);
368f1ee6162SNeilBrown INIT_HLIST_BL_HEAD(&s->s_roots);
369e97fedb9SDave Chinner mutex_init(&s->s_sync_lock);
3701da177e4SLinus Torvalds INIT_LIST_HEAD(&s->s_inodes);
37174278da9SDave Chinner spin_lock_init(&s->s_inode_list_lock);
3726c60d2b5SDave Chinner INIT_LIST_HEAD(&s->s_inodes_wb);
3736c60d2b5SDave Chinner spin_lock_init(&s->s_inode_wblist_lock);
3745ca302c8SGlauber Costa
375b20bd1a5SAl Viro s->s_count = 1;
3761da177e4SLinus Torvalds atomic_set(&s->s_active, 1);
377a11f3a05SArjan van de Ven mutex_init(&s->s_vfs_rename_mutex);
37851ee049eSRoland Dreier lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
379bc8230eeSJan Kara init_rwsem(&s->s_dquot.dqio_sem);
3801da177e4SLinus Torvalds s->s_maxbytes = MAX_NON_LFS;
3811da177e4SLinus Torvalds s->s_op = &default_op;
3821da177e4SLinus Torvalds s->s_time_gran = 1000000000;
383188d20bcSDeepa Dinamani s->s_time_min = TIME64_MIN;
384188d20bcSDeepa Dinamani s->s_time_max = TIME64_MAX;
385b0d40c92SDave Chinner
386b0d40c92SDave Chinner s->s_shrink.seeks = DEFAULT_SEEKS;
3870a234c6dSDave Chinner s->s_shrink.scan_objects = super_cache_scan;
3880a234c6dSDave Chinner s->s_shrink.count_objects = super_cache_count;
3898ab47664SDave Chinner s->s_shrink.batch = 1024;
3902acb60a0SVladimir Davydov s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
391e33c267aSRoman Gushchin if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name))
3928e04944fSTetsuo Handa goto fail;
393c92e8e10SKirill Tkhai if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
3942b3648a6SKirill Tkhai goto fail;
395c92e8e10SKirill Tkhai if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
3962b3648a6SKirill Tkhai goto fail;
3971da177e4SLinus Torvalds return s;
3985ca302c8SGlauber Costa
3997eb5e882SAl Viro fail:
4000200894dSAl Viro destroy_unused_super(s);
4017eb5e882SAl Viro return NULL;
4021da177e4SLinus Torvalds }
4031da177e4SLinus Torvalds
4041da177e4SLinus Torvalds /* Superblock refcounting */
4051da177e4SLinus Torvalds
4061da177e4SLinus Torvalds /*
40735cf7ba0SAl Viro * Drop a superblock's refcount. The caller must hold sb_lock.
4081da177e4SLinus Torvalds */
__put_super(struct super_block * s)409c645b930SAl Viro static void __put_super(struct super_block *s)
4101da177e4SLinus Torvalds {
411c645b930SAl Viro if (!--s->s_count) {
412c645b930SAl Viro list_del_init(&s->s_list);
413c645b930SAl Viro WARN_ON(s->s_dentry_lru.node);
414c645b930SAl Viro WARN_ON(s->s_inode_lru.node);
415c645b930SAl Viro WARN_ON(!list_empty(&s->s_mounts));
416c645b930SAl Viro security_sb_free(s);
417c645b930SAl Viro put_user_ns(s->s_user_ns);
418c645b930SAl Viro kfree(s->s_subtype);
419c645b930SAl Viro call_rcu(&s->rcu, destroy_super_rcu);
4201da177e4SLinus Torvalds }
4211da177e4SLinus Torvalds }
4221da177e4SLinus Torvalds
4231da177e4SLinus Torvalds /**
4241da177e4SLinus Torvalds * put_super - drop a temporary reference to superblock
4251da177e4SLinus Torvalds * @sb: superblock in question
4261da177e4SLinus Torvalds *
4271da177e4SLinus Torvalds * Drops a temporary reference, frees superblock if there's no
4281da177e4SLinus Torvalds * references left.
4291da177e4SLinus Torvalds */
put_super(struct super_block * sb)43060b49885SChristoph Hellwig void put_super(struct super_block *sb)
4311da177e4SLinus Torvalds {
4321da177e4SLinus Torvalds spin_lock(&sb_lock);
4331da177e4SLinus Torvalds __put_super(sb);
4341da177e4SLinus Torvalds spin_unlock(&sb_lock);
4351da177e4SLinus Torvalds }
4361da177e4SLinus Torvalds
kill_super_notify(struct super_block * sb)437dc3216b1SChristian Brauner static void kill_super_notify(struct super_block *sb)
438dc3216b1SChristian Brauner {
439dc3216b1SChristian Brauner lockdep_assert_not_held(&sb->s_umount);
440dc3216b1SChristian Brauner
441dc3216b1SChristian Brauner /* already notified earlier */
442dc3216b1SChristian Brauner if (sb->s_flags & SB_DEAD)
443dc3216b1SChristian Brauner return;
444dc3216b1SChristian Brauner
445dc3216b1SChristian Brauner /*
446dc3216b1SChristian Brauner * Remove it from @fs_supers so it isn't found by new
447dc3216b1SChristian Brauner * sget{_fc}() walkers anymore. Any concurrent mounter still
448dc3216b1SChristian Brauner * managing to grab a temporary reference is guaranteed to
449dc3216b1SChristian Brauner * already see SB_DYING and will wait until we notify them about
450dc3216b1SChristian Brauner * SB_DEAD.
451dc3216b1SChristian Brauner */
452dc3216b1SChristian Brauner spin_lock(&sb_lock);
453dc3216b1SChristian Brauner hlist_del_init(&sb->s_instances);
454dc3216b1SChristian Brauner spin_unlock(&sb_lock);
455dc3216b1SChristian Brauner
456dc3216b1SChristian Brauner /*
457dc3216b1SChristian Brauner * Let concurrent mounts know that this thing is really dead.
458dc3216b1SChristian Brauner * We don't need @sb->s_umount here as every concurrent caller
459dc3216b1SChristian Brauner * will see SB_DYING and either discard the superblock or wait
460dc3216b1SChristian Brauner * for SB_DEAD.
461dc3216b1SChristian Brauner */
462dc3216b1SChristian Brauner super_wake(sb, SB_DEAD);
463dc3216b1SChristian Brauner }
4641da177e4SLinus Torvalds
4651da177e4SLinus Torvalds /**
46674dbbdd7SAl Viro * deactivate_locked_super - drop an active reference to superblock
46774dbbdd7SAl Viro * @s: superblock to deactivate
46874dbbdd7SAl Viro *
469bd7ced98SMasanari Iida * Drops an active reference to superblock, converting it into a temporary
4701712ac8fSAl Viro * one if there is no other active references left. In that case we
4711712ac8fSAl Viro * tell fs driver to shut it down and drop the temporary reference we
4721712ac8fSAl Viro * had just acquired.
4731712ac8fSAl Viro *
4741712ac8fSAl Viro * Caller holds exclusive lock on superblock; that lock is released.
47574dbbdd7SAl Viro */
deactivate_locked_super(struct super_block * s)47674dbbdd7SAl Viro void deactivate_locked_super(struct super_block *s)
47774dbbdd7SAl Viro {
47874dbbdd7SAl Viro struct file_system_type *fs = s->s_type;
479b20bd1a5SAl Viro if (atomic_dec_and_test(&s->s_active)) {
480b0d40c92SDave Chinner unregister_shrinker(&s->s_shrink);
48128f2cd4fSDave Chinner fs->kill_sb(s);
482f5e1dd34SGlauber Costa
483dc3216b1SChristian Brauner kill_super_notify(s);
484dc3216b1SChristian Brauner
485c0a5b560SVladimir Davydov /*
486c0a5b560SVladimir Davydov * Since list_lru_destroy() may sleep, we cannot call it from
487c0a5b560SVladimir Davydov * put_super(), where we hold the sb_lock. Therefore we destroy
488c0a5b560SVladimir Davydov * the lru lists right now.
489c0a5b560SVladimir Davydov */
490c0a5b560SVladimir Davydov list_lru_destroy(&s->s_dentry_lru);
491c0a5b560SVladimir Davydov list_lru_destroy(&s->s_inode_lru);
492c0a5b560SVladimir Davydov
49374dbbdd7SAl Viro put_filesystem(fs);
49474dbbdd7SAl Viro put_super(s);
49574dbbdd7SAl Viro } else {
4960ed33598SChristian Brauner super_unlock_excl(s);
49774dbbdd7SAl Viro }
49874dbbdd7SAl Viro }
49974dbbdd7SAl Viro
50074dbbdd7SAl Viro EXPORT_SYMBOL(deactivate_locked_super);
50174dbbdd7SAl Viro
50274dbbdd7SAl Viro /**
5031712ac8fSAl Viro * deactivate_super - drop an active reference to superblock
5041712ac8fSAl Viro * @s: superblock to deactivate
5051712ac8fSAl Viro *
5061712ac8fSAl Viro * Variant of deactivate_locked_super(), except that superblock is *not*
5071712ac8fSAl Viro * locked by caller. If we are going to drop the final active reference,
5081712ac8fSAl Viro * lock will be acquired prior to that.
5091712ac8fSAl Viro */
deactivate_super(struct super_block * s)5101712ac8fSAl Viro void deactivate_super(struct super_block *s)
5111712ac8fSAl Viro {
5121712ac8fSAl Viro if (!atomic_add_unless(&s->s_active, -1, 1)) {
5135e874914SChristian Brauner __super_lock_excl(s);
5141712ac8fSAl Viro deactivate_locked_super(s);
5151712ac8fSAl Viro }
5161712ac8fSAl Viro }
5171712ac8fSAl Viro
5181712ac8fSAl Viro EXPORT_SYMBOL(deactivate_super);
5191712ac8fSAl Viro
5201712ac8fSAl Viro /**
5211da177e4SLinus Torvalds * grab_super - acquire an active reference
5221da177e4SLinus Torvalds * @s: reference we are trying to make active
5231da177e4SLinus Torvalds *
5241da177e4SLinus Torvalds * Tries to acquire an active reference. grab_super() is used when we
5251da177e4SLinus Torvalds * had just found a superblock in super_blocks or fs_type->fs_supers
5261da177e4SLinus Torvalds * and want to turn it into a full-blown active reference. grab_super()
5271da177e4SLinus Torvalds * is called with sb_lock held and drops it. Returns 1 in case of
5281da177e4SLinus Torvalds * success, 0 if we had failed (superblock contents was already dead or
529acfec9a5SAl Viro * dying when grab_super() had been called). Note that this is only
530acfec9a5SAl Viro * called for superblocks not in rundown mode (== ones still on ->fs_supers
531acfec9a5SAl Viro * of their type), so increment of ->s_count is OK here.
5321da177e4SLinus Torvalds */
grab_super(struct super_block * s)5339c4dbee7SJosh Triplett static int grab_super(struct super_block *s) __releases(sb_lock)
5341da177e4SLinus Torvalds {
5355e874914SChristian Brauner bool born;
5365e874914SChristian Brauner
537b20bd1a5SAl Viro s->s_count++;
5381da177e4SLinus Torvalds spin_unlock(&sb_lock);
5395e874914SChristian Brauner born = super_lock_excl(s);
5405e874914SChristian Brauner if (born && atomic_inc_not_zero(&s->s_active)) {
541acfec9a5SAl Viro put_super(s);
542acfec9a5SAl Viro return 1;
543acfec9a5SAl Viro }
5440ed33598SChristian Brauner super_unlock_excl(s);
5451da177e4SLinus Torvalds put_super(s);
5461da177e4SLinus Torvalds return 0;
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds
wait_dead(struct super_block * sb)5492c18a63bSChristian Brauner static inline bool wait_dead(struct super_block *sb)
5502c18a63bSChristian Brauner {
5512c18a63bSChristian Brauner unsigned int flags;
5522c18a63bSChristian Brauner
553cf9a2ae8SDavid Howells /*
5542c18a63bSChristian Brauner * Pairs with memory barrier in super_wake() and ensures
5552c18a63bSChristian Brauner * that we see SB_DEAD after we're woken.
5562c18a63bSChristian Brauner */
5572c18a63bSChristian Brauner flags = smp_load_acquire(&sb->s_flags);
5582c18a63bSChristian Brauner return flags & SB_DEAD;
5592c18a63bSChristian Brauner }
5602c18a63bSChristian Brauner
5612c18a63bSChristian Brauner /**
5622c18a63bSChristian Brauner * grab_super_dead - acquire an active reference to a superblock
5632c18a63bSChristian Brauner * @sb: superblock to acquire
5642c18a63bSChristian Brauner *
5652c18a63bSChristian Brauner * Acquire a temporary reference on a superblock and try to trade it for
5662c18a63bSChristian Brauner * an active reference. This is used in sget{_fc}() to wait for a
5672c18a63bSChristian Brauner * superblock to either become SB_BORN or for it to pass through
5682c18a63bSChristian Brauner * sb->kill() and be marked as SB_DEAD.
5692c18a63bSChristian Brauner *
5702c18a63bSChristian Brauner * Return: This returns true if an active reference could be acquired,
5712c18a63bSChristian Brauner * false if not.
5722c18a63bSChristian Brauner */
grab_super_dead(struct super_block * sb)5732c18a63bSChristian Brauner static bool grab_super_dead(struct super_block *sb)
5742c18a63bSChristian Brauner {
5752c18a63bSChristian Brauner
5762c18a63bSChristian Brauner sb->s_count++;
5772c18a63bSChristian Brauner if (grab_super(sb)) {
5782c18a63bSChristian Brauner put_super(sb);
5792c18a63bSChristian Brauner lockdep_assert_held(&sb->s_umount);
5802c18a63bSChristian Brauner return true;
5812c18a63bSChristian Brauner }
5822c18a63bSChristian Brauner wait_var_event(&sb->s_flags, wait_dead(sb));
5832c18a63bSChristian Brauner lockdep_assert_not_held(&sb->s_umount);
584345a5c4aSChristian Brauner put_super(sb);
5852c18a63bSChristian Brauner return false;
5862c18a63bSChristian Brauner }
5872c18a63bSChristian Brauner
588cf9a2ae8SDavid Howells /*
589d8ce82efSChristian Brauner * super_trylock_shared - try to grab ->s_umount shared
590331cbdeeSWanpeng Li * @sb: reference we are trying to grab
59112ad3ab6SDave Chinner *
592eb6ef3dfSKonstantin Khlebnikov * Try to prevent fs shutdown. This is used in places where we
59312ad3ab6SDave Chinner * cannot take an active reference but we need to ensure that the
594eb6ef3dfSKonstantin Khlebnikov * filesystem is not shut down while we are working on it. It returns
595eb6ef3dfSKonstantin Khlebnikov * false if we cannot acquire s_umount or if we lose the race and
596eb6ef3dfSKonstantin Khlebnikov * filesystem already got into shutdown, and returns true with the s_umount
597eb6ef3dfSKonstantin Khlebnikov * lock held in read mode in case of success. On successful return,
598eb6ef3dfSKonstantin Khlebnikov * the caller must drop the s_umount lock when done.
599eb6ef3dfSKonstantin Khlebnikov *
600eb6ef3dfSKonstantin Khlebnikov * Note that unlike get_super() et.al. this one does *not* bump ->s_count.
601eb6ef3dfSKonstantin Khlebnikov * The reason why it's safe is that we are OK with doing trylock instead
602eb6ef3dfSKonstantin Khlebnikov * of down_read(). There's a couple of places that are OK with that, but
603eb6ef3dfSKonstantin Khlebnikov * it's very much not a general-purpose interface.
60412ad3ab6SDave Chinner */
super_trylock_shared(struct super_block * sb)605d8ce82efSChristian Brauner bool super_trylock_shared(struct super_block *sb)
60612ad3ab6SDave Chinner {
60712ad3ab6SDave Chinner if (down_read_trylock(&sb->s_umount)) {
6085e874914SChristian Brauner if (!(sb->s_flags & SB_DYING) && sb->s_root &&
6095e874914SChristian Brauner (sb->s_flags & SB_BORN))
61012ad3ab6SDave Chinner return true;
6110ed33598SChristian Brauner super_unlock_shared(sb);
61212ad3ab6SDave Chinner }
61312ad3ab6SDave Chinner
61412ad3ab6SDave Chinner return false;
61512ad3ab6SDave Chinner }
61612ad3ab6SDave Chinner
6171da177e4SLinus Torvalds /**
61804b94071SDaniil Lunev * retire_super - prevents superblock from being reused
61904b94071SDaniil Lunev * @sb: superblock to retire
62004b94071SDaniil Lunev *
62104b94071SDaniil Lunev * The function marks superblock to be ignored in superblock test, which
62204b94071SDaniil Lunev * prevents it from being reused for any new mounts. If the superblock has
62304b94071SDaniil Lunev * a private bdi, it also unregisters it, but doesn't reduce the refcount
62404b94071SDaniil Lunev * of the superblock to prevent potential races. The refcount is reduced
62504b94071SDaniil Lunev * by generic_shutdown_super(). The function can not be called
62604b94071SDaniil Lunev * concurrently with generic_shutdown_super(). It is safe to call the
62704b94071SDaniil Lunev * function multiple times, subsequent calls have no effect.
62804b94071SDaniil Lunev *
62904b94071SDaniil Lunev * The marker will affect the re-use only for block-device-based
63004b94071SDaniil Lunev * superblocks. Other superblocks will still get marked if this function
63104b94071SDaniil Lunev * is used, but that will not affect their reusability.
63204b94071SDaniil Lunev */
retire_super(struct super_block * sb)63304b94071SDaniil Lunev void retire_super(struct super_block *sb)
63404b94071SDaniil Lunev {
63504b94071SDaniil Lunev WARN_ON(!sb->s_bdev);
6365e874914SChristian Brauner __super_lock_excl(sb);
63704b94071SDaniil Lunev if (sb->s_iflags & SB_I_PERSB_BDI) {
63804b94071SDaniil Lunev bdi_unregister(sb->s_bdi);
63904b94071SDaniil Lunev sb->s_iflags &= ~SB_I_PERSB_BDI;
64004b94071SDaniil Lunev }
64104b94071SDaniil Lunev sb->s_iflags |= SB_I_RETIRED;
6420ed33598SChristian Brauner super_unlock_excl(sb);
64304b94071SDaniil Lunev }
64404b94071SDaniil Lunev EXPORT_SYMBOL(retire_super);
64504b94071SDaniil Lunev
64604b94071SDaniil Lunev /**
6471da177e4SLinus Torvalds * generic_shutdown_super - common helper for ->kill_sb()
6481da177e4SLinus Torvalds * @sb: superblock to kill
6491da177e4SLinus Torvalds *
6501da177e4SLinus Torvalds * generic_shutdown_super() does all fs-independent work on superblock
6511da177e4SLinus Torvalds * shutdown. Typical ->kill_sb() should pick all fs-specific objects
6521da177e4SLinus Torvalds * that need destruction out of superblock, call generic_shutdown_super()
6531da177e4SLinus Torvalds * and release aforementioned objects. Note: dentries and inodes _are_
6541da177e4SLinus Torvalds * taken care of and do not need specific handling.
655c636ebdbSDavid Howells *
656c636ebdbSDavid Howells * Upon calling this function, the filesystem may no longer alter or
657c636ebdbSDavid Howells * rearrange the set of dentries belonging to this super_block, nor may it
658c636ebdbSDavid Howells * change the attachments of dentries to inodes.
6591da177e4SLinus Torvalds */
generic_shutdown_super(struct super_block * sb)6601da177e4SLinus Torvalds void generic_shutdown_super(struct super_block *sb)
6611da177e4SLinus Torvalds {
662ee9b6d61SJosef 'Jeff' Sipek const struct super_operations *sop = sb->s_op;
6631da177e4SLinus Torvalds
664c636ebdbSDavid Howells if (sb->s_root) {
665c636ebdbSDavid Howells shrink_dcache_for_umount(sb);
66660b0680fSJan Kara sync_filesystem(sb);
667e462ec50SDavid Howells sb->s_flags &= ~SB_ACTIVE;
668efaee192SArjan van de Ven
669a1a0e23eSTejun Heo cgroup_writeback_umount();
67063997e98SAl Viro
671ccb820dcSEric Biggers /* Evict all inodes with zero refcount. */
67263997e98SAl Viro evict_inodes(sb);
673ccb820dcSEric Biggers
674ccb820dcSEric Biggers /*
675ccb820dcSEric Biggers * Clean up and evict any inodes that still have references due
676ccb820dcSEric Biggers * to fsnotify or the security policy.
677ccb820dcSEric Biggers */
6781edc8eb2SEric Sandeen fsnotify_sb_delete(sb);
67983e804f0SMickaël Salaün security_sb_delete(sb);
6801da177e4SLinus Torvalds
681ccb820dcSEric Biggers /*
682ccb820dcSEric Biggers * Now that all potentially-encrypted inodes have been evicted,
683ccb820dcSEric Biggers * the fscrypt keyring can be destroyed.
684ccb820dcSEric Biggers */
685ccb820dcSEric Biggers fscrypt_destroy_keyring(sb);
686ccb820dcSEric Biggers
6877b7a8665SChristoph Hellwig if (sb->s_dio_done_wq) {
6887b7a8665SChristoph Hellwig destroy_workqueue(sb->s_dio_done_wq);
6897b7a8665SChristoph Hellwig sb->s_dio_done_wq = NULL;
6907b7a8665SChristoph Hellwig }
6917b7a8665SChristoph Hellwig
6921da177e4SLinus Torvalds if (sop->put_super)
6931da177e4SLinus Torvalds sop->put_super(sb);
6941da177e4SLinus Torvalds
69547d58691SJann Horn if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes),
69647d58691SJann Horn "VFS: Busy inodes after unmount of %s (%s)",
69747d58691SJann Horn sb->s_id, sb->s_type->name)) {
69847d58691SJann Horn /*
69947d58691SJann Horn * Adding a proper bailout path here would be hard, but
70047d58691SJann Horn * we can at least make it more likely that a later
70147d58691SJann Horn * iput_final() or such crashes cleanly.
70247d58691SJann Horn */
70347d58691SJann Horn struct inode *inode;
70447d58691SJann Horn
70547d58691SJann Horn spin_lock(&sb->s_inode_list_lock);
70647d58691SJann Horn list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
70747d58691SJann Horn inode->i_op = VFS_PTR_POISON;
70847d58691SJann Horn inode->i_sb = VFS_PTR_POISON;
70947d58691SJann Horn inode->i_mapping = VFS_PTR_POISON;
71047d58691SJann Horn }
71147d58691SJann Horn spin_unlock(&sb->s_inode_list_lock);
7121da177e4SLinus Torvalds }
7131da177e4SLinus Torvalds }
7145e874914SChristian Brauner /*
7155e874914SChristian Brauner * Broadcast to everyone that grabbed a temporary reference to this
7165e874914SChristian Brauner * superblock before we removed it from @fs_supers that the superblock
7175e874914SChristian Brauner * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
7185e874914SChristian Brauner * discard this superblock and treat it as dead.
7192c18a63bSChristian Brauner *
7202c18a63bSChristian Brauner * We leave the superblock on @fs_supers so it can be found by
7212c18a63bSChristian Brauner * sget{_fc}() until we passed sb->kill_sb().
7225e874914SChristian Brauner */
7235e874914SChristian Brauner super_wake(sb, SB_DYING);
7240ed33598SChristian Brauner super_unlock_excl(sb);
725c1844d53SJan Kara if (sb->s_bdi != &noop_backing_dev_info) {
7260b3ea092SChristoph Hellwig if (sb->s_iflags & SB_I_PERSB_BDI)
7270b3ea092SChristoph Hellwig bdi_unregister(sb->s_bdi);
728fca39346SJan Kara bdi_put(sb->s_bdi);
729fca39346SJan Kara sb->s_bdi = &noop_backing_dev_info;
730fca39346SJan Kara }
7311da177e4SLinus Torvalds }
7321da177e4SLinus Torvalds
7331da177e4SLinus Torvalds EXPORT_SYMBOL(generic_shutdown_super);
7341da177e4SLinus Torvalds
mount_capable(struct fs_context * fc)73520284ab7SAl Viro bool mount_capable(struct fs_context *fc)
7360ce0cf12SAl Viro {
73720284ab7SAl Viro if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT))
7380ce0cf12SAl Viro return capable(CAP_SYS_ADMIN);
7390ce0cf12SAl Viro else
740c2c44ec2SAl Viro return ns_capable(fc->user_ns, CAP_SYS_ADMIN);
7410ce0cf12SAl Viro }
7420ce0cf12SAl Viro
7431da177e4SLinus Torvalds /**
744cb50b348SAl Viro * sget_fc - Find or create a superblock
745cb50b348SAl Viro * @fc: Filesystem context.
746cb50b348SAl Viro * @test: Comparison callback
747cb50b348SAl Viro * @set: Setup callback
748cb50b348SAl Viro *
74922ed7ecdSChristian Brauner * Create a new superblock or find an existing one.
750cb50b348SAl Viro *
75122ed7ecdSChristian Brauner * The @test callback is used to find a matching existing superblock.
75222ed7ecdSChristian Brauner * Whether or not the requested parameters in @fc are taken into account
75322ed7ecdSChristian Brauner * is specific to the @test callback that is used. They may even be
75422ed7ecdSChristian Brauner * completely ignored.
75522ed7ecdSChristian Brauner *
75622ed7ecdSChristian Brauner * If an extant superblock is matched, it will be returned unless:
75722ed7ecdSChristian Brauner *
75822ed7ecdSChristian Brauner * (1) the namespace the filesystem context @fc and the extant
75922ed7ecdSChristian Brauner * superblock's namespace differ
76022ed7ecdSChristian Brauner *
76122ed7ecdSChristian Brauner * (2) the filesystem context @fc has requested that reusing an extant
76222ed7ecdSChristian Brauner * superblock is not allowed
76322ed7ecdSChristian Brauner *
76422ed7ecdSChristian Brauner * In both cases EBUSY will be returned.
765cb50b348SAl Viro *
766cb50b348SAl Viro * If no match is made, a new superblock will be allocated and basic
76722ed7ecdSChristian Brauner * initialisation will be performed (s_type, s_fs_info and s_id will be
76822ed7ecdSChristian Brauner * set and the @set callback will be invoked), the superblock will be
76922ed7ecdSChristian Brauner * published and it will be returned in a partially constructed state
77022ed7ecdSChristian Brauner * with SB_BORN and SB_ACTIVE as yet unset.
77122ed7ecdSChristian Brauner *
77222ed7ecdSChristian Brauner * Return: On success, an extant or newly created superblock is
77322ed7ecdSChristian Brauner * returned. On failure an error pointer is returned.
774cb50b348SAl Viro */
sget_fc(struct fs_context * fc,int (* test)(struct super_block *,struct fs_context *),int (* set)(struct super_block *,struct fs_context *))775cb50b348SAl Viro struct super_block *sget_fc(struct fs_context *fc,
776cb50b348SAl Viro int (*test)(struct super_block *, struct fs_context *),
777cb50b348SAl Viro int (*set)(struct super_block *, struct fs_context *))
778cb50b348SAl Viro {
779cb50b348SAl Viro struct super_block *s = NULL;
780cb50b348SAl Viro struct super_block *old;
781cb50b348SAl Viro struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
782cb50b348SAl Viro int err;
783cb50b348SAl Viro
784acbd66c1SSeth Forshee (DigitalOcean) /*
785acbd66c1SSeth Forshee (DigitalOcean) * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is
786acbd66c1SSeth Forshee (DigitalOcean) * not set, as the filesystem is likely unprepared to handle it.
787acbd66c1SSeth Forshee (DigitalOcean) * This can happen when fsconfig() is called from init_user_ns with
788acbd66c1SSeth Forshee (DigitalOcean) * an fs_fd opened in another user namespace.
789acbd66c1SSeth Forshee (DigitalOcean) */
790acbd66c1SSeth Forshee (DigitalOcean) if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
791acbd66c1SSeth Forshee (DigitalOcean) errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed");
792acbd66c1SSeth Forshee (DigitalOcean) return ERR_PTR(-EPERM);
793acbd66c1SSeth Forshee (DigitalOcean) }
794acbd66c1SSeth Forshee (DigitalOcean)
795cb50b348SAl Viro retry:
796cb50b348SAl Viro spin_lock(&sb_lock);
797cb50b348SAl Viro if (test) {
798cb50b348SAl Viro hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
799cb50b348SAl Viro if (test(old, fc))
800cb50b348SAl Viro goto share_extant_sb;
801cb50b348SAl Viro }
802cb50b348SAl Viro }
803cb50b348SAl Viro if (!s) {
804cb50b348SAl Viro spin_unlock(&sb_lock);
805cb50b348SAl Viro s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
806cb50b348SAl Viro if (!s)
807cb50b348SAl Viro return ERR_PTR(-ENOMEM);
808cb50b348SAl Viro goto retry;
809cb50b348SAl Viro }
810cb50b348SAl Viro
811cb50b348SAl Viro s->s_fs_info = fc->s_fs_info;
812cb50b348SAl Viro err = set(s, fc);
813cb50b348SAl Viro if (err) {
814cb50b348SAl Viro s->s_fs_info = NULL;
815cb50b348SAl Viro spin_unlock(&sb_lock);
816cb50b348SAl Viro destroy_unused_super(s);
817cb50b348SAl Viro return ERR_PTR(err);
818cb50b348SAl Viro }
819cb50b348SAl Viro fc->s_fs_info = NULL;
820cb50b348SAl Viro s->s_type = fc->fs_type;
821c80fa7c8SDavid Howells s->s_iflags |= fc->s_iflags;
822c642256bSAzeem Shaikh strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
8235e874914SChristian Brauner /*
8245e874914SChristian Brauner * Make the superblock visible on @super_blocks and @fs_supers.
8255e874914SChristian Brauner * It's in a nascent state and users should wait on SB_BORN or
8265e874914SChristian Brauner * SB_DYING to be set.
8275e874914SChristian Brauner */
828cb50b348SAl Viro list_add_tail(&s->s_list, &super_blocks);
829cb50b348SAl Viro hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
830cb50b348SAl Viro spin_unlock(&sb_lock);
831cb50b348SAl Viro get_filesystem(s->s_type);
832cb50b348SAl Viro register_shrinker_prepared(&s->s_shrink);
833cb50b348SAl Viro return s;
834cb50b348SAl Viro
835cb50b348SAl Viro share_extant_sb:
83622ed7ecdSChristian Brauner if (user_ns != old->s_user_ns || fc->exclusive) {
837cb50b348SAl Viro spin_unlock(&sb_lock);
838cb50b348SAl Viro destroy_unused_super(s);
83922ed7ecdSChristian Brauner if (fc->exclusive)
84022ed7ecdSChristian Brauner warnfc(fc, "reusing existing filesystem not allowed");
84122ed7ecdSChristian Brauner else
84222ed7ecdSChristian Brauner warnfc(fc, "reusing existing filesystem in another namespace not allowed");
843cb50b348SAl Viro return ERR_PTR(-EBUSY);
844cb50b348SAl Viro }
8452c18a63bSChristian Brauner if (!grab_super_dead(old))
846cb50b348SAl Viro goto retry;
847cb50b348SAl Viro destroy_unused_super(s);
848cb50b348SAl Viro return old;
849cb50b348SAl Viro }
850cb50b348SAl Viro EXPORT_SYMBOL(sget_fc);
851cb50b348SAl Viro
852cb50b348SAl Viro /**
853023d066aSDavid Howells * sget - find or create a superblock
8541da177e4SLinus Torvalds * @type: filesystem type superblock should belong to
8551da177e4SLinus Torvalds * @test: comparison callback
8561da177e4SLinus Torvalds * @set: setup callback
8579249e17fSDavid Howells * @flags: mount flags
8581da177e4SLinus Torvalds * @data: argument to each of them
8591da177e4SLinus Torvalds */
sget(struct file_system_type * type,int (* test)(struct super_block *,void *),int (* set)(struct super_block *,void *),int flags,void * data)860023d066aSDavid Howells struct super_block *sget(struct file_system_type *type,
8611da177e4SLinus Torvalds int (*test)(struct super_block *,void *),
8621da177e4SLinus Torvalds int (*set)(struct super_block *,void *),
863023d066aSDavid Howells int flags,
8641da177e4SLinus Torvalds void *data)
8651da177e4SLinus Torvalds {
866023d066aSDavid Howells struct user_namespace *user_ns = current_user_ns();
8671da177e4SLinus Torvalds struct super_block *s = NULL;
868d4730127SMatthias Kaehlcke struct super_block *old;
8691da177e4SLinus Torvalds int err;
8701da177e4SLinus Torvalds
871023d066aSDavid Howells /* We don't yet pass the user namespace of the parent
872023d066aSDavid Howells * mount through to here so always use &init_user_ns
873023d066aSDavid Howells * until that changes.
874023d066aSDavid Howells */
875023d066aSDavid Howells if (flags & SB_SUBMOUNT)
876023d066aSDavid Howells user_ns = &init_user_ns;
877023d066aSDavid Howells
8781da177e4SLinus Torvalds retry:
8791da177e4SLinus Torvalds spin_lock(&sb_lock);
880d4730127SMatthias Kaehlcke if (test) {
881b67bfe0dSSasha Levin hlist_for_each_entry(old, &type->fs_supers, s_instances) {
8821da177e4SLinus Torvalds if (!test(old, data))
8831da177e4SLinus Torvalds continue;
8846e4eab57SEric W. Biederman if (user_ns != old->s_user_ns) {
8856e4eab57SEric W. Biederman spin_unlock(&sb_lock);
8860200894dSAl Viro destroy_unused_super(s);
8876e4eab57SEric W. Biederman return ERR_PTR(-EBUSY);
8886e4eab57SEric W. Biederman }
8892c18a63bSChristian Brauner if (!grab_super_dead(old))
8901da177e4SLinus Torvalds goto retry;
8910200894dSAl Viro destroy_unused_super(s);
8921da177e4SLinus Torvalds return old;
8931da177e4SLinus Torvalds }
894d4730127SMatthias Kaehlcke }
8951da177e4SLinus Torvalds if (!s) {
8961da177e4SLinus Torvalds spin_unlock(&sb_lock);
897e462ec50SDavid Howells s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
8981da177e4SLinus Torvalds if (!s)
8991da177e4SLinus Torvalds return ERR_PTR(-ENOMEM);
9001da177e4SLinus Torvalds goto retry;
9011da177e4SLinus Torvalds }
9021da177e4SLinus Torvalds
9031da177e4SLinus Torvalds err = set(s, data);
9041da177e4SLinus Torvalds if (err) {
9051da177e4SLinus Torvalds spin_unlock(&sb_lock);
9060200894dSAl Viro destroy_unused_super(s);
9071da177e4SLinus Torvalds return ERR_PTR(err);
9081da177e4SLinus Torvalds }
9091da177e4SLinus Torvalds s->s_type = type;
910c642256bSAzeem Shaikh strscpy(s->s_id, type->name, sizeof(s->s_id));
9111da177e4SLinus Torvalds list_add_tail(&s->s_list, &super_blocks);
912a5166169SAl Viro hlist_add_head(&s->s_instances, &type->fs_supers);
9131da177e4SLinus Torvalds spin_unlock(&sb_lock);
9141da177e4SLinus Torvalds get_filesystem(type);
9158e04944fSTetsuo Handa register_shrinker_prepared(&s->s_shrink);
9161da177e4SLinus Torvalds return s;
9171da177e4SLinus Torvalds }
9181da177e4SLinus Torvalds EXPORT_SYMBOL(sget);
9191da177e4SLinus Torvalds
drop_super(struct super_block * sb)9201da177e4SLinus Torvalds void drop_super(struct super_block *sb)
9211da177e4SLinus Torvalds {
9220ed33598SChristian Brauner super_unlock_shared(sb);
9231da177e4SLinus Torvalds put_super(sb);
9241da177e4SLinus Torvalds }
9251da177e4SLinus Torvalds
9261da177e4SLinus Torvalds EXPORT_SYMBOL(drop_super);
9271da177e4SLinus Torvalds
drop_super_exclusive(struct super_block * sb)928ba6379f7SJan Kara void drop_super_exclusive(struct super_block *sb)
929ba6379f7SJan Kara {
9300ed33598SChristian Brauner super_unlock_excl(sb);
931ba6379f7SJan Kara put_super(sb);
932ba6379f7SJan Kara }
933ba6379f7SJan Kara EXPORT_SYMBOL(drop_super_exclusive);
934ba6379f7SJan Kara
__iterate_supers(void (* f)(struct super_block *))935fa7c1d50SMateusz Guzik static void __iterate_supers(void (*f)(struct super_block *))
936fa7c1d50SMateusz Guzik {
937fa7c1d50SMateusz Guzik struct super_block *sb, *p = NULL;
938fa7c1d50SMateusz Guzik
939fa7c1d50SMateusz Guzik spin_lock(&sb_lock);
940fa7c1d50SMateusz Guzik list_for_each_entry(sb, &super_blocks, s_list) {
9415e874914SChristian Brauner /* Pairs with memory marrier in super_wake(). */
9425e874914SChristian Brauner if (smp_load_acquire(&sb->s_flags) & SB_DYING)
943fa7c1d50SMateusz Guzik continue;
944fa7c1d50SMateusz Guzik sb->s_count++;
945fa7c1d50SMateusz Guzik spin_unlock(&sb_lock);
946fa7c1d50SMateusz Guzik
947fa7c1d50SMateusz Guzik f(sb);
948fa7c1d50SMateusz Guzik
949fa7c1d50SMateusz Guzik spin_lock(&sb_lock);
950fa7c1d50SMateusz Guzik if (p)
951fa7c1d50SMateusz Guzik __put_super(p);
952fa7c1d50SMateusz Guzik p = sb;
953fa7c1d50SMateusz Guzik }
954fa7c1d50SMateusz Guzik if (p)
955fa7c1d50SMateusz Guzik __put_super(p);
956fa7c1d50SMateusz Guzik spin_unlock(&sb_lock);
957fa7c1d50SMateusz Guzik }
958e5004753SChristoph Hellwig /**
95901a05b33SAl Viro * iterate_supers - call function for all active superblocks
96001a05b33SAl Viro * @f: function to call
96101a05b33SAl Viro * @arg: argument to pass to it
96201a05b33SAl Viro *
96301a05b33SAl Viro * Scans the superblock list and calls given function, passing it
96401a05b33SAl Viro * locked superblock and given argument.
96501a05b33SAl Viro */
iterate_supers(void (* f)(struct super_block *,void *),void * arg)96601a05b33SAl Viro void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
96701a05b33SAl Viro {
968dca33252SAl Viro struct super_block *sb, *p = NULL;
96901a05b33SAl Viro
97001a05b33SAl Viro spin_lock(&sb_lock);
971dca33252SAl Viro list_for_each_entry(sb, &super_blocks, s_list) {
9725e874914SChristian Brauner bool born;
9735e874914SChristian Brauner
97401a05b33SAl Viro sb->s_count++;
97501a05b33SAl Viro spin_unlock(&sb_lock);
97601a05b33SAl Viro
9775e874914SChristian Brauner born = super_lock_shared(sb);
9785e874914SChristian Brauner if (born && sb->s_root)
97901a05b33SAl Viro f(sb, arg);
9800ed33598SChristian Brauner super_unlock_shared(sb);
98101a05b33SAl Viro
98201a05b33SAl Viro spin_lock(&sb_lock);
983dca33252SAl Viro if (p)
984dca33252SAl Viro __put_super(p);
985dca33252SAl Viro p = sb;
98601a05b33SAl Viro }
987dca33252SAl Viro if (p)
988dca33252SAl Viro __put_super(p);
98901a05b33SAl Viro spin_unlock(&sb_lock);
99001a05b33SAl Viro }
99101a05b33SAl Viro
99201a05b33SAl Viro /**
99343e15cdbSAl Viro * iterate_supers_type - call function for superblocks of given type
99443e15cdbSAl Viro * @type: fs type
99543e15cdbSAl Viro * @f: function to call
99643e15cdbSAl Viro * @arg: argument to pass to it
99743e15cdbSAl Viro *
99843e15cdbSAl Viro * Scans the superblock list and calls given function, passing it
99943e15cdbSAl Viro * locked superblock and given argument.
100043e15cdbSAl Viro */
iterate_supers_type(struct file_system_type * type,void (* f)(struct super_block *,void *),void * arg)100143e15cdbSAl Viro void iterate_supers_type(struct file_system_type *type,
100243e15cdbSAl Viro void (*f)(struct super_block *, void *), void *arg)
100343e15cdbSAl Viro {
100443e15cdbSAl Viro struct super_block *sb, *p = NULL;
100543e15cdbSAl Viro
100643e15cdbSAl Viro spin_lock(&sb_lock);
1007b67bfe0dSSasha Levin hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
10085e874914SChristian Brauner bool born;
10095e874914SChristian Brauner
101043e15cdbSAl Viro sb->s_count++;
101143e15cdbSAl Viro spin_unlock(&sb_lock);
101243e15cdbSAl Viro
10135e874914SChristian Brauner born = super_lock_shared(sb);
10145e874914SChristian Brauner if (born && sb->s_root)
101543e15cdbSAl Viro f(sb, arg);
10160ed33598SChristian Brauner super_unlock_shared(sb);
101743e15cdbSAl Viro
101843e15cdbSAl Viro spin_lock(&sb_lock);
101943e15cdbSAl Viro if (p)
102043e15cdbSAl Viro __put_super(p);
102143e15cdbSAl Viro p = sb;
102243e15cdbSAl Viro }
102343e15cdbSAl Viro if (p)
102443e15cdbSAl Viro __put_super(p);
102543e15cdbSAl Viro spin_unlock(&sb_lock);
102643e15cdbSAl Viro }
102743e15cdbSAl Viro
102843e15cdbSAl Viro EXPORT_SYMBOL(iterate_supers_type);
102943e15cdbSAl Viro
10304e7b5671SChristoph Hellwig /**
10314504230aSChristoph Hellwig * get_active_super - get an active reference to the superblock of a device
10324504230aSChristoph Hellwig * @bdev: device to get the superblock for
10334504230aSChristoph Hellwig *
10344504230aSChristoph Hellwig * Scans the superblock list and finds the superblock of the file system
10354504230aSChristoph Hellwig * mounted on the device given. Returns the superblock with an active
1036d3f21473SAl Viro * reference or %NULL if none was found.
10374504230aSChristoph Hellwig */
get_active_super(struct block_device * bdev)10384504230aSChristoph Hellwig struct super_block *get_active_super(struct block_device *bdev)
10394504230aSChristoph Hellwig {
10404504230aSChristoph Hellwig struct super_block *sb;
10414504230aSChristoph Hellwig
10424504230aSChristoph Hellwig if (!bdev)
10434504230aSChristoph Hellwig return NULL;
10444504230aSChristoph Hellwig
10454504230aSChristoph Hellwig spin_lock(&sb_lock);
10464504230aSChristoph Hellwig list_for_each_entry(sb, &super_blocks, s_list) {
10471494583dSAl Viro if (sb->s_bdev == bdev) {
1048acfec9a5SAl Viro if (!grab_super(sb))
10495e874914SChristian Brauner return NULL;
10500ed33598SChristian Brauner super_unlock_excl(sb);
1051acfec9a5SAl Viro return sb;
10521494583dSAl Viro }
10534504230aSChristoph Hellwig }
10544504230aSChristoph Hellwig spin_unlock(&sb_lock);
10554504230aSChristoph Hellwig return NULL;
10564504230aSChristoph Hellwig }
10574504230aSChristoph Hellwig
user_get_super(dev_t dev,bool excl)10584e7b5671SChristoph Hellwig struct super_block *user_get_super(dev_t dev, bool excl)
10591da177e4SLinus Torvalds {
1060618f0636SKirill Korotaev struct super_block *sb;
10611da177e4SLinus Torvalds
10621da177e4SLinus Torvalds spin_lock(&sb_lock);
1063618f0636SKirill Korotaev list_for_each_entry(sb, &super_blocks, s_list) {
1064618f0636SKirill Korotaev if (sb->s_dev == dev) {
10655e874914SChristian Brauner bool born;
10665e874914SChristian Brauner
1067618f0636SKirill Korotaev sb->s_count++;
10681da177e4SLinus Torvalds spin_unlock(&sb_lock);
1069df40c01aSAl Viro /* still alive? */
10705e874914SChristian Brauner born = super_lock(sb, excl);
10715e874914SChristian Brauner if (born && sb->s_root)
1072618f0636SKirill Korotaev return sb;
10730ed33598SChristian Brauner super_unlock(sb, excl);
1074df40c01aSAl Viro /* nope, got unmounted */
1075618f0636SKirill Korotaev spin_lock(&sb_lock);
1076df40c01aSAl Viro __put_super(sb);
10775e874914SChristian Brauner break;
10781da177e4SLinus Torvalds }
10791da177e4SLinus Torvalds }
10801da177e4SLinus Torvalds spin_unlock(&sb_lock);
10811da177e4SLinus Torvalds return NULL;
10821da177e4SLinus Torvalds }
10831da177e4SLinus Torvalds
10841da177e4SLinus Torvalds /**
10858d0347f6SDavid Howells * reconfigure_super - asks filesystem to change superblock parameters
10868d0347f6SDavid Howells * @fc: The superblock and configuration
10871da177e4SLinus Torvalds *
10888d0347f6SDavid Howells * Alters the configuration parameters of a live superblock.
10891da177e4SLinus Torvalds */
reconfigure_super(struct fs_context * fc)10908d0347f6SDavid Howells int reconfigure_super(struct fs_context *fc)
10911da177e4SLinus Torvalds {
10928d0347f6SDavid Howells struct super_block *sb = fc->root->d_sb;
10931da177e4SLinus Torvalds int retval;
10948d0347f6SDavid Howells bool remount_ro = false;
1095c541dce8SJan Kara bool remount_rw = false;
10968d0347f6SDavid Howells bool force = fc->sb_flags & SB_FORCE;
10971da177e4SLinus Torvalds
10988d0347f6SDavid Howells if (fc->sb_flags_mask & ~MS_RMT_MASK)
10998d0347f6SDavid Howells return -EINVAL;
11005accdf82SJan Kara if (sb->s_writers.frozen != SB_UNFROZEN)
11014504230aSChristoph Hellwig return -EBUSY;
11024504230aSChristoph Hellwig
11038d0347f6SDavid Howells retval = security_sb_remount(sb, fc->security);
11048d0347f6SDavid Howells if (retval)
11058d0347f6SDavid Howells return retval;
11068d0347f6SDavid Howells
11078d0347f6SDavid Howells if (fc->sb_flags_mask & SB_RDONLY) {
11089361401eSDavid Howells #ifdef CONFIG_BLOCK
11096f0d9689SChristoph Hellwig if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev &&
11106f0d9689SChristoph Hellwig bdev_read_only(sb->s_bdev))
11111da177e4SLinus Torvalds return -EACCES;
11129361401eSDavid Howells #endif
1113c541dce8SJan Kara remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
11148d0347f6SDavid Howells remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
11158d0347f6SDavid Howells }
1116d208bbddSNick Piggin
11170aec09d0SAl Viro if (remount_ro) {
1118fdab684dSAl Viro if (!hlist_empty(&sb->s_pins)) {
11190ed33598SChristian Brauner super_unlock_excl(sb);
1120fdab684dSAl Viro group_pin_kill(&sb->s_pins);
11215e874914SChristian Brauner __super_lock_excl(sb);
11220aec09d0SAl Viro if (!sb->s_root)
11230aec09d0SAl Viro return 0;
11240aec09d0SAl Viro if (sb->s_writers.frozen != SB_UNFROZEN)
11250aec09d0SAl Viro return -EBUSY;
11268d0347f6SDavid Howells remount_ro = !sb_rdonly(sb);
11270aec09d0SAl Viro }
11280aec09d0SAl Viro }
11290aec09d0SAl Viro shrink_dcache_sb(sb);
11300aec09d0SAl Viro
11318d0347f6SDavid Howells /* If we are reconfiguring to RDONLY and current sb is read/write,
11328d0347f6SDavid Howells * make sure there are no files open for writing.
11338d0347f6SDavid Howells */
1134d208bbddSNick Piggin if (remount_ro) {
11354ed5e82fSMiklos Szeredi if (force) {
1136d7439fb1SJan Kara sb_start_ro_state_change(sb);
11374ed5e82fSMiklos Szeredi } else {
11384ed5e82fSMiklos Szeredi retval = sb_prepare_remount_readonly(sb);
11394ed5e82fSMiklos Szeredi if (retval)
11404ed5e82fSMiklos Szeredi return retval;
11414ed5e82fSMiklos Szeredi }
1142c541dce8SJan Kara } else if (remount_rw) {
1143c541dce8SJan Kara /*
1144d7439fb1SJan Kara * Protect filesystem's reconfigure code from writes from
1145d7439fb1SJan Kara * userspace until reconfigure finishes.
1146c541dce8SJan Kara */
1147d7439fb1SJan Kara sb_start_ro_state_change(sb);
11481da177e4SLinus Torvalds }
11491da177e4SLinus Torvalds
1150f3a09c92SAl Viro if (fc->ops->reconfigure) {
1151f3a09c92SAl Viro retval = fc->ops->reconfigure(fc);
11522833eb2bSMiklos Szeredi if (retval) {
11532833eb2bSMiklos Szeredi if (!force)
11544ed5e82fSMiklos Szeredi goto cancel_readonly;
11552833eb2bSMiklos Szeredi /* If forced remount, go ahead despite any errors */
11562833eb2bSMiklos Szeredi WARN(1, "forced remount of a %s fs returned %i\n",
11572833eb2bSMiklos Szeredi sb->s_type->name, retval);
11582833eb2bSMiklos Szeredi }
1159f3a09c92SAl Viro }
11608d0347f6SDavid Howells
11618d0347f6SDavid Howells WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
11628d0347f6SDavid Howells (fc->sb_flags & fc->sb_flags_mask)));
1163d7439fb1SJan Kara sb_end_ro_state_change(sb);
1164c79d967dSChristoph Hellwig
1165d208bbddSNick Piggin /*
1166d208bbddSNick Piggin * Some filesystems modify their metadata via some other path than the
1167d208bbddSNick Piggin * bdev buffer cache (eg. use a private mapping, or directories in
1168d208bbddSNick Piggin * pagecache, etc). Also file data modifications go via their own
1169d208bbddSNick Piggin * mappings. So If we try to mount readonly then copy the filesystem
1170d208bbddSNick Piggin * from bdev, we could get stale data, so invalidate it to give a best
1171d208bbddSNick Piggin * effort at coherency.
1172d208bbddSNick Piggin */
1173d208bbddSNick Piggin if (remount_ro && sb->s_bdev)
1174d208bbddSNick Piggin invalidate_bdev(sb->s_bdev);
11751da177e4SLinus Torvalds return 0;
11764ed5e82fSMiklos Szeredi
11774ed5e82fSMiklos Szeredi cancel_readonly:
1178d7439fb1SJan Kara sb_end_ro_state_change(sb);
11794ed5e82fSMiklos Szeredi return retval;
11801da177e4SLinus Torvalds }
11811da177e4SLinus Torvalds
do_emergency_remount_callback(struct super_block * sb)1182fa7c1d50SMateusz Guzik static void do_emergency_remount_callback(struct super_block *sb)
11831da177e4SLinus Torvalds {
11845e874914SChristian Brauner bool born = super_lock_excl(sb);
11855e874914SChristian Brauner
11865e874914SChristian Brauner if (born && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
11878d0347f6SDavid Howells struct fs_context *fc;
11888d0347f6SDavid Howells
11898d0347f6SDavid Howells fc = fs_context_for_reconfigure(sb->s_root,
11908d0347f6SDavid Howells SB_RDONLY | SB_FORCE, SB_RDONLY);
11918d0347f6SDavid Howells if (!IS_ERR(fc)) {
11928d0347f6SDavid Howells if (parse_monolithic_mount_data(fc, NULL) == 0)
11938d0347f6SDavid Howells (void)reconfigure_super(fc);
11948d0347f6SDavid Howells put_fs_context(fc);
11958d0347f6SDavid Howells }
11961da177e4SLinus Torvalds }
11970ed33598SChristian Brauner super_unlock_excl(sb);
11981da177e4SLinus Torvalds }
1199fa7c1d50SMateusz Guzik
do_emergency_remount(struct work_struct * work)1200fa7c1d50SMateusz Guzik static void do_emergency_remount(struct work_struct *work)
1201fa7c1d50SMateusz Guzik {
1202fa7c1d50SMateusz Guzik __iterate_supers(do_emergency_remount_callback);
1203a2a9537aSJens Axboe kfree(work);
12041da177e4SLinus Torvalds printk("Emergency Remount complete\n");
12051da177e4SLinus Torvalds }
12061da177e4SLinus Torvalds
emergency_remount(void)12071da177e4SLinus Torvalds void emergency_remount(void)
12081da177e4SLinus Torvalds {
1209a2a9537aSJens Axboe struct work_struct *work;
1210a2a9537aSJens Axboe
1211a2a9537aSJens Axboe work = kmalloc(sizeof(*work), GFP_ATOMIC);
1212a2a9537aSJens Axboe if (work) {
1213a2a9537aSJens Axboe INIT_WORK(work, do_emergency_remount);
1214a2a9537aSJens Axboe schedule_work(work);
1215a2a9537aSJens Axboe }
12161da177e4SLinus Torvalds }
12171da177e4SLinus Torvalds
do_thaw_all_callback(struct super_block * sb)121808fdc8a0SMateusz Guzik static void do_thaw_all_callback(struct super_block *sb)
121908fdc8a0SMateusz Guzik {
12205e874914SChristian Brauner bool born = super_lock_excl(sb);
12215e874914SChristian Brauner
12225e874914SChristian Brauner if (born && sb->s_root) {
12234a8b719fSChristoph Hellwig if (IS_ENABLED(CONFIG_BLOCK))
12244a8b719fSChristoph Hellwig while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
12254a8b719fSChristoph Hellwig pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
1226880b9577SDarrick J. Wong thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
122708fdc8a0SMateusz Guzik } else {
12280ed33598SChristian Brauner super_unlock_excl(sb);
122908fdc8a0SMateusz Guzik }
123008fdc8a0SMateusz Guzik }
123108fdc8a0SMateusz Guzik
do_thaw_all(struct work_struct * work)123208fdc8a0SMateusz Guzik static void do_thaw_all(struct work_struct *work)
123308fdc8a0SMateusz Guzik {
123408fdc8a0SMateusz Guzik __iterate_supers(do_thaw_all_callback);
123508fdc8a0SMateusz Guzik kfree(work);
123608fdc8a0SMateusz Guzik printk(KERN_WARNING "Emergency Thaw complete\n");
123708fdc8a0SMateusz Guzik }
123808fdc8a0SMateusz Guzik
123908fdc8a0SMateusz Guzik /**
124008fdc8a0SMateusz Guzik * emergency_thaw_all -- forcibly thaw every frozen filesystem
124108fdc8a0SMateusz Guzik *
124208fdc8a0SMateusz Guzik * Used for emergency unfreeze of all filesystems via SysRq
124308fdc8a0SMateusz Guzik */
emergency_thaw_all(void)124408fdc8a0SMateusz Guzik void emergency_thaw_all(void)
124508fdc8a0SMateusz Guzik {
124608fdc8a0SMateusz Guzik struct work_struct *work;
124708fdc8a0SMateusz Guzik
124808fdc8a0SMateusz Guzik work = kmalloc(sizeof(*work), GFP_ATOMIC);
124908fdc8a0SMateusz Guzik if (work) {
125008fdc8a0SMateusz Guzik INIT_WORK(work, do_thaw_all);
125108fdc8a0SMateusz Guzik schedule_work(work);
125208fdc8a0SMateusz Guzik }
125308fdc8a0SMateusz Guzik }
125408fdc8a0SMateusz Guzik
1255ad76cbc6SAlexey Dobriyan static DEFINE_IDA(unnamed_dev_ida);
12561da177e4SLinus Torvalds
12575a66847eSMatthew Wilcox /**
12585a66847eSMatthew Wilcox * get_anon_bdev - Allocate a block device for filesystems which don't have one.
12595a66847eSMatthew Wilcox * @p: Pointer to a dev_t.
12605a66847eSMatthew Wilcox *
12615a66847eSMatthew Wilcox * Filesystems which don't use real block devices can call this function
12625a66847eSMatthew Wilcox * to allocate a virtual block device.
12635a66847eSMatthew Wilcox *
12645a66847eSMatthew Wilcox * Context: Any context. Frequently called while holding sb_lock.
12655a66847eSMatthew Wilcox * Return: 0 on success, -EMFILE if there are no anonymous bdevs left
12665a66847eSMatthew Wilcox * or -ENOMEM if memory allocation failed.
12675a66847eSMatthew Wilcox */
get_anon_bdev(dev_t * p)12680ee5dc67SAl Viro int get_anon_bdev(dev_t *p)
12691da177e4SLinus Torvalds {
12701da177e4SLinus Torvalds int dev;
12711da177e4SLinus Torvalds
12725a66847eSMatthew Wilcox /*
12735a66847eSMatthew Wilcox * Many userspace utilities consider an FSID of 0 invalid.
12745a66847eSMatthew Wilcox * Always return at least 1 from get_anon_bdev.
12755a66847eSMatthew Wilcox */
12765a66847eSMatthew Wilcox dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
12775a66847eSMatthew Wilcox GFP_ATOMIC);
12785a66847eSMatthew Wilcox if (dev == -ENOSPC)
12795a66847eSMatthew Wilcox dev = -EMFILE;
12805a66847eSMatthew Wilcox if (dev < 0)
12815a66847eSMatthew Wilcox return dev;
12821da177e4SLinus Torvalds
12835a66847eSMatthew Wilcox *p = MKDEV(0, dev);
12841da177e4SLinus Torvalds return 0;
12851da177e4SLinus Torvalds }
12860ee5dc67SAl Viro EXPORT_SYMBOL(get_anon_bdev);
12870ee5dc67SAl Viro
free_anon_bdev(dev_t dev)12880ee5dc67SAl Viro void free_anon_bdev(dev_t dev)
12890ee5dc67SAl Viro {
12905a66847eSMatthew Wilcox ida_free(&unnamed_dev_ida, MINOR(dev));
12910ee5dc67SAl Viro }
12920ee5dc67SAl Viro EXPORT_SYMBOL(free_anon_bdev);
12930ee5dc67SAl Viro
set_anon_super(struct super_block * s,void * data)12940ee5dc67SAl Viro int set_anon_super(struct super_block *s, void *data)
12950ee5dc67SAl Viro {
1296df0ce26cSChristoph Hellwig return get_anon_bdev(&s->s_dev);
12970ee5dc67SAl Viro }
12981da177e4SLinus Torvalds EXPORT_SYMBOL(set_anon_super);
12991da177e4SLinus Torvalds
kill_anon_super(struct super_block * sb)13001da177e4SLinus Torvalds void kill_anon_super(struct super_block *sb)
13011da177e4SLinus Torvalds {
13020ee5dc67SAl Viro dev_t dev = sb->s_dev;
13031da177e4SLinus Torvalds generic_shutdown_super(sb);
1304dc3216b1SChristian Brauner kill_super_notify(sb);
13050ee5dc67SAl Viro free_anon_bdev(dev);
13061da177e4SLinus Torvalds }
13071da177e4SLinus Torvalds EXPORT_SYMBOL(kill_anon_super);
13081da177e4SLinus Torvalds
kill_litter_super(struct super_block * sb)13091da177e4SLinus Torvalds void kill_litter_super(struct super_block *sb)
13101da177e4SLinus Torvalds {
13111da177e4SLinus Torvalds if (sb->s_root)
13121da177e4SLinus Torvalds d_genocide(sb->s_root);
13131da177e4SLinus Torvalds kill_anon_super(sb);
13141da177e4SLinus Torvalds }
13151da177e4SLinus Torvalds EXPORT_SYMBOL(kill_litter_super);
13161da177e4SLinus Torvalds
set_anon_super_fc(struct super_block * sb,struct fs_context * fc)1317cb50b348SAl Viro int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
1318cb50b348SAl Viro {
1319cb50b348SAl Viro return set_anon_super(sb, NULL);
1320cb50b348SAl Viro }
1321cb50b348SAl Viro EXPORT_SYMBOL(set_anon_super_fc);
1322cb50b348SAl Viro
test_keyed_super(struct super_block * sb,struct fs_context * fc)1323cb50b348SAl Viro static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
1324cb50b348SAl Viro {
1325cb50b348SAl Viro return sb->s_fs_info == fc->s_fs_info;
1326cb50b348SAl Viro }
1327cb50b348SAl Viro
test_single_super(struct super_block * s,struct fs_context * fc)1328cb50b348SAl Viro static int test_single_super(struct super_block *s, struct fs_context *fc)
1329cb50b348SAl Viro {
1330cb50b348SAl Viro return 1;
1331cb50b348SAl Viro }
1332cb50b348SAl Viro
vfs_get_super(struct fs_context * fc,int (* test)(struct super_block *,struct fs_context *),int (* fill_super)(struct super_block * sb,struct fs_context * fc))1333e062abaeSChristian Brauner static int vfs_get_super(struct fs_context *fc,
1334cda2ed05SChristoph Hellwig int (*test)(struct super_block *, struct fs_context *),
1335cb50b348SAl Viro int (*fill_super)(struct super_block *sb,
1336cb50b348SAl Viro struct fs_context *fc))
1337cb50b348SAl Viro {
1338cb50b348SAl Viro struct super_block *sb;
133943ce4c1fSDavid Howells int err;
1340cb50b348SAl Viro
1341cb50b348SAl Viro sb = sget_fc(fc, test, set_anon_super_fc);
1342cb50b348SAl Viro if (IS_ERR(sb))
1343cb50b348SAl Viro return PTR_ERR(sb);
1344cb50b348SAl Viro
1345cb50b348SAl Viro if (!sb->s_root) {
134643ce4c1fSDavid Howells err = fill_super(sb, fc);
134743ce4c1fSDavid Howells if (err)
134843ce4c1fSDavid Howells goto error;
1349cb50b348SAl Viro
1350cb50b348SAl Viro sb->s_flags |= SB_ACTIVE;
1351cb50b348SAl Viro }
1352cb50b348SAl Viro
1353e062abaeSChristian Brauner fc->root = dget(sb->s_root);
1354cb50b348SAl Viro return 0;
135543ce4c1fSDavid Howells
135643ce4c1fSDavid Howells error:
135743ce4c1fSDavid Howells deactivate_locked_super(sb);
135843ce4c1fSDavid Howells return err;
1359cb50b348SAl Viro }
1360cb50b348SAl Viro
get_tree_nodev(struct fs_context * fc,int (* fill_super)(struct super_block * sb,struct fs_context * fc))13612ac295d4SAl Viro int get_tree_nodev(struct fs_context *fc,
13622ac295d4SAl Viro int (*fill_super)(struct super_block *sb,
13632ac295d4SAl Viro struct fs_context *fc))
13642ac295d4SAl Viro {
1365e062abaeSChristian Brauner return vfs_get_super(fc, NULL, fill_super);
13662ac295d4SAl Viro }
13672ac295d4SAl Viro EXPORT_SYMBOL(get_tree_nodev);
13682ac295d4SAl Viro
get_tree_single(struct fs_context * fc,int (* fill_super)(struct super_block * sb,struct fs_context * fc))1369c23a0bbaSAl Viro int get_tree_single(struct fs_context *fc,
1370c23a0bbaSAl Viro int (*fill_super)(struct super_block *sb,
1371c23a0bbaSAl Viro struct fs_context *fc))
1372c23a0bbaSAl Viro {
1373e062abaeSChristian Brauner return vfs_get_super(fc, test_single_super, fill_super);
1374c23a0bbaSAl Viro }
1375c23a0bbaSAl Viro EXPORT_SYMBOL(get_tree_single);
1376c23a0bbaSAl Viro
get_tree_keyed(struct fs_context * fc,int (* fill_super)(struct super_block * sb,struct fs_context * fc),void * key)1377533770ccSAl Viro int get_tree_keyed(struct fs_context *fc,
1378533770ccSAl Viro int (*fill_super)(struct super_block *sb,
1379533770ccSAl Viro struct fs_context *fc),
1380533770ccSAl Viro void *key)
1381533770ccSAl Viro {
1382533770ccSAl Viro fc->s_fs_info = key;
1383e062abaeSChristian Brauner return vfs_get_super(fc, test_keyed_super, fill_super);
1384533770ccSAl Viro }
1385533770ccSAl Viro EXPORT_SYMBOL(get_tree_keyed);
1386533770ccSAl Viro
set_bdev_super(struct super_block * s,void * data)138769881be3SChristian Brauner static int set_bdev_super(struct super_block *s, void *data)
138869881be3SChristian Brauner {
138969881be3SChristian Brauner s->s_dev = *(dev_t *)data;
139069881be3SChristian Brauner return 0;
139169881be3SChristian Brauner }
139269881be3SChristian Brauner
super_s_dev_set(struct super_block * s,struct fs_context * fc)139369881be3SChristian Brauner static int super_s_dev_set(struct super_block *s, struct fs_context *fc)
139469881be3SChristian Brauner {
139569881be3SChristian Brauner return set_bdev_super(s, fc->sget_key);
139669881be3SChristian Brauner }
139769881be3SChristian Brauner
super_s_dev_test(struct super_block * s,struct fs_context * fc)139869881be3SChristian Brauner static int super_s_dev_test(struct super_block *s, struct fs_context *fc)
139969881be3SChristian Brauner {
140069881be3SChristian Brauner return !(s->s_iflags & SB_I_RETIRED) &&
140169881be3SChristian Brauner s->s_dev == *(dev_t *)fc->sget_key;
140269881be3SChristian Brauner }
140369881be3SChristian Brauner
140469881be3SChristian Brauner /**
140569881be3SChristian Brauner * sget_dev - Find or create a superblock by device number
140669881be3SChristian Brauner * @fc: Filesystem context.
140769881be3SChristian Brauner * @dev: device number
140869881be3SChristian Brauner *
140969881be3SChristian Brauner * Find or create a superblock using the provided device number that
141069881be3SChristian Brauner * will be stored in fc->sget_key.
141169881be3SChristian Brauner *
141269881be3SChristian Brauner * If an extant superblock is matched, then that will be returned with
141369881be3SChristian Brauner * an elevated reference count that the caller must transfer or discard.
141469881be3SChristian Brauner *
141569881be3SChristian Brauner * If no match is made, a new superblock will be allocated and basic
141669881be3SChristian Brauner * initialisation will be performed (s_type, s_fs_info, s_id, s_dev will
141769881be3SChristian Brauner * be set). The superblock will be published and it will be returned in
141869881be3SChristian Brauner * a partially constructed state with SB_BORN and SB_ACTIVE as yet
141969881be3SChristian Brauner * unset.
142069881be3SChristian Brauner *
142169881be3SChristian Brauner * Return: an existing or newly created superblock on success, an error
142269881be3SChristian Brauner * pointer on failure.
142369881be3SChristian Brauner */
sget_dev(struct fs_context * fc,dev_t dev)142469881be3SChristian Brauner struct super_block *sget_dev(struct fs_context *fc, dev_t dev)
142569881be3SChristian Brauner {
142669881be3SChristian Brauner fc->sget_key = &dev;
142769881be3SChristian Brauner return sget_fc(fc, super_s_dev_test, super_s_dev_set);
142869881be3SChristian Brauner }
142969881be3SChristian Brauner EXPORT_SYMBOL(sget_dev);
143069881be3SChristian Brauner
14319361401eSDavid Howells #ifdef CONFIG_BLOCK
14329c09a7cfSChristoph Hellwig /*
14339c09a7cfSChristoph Hellwig * Lock a super block that the callers holds a reference to.
14349c09a7cfSChristoph Hellwig *
14359c09a7cfSChristoph Hellwig * The caller needs to ensure that the super_block isn't being freed while
14369c09a7cfSChristoph Hellwig * calling this function, e.g. by holding a lock over the call to this function
14379c09a7cfSChristoph Hellwig * and the place that clears the pointer to the superblock used by this function
14389c09a7cfSChristoph Hellwig * before freeing the superblock.
14399c09a7cfSChristoph Hellwig */
super_lock_shared_active(struct super_block * sb)1440d8ce82efSChristian Brauner static bool super_lock_shared_active(struct super_block *sb)
144187efb390SChristoph Hellwig {
14425e874914SChristian Brauner bool born = super_lock_shared(sb);
144387efb390SChristoph Hellwig
14445e874914SChristian Brauner if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
14450ed33598SChristian Brauner super_unlock_shared(sb);
14469c09a7cfSChristoph Hellwig return false;
14479c09a7cfSChristoph Hellwig }
14489c09a7cfSChristoph Hellwig return true;
144987efb390SChristoph Hellwig }
145087efb390SChristoph Hellwig
fs_bdev_mark_dead(struct block_device * bdev,bool surprise)1451d8530de5SChristoph Hellwig static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
145287efb390SChristoph Hellwig {
14539c09a7cfSChristoph Hellwig struct super_block *sb = bdev->bd_holder;
145487efb390SChristoph Hellwig
14559c09a7cfSChristoph Hellwig /* bd_holder_lock ensures that the sb isn't freed */
14569c09a7cfSChristoph Hellwig lockdep_assert_held(&bdev->bd_holder_lock);
14579c09a7cfSChristoph Hellwig
1458d8ce82efSChristian Brauner if (!super_lock_shared_active(sb))
145987efb390SChristoph Hellwig return;
146087efb390SChristoph Hellwig
1461d8530de5SChristoph Hellwig if (!surprise)
1462d8530de5SChristoph Hellwig sync_filesystem(sb);
1463d8530de5SChristoph Hellwig shrink_dcache_sb(sb);
1464e127b9bcSChristoph Hellwig invalidate_inodes(sb);
146587efb390SChristoph Hellwig if (sb->s_op->shutdown)
146687efb390SChristoph Hellwig sb->s_op->shutdown(sb);
14679c09a7cfSChristoph Hellwig
14680ed33598SChristian Brauner super_unlock_shared(sb);
146987efb390SChristoph Hellwig }
147087efb390SChristoph Hellwig
fs_bdev_sync(struct block_device * bdev)14712142b88cSChristoph Hellwig static void fs_bdev_sync(struct block_device *bdev)
14722142b88cSChristoph Hellwig {
14732142b88cSChristoph Hellwig struct super_block *sb = bdev->bd_holder;
14742142b88cSChristoph Hellwig
14752142b88cSChristoph Hellwig lockdep_assert_held(&bdev->bd_holder_lock);
14762142b88cSChristoph Hellwig
1477d8ce82efSChristian Brauner if (!super_lock_shared_active(sb))
14782142b88cSChristoph Hellwig return;
14792142b88cSChristoph Hellwig sync_filesystem(sb);
14800ed33598SChristian Brauner super_unlock_shared(sb);
14812142b88cSChristoph Hellwig }
14822142b88cSChristoph Hellwig
14837ecd0b6fSChristoph Hellwig const struct blk_holder_ops fs_holder_ops = {
1484d8530de5SChristoph Hellwig .mark_dead = fs_bdev_mark_dead,
14852142b88cSChristoph Hellwig .sync = fs_bdev_sync,
148687efb390SChristoph Hellwig };
14877ecd0b6fSChristoph Hellwig EXPORT_SYMBOL_GPL(fs_holder_ops);
1488fe62c3a4SDavid Howells
setup_bdev_super(struct super_block * sb,int sb_flags,struct fs_context * fc)1489cf6da236SChristoph Hellwig int setup_bdev_super(struct super_block *sb, int sb_flags,
1490aca740ceSJan Kara struct fs_context *fc)
1491aca740ceSJan Kara {
1492aca740ceSJan Kara blk_mode_t mode = sb_open_mode(sb_flags);
1493*4365d0d6SJan Kara struct bdev_handle *bdev_handle;
1494aca740ceSJan Kara struct block_device *bdev;
1495aca740ceSJan Kara
1496*4365d0d6SJan Kara bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
1497*4365d0d6SJan Kara if (IS_ERR(bdev_handle)) {
1498aca740ceSJan Kara if (fc)
1499aca740ceSJan Kara errorf(fc, "%s: Can't open blockdev", fc->source);
1500*4365d0d6SJan Kara return PTR_ERR(bdev_handle);
1501aca740ceSJan Kara }
1502*4365d0d6SJan Kara bdev = bdev_handle->bdev;
1503aca740ceSJan Kara
1504aca740ceSJan Kara /*
1505aca740ceSJan Kara * This really should be in blkdev_get_by_dev, but right now can't due
1506aca740ceSJan Kara * to legacy issues that require us to allow opening a block device node
1507aca740ceSJan Kara * writable from userspace even for a read-only block device.
1508aca740ceSJan Kara */
1509aca740ceSJan Kara if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
1510*4365d0d6SJan Kara bdev_release(bdev_handle);
1511aca740ceSJan Kara return -EACCES;
1512aca740ceSJan Kara }
1513aca740ceSJan Kara
1514aca740ceSJan Kara /*
1515aca740ceSJan Kara * Until SB_BORN flag is set, there can be no active superblock
1516aca740ceSJan Kara * references and thus no filesystem freezing. get_active_super() will
1517aca740ceSJan Kara * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
1518aca740ceSJan Kara *
1519aca740ceSJan Kara * It is enough to check bdev was not frozen before we set s_bdev.
1520aca740ceSJan Kara */
1521aca740ceSJan Kara mutex_lock(&bdev->bd_fsfreeze_mutex);
1522aca740ceSJan Kara if (bdev->bd_fsfreeze_count > 0) {
1523aca740ceSJan Kara mutex_unlock(&bdev->bd_fsfreeze_mutex);
1524aca740ceSJan Kara if (fc)
1525aca740ceSJan Kara warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
1526*4365d0d6SJan Kara bdev_release(bdev_handle);
1527aca740ceSJan Kara return -EBUSY;
1528aca740ceSJan Kara }
1529aca740ceSJan Kara spin_lock(&sb_lock);
1530*4365d0d6SJan Kara sb->s_bdev_handle = bdev_handle;
1531aca740ceSJan Kara sb->s_bdev = bdev;
1532aca740ceSJan Kara sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
1533aca740ceSJan Kara if (bdev_stable_writes(bdev))
1534aca740ceSJan Kara sb->s_iflags |= SB_I_STABLE_WRITES;
1535aca740ceSJan Kara spin_unlock(&sb_lock);
1536aca740ceSJan Kara mutex_unlock(&bdev->bd_fsfreeze_mutex);
1537aca740ceSJan Kara
1538aca740ceSJan Kara snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
1539aca740ceSJan Kara shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
1540aca740ceSJan Kara sb->s_id);
1541aca740ceSJan Kara sb_set_blocksize(sb, block_size(bdev));
1542aca740ceSJan Kara return 0;
1543fe62c3a4SDavid Howells }
1544cf6da236SChristoph Hellwig EXPORT_SYMBOL_GPL(setup_bdev_super);
1545fe62c3a4SDavid Howells
1546fe62c3a4SDavid Howells /**
1547fe62c3a4SDavid Howells * get_tree_bdev - Get a superblock based on a single block device
1548fe62c3a4SDavid Howells * @fc: The filesystem context holding the parameters
1549fe62c3a4SDavid Howells * @fill_super: Helper to initialise a new superblock
1550fe62c3a4SDavid Howells */
get_tree_bdev(struct fs_context * fc,int (* fill_super)(struct super_block *,struct fs_context *))1551fe62c3a4SDavid Howells int get_tree_bdev(struct fs_context *fc,
1552fe62c3a4SDavid Howells int (*fill_super)(struct super_block *,
1553fe62c3a4SDavid Howells struct fs_context *))
1554fe62c3a4SDavid Howells {
1555fe62c3a4SDavid Howells struct super_block *s;
1556fe62c3a4SDavid Howells int error = 0;
1557aca740ceSJan Kara dev_t dev;
1558fe62c3a4SDavid Howells
1559fe62c3a4SDavid Howells if (!fc->source)
1560fe62c3a4SDavid Howells return invalf(fc, "No source specified");
1561fe62c3a4SDavid Howells
1562aca740ceSJan Kara error = lookup_bdev(fc->source, &dev);
1563aca740ceSJan Kara if (error) {
1564aca740ceSJan Kara errorf(fc, "%s: Can't lookup blockdev", fc->source);
1565aca740ceSJan Kara return error;
1566fe62c3a4SDavid Howells }
1567fe62c3a4SDavid Howells
1568fe62c3a4SDavid Howells fc->sb_flags |= SB_NOSEC;
156969881be3SChristian Brauner s = sget_dev(fc, dev);
1570aca740ceSJan Kara if (IS_ERR(s))
1571fe62c3a4SDavid Howells return PTR_ERR(s);
1572fe62c3a4SDavid Howells
1573fe62c3a4SDavid Howells if (s->s_root) {
1574fe62c3a4SDavid Howells /* Don't summarily change the RO/RW state. */
1575fe62c3a4SDavid Howells if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
1576aca740ceSJan Kara warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
1577fe62c3a4SDavid Howells deactivate_locked_super(s);
1578fe62c3a4SDavid Howells return -EBUSY;
1579fe62c3a4SDavid Howells }
1580fe62c3a4SDavid Howells } else {
1581fe62c3a4SDavid Howells /*
1582aca740ceSJan Kara * We drop s_umount here because we need to open the bdev and
1583aca740ceSJan Kara * bdev->open_mutex ranks above s_umount (blkdev_put() ->
1584560e20e4SChristoph Hellwig * bdev_mark_dead()). It is safe because we have active sb
1585aca740ceSJan Kara * reference and SB_BORN is not set yet.
1586fe62c3a4SDavid Howells */
15870ed33598SChristian Brauner super_unlock_excl(s);
1588aca740ceSJan Kara error = setup_bdev_super(s, fc->sb_flags, fc);
15895e874914SChristian Brauner __super_lock_excl(s);
1590aca740ceSJan Kara if (!error)
1591fe62c3a4SDavid Howells error = fill_super(s, fc);
1592fe62c3a4SDavid Howells if (error) {
1593fe62c3a4SDavid Howells deactivate_locked_super(s);
1594fe62c3a4SDavid Howells return error;
1595fe62c3a4SDavid Howells }
1596fe62c3a4SDavid Howells s->s_flags |= SB_ACTIVE;
1597fe62c3a4SDavid Howells }
1598fe62c3a4SDavid Howells
1599fe62c3a4SDavid Howells BUG_ON(fc->root);
1600fe62c3a4SDavid Howells fc->root = dget(s->s_root);
1601fe62c3a4SDavid Howells return 0;
1602fe62c3a4SDavid Howells }
1603fe62c3a4SDavid Howells EXPORT_SYMBOL(get_tree_bdev);
1604fe62c3a4SDavid Howells
test_bdev_super(struct super_block * s,void * data)16051da177e4SLinus Torvalds static int test_bdev_super(struct super_block *s, void *data)
16061da177e4SLinus Torvalds {
1607aca740ceSJan Kara return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
16081da177e4SLinus Torvalds }
16091da177e4SLinus Torvalds
mount_bdev(struct file_system_type * fs_type,int flags,const char * dev_name,void * data,int (* fill_super)(struct super_block *,void *,int))1610152a0836SAl Viro struct dentry *mount_bdev(struct file_system_type *fs_type,
16111da177e4SLinus Torvalds int flags, const char *dev_name, void *data,
1612152a0836SAl Viro int (*fill_super)(struct super_block *, void *, int))
16131da177e4SLinus Torvalds {
16141da177e4SLinus Torvalds struct super_block *s;
1615aca740ceSJan Kara int error;
1616aca740ceSJan Kara dev_t dev;
16171da177e4SLinus Torvalds
1618aca740ceSJan Kara error = lookup_bdev(dev_name, &dev);
1619aca740ceSJan Kara if (error)
1620aca740ceSJan Kara return ERR_PTR(error);
16211da177e4SLinus Torvalds
1622aca740ceSJan Kara flags |= SB_NOSEC;
1623aca740ceSJan Kara s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
16241da177e4SLinus Torvalds if (IS_ERR(s))
1625aca740ceSJan Kara return ERR_CAST(s);
16261da177e4SLinus Torvalds
16271da177e4SLinus Torvalds if (s->s_root) {
1628e462ec50SDavid Howells if ((flags ^ s->s_flags) & SB_RDONLY) {
162974dbbdd7SAl Viro deactivate_locked_super(s);
1630aca740ceSJan Kara return ERR_PTR(-EBUSY);
16311da177e4SLinus Torvalds }
16321da177e4SLinus Torvalds } else {
16331da177e4SLinus Torvalds /*
1634aca740ceSJan Kara * We drop s_umount here because we need to open the bdev and
1635aca740ceSJan Kara * bdev->open_mutex ranks above s_umount (blkdev_put() ->
1636560e20e4SChristoph Hellwig * bdev_mark_dead()). It is safe because we have active sb
1637aca740ceSJan Kara * reference and SB_BORN is not set yet.
16381da177e4SLinus Torvalds */
16390ed33598SChristian Brauner super_unlock_excl(s);
1640aca740ceSJan Kara error = setup_bdev_super(s, flags, NULL);
16415e874914SChristian Brauner __super_lock_excl(s);
1642aca740ceSJan Kara if (!error)
1643e462ec50SDavid Howells error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
16441da177e4SLinus Torvalds if (error) {
164574dbbdd7SAl Viro deactivate_locked_super(s);
1646aca740ceSJan Kara return ERR_PTR(error);
1647454e2398SDavid Howells }
1648454e2398SDavid Howells
1649e462ec50SDavid Howells s->s_flags |= SB_ACTIVE;
1650fa675765SGreg Kroah-Hartman }
16511da177e4SLinus Torvalds
1652152a0836SAl Viro return dget(s->s_root);
1653152a0836SAl Viro }
1654152a0836SAl Viro EXPORT_SYMBOL(mount_bdev);
1655152a0836SAl Viro
kill_block_super(struct super_block * sb)16561da177e4SLinus Torvalds void kill_block_super(struct super_block *sb)
16571da177e4SLinus Torvalds {
16581da177e4SLinus Torvalds struct block_device *bdev = sb->s_bdev;
16591da177e4SLinus Torvalds
16601da177e4SLinus Torvalds generic_shutdown_super(sb);
1661aca740ceSJan Kara if (bdev) {
16621da177e4SLinus Torvalds sync_blockdev(bdev);
1663*4365d0d6SJan Kara bdev_release(sb->s_bdev_handle);
16641da177e4SLinus Torvalds }
16651da177e4SLinus Torvalds }
16661da177e4SLinus Torvalds
16671da177e4SLinus Torvalds EXPORT_SYMBOL(kill_block_super);
16689361401eSDavid Howells #endif
16691da177e4SLinus Torvalds
mount_nodev(struct file_system_type * fs_type,int flags,void * data,int (* fill_super)(struct super_block *,void *,int))16703c26ff6eSAl Viro struct dentry *mount_nodev(struct file_system_type *fs_type,
16711da177e4SLinus Torvalds int flags, void *data,
16723c26ff6eSAl Viro int (*fill_super)(struct super_block *, void *, int))
16731da177e4SLinus Torvalds {
16741da177e4SLinus Torvalds int error;
16759249e17fSDavid Howells struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
16761da177e4SLinus Torvalds
16771da177e4SLinus Torvalds if (IS_ERR(s))
16783c26ff6eSAl Viro return ERR_CAST(s);
16791da177e4SLinus Torvalds
1680e462ec50SDavid Howells error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
16811da177e4SLinus Torvalds if (error) {
168274dbbdd7SAl Viro deactivate_locked_super(s);
16833c26ff6eSAl Viro return ERR_PTR(error);
16841da177e4SLinus Torvalds }
1685e462ec50SDavid Howells s->s_flags |= SB_ACTIVE;
16863c26ff6eSAl Viro return dget(s->s_root);
16873c26ff6eSAl Viro }
16883c26ff6eSAl Viro EXPORT_SYMBOL(mount_nodev);
16893c26ff6eSAl Viro
reconfigure_single(struct super_block * s,int flags,void * data)1690a6097180SNeilBrown int reconfigure_single(struct super_block *s,
16918d0347f6SDavid Howells int flags, void *data)
16928d0347f6SDavid Howells {
16938d0347f6SDavid Howells struct fs_context *fc;
16948d0347f6SDavid Howells int ret;
16958d0347f6SDavid Howells
16968d0347f6SDavid Howells /* The caller really need to be passing fc down into mount_single(),
16978d0347f6SDavid Howells * then a chunk of this can be removed. [Bollocks -- AV]
16988d0347f6SDavid Howells * Better yet, reconfiguration shouldn't happen, but rather the second
16998d0347f6SDavid Howells * mount should be rejected if the parameters are not compatible.
17008d0347f6SDavid Howells */
17018d0347f6SDavid Howells fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
17028d0347f6SDavid Howells if (IS_ERR(fc))
17038d0347f6SDavid Howells return PTR_ERR(fc);
17048d0347f6SDavid Howells
17058d0347f6SDavid Howells ret = parse_monolithic_mount_data(fc, data);
17068d0347f6SDavid Howells if (ret < 0)
17078d0347f6SDavid Howells goto out;
17088d0347f6SDavid Howells
17098d0347f6SDavid Howells ret = reconfigure_super(fc);
17108d0347f6SDavid Howells out:
17118d0347f6SDavid Howells put_fs_context(fc);
17128d0347f6SDavid Howells return ret;
17138d0347f6SDavid Howells }
17148d0347f6SDavid Howells
compare_single(struct super_block * s,void * p)17151da177e4SLinus Torvalds static int compare_single(struct super_block *s, void *p)
17161da177e4SLinus Torvalds {
17171da177e4SLinus Torvalds return 1;
17181da177e4SLinus Torvalds }
17191da177e4SLinus Torvalds
mount_single(struct file_system_type * fs_type,int flags,void * data,int (* fill_super)(struct super_block *,void *,int))1720fc14f2feSAl Viro struct dentry *mount_single(struct file_system_type *fs_type,
17211da177e4SLinus Torvalds int flags, void *data,
1722fc14f2feSAl Viro int (*fill_super)(struct super_block *, void *, int))
17231da177e4SLinus Torvalds {
17241da177e4SLinus Torvalds struct super_block *s;
17251da177e4SLinus Torvalds int error;
17261da177e4SLinus Torvalds
17279249e17fSDavid Howells s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
17281da177e4SLinus Torvalds if (IS_ERR(s))
1729fc14f2feSAl Viro return ERR_CAST(s);
17301da177e4SLinus Torvalds if (!s->s_root) {
1731e462ec50SDavid Howells error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
17328d0347f6SDavid Howells if (!error)
1733e462ec50SDavid Howells s->s_flags |= SB_ACTIVE;
17349329d1beSKay Sievers } else {
17358d0347f6SDavid Howells error = reconfigure_single(s, flags, data);
17368d0347f6SDavid Howells }
17378d0347f6SDavid Howells if (unlikely(error)) {
17388d0347f6SDavid Howells deactivate_locked_super(s);
17398d0347f6SDavid Howells return ERR_PTR(error);
17409329d1beSKay Sievers }
1741fc14f2feSAl Viro return dget(s->s_root);
1742fc14f2feSAl Viro }
1743fc14f2feSAl Viro EXPORT_SYMBOL(mount_single);
1744fc14f2feSAl Viro
17459bc61ab1SDavid Howells /**
17469bc61ab1SDavid Howells * vfs_get_tree - Get the mountable root
17479bc61ab1SDavid Howells * @fc: The superblock configuration context.
17489bc61ab1SDavid Howells *
17499bc61ab1SDavid Howells * The filesystem is invoked to get or create a superblock which can then later
17509bc61ab1SDavid Howells * be used for mounting. The filesystem places a pointer to the root to be
17519bc61ab1SDavid Howells * used for mounting in @fc->root.
17529bc61ab1SDavid Howells */
vfs_get_tree(struct fs_context * fc)17539bc61ab1SDavid Howells int vfs_get_tree(struct fs_context *fc)
17541da177e4SLinus Torvalds {
17559d412a43SAl Viro struct super_block *sb;
17569bc61ab1SDavid Howells int error;
17578089352aSAl Viro
1758f3a09c92SAl Viro if (fc->root)
1759f3a09c92SAl Viro return -EBUSY;
1760f3a09c92SAl Viro
1761f3a09c92SAl Viro /* Get the mountable root in fc->root, with a ref on the root and a ref
1762f3a09c92SAl Viro * on the superblock.
1763f3a09c92SAl Viro */
1764f3a09c92SAl Viro error = fc->ops->get_tree(fc);
17659bc61ab1SDavid Howells if (error < 0)
17669bc61ab1SDavid Howells return error;
17671da177e4SLinus Torvalds
1768f3a09c92SAl Viro if (!fc->root) {
1769f3a09c92SAl Viro pr_err("Filesystem %s get_tree() didn't set fc->root\n",
1770f3a09c92SAl Viro fc->fs_type->name);
1771f3a09c92SAl Viro /* We don't know what the locking state of the superblock is -
1772f3a09c92SAl Viro * if there is a superblock.
1773f3a09c92SAl Viro */
1774f3a09c92SAl Viro BUG();
1775f3a09c92SAl Viro }
1776f3a09c92SAl Viro
17779bc61ab1SDavid Howells sb = fc->root->d_sb;
17789d412a43SAl Viro WARN_ON(!sb->s_bdi);
177979f546a6SDave Chinner
178079f546a6SDave Chinner /*
17815e874914SChristian Brauner * super_wake() contains a memory barrier which also care of
17825e874914SChristian Brauner * ordering for super_cache_count(). We place it before setting
17835e874914SChristian Brauner * SB_BORN as the data dependency between the two functions is
17845e874914SChristian Brauner * the superblock structure contents that we just set up, not
17855e874914SChristian Brauner * the SB_BORN flag.
178679f546a6SDave Chinner */
17875e874914SChristian Brauner super_wake(sb, SB_BORN);
1788454e2398SDavid Howells
17899bc61ab1SDavid Howells error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
1790c9ce29edSAl Viro if (unlikely(error)) {
1791c9ce29edSAl Viro fc_drop_locked(fc);
1792c9ce29edSAl Viro return error;
1793a10d7c22SAl Viro }
1794a10d7c22SAl Viro
179542cb56aeSJeff Layton /*
179642cb56aeSJeff Layton * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
179742cb56aeSJeff Layton * but s_maxbytes was an unsigned long long for many releases. Throw
179842cb56aeSJeff Layton * this warning for a little while to try and catch filesystems that
17994358b567SJeff Layton * violate this rule.
180042cb56aeSJeff Layton */
18019d412a43SAl Viro WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
18029bc61ab1SDavid Howells "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
180342cb56aeSJeff Layton
18049bc61ab1SDavid Howells return 0;
18051da177e4SLinus Torvalds }
18069bc61ab1SDavid Howells EXPORT_SYMBOL(vfs_get_tree);
18071da177e4SLinus Torvalds
18085accdf82SJan Kara /*
1809fca39346SJan Kara * Setup private BDI for given superblock. It gets automatically cleaned up
1810fca39346SJan Kara * in generic_shutdown_super().
1811fca39346SJan Kara */
super_setup_bdi_name(struct super_block * sb,char * fmt,...)1812fca39346SJan Kara int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1813fca39346SJan Kara {
1814fca39346SJan Kara struct backing_dev_info *bdi;
1815fca39346SJan Kara int err;
1816fca39346SJan Kara va_list args;
1817fca39346SJan Kara
1818aef33c2fSChristoph Hellwig bdi = bdi_alloc(NUMA_NO_NODE);
1819fca39346SJan Kara if (!bdi)
1820fca39346SJan Kara return -ENOMEM;
1821fca39346SJan Kara
1822fca39346SJan Kara va_start(args, fmt);
18237c4cc300SJan Kara err = bdi_register_va(bdi, fmt, args);
1824fca39346SJan Kara va_end(args);
1825fca39346SJan Kara if (err) {
1826fca39346SJan Kara bdi_put(bdi);
1827fca39346SJan Kara return err;
1828fca39346SJan Kara }
1829fca39346SJan Kara WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1830fca39346SJan Kara sb->s_bdi = bdi;
18310b3ea092SChristoph Hellwig sb->s_iflags |= SB_I_PERSB_BDI;
1832fca39346SJan Kara
1833fca39346SJan Kara return 0;
1834fca39346SJan Kara }
1835fca39346SJan Kara EXPORT_SYMBOL(super_setup_bdi_name);
1836fca39346SJan Kara
1837fca39346SJan Kara /*
1838fca39346SJan Kara * Setup private BDI for given superblock. I gets automatically cleaned up
1839fca39346SJan Kara * in generic_shutdown_super().
1840fca39346SJan Kara */
super_setup_bdi(struct super_block * sb)1841fca39346SJan Kara int super_setup_bdi(struct super_block *sb)
1842fca39346SJan Kara {
1843fca39346SJan Kara static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1844fca39346SJan Kara
1845fca39346SJan Kara return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
1846fca39346SJan Kara atomic_long_inc_return(&bdi_seq));
1847fca39346SJan Kara }
1848fca39346SJan Kara EXPORT_SYMBOL(super_setup_bdi);
1849fca39346SJan Kara
18505accdf82SJan Kara /**
18515accdf82SJan Kara * sb_wait_write - wait until all writers to given file system finish
18525accdf82SJan Kara * @sb: the super for which we wait
18535accdf82SJan Kara * @level: type of writers we wait for (normal vs page fault)
18545accdf82SJan Kara *
18555accdf82SJan Kara * This function waits until there are no writers of given type to given file
18568129ed29SOleg Nesterov * system.
18575accdf82SJan Kara */
sb_wait_write(struct super_block * sb,int level)18585accdf82SJan Kara static void sb_wait_write(struct super_block *sb, int level)
18595accdf82SJan Kara {
18608129ed29SOleg Nesterov percpu_down_write(sb->s_writers.rw_sem + level-1);
18618129ed29SOleg Nesterov }
18625accdf82SJan Kara
1863f1a96220SOleg Nesterov /*
1864f1a96220SOleg Nesterov * We are going to return to userspace and forget about these locks, the
1865f1a96220SOleg Nesterov * ownership goes to the caller of thaw_super() which does unlock().
1866f1a96220SOleg Nesterov */
lockdep_sb_freeze_release(struct super_block * sb)1867f1a96220SOleg Nesterov static void lockdep_sb_freeze_release(struct super_block *sb)
1868f1a96220SOleg Nesterov {
1869f1a96220SOleg Nesterov int level;
1870f1a96220SOleg Nesterov
1871f1a96220SOleg Nesterov for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1872f1a96220SOleg Nesterov percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1873f1a96220SOleg Nesterov }
1874f1a96220SOleg Nesterov
1875f1a96220SOleg Nesterov /*
1876f1a96220SOleg Nesterov * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
1877f1a96220SOleg Nesterov */
lockdep_sb_freeze_acquire(struct super_block * sb)1878f1a96220SOleg Nesterov static void lockdep_sb_freeze_acquire(struct super_block *sb)
18798129ed29SOleg Nesterov {
18808129ed29SOleg Nesterov int level;
18815accdf82SJan Kara
18828129ed29SOleg Nesterov for (level = 0; level < SB_FREEZE_LEVELS; ++level)
18838129ed29SOleg Nesterov percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1884f1a96220SOleg Nesterov }
1885f1a96220SOleg Nesterov
sb_freeze_unlock(struct super_block * sb,int level)18862719c716SDarrick J. Wong static void sb_freeze_unlock(struct super_block *sb, int level)
1887f1a96220SOleg Nesterov {
18882719c716SDarrick J. Wong for (level--; level >= 0; level--)
18898129ed29SOleg Nesterov percpu_up_write(sb->s_writers.rw_sem + level);
18905accdf82SJan Kara }
18915accdf82SJan Kara
wait_for_partially_frozen(struct super_block * sb)189259ba4fddSDarrick J. Wong static int wait_for_partially_frozen(struct super_block *sb)
189359ba4fddSDarrick J. Wong {
189459ba4fddSDarrick J. Wong int ret = 0;
189559ba4fddSDarrick J. Wong
189659ba4fddSDarrick J. Wong do {
189759ba4fddSDarrick J. Wong unsigned short old = sb->s_writers.frozen;
189859ba4fddSDarrick J. Wong
189959ba4fddSDarrick J. Wong up_write(&sb->s_umount);
190059ba4fddSDarrick J. Wong ret = wait_var_event_killable(&sb->s_writers.frozen,
190159ba4fddSDarrick J. Wong sb->s_writers.frozen != old);
190259ba4fddSDarrick J. Wong down_write(&sb->s_umount);
190359ba4fddSDarrick J. Wong } while (ret == 0 &&
190459ba4fddSDarrick J. Wong sb->s_writers.frozen != SB_UNFROZEN &&
190559ba4fddSDarrick J. Wong sb->s_writers.frozen != SB_FREEZE_COMPLETE);
190659ba4fddSDarrick J. Wong
190759ba4fddSDarrick J. Wong return ret;
190859ba4fddSDarrick J. Wong }
190959ba4fddSDarrick J. Wong
191018e9e510SJosef Bacik /**
19117000d3c4SRandy Dunlap * freeze_super - lock the filesystem and force it into a consistent state
19127000d3c4SRandy Dunlap * @sb: the super to lock
1913880b9577SDarrick J. Wong * @who: context that wants to freeze
191418e9e510SJosef Bacik *
191518e9e510SJosef Bacik * Syncs the super to make sure the filesystem is consistent and calls the fs's
1916880b9577SDarrick J. Wong * freeze_fs. Subsequent calls to this without first thawing the fs may return
191718e9e510SJosef Bacik * -EBUSY.
19185accdf82SJan Kara *
1919880b9577SDarrick J. Wong * @who should be:
1920880b9577SDarrick J. Wong * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs;
1921880b9577SDarrick J. Wong * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs.
1922880b9577SDarrick J. Wong *
1923880b9577SDarrick J. Wong * The @who argument distinguishes between the kernel and userspace trying to
1924880b9577SDarrick J. Wong * freeze the filesystem. Although there cannot be multiple kernel freezes or
1925880b9577SDarrick J. Wong * multiple userspace freezes in effect at any given time, the kernel and
1926880b9577SDarrick J. Wong * userspace can both hold a filesystem frozen. The filesystem remains frozen
1927880b9577SDarrick J. Wong * until there are no kernel or userspace freezes in effect.
1928880b9577SDarrick J. Wong *
19295accdf82SJan Kara * During this function, sb->s_writers.frozen goes through these values:
19305accdf82SJan Kara *
19315accdf82SJan Kara * SB_UNFROZEN: File system is normal, all writes progress as usual.
19325accdf82SJan Kara *
19335accdf82SJan Kara * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
19345accdf82SJan Kara * writes should be blocked, though page faults are still allowed. We wait for
19355accdf82SJan Kara * all writes to complete and then proceed to the next stage.
19365accdf82SJan Kara *
19375accdf82SJan Kara * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
19385accdf82SJan Kara * but internal fs threads can still modify the filesystem (although they
19395accdf82SJan Kara * should not dirty new pages or inodes), writeback can run etc. After waiting
19405accdf82SJan Kara * for all running page faults we sync the filesystem which will clean all
19415accdf82SJan Kara * dirty pages and inodes (no new dirty pages or inodes can be created when
19425accdf82SJan Kara * sync is running).
19435accdf82SJan Kara *
19445accdf82SJan Kara * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
19455accdf82SJan Kara * modification are blocked (e.g. XFS preallocation truncation on inode
19465accdf82SJan Kara * reclaim). This is usually implemented by blocking new transactions for
19475accdf82SJan Kara * filesystems that have them and need this additional guard. After all
19485accdf82SJan Kara * internal writers are finished we call ->freeze_fs() to finish filesystem
19495accdf82SJan Kara * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
19505accdf82SJan Kara * mostly auxiliary for filesystems to verify they do not modify frozen fs.
19515accdf82SJan Kara *
19525accdf82SJan Kara * sb->s_writers.frozen is protected by sb->s_umount.
195318e9e510SJosef Bacik */
freeze_super(struct super_block * sb,enum freeze_holder who)1954880b9577SDarrick J. Wong int freeze_super(struct super_block *sb, enum freeze_holder who)
195518e9e510SJosef Bacik {
195618e9e510SJosef Bacik int ret;
195718e9e510SJosef Bacik
195818e9e510SJosef Bacik atomic_inc(&sb->s_active);
1959051178c3SChristian Brauner if (!super_lock_excl(sb))
1960051178c3SChristian Brauner WARN(1, "Dying superblock while freezing!");
1961051178c3SChristian Brauner
196259ba4fddSDarrick J. Wong retry:
1963880b9577SDarrick J. Wong if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
1964880b9577SDarrick J. Wong if (sb->s_writers.freeze_holders & who) {
196518e9e510SJosef Bacik deactivate_locked_super(sb);
196618e9e510SJosef Bacik return -EBUSY;
196718e9e510SJosef Bacik }
196818e9e510SJosef Bacik
1969880b9577SDarrick J. Wong WARN_ON(sb->s_writers.freeze_holders == 0);
1970880b9577SDarrick J. Wong
1971880b9577SDarrick J. Wong /*
1972880b9577SDarrick J. Wong * Someone else already holds this type of freeze; share the
1973880b9577SDarrick J. Wong * freeze and assign the active ref to the freeze.
1974880b9577SDarrick J. Wong */
1975880b9577SDarrick J. Wong sb->s_writers.freeze_holders |= who;
19763fb5a656SChristian Brauner super_unlock_excl(sb);
1977880b9577SDarrick J. Wong return 0;
1978880b9577SDarrick J. Wong }
1979880b9577SDarrick J. Wong
198018e9e510SJosef Bacik if (sb->s_writers.frozen != SB_UNFROZEN) {
198159ba4fddSDarrick J. Wong ret = wait_for_partially_frozen(sb);
198259ba4fddSDarrick J. Wong if (ret) {
198318e9e510SJosef Bacik deactivate_locked_super(sb);
198459ba4fddSDarrick J. Wong return ret;
198559ba4fddSDarrick J. Wong }
198659ba4fddSDarrick J. Wong
198759ba4fddSDarrick J. Wong goto retry;
198818e9e510SJosef Bacik }
198918e9e510SJosef Bacik
1990e462ec50SDavid Howells if (!(sb->s_flags & SB_BORN)) {
19910ed33598SChristian Brauner super_unlock_excl(sb);
1992dabe0dc1SAl Viro return 0; /* sic - it's "nothing to do" */
1993dabe0dc1SAl Viro }
1994dabe0dc1SAl Viro
1995bc98a42cSDavid Howells if (sb_rdonly(sb)) {
19965accdf82SJan Kara /* Nothing to do really... */
1997880b9577SDarrick J. Wong sb->s_writers.freeze_holders |= who;
19985accdf82SJan Kara sb->s_writers.frozen = SB_FREEZE_COMPLETE;
199959ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
20000ed33598SChristian Brauner super_unlock_excl(sb);
200118e9e510SJosef Bacik return 0;
200218e9e510SJosef Bacik }
200318e9e510SJosef Bacik
20045accdf82SJan Kara sb->s_writers.frozen = SB_FREEZE_WRITE;
20055accdf82SJan Kara /* Release s_umount to preserve sb_start_write -> s_umount ordering */
20060ed33598SChristian Brauner super_unlock_excl(sb);
20075accdf82SJan Kara sb_wait_write(sb, SB_FREEZE_WRITE);
2008051178c3SChristian Brauner if (!super_lock_excl(sb))
2009051178c3SChristian Brauner WARN(1, "Dying superblock while freezing!");
20105accdf82SJan Kara
20115accdf82SJan Kara /* Now we go and block page faults... */
20125accdf82SJan Kara sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
20135accdf82SJan Kara sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
20145accdf82SJan Kara
20155accdf82SJan Kara /* All writers are done so after syncing there won't be dirty data */
20162719c716SDarrick J. Wong ret = sync_filesystem(sb);
20172719c716SDarrick J. Wong if (ret) {
20182719c716SDarrick J. Wong sb->s_writers.frozen = SB_UNFROZEN;
20192719c716SDarrick J. Wong sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
202059ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
20212719c716SDarrick J. Wong deactivate_locked_super(sb);
20222719c716SDarrick J. Wong return ret;
20232719c716SDarrick J. Wong }
202418e9e510SJosef Bacik
20255accdf82SJan Kara /* Now wait for internal filesystem counter */
20265accdf82SJan Kara sb->s_writers.frozen = SB_FREEZE_FS;
20275accdf82SJan Kara sb_wait_write(sb, SB_FREEZE_FS);
202818e9e510SJosef Bacik
202918e9e510SJosef Bacik if (sb->s_op->freeze_fs) {
203018e9e510SJosef Bacik ret = sb->s_op->freeze_fs(sb);
203118e9e510SJosef Bacik if (ret) {
203218e9e510SJosef Bacik printk(KERN_ERR
203318e9e510SJosef Bacik "VFS:Filesystem freeze failed\n");
20345accdf82SJan Kara sb->s_writers.frozen = SB_UNFROZEN;
20352719c716SDarrick J. Wong sb_freeze_unlock(sb, SB_FREEZE_FS);
203659ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
203718e9e510SJosef Bacik deactivate_locked_super(sb);
203818e9e510SJosef Bacik return ret;
203918e9e510SJosef Bacik }
204018e9e510SJosef Bacik }
20415accdf82SJan Kara /*
204289f39af1SOleg Nesterov * For debugging purposes so that fs can warn if it sees write activity
204389f39af1SOleg Nesterov * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
20445accdf82SJan Kara */
2045880b9577SDarrick J. Wong sb->s_writers.freeze_holders |= who;
20465accdf82SJan Kara sb->s_writers.frozen = SB_FREEZE_COMPLETE;
204759ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
2048f1a96220SOleg Nesterov lockdep_sb_freeze_release(sb);
20490ed33598SChristian Brauner super_unlock_excl(sb);
205018e9e510SJosef Bacik return 0;
205118e9e510SJosef Bacik }
205218e9e510SJosef Bacik EXPORT_SYMBOL(freeze_super);
205318e9e510SJosef Bacik
2054880b9577SDarrick J. Wong /*
2055880b9577SDarrick J. Wong * Undoes the effect of a freeze_super_locked call. If the filesystem is
2056880b9577SDarrick J. Wong * frozen both by userspace and the kernel, a thaw call from either source
2057880b9577SDarrick J. Wong * removes that state without releasing the other state or unlocking the
2058880b9577SDarrick J. Wong * filesystem.
2059880b9577SDarrick J. Wong */
thaw_super_locked(struct super_block * sb,enum freeze_holder who)2060880b9577SDarrick J. Wong static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
206118e9e510SJosef Bacik {
206218e9e510SJosef Bacik int error;
206318e9e510SJosef Bacik
2064880b9577SDarrick J. Wong if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
2065880b9577SDarrick J. Wong if (!(sb->s_writers.freeze_holders & who)) {
20663fb5a656SChristian Brauner super_unlock_excl(sb);
2067880b9577SDarrick J. Wong return -EINVAL;
2068880b9577SDarrick J. Wong }
2069880b9577SDarrick J. Wong
2070880b9577SDarrick J. Wong /*
2071880b9577SDarrick J. Wong * Freeze is shared with someone else. Release our hold and
2072880b9577SDarrick J. Wong * drop the active ref that freeze_super assigned to the
2073880b9577SDarrick J. Wong * freezer.
2074880b9577SDarrick J. Wong */
2075880b9577SDarrick J. Wong if (sb->s_writers.freeze_holders & ~who) {
2076880b9577SDarrick J. Wong sb->s_writers.freeze_holders &= ~who;
2077880b9577SDarrick J. Wong deactivate_locked_super(sb);
2078880b9577SDarrick J. Wong return 0;
2079880b9577SDarrick J. Wong }
2080880b9577SDarrick J. Wong } else {
20810ed33598SChristian Brauner super_unlock_excl(sb);
208218e9e510SJosef Bacik return -EINVAL;
208318e9e510SJosef Bacik }
208418e9e510SJosef Bacik
2085bc98a42cSDavid Howells if (sb_rdonly(sb)) {
2086880b9577SDarrick J. Wong sb->s_writers.freeze_holders &= ~who;
20878129ed29SOleg Nesterov sb->s_writers.frozen = SB_UNFROZEN;
208859ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
208918e9e510SJosef Bacik goto out;
20908129ed29SOleg Nesterov }
209118e9e510SJosef Bacik
2092f1a96220SOleg Nesterov lockdep_sb_freeze_acquire(sb);
2093f1a96220SOleg Nesterov
209418e9e510SJosef Bacik if (sb->s_op->unfreeze_fs) {
209518e9e510SJosef Bacik error = sb->s_op->unfreeze_fs(sb);
209618e9e510SJosef Bacik if (error) {
20973fb5a656SChristian Brauner printk(KERN_ERR "VFS:Filesystem thaw failed\n");
2098f1a96220SOleg Nesterov lockdep_sb_freeze_release(sb);
20990ed33598SChristian Brauner super_unlock_excl(sb);
210018e9e510SJosef Bacik return error;
210118e9e510SJosef Bacik }
210218e9e510SJosef Bacik }
210318e9e510SJosef Bacik
2104880b9577SDarrick J. Wong sb->s_writers.freeze_holders &= ~who;
21055accdf82SJan Kara sb->s_writers.frozen = SB_UNFROZEN;
210659ba4fddSDarrick J. Wong wake_up_var(&sb->s_writers.frozen);
21072719c716SDarrick J. Wong sb_freeze_unlock(sb, SB_FREEZE_FS);
21088129ed29SOleg Nesterov out:
210918e9e510SJosef Bacik deactivate_locked_super(sb);
211018e9e510SJosef Bacik return 0;
211118e9e510SJosef Bacik }
211208fdc8a0SMateusz Guzik
2113961f3c89SMauro Carvalho Chehab /**
2114961f3c89SMauro Carvalho Chehab * thaw_super -- unlock filesystem
2115961f3c89SMauro Carvalho Chehab * @sb: the super to thaw
2116880b9577SDarrick J. Wong * @who: context that wants to freeze
2117961f3c89SMauro Carvalho Chehab *
2118880b9577SDarrick J. Wong * Unlocks the filesystem and marks it writeable again after freeze_super()
2119880b9577SDarrick J. Wong * if there are no remaining freezes on the filesystem.
2120880b9577SDarrick J. Wong *
2121880b9577SDarrick J. Wong * @who should be:
2122880b9577SDarrick J. Wong * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs;
2123880b9577SDarrick J. Wong * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs.
2124961f3c89SMauro Carvalho Chehab */
thaw_super(struct super_block * sb,enum freeze_holder who)2125880b9577SDarrick J. Wong int thaw_super(struct super_block *sb, enum freeze_holder who)
212608fdc8a0SMateusz Guzik {
2127051178c3SChristian Brauner if (!super_lock_excl(sb))
2128051178c3SChristian Brauner WARN(1, "Dying superblock while thawing!");
2129880b9577SDarrick J. Wong return thaw_super_locked(sb, who);
213008fdc8a0SMateusz Guzik }
213118e9e510SJosef Bacik EXPORT_SYMBOL(thaw_super);
2132439bc39bSChristoph Hellwig
2133439bc39bSChristoph Hellwig /*
2134439bc39bSChristoph Hellwig * Create workqueue for deferred direct IO completions. We allocate the
2135439bc39bSChristoph Hellwig * workqueue when it's first needed. This avoids creating workqueue for
2136439bc39bSChristoph Hellwig * filesystems that don't need it and also allows us to create the workqueue
2137439bc39bSChristoph Hellwig * late enough so the we can include s_id in the name of the workqueue.
2138439bc39bSChristoph Hellwig */
sb_init_dio_done_wq(struct super_block * sb)2139439bc39bSChristoph Hellwig int sb_init_dio_done_wq(struct super_block *sb)
2140439bc39bSChristoph Hellwig {
2141439bc39bSChristoph Hellwig struct workqueue_struct *old;
2142439bc39bSChristoph Hellwig struct workqueue_struct *wq = alloc_workqueue("dio/%s",
2143439bc39bSChristoph Hellwig WQ_MEM_RECLAIM, 0,
2144439bc39bSChristoph Hellwig sb->s_id);
2145439bc39bSChristoph Hellwig if (!wq)
2146439bc39bSChristoph Hellwig return -ENOMEM;
2147439bc39bSChristoph Hellwig /*
2148439bc39bSChristoph Hellwig * This has to be atomic as more DIOs can race to create the workqueue
2149439bc39bSChristoph Hellwig */
2150439bc39bSChristoph Hellwig old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
2151439bc39bSChristoph Hellwig /* Someone created workqueue before us? Free ours... */
2152439bc39bSChristoph Hellwig if (old)
2153439bc39bSChristoph Hellwig destroy_workqueue(wq);
2154439bc39bSChristoph Hellwig return 0;
2155439bc39bSChristoph Hellwig }
2156