xref: /openbmc/linux/fs/ext2/super.c (revision 051d4420)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/ext2/super.c
4   *
5   * Copyright (C) 1992, 1993, 1994, 1995
6   * Remy Card (card@masi.ibp.fr)
7   * Laboratoire MASI - Institut Blaise Pascal
8   * Universite Pierre et Marie Curie (Paris VI)
9   *
10   *  from
11   *
12   *  linux/fs/minix/inode.c
13   *
14   *  Copyright (C) 1991, 1992  Linus Torvalds
15   *
16   *  Big-endian to little-endian byte-swapping/bitmaps by
17   *        David S. Miller (davem@caip.rutgers.edu), 1995
18   */
19  
20  #include <linux/module.h>
21  #include <linux/string.h>
22  #include <linux/fs.h>
23  #include <linux/slab.h>
24  #include <linux/init.h>
25  #include <linux/blkdev.h>
26  #include <linux/parser.h>
27  #include <linux/random.h>
28  #include <linux/buffer_head.h>
29  #include <linux/exportfs.h>
30  #include <linux/vfs.h>
31  #include <linux/seq_file.h>
32  #include <linux/mount.h>
33  #include <linux/log2.h>
34  #include <linux/quotaops.h>
35  #include <linux/uaccess.h>
36  #include <linux/dax.h>
37  #include <linux/iversion.h>
38  #include "ext2.h"
39  #include "xattr.h"
40  #include "acl.h"
41  
42  static void ext2_write_super(struct super_block *sb);
43  static int ext2_remount (struct super_block * sb, int * flags, char * data);
44  static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
45  static int ext2_sync_fs(struct super_block *sb, int wait);
46  static int ext2_freeze(struct super_block *sb);
47  static int ext2_unfreeze(struct super_block *sb);
48  
49  void ext2_error(struct super_block *sb, const char *function,
50  		const char *fmt, ...)
51  {
52  	struct va_format vaf;
53  	va_list args;
54  	struct ext2_sb_info *sbi = EXT2_SB(sb);
55  	struct ext2_super_block *es = sbi->s_es;
56  
57  	if (!sb_rdonly(sb)) {
58  		spin_lock(&sbi->s_lock);
59  		sbi->s_mount_state |= EXT2_ERROR_FS;
60  		es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
61  		spin_unlock(&sbi->s_lock);
62  		ext2_sync_super(sb, es, 1);
63  	}
64  
65  	va_start(args, fmt);
66  
67  	vaf.fmt = fmt;
68  	vaf.va = &args;
69  
70  	printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n",
71  	       sb->s_id, function, &vaf);
72  
73  	va_end(args);
74  
75  	if (test_opt(sb, ERRORS_PANIC))
76  		panic("EXT2-fs: panic from previous error\n");
77  	if (!sb_rdonly(sb) && test_opt(sb, ERRORS_RO)) {
78  		ext2_msg(sb, KERN_CRIT,
79  			     "error: remounting filesystem read-only");
80  		sb->s_flags |= SB_RDONLY;
81  	}
82  }
83  
84  void ext2_msg(struct super_block *sb, const char *prefix,
85  		const char *fmt, ...)
86  {
87  	struct va_format vaf;
88  	va_list args;
89  
90  	va_start(args, fmt);
91  
92  	vaf.fmt = fmt;
93  	vaf.va = &args;
94  
95  	printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
96  
97  	va_end(args);
98  }
99  
100  /*
101   * This must be called with sbi->s_lock held.
102   */
103  void ext2_update_dynamic_rev(struct super_block *sb)
104  {
105  	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
106  
107  	if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
108  		return;
109  
110  	ext2_msg(sb, KERN_WARNING,
111  		     "warning: updating to rev %d because of "
112  		     "new feature flag, running e2fsck is recommended",
113  		     EXT2_DYNAMIC_REV);
114  
115  	es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
116  	es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
117  	es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
118  	/* leave es->s_feature_*compat flags alone */
119  	/* es->s_uuid will be set by e2fsck if empty */
120  
121  	/*
122  	 * The rest of the superblock fields should be zero, and if not it
123  	 * means they are likely already in use, so leave them alone.  We
124  	 * can leave it up to e2fsck to clean up any inconsistencies there.
125  	 */
126  }
127  
128  #ifdef CONFIG_QUOTA
129  static int ext2_quota_off(struct super_block *sb, int type);
130  
131  static void ext2_quota_off_umount(struct super_block *sb)
132  {
133  	int type;
134  
135  	for (type = 0; type < MAXQUOTAS; type++)
136  		ext2_quota_off(sb, type);
137  }
138  #else
139  static inline void ext2_quota_off_umount(struct super_block *sb)
140  {
141  }
142  #endif
143  
144  static void ext2_put_super (struct super_block * sb)
145  {
146  	int db_count;
147  	int i;
148  	struct ext2_sb_info *sbi = EXT2_SB(sb);
149  
150  	ext2_quota_off_umount(sb);
151  
152  	ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
153  	sbi->s_ea_block_cache = NULL;
154  
155  	if (!sb_rdonly(sb)) {
156  		struct ext2_super_block *es = sbi->s_es;
157  
158  		spin_lock(&sbi->s_lock);
159  		es->s_state = cpu_to_le16(sbi->s_mount_state);
160  		spin_unlock(&sbi->s_lock);
161  		ext2_sync_super(sb, es, 1);
162  	}
163  	db_count = sbi->s_gdb_count;
164  	for (i = 0; i < db_count; i++)
165  		brelse(sbi->s_group_desc[i]);
166  	kvfree(sbi->s_group_desc);
167  	kfree(sbi->s_debts);
168  	percpu_counter_destroy(&sbi->s_freeblocks_counter);
169  	percpu_counter_destroy(&sbi->s_freeinodes_counter);
170  	percpu_counter_destroy(&sbi->s_dirs_counter);
171  	brelse (sbi->s_sbh);
172  	sb->s_fs_info = NULL;
173  	kfree(sbi->s_blockgroup_lock);
174  	fs_put_dax(sbi->s_daxdev, NULL);
175  	kfree(sbi);
176  }
177  
178  static struct kmem_cache * ext2_inode_cachep;
179  
180  static struct inode *ext2_alloc_inode(struct super_block *sb)
181  {
182  	struct ext2_inode_info *ei;
183  	ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL);
184  	if (!ei)
185  		return NULL;
186  	ei->i_block_alloc_info = NULL;
187  	inode_set_iversion(&ei->vfs_inode, 1);
188  #ifdef CONFIG_QUOTA
189  	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
190  #endif
191  
192  	return &ei->vfs_inode;
193  }
194  
195  static void ext2_free_in_core_inode(struct inode *inode)
196  {
197  	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
198  }
199  
200  static void init_once(void *foo)
201  {
202  	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
203  
204  	rwlock_init(&ei->i_meta_lock);
205  #ifdef CONFIG_EXT2_FS_XATTR
206  	init_rwsem(&ei->xattr_sem);
207  #endif
208  	mutex_init(&ei->truncate_mutex);
209  	inode_init_once(&ei->vfs_inode);
210  }
211  
212  static int __init init_inodecache(void)
213  {
214  	ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache",
215  				sizeof(struct ext2_inode_info), 0,
216  				(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
217  					SLAB_ACCOUNT),
218  				offsetof(struct ext2_inode_info, i_data),
219  				sizeof_field(struct ext2_inode_info, i_data),
220  				init_once);
221  	if (ext2_inode_cachep == NULL)
222  		return -ENOMEM;
223  	return 0;
224  }
225  
226  static void destroy_inodecache(void)
227  {
228  	/*
229  	 * Make sure all delayed rcu free inodes are flushed before we
230  	 * destroy cache.
231  	 */
232  	rcu_barrier();
233  	kmem_cache_destroy(ext2_inode_cachep);
234  }
235  
236  static int ext2_show_options(struct seq_file *seq, struct dentry *root)
237  {
238  	struct super_block *sb = root->d_sb;
239  	struct ext2_sb_info *sbi = EXT2_SB(sb);
240  	struct ext2_super_block *es = sbi->s_es;
241  	unsigned long def_mount_opts;
242  
243  	spin_lock(&sbi->s_lock);
244  	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
245  
246  	if (sbi->s_sb_block != 1)
247  		seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
248  	if (test_opt(sb, MINIX_DF))
249  		seq_puts(seq, ",minixdf");
250  	if (test_opt(sb, GRPID))
251  		seq_puts(seq, ",grpid");
252  	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT2_DEFM_BSDGROUPS))
253  		seq_puts(seq, ",nogrpid");
254  	if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT2_DEF_RESUID)) ||
255  	    le16_to_cpu(es->s_def_resuid) != EXT2_DEF_RESUID) {
256  		seq_printf(seq, ",resuid=%u",
257  				from_kuid_munged(&init_user_ns, sbi->s_resuid));
258  	}
259  	if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT2_DEF_RESGID)) ||
260  	    le16_to_cpu(es->s_def_resgid) != EXT2_DEF_RESGID) {
261  		seq_printf(seq, ",resgid=%u",
262  				from_kgid_munged(&init_user_ns, sbi->s_resgid));
263  	}
264  	if (test_opt(sb, ERRORS_RO)) {
265  		int def_errors = le16_to_cpu(es->s_errors);
266  
267  		if (def_errors == EXT2_ERRORS_PANIC ||
268  		    def_errors == EXT2_ERRORS_CONTINUE) {
269  			seq_puts(seq, ",errors=remount-ro");
270  		}
271  	}
272  	if (test_opt(sb, ERRORS_CONT))
273  		seq_puts(seq, ",errors=continue");
274  	if (test_opt(sb, ERRORS_PANIC))
275  		seq_puts(seq, ",errors=panic");
276  	if (test_opt(sb, NO_UID32))
277  		seq_puts(seq, ",nouid32");
278  	if (test_opt(sb, DEBUG))
279  		seq_puts(seq, ",debug");
280  	if (test_opt(sb, OLDALLOC))
281  		seq_puts(seq, ",oldalloc");
282  
283  #ifdef CONFIG_EXT2_FS_XATTR
284  	if (test_opt(sb, XATTR_USER))
285  		seq_puts(seq, ",user_xattr");
286  	if (!test_opt(sb, XATTR_USER) &&
287  	    (def_mount_opts & EXT2_DEFM_XATTR_USER)) {
288  		seq_puts(seq, ",nouser_xattr");
289  	}
290  #endif
291  
292  #ifdef CONFIG_EXT2_FS_POSIX_ACL
293  	if (test_opt(sb, POSIX_ACL))
294  		seq_puts(seq, ",acl");
295  	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT2_DEFM_ACL))
296  		seq_puts(seq, ",noacl");
297  #endif
298  
299  	if (test_opt(sb, USRQUOTA))
300  		seq_puts(seq, ",usrquota");
301  
302  	if (test_opt(sb, GRPQUOTA))
303  		seq_puts(seq, ",grpquota");
304  
305  	if (test_opt(sb, XIP))
306  		seq_puts(seq, ",xip");
307  
308  	if (test_opt(sb, DAX))
309  		seq_puts(seq, ",dax");
310  
311  	if (!test_opt(sb, RESERVATION))
312  		seq_puts(seq, ",noreservation");
313  
314  	spin_unlock(&sbi->s_lock);
315  	return 0;
316  }
317  
318  #ifdef CONFIG_QUOTA
319  static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
320  static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
321  static int ext2_quota_on(struct super_block *sb, int type, int format_id,
322  			 const struct path *path);
323  static struct dquot **ext2_get_dquots(struct inode *inode)
324  {
325  	return EXT2_I(inode)->i_dquot;
326  }
327  
328  static const struct quotactl_ops ext2_quotactl_ops = {
329  	.quota_on	= ext2_quota_on,
330  	.quota_off	= ext2_quota_off,
331  	.quota_sync	= dquot_quota_sync,
332  	.get_state	= dquot_get_state,
333  	.set_info	= dquot_set_dqinfo,
334  	.get_dqblk	= dquot_get_dqblk,
335  	.set_dqblk	= dquot_set_dqblk,
336  	.get_nextdqblk	= dquot_get_next_dqblk,
337  };
338  #endif
339  
340  static const struct super_operations ext2_sops = {
341  	.alloc_inode	= ext2_alloc_inode,
342  	.free_inode	= ext2_free_in_core_inode,
343  	.write_inode	= ext2_write_inode,
344  	.evict_inode	= ext2_evict_inode,
345  	.put_super	= ext2_put_super,
346  	.sync_fs	= ext2_sync_fs,
347  	.freeze_fs	= ext2_freeze,
348  	.unfreeze_fs	= ext2_unfreeze,
349  	.statfs		= ext2_statfs,
350  	.remount_fs	= ext2_remount,
351  	.show_options	= ext2_show_options,
352  #ifdef CONFIG_QUOTA
353  	.quota_read	= ext2_quota_read,
354  	.quota_write	= ext2_quota_write,
355  	.get_dquots	= ext2_get_dquots,
356  #endif
357  };
358  
359  static struct inode *ext2_nfs_get_inode(struct super_block *sb,
360  		u64 ino, u32 generation)
361  {
362  	struct inode *inode;
363  
364  	if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO)
365  		return ERR_PTR(-ESTALE);
366  	if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
367  		return ERR_PTR(-ESTALE);
368  
369  	/*
370  	 * ext2_iget isn't quite right if the inode is currently unallocated!
371  	 * However ext2_iget currently does appropriate checks to handle stale
372  	 * inodes so everything is OK.
373  	 */
374  	inode = ext2_iget(sb, ino);
375  	if (IS_ERR(inode))
376  		return ERR_CAST(inode);
377  	if (generation && inode->i_generation != generation) {
378  		/* we didn't find the right inode.. */
379  		iput(inode);
380  		return ERR_PTR(-ESTALE);
381  	}
382  	return inode;
383  }
384  
385  static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid,
386  		int fh_len, int fh_type)
387  {
388  	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
389  				    ext2_nfs_get_inode);
390  }
391  
392  static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid,
393  		int fh_len, int fh_type)
394  {
395  	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
396  				    ext2_nfs_get_inode);
397  }
398  
399  static const struct export_operations ext2_export_ops = {
400  	.fh_to_dentry = ext2_fh_to_dentry,
401  	.fh_to_parent = ext2_fh_to_parent,
402  	.get_parent = ext2_get_parent,
403  };
404  
405  static unsigned long get_sb_block(void **data)
406  {
407  	unsigned long 	sb_block;
408  	char 		*options = (char *) *data;
409  
410  	if (!options || strncmp(options, "sb=", 3) != 0)
411  		return 1;	/* Default location */
412  	options += 3;
413  	sb_block = simple_strtoul(options, &options, 0);
414  	if (*options && *options != ',') {
415  		printk("EXT2-fs: Invalid sb specification: %s\n",
416  		       (char *) *data);
417  		return 1;
418  	}
419  	if (*options == ',')
420  		options++;
421  	*data = (void *) options;
422  	return sb_block;
423  }
424  
425  enum {
426  	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
427  	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
428  	Opt_err_ro, Opt_nouid32, Opt_debug,
429  	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
430  	Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
431  	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
432  };
433  
434  static const match_table_t tokens = {
435  	{Opt_bsd_df, "bsddf"},
436  	{Opt_minix_df, "minixdf"},
437  	{Opt_grpid, "grpid"},
438  	{Opt_grpid, "bsdgroups"},
439  	{Opt_nogrpid, "nogrpid"},
440  	{Opt_nogrpid, "sysvgroups"},
441  	{Opt_resgid, "resgid=%u"},
442  	{Opt_resuid, "resuid=%u"},
443  	{Opt_sb, "sb=%u"},
444  	{Opt_err_cont, "errors=continue"},
445  	{Opt_err_panic, "errors=panic"},
446  	{Opt_err_ro, "errors=remount-ro"},
447  	{Opt_nouid32, "nouid32"},
448  	{Opt_debug, "debug"},
449  	{Opt_oldalloc, "oldalloc"},
450  	{Opt_orlov, "orlov"},
451  	{Opt_nobh, "nobh"},
452  	{Opt_user_xattr, "user_xattr"},
453  	{Opt_nouser_xattr, "nouser_xattr"},
454  	{Opt_acl, "acl"},
455  	{Opt_noacl, "noacl"},
456  	{Opt_xip, "xip"},
457  	{Opt_dax, "dax"},
458  	{Opt_grpquota, "grpquota"},
459  	{Opt_ignore, "noquota"},
460  	{Opt_quota, "quota"},
461  	{Opt_usrquota, "usrquota"},
462  	{Opt_reservation, "reservation"},
463  	{Opt_noreservation, "noreservation"},
464  	{Opt_err, NULL}
465  };
466  
467  static int parse_options(char *options, struct super_block *sb,
468  			 struct ext2_mount_options *opts)
469  {
470  	char *p;
471  	substring_t args[MAX_OPT_ARGS];
472  	int option;
473  	kuid_t uid;
474  	kgid_t gid;
475  
476  	if (!options)
477  		return 1;
478  
479  	while ((p = strsep (&options, ",")) != NULL) {
480  		int token;
481  		if (!*p)
482  			continue;
483  
484  		token = match_token(p, tokens, args);
485  		switch (token) {
486  		case Opt_bsd_df:
487  			clear_opt (opts->s_mount_opt, MINIX_DF);
488  			break;
489  		case Opt_minix_df:
490  			set_opt (opts->s_mount_opt, MINIX_DF);
491  			break;
492  		case Opt_grpid:
493  			set_opt (opts->s_mount_opt, GRPID);
494  			break;
495  		case Opt_nogrpid:
496  			clear_opt (opts->s_mount_opt, GRPID);
497  			break;
498  		case Opt_resuid:
499  			if (match_int(&args[0], &option))
500  				return 0;
501  			uid = make_kuid(current_user_ns(), option);
502  			if (!uid_valid(uid)) {
503  				ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option);
504  				return 0;
505  
506  			}
507  			opts->s_resuid = uid;
508  			break;
509  		case Opt_resgid:
510  			if (match_int(&args[0], &option))
511  				return 0;
512  			gid = make_kgid(current_user_ns(), option);
513  			if (!gid_valid(gid)) {
514  				ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option);
515  				return 0;
516  			}
517  			opts->s_resgid = gid;
518  			break;
519  		case Opt_sb:
520  			/* handled by get_sb_block() instead of here */
521  			/* *sb_block = match_int(&args[0]); */
522  			break;
523  		case Opt_err_panic:
524  			clear_opt (opts->s_mount_opt, ERRORS_CONT);
525  			clear_opt (opts->s_mount_opt, ERRORS_RO);
526  			set_opt (opts->s_mount_opt, ERRORS_PANIC);
527  			break;
528  		case Opt_err_ro:
529  			clear_opt (opts->s_mount_opt, ERRORS_CONT);
530  			clear_opt (opts->s_mount_opt, ERRORS_PANIC);
531  			set_opt (opts->s_mount_opt, ERRORS_RO);
532  			break;
533  		case Opt_err_cont:
534  			clear_opt (opts->s_mount_opt, ERRORS_RO);
535  			clear_opt (opts->s_mount_opt, ERRORS_PANIC);
536  			set_opt (opts->s_mount_opt, ERRORS_CONT);
537  			break;
538  		case Opt_nouid32:
539  			set_opt (opts->s_mount_opt, NO_UID32);
540  			break;
541  		case Opt_debug:
542  			set_opt (opts->s_mount_opt, DEBUG);
543  			break;
544  		case Opt_oldalloc:
545  			set_opt (opts->s_mount_opt, OLDALLOC);
546  			break;
547  		case Opt_orlov:
548  			clear_opt (opts->s_mount_opt, OLDALLOC);
549  			break;
550  		case Opt_nobh:
551  			ext2_msg(sb, KERN_INFO,
552  				"nobh option not supported");
553  			break;
554  #ifdef CONFIG_EXT2_FS_XATTR
555  		case Opt_user_xattr:
556  			set_opt (opts->s_mount_opt, XATTR_USER);
557  			break;
558  		case Opt_nouser_xattr:
559  			clear_opt (opts->s_mount_opt, XATTR_USER);
560  			break;
561  #else
562  		case Opt_user_xattr:
563  		case Opt_nouser_xattr:
564  			ext2_msg(sb, KERN_INFO, "(no)user_xattr options"
565  				"not supported");
566  			break;
567  #endif
568  #ifdef CONFIG_EXT2_FS_POSIX_ACL
569  		case Opt_acl:
570  			set_opt(opts->s_mount_opt, POSIX_ACL);
571  			break;
572  		case Opt_noacl:
573  			clear_opt(opts->s_mount_opt, POSIX_ACL);
574  			break;
575  #else
576  		case Opt_acl:
577  		case Opt_noacl:
578  			ext2_msg(sb, KERN_INFO,
579  				"(no)acl options not supported");
580  			break;
581  #endif
582  		case Opt_xip:
583  			ext2_msg(sb, KERN_INFO, "use dax instead of xip");
584  			set_opt(opts->s_mount_opt, XIP);
585  			fallthrough;
586  		case Opt_dax:
587  #ifdef CONFIG_FS_DAX
588  			ext2_msg(sb, KERN_WARNING,
589  		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
590  			set_opt(opts->s_mount_opt, DAX);
591  #else
592  			ext2_msg(sb, KERN_INFO, "dax option not supported");
593  #endif
594  			break;
595  
596  #if defined(CONFIG_QUOTA)
597  		case Opt_quota:
598  		case Opt_usrquota:
599  			set_opt(opts->s_mount_opt, USRQUOTA);
600  			break;
601  
602  		case Opt_grpquota:
603  			set_opt(opts->s_mount_opt, GRPQUOTA);
604  			break;
605  #else
606  		case Opt_quota:
607  		case Opt_usrquota:
608  		case Opt_grpquota:
609  			ext2_msg(sb, KERN_INFO,
610  				"quota operations not supported");
611  			break;
612  #endif
613  
614  		case Opt_reservation:
615  			set_opt(opts->s_mount_opt, RESERVATION);
616  			ext2_msg(sb, KERN_INFO, "reservations ON");
617  			break;
618  		case Opt_noreservation:
619  			clear_opt(opts->s_mount_opt, RESERVATION);
620  			ext2_msg(sb, KERN_INFO, "reservations OFF");
621  			break;
622  		case Opt_ignore:
623  			break;
624  		default:
625  			return 0;
626  		}
627  	}
628  	return 1;
629  }
630  
631  static int ext2_setup_super (struct super_block * sb,
632  			      struct ext2_super_block * es,
633  			      int read_only)
634  {
635  	int res = 0;
636  	struct ext2_sb_info *sbi = EXT2_SB(sb);
637  
638  	if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
639  		ext2_msg(sb, KERN_ERR,
640  			"error: revision level too high, "
641  			"forcing read-only mode");
642  		res = SB_RDONLY;
643  	}
644  	if (read_only)
645  		return res;
646  	if (!(sbi->s_mount_state & EXT2_VALID_FS))
647  		ext2_msg(sb, KERN_WARNING,
648  			"warning: mounting unchecked fs, "
649  			"running e2fsck is recommended");
650  	else if ((sbi->s_mount_state & EXT2_ERROR_FS))
651  		ext2_msg(sb, KERN_WARNING,
652  			"warning: mounting fs with errors, "
653  			"running e2fsck is recommended");
654  	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
655  		 le16_to_cpu(es->s_mnt_count) >=
656  		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
657  		ext2_msg(sb, KERN_WARNING,
658  			"warning: maximal mount count reached, "
659  			"running e2fsck is recommended");
660  	else if (le32_to_cpu(es->s_checkinterval) &&
661  		(le32_to_cpu(es->s_lastcheck) +
662  			le32_to_cpu(es->s_checkinterval) <=
663  			ktime_get_real_seconds()))
664  		ext2_msg(sb, KERN_WARNING,
665  			"warning: checktime reached, "
666  			"running e2fsck is recommended");
667  	if (!le16_to_cpu(es->s_max_mnt_count))
668  		es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
669  	le16_add_cpu(&es->s_mnt_count, 1);
670  	if (test_opt (sb, DEBUG))
671  		ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
672  			"bpg=%lu, ipg=%lu, mo=%04lx]",
673  			EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
674  			sbi->s_frag_size,
675  			sbi->s_groups_count,
676  			EXT2_BLOCKS_PER_GROUP(sb),
677  			EXT2_INODES_PER_GROUP(sb),
678  			sbi->s_mount_opt);
679  	return res;
680  }
681  
682  static int ext2_check_descriptors(struct super_block *sb)
683  {
684  	int i;
685  	struct ext2_sb_info *sbi = EXT2_SB(sb);
686  
687  	ext2_debug ("Checking group descriptors");
688  
689  	for (i = 0; i < sbi->s_groups_count; i++) {
690  		struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL);
691  		ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i);
692  		ext2_fsblk_t last_block = ext2_group_last_block_no(sb, i);
693  
694  		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
695  		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
696  		{
697  			ext2_error (sb, "ext2_check_descriptors",
698  				    "Block bitmap for group %d"
699  				    " not in group (block %lu)!",
700  				    i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
701  			return 0;
702  		}
703  		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
704  		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
705  		{
706  			ext2_error (sb, "ext2_check_descriptors",
707  				    "Inode bitmap for group %d"
708  				    " not in group (block %lu)!",
709  				    i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
710  			return 0;
711  		}
712  		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
713  		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
714  		    last_block)
715  		{
716  			ext2_error (sb, "ext2_check_descriptors",
717  				    "Inode table for group %d"
718  				    " not in group (block %lu)!",
719  				    i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
720  			return 0;
721  		}
722  	}
723  	return 1;
724  }
725  
726  /*
727   * Maximal file size.  There is a direct, and {,double-,triple-}indirect
728   * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
729   * We need to be 1 filesystem block less than the 2^32 sector limit.
730   */
731  static loff_t ext2_max_size(int bits)
732  {
733  	loff_t res = EXT2_NDIR_BLOCKS;
734  	int meta_blocks;
735  	unsigned int upper_limit;
736  	unsigned int ppb = 1 << (bits-2);
737  
738  	/* This is calculated to be the largest file size for a
739  	 * dense, file such that the total number of
740  	 * sectors in the file, including data and all indirect blocks,
741  	 * does not exceed 2^32 -1
742  	 * __u32 i_blocks representing the total number of
743  	 * 512 bytes blocks of the file
744  	 */
745  	upper_limit = (1LL << 32) - 1;
746  
747  	/* total blocks in file system block size */
748  	upper_limit >>= (bits - 9);
749  
750  	/* Compute how many blocks we can address by block tree */
751  	res += 1LL << (bits-2);
752  	res += 1LL << (2*(bits-2));
753  	res += 1LL << (3*(bits-2));
754  	/* Compute how many metadata blocks are needed */
755  	meta_blocks = 1;
756  	meta_blocks += 1 + ppb;
757  	meta_blocks += 1 + ppb + ppb * ppb;
758  	/* Does block tree limit file size? */
759  	if (res + meta_blocks <= upper_limit)
760  		goto check_lfs;
761  
762  	res = upper_limit;
763  	/* How many metadata blocks are needed for addressing upper_limit? */
764  	upper_limit -= EXT2_NDIR_BLOCKS;
765  	/* indirect blocks */
766  	meta_blocks = 1;
767  	upper_limit -= ppb;
768  	/* double indirect blocks */
769  	if (upper_limit < ppb * ppb) {
770  		meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb);
771  		res -= meta_blocks;
772  		goto check_lfs;
773  	}
774  	meta_blocks += 1 + ppb;
775  	upper_limit -= ppb * ppb;
776  	/* tripple indirect blocks for the rest */
777  	meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) +
778  		DIV_ROUND_UP(upper_limit, ppb*ppb);
779  	res -= meta_blocks;
780  check_lfs:
781  	res <<= bits;
782  	if (res > MAX_LFS_FILESIZE)
783  		res = MAX_LFS_FILESIZE;
784  
785  	return res;
786  }
787  
788  static unsigned long descriptor_loc(struct super_block *sb,
789  				    unsigned long logic_sb_block,
790  				    int nr)
791  {
792  	struct ext2_sb_info *sbi = EXT2_SB(sb);
793  	unsigned long bg, first_meta_bg;
794  
795  	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
796  
797  	if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
798  	    nr < first_meta_bg)
799  		return (logic_sb_block + nr + 1);
800  	bg = sbi->s_desc_per_block * nr;
801  
802  	return ext2_group_first_block_no(sb, bg) + ext2_bg_has_super(sb, bg);
803  }
804  
805  static int ext2_fill_super(struct super_block *sb, void *data, int silent)
806  {
807  	struct buffer_head * bh;
808  	struct ext2_sb_info * sbi;
809  	struct ext2_super_block * es;
810  	struct inode *root;
811  	unsigned long block;
812  	unsigned long sb_block = get_sb_block(&data);
813  	unsigned long logic_sb_block;
814  	unsigned long offset = 0;
815  	unsigned long def_mount_opts;
816  	long ret = -ENOMEM;
817  	int blocksize = BLOCK_SIZE;
818  	int db_count;
819  	int i, j;
820  	__le32 features;
821  	int err;
822  	struct ext2_mount_options opts;
823  
824  	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
825  	if (!sbi)
826  		return -ENOMEM;
827  
828  	sbi->s_blockgroup_lock =
829  		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
830  	if (!sbi->s_blockgroup_lock) {
831  		kfree(sbi);
832  		return -ENOMEM;
833  	}
834  	sb->s_fs_info = sbi;
835  	sbi->s_sb_block = sb_block;
836  	sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
837  					   NULL, NULL);
838  
839  	spin_lock_init(&sbi->s_lock);
840  	ret = -EINVAL;
841  
842  	/*
843  	 * See what the current blocksize for the device is, and
844  	 * use that as the blocksize.  Otherwise (or if the blocksize
845  	 * is smaller than the default) use the default.
846  	 * This is important for devices that have a hardware
847  	 * sectorsize that is larger than the default.
848  	 */
849  	blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
850  	if (!blocksize) {
851  		ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
852  		goto failed_sbi;
853  	}
854  
855  	/*
856  	 * If the superblock doesn't start on a hardware sector boundary,
857  	 * calculate the offset.
858  	 */
859  	if (blocksize != BLOCK_SIZE) {
860  		logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
861  		offset = (sb_block*BLOCK_SIZE) % blocksize;
862  	} else {
863  		logic_sb_block = sb_block;
864  	}
865  
866  	if (!(bh = sb_bread(sb, logic_sb_block))) {
867  		ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
868  		goto failed_sbi;
869  	}
870  	/*
871  	 * Note: s_es must be initialized as soon as possible because
872  	 *       some ext2 macro-instructions depend on its value
873  	 */
874  	es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
875  	sbi->s_es = es;
876  	sb->s_magic = le16_to_cpu(es->s_magic);
877  
878  	if (sb->s_magic != EXT2_SUPER_MAGIC)
879  		goto cantfind_ext2;
880  
881  	opts.s_mount_opt = 0;
882  	/* Set defaults before we parse the mount options */
883  	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
884  	if (def_mount_opts & EXT2_DEFM_DEBUG)
885  		set_opt(opts.s_mount_opt, DEBUG);
886  	if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
887  		set_opt(opts.s_mount_opt, GRPID);
888  	if (def_mount_opts & EXT2_DEFM_UID16)
889  		set_opt(opts.s_mount_opt, NO_UID32);
890  #ifdef CONFIG_EXT2_FS_XATTR
891  	if (def_mount_opts & EXT2_DEFM_XATTR_USER)
892  		set_opt(opts.s_mount_opt, XATTR_USER);
893  #endif
894  #ifdef CONFIG_EXT2_FS_POSIX_ACL
895  	if (def_mount_opts & EXT2_DEFM_ACL)
896  		set_opt(opts.s_mount_opt, POSIX_ACL);
897  #endif
898  
899  	if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
900  		set_opt(opts.s_mount_opt, ERRORS_PANIC);
901  	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE)
902  		set_opt(opts.s_mount_opt, ERRORS_CONT);
903  	else
904  		set_opt(opts.s_mount_opt, ERRORS_RO);
905  
906  	opts.s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
907  	opts.s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
908  
909  	set_opt(opts.s_mount_opt, RESERVATION);
910  
911  	if (!parse_options((char *) data, sb, &opts))
912  		goto failed_mount;
913  
914  	sbi->s_mount_opt = opts.s_mount_opt;
915  	sbi->s_resuid = opts.s_resuid;
916  	sbi->s_resgid = opts.s_resgid;
917  
918  	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
919  		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
920  	sb->s_iflags |= SB_I_CGROUPWB;
921  
922  	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
923  	    (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
924  	     EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
925  	     EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
926  		ext2_msg(sb, KERN_WARNING,
927  			"warning: feature flags set on rev 0 fs, "
928  			"running e2fsck is recommended");
929  	/*
930  	 * Check feature flags regardless of the revision level, since we
931  	 * previously didn't change the revision level when setting the flags,
932  	 * so there is a chance incompat flags are set on a rev 0 filesystem.
933  	 */
934  	features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
935  	if (features) {
936  		ext2_msg(sb, KERN_ERR,	"error: couldn't mount because of "
937  		       "unsupported optional features (%x)",
938  			le32_to_cpu(features));
939  		goto failed_mount;
940  	}
941  	if (!sb_rdonly(sb) && (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
942  		ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
943  		       "unsupported optional features (%x)",
944  		       le32_to_cpu(features));
945  		goto failed_mount;
946  	}
947  
948  	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
949  
950  	if (test_opt(sb, DAX)) {
951  		if (!sbi->s_daxdev) {
952  			ext2_msg(sb, KERN_ERR,
953  				"DAX unsupported by block device. Turning off DAX.");
954  			clear_opt(sbi->s_mount_opt, DAX);
955  		} else if (blocksize != PAGE_SIZE) {
956  			ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
957  			clear_opt(sbi->s_mount_opt, DAX);
958  		}
959  	}
960  
961  	/* If the blocksize doesn't match, re-read the thing.. */
962  	if (sb->s_blocksize != blocksize) {
963  		brelse(bh);
964  
965  		if (!sb_set_blocksize(sb, blocksize)) {
966  			ext2_msg(sb, KERN_ERR,
967  				"error: bad blocksize %d", blocksize);
968  			goto failed_sbi;
969  		}
970  
971  		logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
972  		offset = (sb_block*BLOCK_SIZE) % blocksize;
973  		bh = sb_bread(sb, logic_sb_block);
974  		if(!bh) {
975  			ext2_msg(sb, KERN_ERR, "error: couldn't read"
976  				"superblock on 2nd try");
977  			goto failed_sbi;
978  		}
979  		es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
980  		sbi->s_es = es;
981  		if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
982  			ext2_msg(sb, KERN_ERR, "error: magic mismatch");
983  			goto failed_mount;
984  		}
985  	}
986  
987  	sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
988  	sb->s_max_links = EXT2_LINK_MAX;
989  	sb->s_time_min = S32_MIN;
990  	sb->s_time_max = S32_MAX;
991  
992  	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
993  		sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
994  		sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
995  	} else {
996  		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
997  		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
998  		if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
999  		    !is_power_of_2(sbi->s_inode_size) ||
1000  		    (sbi->s_inode_size > blocksize)) {
1001  			ext2_msg(sb, KERN_ERR,
1002  				"error: unsupported inode size: %d",
1003  				sbi->s_inode_size);
1004  			goto failed_mount;
1005  		}
1006  	}
1007  
1008  	sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
1009  				   le32_to_cpu(es->s_log_frag_size);
1010  	if (sbi->s_frag_size == 0)
1011  		goto cantfind_ext2;
1012  	sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
1013  
1014  	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1015  	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1016  	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1017  
1018  	sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
1019  	if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
1020  		goto cantfind_ext2;
1021  	sbi->s_itb_per_group = sbi->s_inodes_per_group /
1022  					sbi->s_inodes_per_block;
1023  	sbi->s_desc_per_block = sb->s_blocksize /
1024  					sizeof (struct ext2_group_desc);
1025  	sbi->s_sbh = bh;
1026  	sbi->s_mount_state = le16_to_cpu(es->s_state);
1027  	sbi->s_addr_per_block_bits =
1028  		ilog2 (EXT2_ADDR_PER_BLOCK(sb));
1029  	sbi->s_desc_per_block_bits =
1030  		ilog2 (EXT2_DESC_PER_BLOCK(sb));
1031  
1032  	if (sb->s_magic != EXT2_SUPER_MAGIC)
1033  		goto cantfind_ext2;
1034  
1035  	if (sb->s_blocksize != bh->b_size) {
1036  		if (!silent)
1037  			ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
1038  		goto failed_mount;
1039  	}
1040  
1041  	if (sb->s_blocksize != sbi->s_frag_size) {
1042  		ext2_msg(sb, KERN_ERR,
1043  			"error: fragsize %lu != blocksize %lu"
1044  			"(not supported yet)",
1045  			sbi->s_frag_size, sb->s_blocksize);
1046  		goto failed_mount;
1047  	}
1048  
1049  	if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
1050  		ext2_msg(sb, KERN_ERR,
1051  			"error: #blocks per group too big: %lu",
1052  			sbi->s_blocks_per_group);
1053  		goto failed_mount;
1054  	}
1055  	/* At least inode table, bitmaps, and sb have to fit in one group */
1056  	if (sbi->s_blocks_per_group <= sbi->s_itb_per_group + 3) {
1057  		ext2_msg(sb, KERN_ERR,
1058  			"error: #blocks per group smaller than metadata size: %lu <= %lu",
1059  			sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3);
1060  		goto failed_mount;
1061  	}
1062  	if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
1063  		ext2_msg(sb, KERN_ERR,
1064  			"error: #fragments per group too big: %lu",
1065  			sbi->s_frags_per_group);
1066  		goto failed_mount;
1067  	}
1068  	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
1069  	    sbi->s_inodes_per_group > sb->s_blocksize * 8) {
1070  		ext2_msg(sb, KERN_ERR,
1071  			"error: invalid #inodes per group: %lu",
1072  			sbi->s_inodes_per_group);
1073  		goto failed_mount;
1074  	}
1075  	if (sb_bdev_nr_blocks(sb) < le32_to_cpu(es->s_blocks_count)) {
1076  		ext2_msg(sb, KERN_ERR,
1077  			 "bad geometry: block count %u exceeds size of device (%u blocks)",
1078  			 le32_to_cpu(es->s_blocks_count),
1079  			 (unsigned)sb_bdev_nr_blocks(sb));
1080  		goto failed_mount;
1081  	}
1082  
1083  	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1084  				le32_to_cpu(es->s_first_data_block) - 1)
1085  					/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
1086  	if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
1087  	    le32_to_cpu(es->s_inodes_count)) {
1088  		ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
1089  			 le32_to_cpu(es->s_inodes_count),
1090  			 (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
1091  		goto failed_mount;
1092  	}
1093  	db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
1094  		   EXT2_DESC_PER_BLOCK(sb);
1095  	sbi->s_group_desc = kvmalloc_array(db_count,
1096  					   sizeof(struct buffer_head *),
1097  					   GFP_KERNEL);
1098  	if (sbi->s_group_desc == NULL) {
1099  		ret = -ENOMEM;
1100  		ext2_msg(sb, KERN_ERR, "error: not enough memory");
1101  		goto failed_mount;
1102  	}
1103  	bgl_lock_init(sbi->s_blockgroup_lock);
1104  	sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
1105  	if (!sbi->s_debts) {
1106  		ret = -ENOMEM;
1107  		ext2_msg(sb, KERN_ERR, "error: not enough memory");
1108  		goto failed_mount_group_desc;
1109  	}
1110  	for (i = 0; i < db_count; i++) {
1111  		block = descriptor_loc(sb, logic_sb_block, i);
1112  		sbi->s_group_desc[i] = sb_bread(sb, block);
1113  		if (!sbi->s_group_desc[i]) {
1114  			for (j = 0; j < i; j++)
1115  				brelse (sbi->s_group_desc[j]);
1116  			ext2_msg(sb, KERN_ERR,
1117  				"error: unable to read group descriptors");
1118  			goto failed_mount_group_desc;
1119  		}
1120  	}
1121  	if (!ext2_check_descriptors (sb)) {
1122  		ext2_msg(sb, KERN_ERR, "group descriptors corrupted");
1123  		goto failed_mount2;
1124  	}
1125  	sbi->s_gdb_count = db_count;
1126  	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1127  	spin_lock_init(&sbi->s_next_gen_lock);
1128  
1129  	/* per filesystem reservation list head & lock */
1130  	spin_lock_init(&sbi->s_rsv_window_lock);
1131  	sbi->s_rsv_window_root = RB_ROOT;
1132  	/*
1133  	 * Add a single, static dummy reservation to the start of the
1134  	 * reservation window list --- it gives us a placeholder for
1135  	 * append-at-start-of-list which makes the allocation logic
1136  	 * _much_ simpler.
1137  	 */
1138  	sbi->s_rsv_window_head.rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
1139  	sbi->s_rsv_window_head.rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
1140  	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1141  	sbi->s_rsv_window_head.rsv_goal_size = 0;
1142  	ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
1143  
1144  	err = percpu_counter_init(&sbi->s_freeblocks_counter,
1145  				ext2_count_free_blocks(sb), GFP_KERNEL);
1146  	if (!err) {
1147  		err = percpu_counter_init(&sbi->s_freeinodes_counter,
1148  				ext2_count_free_inodes(sb), GFP_KERNEL);
1149  	}
1150  	if (!err) {
1151  		err = percpu_counter_init(&sbi->s_dirs_counter,
1152  				ext2_count_dirs(sb), GFP_KERNEL);
1153  	}
1154  	if (err) {
1155  		ret = err;
1156  		ext2_msg(sb, KERN_ERR, "error: insufficient memory");
1157  		goto failed_mount3;
1158  	}
1159  
1160  #ifdef CONFIG_EXT2_FS_XATTR
1161  	sbi->s_ea_block_cache = ext2_xattr_create_cache();
1162  	if (!sbi->s_ea_block_cache) {
1163  		ret = -ENOMEM;
1164  		ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
1165  		goto failed_mount3;
1166  	}
1167  #endif
1168  	/*
1169  	 * set up enough so that it can read an inode
1170  	 */
1171  	sb->s_op = &ext2_sops;
1172  	sb->s_export_op = &ext2_export_ops;
1173  	sb->s_xattr = ext2_xattr_handlers;
1174  
1175  #ifdef CONFIG_QUOTA
1176  	sb->dq_op = &dquot_operations;
1177  	sb->s_qcop = &ext2_quotactl_ops;
1178  	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
1179  #endif
1180  
1181  	root = ext2_iget(sb, EXT2_ROOT_INO);
1182  	if (IS_ERR(root)) {
1183  		ret = PTR_ERR(root);
1184  		goto failed_mount3;
1185  	}
1186  	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1187  		iput(root);
1188  		ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
1189  		goto failed_mount3;
1190  	}
1191  
1192  	sb->s_root = d_make_root(root);
1193  	if (!sb->s_root) {
1194  		ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1195  		ret = -ENOMEM;
1196  		goto failed_mount3;
1197  	}
1198  	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1199  		ext2_msg(sb, KERN_WARNING,
1200  			"warning: mounting ext3 filesystem as ext2");
1201  	if (ext2_setup_super (sb, es, sb_rdonly(sb)))
1202  		sb->s_flags |= SB_RDONLY;
1203  	ext2_write_super(sb);
1204  	return 0;
1205  
1206  cantfind_ext2:
1207  	if (!silent)
1208  		ext2_msg(sb, KERN_ERR,
1209  			"error: can't find an ext2 filesystem on dev %s.",
1210  			sb->s_id);
1211  	goto failed_mount;
1212  failed_mount3:
1213  	ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
1214  	percpu_counter_destroy(&sbi->s_freeblocks_counter);
1215  	percpu_counter_destroy(&sbi->s_freeinodes_counter);
1216  	percpu_counter_destroy(&sbi->s_dirs_counter);
1217  failed_mount2:
1218  	for (i = 0; i < db_count; i++)
1219  		brelse(sbi->s_group_desc[i]);
1220  failed_mount_group_desc:
1221  	kvfree(sbi->s_group_desc);
1222  	kfree(sbi->s_debts);
1223  failed_mount:
1224  	brelse(bh);
1225  failed_sbi:
1226  	fs_put_dax(sbi->s_daxdev, NULL);
1227  	sb->s_fs_info = NULL;
1228  	kfree(sbi->s_blockgroup_lock);
1229  	kfree(sbi);
1230  	return ret;
1231  }
1232  
1233  static void ext2_clear_super_error(struct super_block *sb)
1234  {
1235  	struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1236  
1237  	if (buffer_write_io_error(sbh)) {
1238  		/*
1239  		 * Oh, dear.  A previous attempt to write the
1240  		 * superblock failed.  This could happen because the
1241  		 * USB device was yanked out.  Or it could happen to
1242  		 * be a transient write error and maybe the block will
1243  		 * be remapped.  Nothing we can do but to retry the
1244  		 * write and hope for the best.
1245  		 */
1246  		ext2_msg(sb, KERN_ERR,
1247  		       "previous I/O error to superblock detected");
1248  		clear_buffer_write_io_error(sbh);
1249  		set_buffer_uptodate(sbh);
1250  	}
1251  }
1252  
1253  void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
1254  		     int wait)
1255  {
1256  	ext2_clear_super_error(sb);
1257  	spin_lock(&EXT2_SB(sb)->s_lock);
1258  	es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1259  	es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1260  	es->s_wtime = cpu_to_le32(ktime_get_real_seconds());
1261  	/* unlock before we do IO */
1262  	spin_unlock(&EXT2_SB(sb)->s_lock);
1263  	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1264  	if (wait)
1265  		sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
1266  }
1267  
1268  /*
1269   * In the second extended file system, it is not necessary to
1270   * write the super block since we use a mapping of the
1271   * disk super block in a buffer.
1272   *
1273   * However, this function is still used to set the fs valid
1274   * flags to 0.  We need to set this flag to 0 since the fs
1275   * may have been checked while mounted and e2fsck may have
1276   * set s_state to EXT2_VALID_FS after some corrections.
1277   */
1278  static int ext2_sync_fs(struct super_block *sb, int wait)
1279  {
1280  	struct ext2_sb_info *sbi = EXT2_SB(sb);
1281  	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1282  
1283  	/*
1284  	 * Write quota structures to quota file, sync_blockdev() will write
1285  	 * them to disk later
1286  	 */
1287  	dquot_writeback_dquots(sb, -1);
1288  
1289  	spin_lock(&sbi->s_lock);
1290  	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1291  		ext2_debug("setting valid to 0\n");
1292  		es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
1293  	}
1294  	spin_unlock(&sbi->s_lock);
1295  	ext2_sync_super(sb, es, wait);
1296  	return 0;
1297  }
1298  
1299  static int ext2_freeze(struct super_block *sb)
1300  {
1301  	struct ext2_sb_info *sbi = EXT2_SB(sb);
1302  
1303  	/*
1304  	 * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
1305  	 * because we have unattached inodes and thus filesystem is not fully
1306  	 * consistent.
1307  	 */
1308  	if (atomic_long_read(&sb->s_remove_count)) {
1309  		ext2_sync_fs(sb, 1);
1310  		return 0;
1311  	}
1312  	/* Set EXT2_FS_VALID flag */
1313  	spin_lock(&sbi->s_lock);
1314  	sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
1315  	spin_unlock(&sbi->s_lock);
1316  	ext2_sync_super(sb, sbi->s_es, 1);
1317  
1318  	return 0;
1319  }
1320  
1321  static int ext2_unfreeze(struct super_block *sb)
1322  {
1323  	/* Just write sb to clear EXT2_VALID_FS flag */
1324  	ext2_write_super(sb);
1325  
1326  	return 0;
1327  }
1328  
1329  static void ext2_write_super(struct super_block *sb)
1330  {
1331  	if (!sb_rdonly(sb))
1332  		ext2_sync_fs(sb, 1);
1333  }
1334  
1335  static int ext2_remount (struct super_block * sb, int * flags, char * data)
1336  {
1337  	struct ext2_sb_info * sbi = EXT2_SB(sb);
1338  	struct ext2_super_block * es;
1339  	struct ext2_mount_options new_opts;
1340  	int err;
1341  
1342  	sync_filesystem(sb);
1343  
1344  	spin_lock(&sbi->s_lock);
1345  	new_opts.s_mount_opt = sbi->s_mount_opt;
1346  	new_opts.s_resuid = sbi->s_resuid;
1347  	new_opts.s_resgid = sbi->s_resgid;
1348  	spin_unlock(&sbi->s_lock);
1349  
1350  	if (!parse_options(data, sb, &new_opts))
1351  		return -EINVAL;
1352  
1353  	spin_lock(&sbi->s_lock);
1354  	es = sbi->s_es;
1355  	if ((sbi->s_mount_opt ^ new_opts.s_mount_opt) & EXT2_MOUNT_DAX) {
1356  		ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1357  			 "dax flag with busy inodes while remounting");
1358  		new_opts.s_mount_opt ^= EXT2_MOUNT_DAX;
1359  	}
1360  	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
1361  		goto out_set;
1362  	if (*flags & SB_RDONLY) {
1363  		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
1364  		    !(sbi->s_mount_state & EXT2_VALID_FS))
1365  			goto out_set;
1366  
1367  		/*
1368  		 * OK, we are remounting a valid rw partition rdonly, so set
1369  		 * the rdonly flag and then mark the partition as valid again.
1370  		 */
1371  		es->s_state = cpu_to_le16(sbi->s_mount_state);
1372  		es->s_mtime = cpu_to_le32(ktime_get_real_seconds());
1373  		spin_unlock(&sbi->s_lock);
1374  
1375  		err = dquot_suspend(sb, -1);
1376  		if (err < 0)
1377  			return err;
1378  
1379  		ext2_sync_super(sb, es, 1);
1380  	} else {
1381  		__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
1382  					       ~EXT2_FEATURE_RO_COMPAT_SUPP);
1383  		if (ret) {
1384  			spin_unlock(&sbi->s_lock);
1385  			ext2_msg(sb, KERN_WARNING,
1386  				"warning: couldn't remount RDWR because of "
1387  				"unsupported optional features (%x).",
1388  				le32_to_cpu(ret));
1389  			return -EROFS;
1390  		}
1391  		/*
1392  		 * Mounting a RDONLY partition read-write, so reread and
1393  		 * store the current valid flag.  (It may have been changed
1394  		 * by e2fsck since we originally mounted the partition.)
1395  		 */
1396  		sbi->s_mount_state = le16_to_cpu(es->s_state);
1397  		if (!ext2_setup_super (sb, es, 0))
1398  			sb->s_flags &= ~SB_RDONLY;
1399  		spin_unlock(&sbi->s_lock);
1400  
1401  		ext2_write_super(sb);
1402  
1403  		dquot_resume(sb, -1);
1404  	}
1405  
1406  	spin_lock(&sbi->s_lock);
1407  out_set:
1408  	sbi->s_mount_opt = new_opts.s_mount_opt;
1409  	sbi->s_resuid = new_opts.s_resuid;
1410  	sbi->s_resgid = new_opts.s_resgid;
1411  	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
1412  		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
1413  	spin_unlock(&sbi->s_lock);
1414  
1415  	return 0;
1416  }
1417  
1418  static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1419  {
1420  	struct super_block *sb = dentry->d_sb;
1421  	struct ext2_sb_info *sbi = EXT2_SB(sb);
1422  	struct ext2_super_block *es = sbi->s_es;
1423  
1424  	spin_lock(&sbi->s_lock);
1425  
1426  	if (test_opt (sb, MINIX_DF))
1427  		sbi->s_overhead_last = 0;
1428  	else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
1429  		unsigned long i, overhead = 0;
1430  		smp_rmb();
1431  
1432  		/*
1433  		 * Compute the overhead (FS structures). This is constant
1434  		 * for a given filesystem unless the number of block groups
1435  		 * changes so we cache the previous value until it does.
1436  		 */
1437  
1438  		/*
1439  		 * All of the blocks before first_data_block are
1440  		 * overhead
1441  		 */
1442  		overhead = le32_to_cpu(es->s_first_data_block);
1443  
1444  		/*
1445  		 * Add the overhead attributed to the superblock and
1446  		 * block group descriptors.  If the sparse superblocks
1447  		 * feature is turned on, then not all groups have this.
1448  		 */
1449  		for (i = 0; i < sbi->s_groups_count; i++)
1450  			overhead += ext2_bg_has_super(sb, i) +
1451  				ext2_bg_num_gdb(sb, i);
1452  
1453  		/*
1454  		 * Every block group has an inode bitmap, a block
1455  		 * bitmap, and an inode table.
1456  		 */
1457  		overhead += (sbi->s_groups_count *
1458  			     (2 + sbi->s_itb_per_group));
1459  		sbi->s_overhead_last = overhead;
1460  		smp_wmb();
1461  		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
1462  	}
1463  
1464  	buf->f_type = EXT2_SUPER_MAGIC;
1465  	buf->f_bsize = sb->s_blocksize;
1466  	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
1467  	buf->f_bfree = ext2_count_free_blocks(sb);
1468  	es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
1469  	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
1470  	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
1471  		buf->f_bavail = 0;
1472  	buf->f_files = le32_to_cpu(es->s_inodes_count);
1473  	buf->f_ffree = ext2_count_free_inodes(sb);
1474  	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
1475  	buf->f_namelen = EXT2_NAME_LEN;
1476  	buf->f_fsid = uuid_to_fsid(es->s_uuid);
1477  	spin_unlock(&sbi->s_lock);
1478  	return 0;
1479  }
1480  
1481  static struct dentry *ext2_mount(struct file_system_type *fs_type,
1482  	int flags, const char *dev_name, void *data)
1483  {
1484  	return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
1485  }
1486  
1487  #ifdef CONFIG_QUOTA
1488  
1489  /* Read data from quotafile - avoid pagecache and such because we cannot afford
1490   * acquiring the locks... As quota files are never truncated and quota code
1491   * itself serializes the operations (and no one else should touch the files)
1492   * we don't have to be afraid of races */
1493  static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
1494  			       size_t len, loff_t off)
1495  {
1496  	struct inode *inode = sb_dqopt(sb)->files[type];
1497  	sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
1498  	int err = 0;
1499  	int offset = off & (sb->s_blocksize - 1);
1500  	int tocopy;
1501  	size_t toread;
1502  	struct buffer_head tmp_bh;
1503  	struct buffer_head *bh;
1504  	loff_t i_size = i_size_read(inode);
1505  
1506  	if (off > i_size)
1507  		return 0;
1508  	if (off+len > i_size)
1509  		len = i_size-off;
1510  	toread = len;
1511  	while (toread > 0) {
1512  		tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
1513  
1514  		tmp_bh.b_state = 0;
1515  		tmp_bh.b_size = sb->s_blocksize;
1516  		err = ext2_get_block(inode, blk, &tmp_bh, 0);
1517  		if (err < 0)
1518  			return err;
1519  		if (!buffer_mapped(&tmp_bh))	/* A hole? */
1520  			memset(data, 0, tocopy);
1521  		else {
1522  			bh = sb_bread(sb, tmp_bh.b_blocknr);
1523  			if (!bh)
1524  				return -EIO;
1525  			memcpy(data, bh->b_data+offset, tocopy);
1526  			brelse(bh);
1527  		}
1528  		offset = 0;
1529  		toread -= tocopy;
1530  		data += tocopy;
1531  		blk++;
1532  	}
1533  	return len;
1534  }
1535  
1536  /* Write to quotafile */
1537  static ssize_t ext2_quota_write(struct super_block *sb, int type,
1538  				const char *data, size_t len, loff_t off)
1539  {
1540  	struct inode *inode = sb_dqopt(sb)->files[type];
1541  	sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
1542  	int err = 0;
1543  	int offset = off & (sb->s_blocksize - 1);
1544  	int tocopy;
1545  	size_t towrite = len;
1546  	struct buffer_head tmp_bh;
1547  	struct buffer_head *bh;
1548  
1549  	while (towrite > 0) {
1550  		tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
1551  
1552  		tmp_bh.b_state = 0;
1553  		tmp_bh.b_size = sb->s_blocksize;
1554  		err = ext2_get_block(inode, blk, &tmp_bh, 1);
1555  		if (err < 0)
1556  			goto out;
1557  		if (offset || tocopy != EXT2_BLOCK_SIZE(sb))
1558  			bh = sb_bread(sb, tmp_bh.b_blocknr);
1559  		else
1560  			bh = sb_getblk(sb, tmp_bh.b_blocknr);
1561  		if (unlikely(!bh)) {
1562  			err = -EIO;
1563  			goto out;
1564  		}
1565  		lock_buffer(bh);
1566  		memcpy(bh->b_data+offset, data, tocopy);
1567  		flush_dcache_page(bh->b_page);
1568  		set_buffer_uptodate(bh);
1569  		mark_buffer_dirty(bh);
1570  		unlock_buffer(bh);
1571  		brelse(bh);
1572  		offset = 0;
1573  		towrite -= tocopy;
1574  		data += tocopy;
1575  		blk++;
1576  	}
1577  out:
1578  	if (len == towrite)
1579  		return err;
1580  	if (inode->i_size < off+len-towrite)
1581  		i_size_write(inode, off+len-towrite);
1582  	inode_inc_iversion(inode);
1583  	inode->i_mtime = inode->i_ctime = current_time(inode);
1584  	mark_inode_dirty(inode);
1585  	return len - towrite;
1586  }
1587  
1588  static int ext2_quota_on(struct super_block *sb, int type, int format_id,
1589  			 const struct path *path)
1590  {
1591  	int err;
1592  	struct inode *inode;
1593  
1594  	err = dquot_quota_on(sb, type, format_id, path);
1595  	if (err)
1596  		return err;
1597  
1598  	inode = d_inode(path->dentry);
1599  	inode_lock(inode);
1600  	EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL;
1601  	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
1602  			S_NOATIME | S_IMMUTABLE);
1603  	inode_unlock(inode);
1604  	mark_inode_dirty(inode);
1605  
1606  	return 0;
1607  }
1608  
1609  static int ext2_quota_off(struct super_block *sb, int type)
1610  {
1611  	struct inode *inode = sb_dqopt(sb)->files[type];
1612  	int err;
1613  
1614  	if (!inode || !igrab(inode))
1615  		goto out;
1616  
1617  	err = dquot_quota_off(sb, type);
1618  	if (err)
1619  		goto out_put;
1620  
1621  	inode_lock(inode);
1622  	EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL);
1623  	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
1624  	inode_unlock(inode);
1625  	mark_inode_dirty(inode);
1626  out_put:
1627  	iput(inode);
1628  	return err;
1629  out:
1630  	return dquot_quota_off(sb, type);
1631  }
1632  
1633  #endif
1634  
1635  static struct file_system_type ext2_fs_type = {
1636  	.owner		= THIS_MODULE,
1637  	.name		= "ext2",
1638  	.mount		= ext2_mount,
1639  	.kill_sb	= kill_block_super,
1640  	.fs_flags	= FS_REQUIRES_DEV,
1641  };
1642  MODULE_ALIAS_FS("ext2");
1643  
1644  static int __init init_ext2_fs(void)
1645  {
1646  	int err;
1647  
1648  	err = init_inodecache();
1649  	if (err)
1650  		return err;
1651  	err = register_filesystem(&ext2_fs_type);
1652  	if (err)
1653  		goto out;
1654  	return 0;
1655  out:
1656  	destroy_inodecache();
1657  	return err;
1658  }
1659  
1660  static void __exit exit_ext2_fs(void)
1661  {
1662  	unregister_filesystem(&ext2_fs_type);
1663  	destroy_inodecache();
1664  }
1665  
1666  MODULE_AUTHOR("Remy Card and others");
1667  MODULE_DESCRIPTION("Second Extended Filesystem");
1668  MODULE_LICENSE("GPL");
1669  module_init(init_ext2_fs)
1670  module_exit(exit_ext2_fs)
1671