xref: /openbmc/linux/fs/f2fs/super.c (revision 6b5fc336)
1 /*
2  * fs/f2fs/super.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/quotaops.h>
26 #include <linux/f2fs_fs.h>
27 #include <linux/sysfs.h>
28 
29 #include "f2fs.h"
30 #include "node.h"
31 #include "segment.h"
32 #include "xattr.h"
33 #include "gc.h"
34 #include "trace.h"
35 
36 #define CREATE_TRACE_POINTS
37 #include <trace/events/f2fs.h>
38 
39 static struct kmem_cache *f2fs_inode_cachep;
40 
41 #ifdef CONFIG_F2FS_FAULT_INJECTION
42 
43 char *fault_name[FAULT_MAX] = {
44 	[FAULT_KMALLOC]		= "kmalloc",
45 	[FAULT_PAGE_ALLOC]	= "page alloc",
46 	[FAULT_ALLOC_NID]	= "alloc nid",
47 	[FAULT_ORPHAN]		= "orphan",
48 	[FAULT_BLOCK]		= "no more block",
49 	[FAULT_DIR_DEPTH]	= "too big dir depth",
50 	[FAULT_EVICT_INODE]	= "evict_inode fail",
51 	[FAULT_TRUNCATE]	= "truncate fail",
52 	[FAULT_IO]		= "IO error",
53 	[FAULT_CHECKPOINT]	= "checkpoint error",
54 };
55 
56 static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
57 						unsigned int rate)
58 {
59 	struct f2fs_fault_info *ffi = &sbi->fault_info;
60 
61 	if (rate) {
62 		atomic_set(&ffi->inject_ops, 0);
63 		ffi->inject_rate = rate;
64 		ffi->inject_type = (1 << FAULT_MAX) - 1;
65 	} else {
66 		memset(ffi, 0, sizeof(struct f2fs_fault_info));
67 	}
68 }
69 #endif
70 
71 /* f2fs-wide shrinker description */
72 static struct shrinker f2fs_shrinker_info = {
73 	.scan_objects = f2fs_shrink_scan,
74 	.count_objects = f2fs_shrink_count,
75 	.seeks = DEFAULT_SEEKS,
76 };
77 
78 enum {
79 	Opt_gc_background,
80 	Opt_disable_roll_forward,
81 	Opt_norecovery,
82 	Opt_discard,
83 	Opt_nodiscard,
84 	Opt_noheap,
85 	Opt_heap,
86 	Opt_user_xattr,
87 	Opt_nouser_xattr,
88 	Opt_acl,
89 	Opt_noacl,
90 	Opt_active_logs,
91 	Opt_disable_ext_identify,
92 	Opt_inline_xattr,
93 	Opt_noinline_xattr,
94 	Opt_inline_data,
95 	Opt_inline_dentry,
96 	Opt_noinline_dentry,
97 	Opt_flush_merge,
98 	Opt_noflush_merge,
99 	Opt_nobarrier,
100 	Opt_fastboot,
101 	Opt_extent_cache,
102 	Opt_noextent_cache,
103 	Opt_noinline_data,
104 	Opt_data_flush,
105 	Opt_mode,
106 	Opt_io_size_bits,
107 	Opt_fault_injection,
108 	Opt_lazytime,
109 	Opt_nolazytime,
110 	Opt_usrquota,
111 	Opt_grpquota,
112 	Opt_err,
113 };
114 
115 static match_table_t f2fs_tokens = {
116 	{Opt_gc_background, "background_gc=%s"},
117 	{Opt_disable_roll_forward, "disable_roll_forward"},
118 	{Opt_norecovery, "norecovery"},
119 	{Opt_discard, "discard"},
120 	{Opt_nodiscard, "nodiscard"},
121 	{Opt_noheap, "no_heap"},
122 	{Opt_heap, "heap"},
123 	{Opt_user_xattr, "user_xattr"},
124 	{Opt_nouser_xattr, "nouser_xattr"},
125 	{Opt_acl, "acl"},
126 	{Opt_noacl, "noacl"},
127 	{Opt_active_logs, "active_logs=%u"},
128 	{Opt_disable_ext_identify, "disable_ext_identify"},
129 	{Opt_inline_xattr, "inline_xattr"},
130 	{Opt_noinline_xattr, "noinline_xattr"},
131 	{Opt_inline_data, "inline_data"},
132 	{Opt_inline_dentry, "inline_dentry"},
133 	{Opt_noinline_dentry, "noinline_dentry"},
134 	{Opt_flush_merge, "flush_merge"},
135 	{Opt_noflush_merge, "noflush_merge"},
136 	{Opt_nobarrier, "nobarrier"},
137 	{Opt_fastboot, "fastboot"},
138 	{Opt_extent_cache, "extent_cache"},
139 	{Opt_noextent_cache, "noextent_cache"},
140 	{Opt_noinline_data, "noinline_data"},
141 	{Opt_data_flush, "data_flush"},
142 	{Opt_mode, "mode=%s"},
143 	{Opt_io_size_bits, "io_bits=%u"},
144 	{Opt_fault_injection, "fault_injection=%u"},
145 	{Opt_lazytime, "lazytime"},
146 	{Opt_nolazytime, "nolazytime"},
147 	{Opt_usrquota, "usrquota"},
148 	{Opt_grpquota, "grpquota"},
149 	{Opt_err, NULL},
150 };
151 
152 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
153 {
154 	struct va_format vaf;
155 	va_list args;
156 
157 	va_start(args, fmt);
158 	vaf.fmt = fmt;
159 	vaf.va = &args;
160 	printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
161 	va_end(args);
162 }
163 
164 static void init_once(void *foo)
165 {
166 	struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
167 
168 	inode_init_once(&fi->vfs_inode);
169 }
170 
171 static int parse_options(struct super_block *sb, char *options)
172 {
173 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
174 	struct request_queue *q;
175 	substring_t args[MAX_OPT_ARGS];
176 	char *p, *name;
177 	int arg = 0;
178 
179 	if (!options)
180 		return 0;
181 
182 	while ((p = strsep(&options, ",")) != NULL) {
183 		int token;
184 		if (!*p)
185 			continue;
186 		/*
187 		 * Initialize args struct so we know whether arg was
188 		 * found; some options take optional arguments.
189 		 */
190 		args[0].to = args[0].from = NULL;
191 		token = match_token(p, f2fs_tokens, args);
192 
193 		switch (token) {
194 		case Opt_gc_background:
195 			name = match_strdup(&args[0]);
196 
197 			if (!name)
198 				return -ENOMEM;
199 			if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
200 				set_opt(sbi, BG_GC);
201 				clear_opt(sbi, FORCE_FG_GC);
202 			} else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
203 				clear_opt(sbi, BG_GC);
204 				clear_opt(sbi, FORCE_FG_GC);
205 			} else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
206 				set_opt(sbi, BG_GC);
207 				set_opt(sbi, FORCE_FG_GC);
208 			} else {
209 				kfree(name);
210 				return -EINVAL;
211 			}
212 			kfree(name);
213 			break;
214 		case Opt_disable_roll_forward:
215 			set_opt(sbi, DISABLE_ROLL_FORWARD);
216 			break;
217 		case Opt_norecovery:
218 			/* this option mounts f2fs with ro */
219 			set_opt(sbi, DISABLE_ROLL_FORWARD);
220 			if (!f2fs_readonly(sb))
221 				return -EINVAL;
222 			break;
223 		case Opt_discard:
224 			q = bdev_get_queue(sb->s_bdev);
225 			if (blk_queue_discard(q)) {
226 				set_opt(sbi, DISCARD);
227 			} else if (!f2fs_sb_mounted_blkzoned(sb)) {
228 				f2fs_msg(sb, KERN_WARNING,
229 					"mounting with \"discard\" option, but "
230 					"the device does not support discard");
231 			}
232 			break;
233 		case Opt_nodiscard:
234 			if (f2fs_sb_mounted_blkzoned(sb)) {
235 				f2fs_msg(sb, KERN_WARNING,
236 					"discard is required for zoned block devices");
237 				return -EINVAL;
238 			}
239 			clear_opt(sbi, DISCARD);
240 			break;
241 		case Opt_noheap:
242 			set_opt(sbi, NOHEAP);
243 			break;
244 		case Opt_heap:
245 			clear_opt(sbi, NOHEAP);
246 			break;
247 #ifdef CONFIG_F2FS_FS_XATTR
248 		case Opt_user_xattr:
249 			set_opt(sbi, XATTR_USER);
250 			break;
251 		case Opt_nouser_xattr:
252 			clear_opt(sbi, XATTR_USER);
253 			break;
254 		case Opt_inline_xattr:
255 			set_opt(sbi, INLINE_XATTR);
256 			break;
257 		case Opt_noinline_xattr:
258 			clear_opt(sbi, INLINE_XATTR);
259 			break;
260 #else
261 		case Opt_user_xattr:
262 			f2fs_msg(sb, KERN_INFO,
263 				"user_xattr options not supported");
264 			break;
265 		case Opt_nouser_xattr:
266 			f2fs_msg(sb, KERN_INFO,
267 				"nouser_xattr options not supported");
268 			break;
269 		case Opt_inline_xattr:
270 			f2fs_msg(sb, KERN_INFO,
271 				"inline_xattr options not supported");
272 			break;
273 		case Opt_noinline_xattr:
274 			f2fs_msg(sb, KERN_INFO,
275 				"noinline_xattr options not supported");
276 			break;
277 #endif
278 #ifdef CONFIG_F2FS_FS_POSIX_ACL
279 		case Opt_acl:
280 			set_opt(sbi, POSIX_ACL);
281 			break;
282 		case Opt_noacl:
283 			clear_opt(sbi, POSIX_ACL);
284 			break;
285 #else
286 		case Opt_acl:
287 			f2fs_msg(sb, KERN_INFO, "acl options not supported");
288 			break;
289 		case Opt_noacl:
290 			f2fs_msg(sb, KERN_INFO, "noacl options not supported");
291 			break;
292 #endif
293 		case Opt_active_logs:
294 			if (args->from && match_int(args, &arg))
295 				return -EINVAL;
296 			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
297 				return -EINVAL;
298 			sbi->active_logs = arg;
299 			break;
300 		case Opt_disable_ext_identify:
301 			set_opt(sbi, DISABLE_EXT_IDENTIFY);
302 			break;
303 		case Opt_inline_data:
304 			set_opt(sbi, INLINE_DATA);
305 			break;
306 		case Opt_inline_dentry:
307 			set_opt(sbi, INLINE_DENTRY);
308 			break;
309 		case Opt_noinline_dentry:
310 			clear_opt(sbi, INLINE_DENTRY);
311 			break;
312 		case Opt_flush_merge:
313 			set_opt(sbi, FLUSH_MERGE);
314 			break;
315 		case Opt_noflush_merge:
316 			clear_opt(sbi, FLUSH_MERGE);
317 			break;
318 		case Opt_nobarrier:
319 			set_opt(sbi, NOBARRIER);
320 			break;
321 		case Opt_fastboot:
322 			set_opt(sbi, FASTBOOT);
323 			break;
324 		case Opt_extent_cache:
325 			set_opt(sbi, EXTENT_CACHE);
326 			break;
327 		case Opt_noextent_cache:
328 			clear_opt(sbi, EXTENT_CACHE);
329 			break;
330 		case Opt_noinline_data:
331 			clear_opt(sbi, INLINE_DATA);
332 			break;
333 		case Opt_data_flush:
334 			set_opt(sbi, DATA_FLUSH);
335 			break;
336 		case Opt_mode:
337 			name = match_strdup(&args[0]);
338 
339 			if (!name)
340 				return -ENOMEM;
341 			if (strlen(name) == 8 &&
342 					!strncmp(name, "adaptive", 8)) {
343 				if (f2fs_sb_mounted_blkzoned(sb)) {
344 					f2fs_msg(sb, KERN_WARNING,
345 						 "adaptive mode is not allowed with "
346 						 "zoned block device feature");
347 					kfree(name);
348 					return -EINVAL;
349 				}
350 				set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
351 			} else if (strlen(name) == 3 &&
352 					!strncmp(name, "lfs", 3)) {
353 				set_opt_mode(sbi, F2FS_MOUNT_LFS);
354 			} else {
355 				kfree(name);
356 				return -EINVAL;
357 			}
358 			kfree(name);
359 			break;
360 		case Opt_io_size_bits:
361 			if (args->from && match_int(args, &arg))
362 				return -EINVAL;
363 			if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
364 				f2fs_msg(sb, KERN_WARNING,
365 					"Not support %d, larger than %d",
366 					1 << arg, BIO_MAX_PAGES);
367 				return -EINVAL;
368 			}
369 			sbi->write_io_size_bits = arg;
370 			break;
371 		case Opt_fault_injection:
372 			if (args->from && match_int(args, &arg))
373 				return -EINVAL;
374 #ifdef CONFIG_F2FS_FAULT_INJECTION
375 			f2fs_build_fault_attr(sbi, arg);
376 			set_opt(sbi, FAULT_INJECTION);
377 #else
378 			f2fs_msg(sb, KERN_INFO,
379 				"FAULT_INJECTION was not selected");
380 #endif
381 			break;
382 		case Opt_lazytime:
383 			sb->s_flags |= MS_LAZYTIME;
384 			break;
385 		case Opt_nolazytime:
386 			sb->s_flags &= ~MS_LAZYTIME;
387 			break;
388 #ifdef CONFIG_QUOTA
389 		case Opt_usrquota:
390 			set_opt(sbi, USRQUOTA);
391 			break;
392 		case Opt_grpquota:
393 			set_opt(sbi, GRPQUOTA);
394 			break;
395 #else
396 		case Opt_usrquota:
397 		case Opt_grpquota:
398 			f2fs_msg(sb, KERN_INFO,
399 					"quota operations not supported");
400 			break;
401 #endif
402 		default:
403 			f2fs_msg(sb, KERN_ERR,
404 				"Unrecognized mount option \"%s\" or missing value",
405 				p);
406 			return -EINVAL;
407 		}
408 	}
409 
410 	if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
411 		f2fs_msg(sb, KERN_ERR,
412 				"Should set mode=lfs with %uKB-sized IO",
413 				F2FS_IO_SIZE_KB(sbi));
414 		return -EINVAL;
415 	}
416 	return 0;
417 }
418 
419 static struct inode *f2fs_alloc_inode(struct super_block *sb)
420 {
421 	struct f2fs_inode_info *fi;
422 
423 	fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
424 	if (!fi)
425 		return NULL;
426 
427 	init_once((void *) fi);
428 
429 	/* Initialize f2fs-specific inode info */
430 	fi->vfs_inode.i_version = 1;
431 	atomic_set(&fi->dirty_pages, 0);
432 	fi->i_current_depth = 1;
433 	fi->i_advise = 0;
434 	init_rwsem(&fi->i_sem);
435 	INIT_LIST_HEAD(&fi->dirty_list);
436 	INIT_LIST_HEAD(&fi->gdirty_list);
437 	INIT_LIST_HEAD(&fi->inmem_pages);
438 	mutex_init(&fi->inmem_lock);
439 	init_rwsem(&fi->dio_rwsem[READ]);
440 	init_rwsem(&fi->dio_rwsem[WRITE]);
441 	init_rwsem(&fi->i_mmap_sem);
442 
443 #ifdef CONFIG_QUOTA
444 	memset(&fi->i_dquot, 0, sizeof(fi->i_dquot));
445 	fi->i_reserved_quota = 0;
446 #endif
447 	/* Will be used by directory only */
448 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
449 	return &fi->vfs_inode;
450 }
451 
452 static int f2fs_drop_inode(struct inode *inode)
453 {
454 	int ret;
455 	/*
456 	 * This is to avoid a deadlock condition like below.
457 	 * writeback_single_inode(inode)
458 	 *  - f2fs_write_data_page
459 	 *    - f2fs_gc -> iput -> evict
460 	 *       - inode_wait_for_writeback(inode)
461 	 */
462 	if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
463 		if (!inode->i_nlink && !is_bad_inode(inode)) {
464 			/* to avoid evict_inode call simultaneously */
465 			atomic_inc(&inode->i_count);
466 			spin_unlock(&inode->i_lock);
467 
468 			/* some remained atomic pages should discarded */
469 			if (f2fs_is_atomic_file(inode))
470 				drop_inmem_pages(inode);
471 
472 			/* should remain fi->extent_tree for writepage */
473 			f2fs_destroy_extent_node(inode);
474 
475 			sb_start_intwrite(inode->i_sb);
476 			f2fs_i_size_write(inode, 0);
477 
478 			if (F2FS_HAS_BLOCKS(inode))
479 				f2fs_truncate(inode);
480 
481 			sb_end_intwrite(inode->i_sb);
482 
483 			fscrypt_put_encryption_info(inode, NULL);
484 			spin_lock(&inode->i_lock);
485 			atomic_dec(&inode->i_count);
486 		}
487 		trace_f2fs_drop_inode(inode, 0);
488 		return 0;
489 	}
490 	ret = generic_drop_inode(inode);
491 	trace_f2fs_drop_inode(inode, ret);
492 	return ret;
493 }
494 
495 int f2fs_inode_dirtied(struct inode *inode, bool sync)
496 {
497 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
498 	int ret = 0;
499 
500 	spin_lock(&sbi->inode_lock[DIRTY_META]);
501 	if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
502 		ret = 1;
503 	} else {
504 		set_inode_flag(inode, FI_DIRTY_INODE);
505 		stat_inc_dirty_inode(sbi, DIRTY_META);
506 	}
507 	if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
508 		list_add_tail(&F2FS_I(inode)->gdirty_list,
509 				&sbi->inode_list[DIRTY_META]);
510 		inc_page_count(sbi, F2FS_DIRTY_IMETA);
511 	}
512 	spin_unlock(&sbi->inode_lock[DIRTY_META]);
513 	return ret;
514 }
515 
516 void f2fs_inode_synced(struct inode *inode)
517 {
518 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
519 
520 	spin_lock(&sbi->inode_lock[DIRTY_META]);
521 	if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
522 		spin_unlock(&sbi->inode_lock[DIRTY_META]);
523 		return;
524 	}
525 	if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
526 		list_del_init(&F2FS_I(inode)->gdirty_list);
527 		dec_page_count(sbi, F2FS_DIRTY_IMETA);
528 	}
529 	clear_inode_flag(inode, FI_DIRTY_INODE);
530 	clear_inode_flag(inode, FI_AUTO_RECOVER);
531 	stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
532 	spin_unlock(&sbi->inode_lock[DIRTY_META]);
533 }
534 
535 /*
536  * f2fs_dirty_inode() is called from __mark_inode_dirty()
537  *
538  * We should call set_dirty_inode to write the dirty inode through write_inode.
539  */
540 static void f2fs_dirty_inode(struct inode *inode, int flags)
541 {
542 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
543 
544 	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
545 			inode->i_ino == F2FS_META_INO(sbi))
546 		return;
547 
548 	if (flags == I_DIRTY_TIME)
549 		return;
550 
551 	if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
552 		clear_inode_flag(inode, FI_AUTO_RECOVER);
553 
554 	f2fs_inode_dirtied(inode, false);
555 }
556 
557 static void f2fs_i_callback(struct rcu_head *head)
558 {
559 	struct inode *inode = container_of(head, struct inode, i_rcu);
560 	kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
561 }
562 
563 static void f2fs_destroy_inode(struct inode *inode)
564 {
565 	call_rcu(&inode->i_rcu, f2fs_i_callback);
566 }
567 
568 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
569 {
570 	percpu_counter_destroy(&sbi->alloc_valid_block_count);
571 	percpu_counter_destroy(&sbi->total_valid_inode_count);
572 }
573 
574 static void destroy_device_list(struct f2fs_sb_info *sbi)
575 {
576 	int i;
577 
578 	for (i = 0; i < sbi->s_ndevs; i++) {
579 		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
580 #ifdef CONFIG_BLK_DEV_ZONED
581 		kfree(FDEV(i).blkz_type);
582 #endif
583 	}
584 	kfree(sbi->devs);
585 }
586 
587 static void f2fs_quota_off_umount(struct super_block *sb);
588 static void f2fs_put_super(struct super_block *sb)
589 {
590 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
591 	int i;
592 
593 	f2fs_quota_off_umount(sb);
594 
595 	/* prevent remaining shrinker jobs */
596 	mutex_lock(&sbi->umount_mutex);
597 
598 	/*
599 	 * We don't need to do checkpoint when superblock is clean.
600 	 * But, the previous checkpoint was not done by umount, it needs to do
601 	 * clean checkpoint again.
602 	 */
603 	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
604 			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
605 		struct cp_control cpc = {
606 			.reason = CP_UMOUNT,
607 		};
608 		write_checkpoint(sbi, &cpc);
609 	}
610 
611 	/* be sure to wait for any on-going discard commands */
612 	f2fs_wait_discard_bios(sbi);
613 
614 	if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
615 		struct cp_control cpc = {
616 			.reason = CP_UMOUNT | CP_TRIMMED,
617 		};
618 		write_checkpoint(sbi, &cpc);
619 	}
620 
621 	/* write_checkpoint can update stat informaion */
622 	f2fs_destroy_stats(sbi);
623 
624 	/*
625 	 * normally superblock is clean, so we need to release this.
626 	 * In addition, EIO will skip do checkpoint, we need this as well.
627 	 */
628 	release_ino_entry(sbi, true);
629 
630 	f2fs_leave_shrinker(sbi);
631 	mutex_unlock(&sbi->umount_mutex);
632 
633 	/* our cp_error case, we can wait for any writeback page */
634 	f2fs_flush_merged_writes(sbi);
635 
636 	iput(sbi->node_inode);
637 	iput(sbi->meta_inode);
638 
639 	/* destroy f2fs internal modules */
640 	destroy_node_manager(sbi);
641 	destroy_segment_manager(sbi);
642 
643 	kfree(sbi->ckpt);
644 
645 	f2fs_exit_sysfs(sbi);
646 
647 	sb->s_fs_info = NULL;
648 	if (sbi->s_chksum_driver)
649 		crypto_free_shash(sbi->s_chksum_driver);
650 	kfree(sbi->raw_super);
651 
652 	destroy_device_list(sbi);
653 	mempool_destroy(sbi->write_io_dummy);
654 	destroy_percpu_info(sbi);
655 	for (i = 0; i < NR_PAGE_TYPE; i++)
656 		kfree(sbi->write_io[i]);
657 	kfree(sbi);
658 }
659 
660 int f2fs_sync_fs(struct super_block *sb, int sync)
661 {
662 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
663 	int err = 0;
664 
665 	trace_f2fs_sync_fs(sb, sync);
666 
667 	if (sync) {
668 		struct cp_control cpc;
669 
670 		cpc.reason = __get_cp_reason(sbi);
671 
672 		mutex_lock(&sbi->gc_mutex);
673 		err = write_checkpoint(sbi, &cpc);
674 		mutex_unlock(&sbi->gc_mutex);
675 	}
676 	f2fs_trace_ios(NULL, 1);
677 
678 	return err;
679 }
680 
681 static int f2fs_freeze(struct super_block *sb)
682 {
683 	if (f2fs_readonly(sb))
684 		return 0;
685 
686 	/* IO error happened before */
687 	if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
688 		return -EIO;
689 
690 	/* must be clean, since sync_filesystem() was already called */
691 	if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
692 		return -EINVAL;
693 	return 0;
694 }
695 
696 static int f2fs_unfreeze(struct super_block *sb)
697 {
698 	return 0;
699 }
700 
701 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
702 {
703 	struct super_block *sb = dentry->d_sb;
704 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
705 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
706 	block_t total_count, user_block_count, start_count, ovp_count;
707 	u64 avail_node_count;
708 
709 	total_count = le64_to_cpu(sbi->raw_super->block_count);
710 	user_block_count = sbi->user_block_count;
711 	start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
712 	ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
713 	buf->f_type = F2FS_SUPER_MAGIC;
714 	buf->f_bsize = sbi->blocksize;
715 
716 	buf->f_blocks = total_count - start_count;
717 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
718 	buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
719 						sbi->reserved_blocks;
720 
721 	avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
722 
723 	if (avail_node_count > user_block_count) {
724 		buf->f_files = user_block_count;
725 		buf->f_ffree = buf->f_bavail;
726 	} else {
727 		buf->f_files = avail_node_count;
728 		buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
729 					buf->f_bavail);
730 	}
731 
732 	buf->f_namelen = F2FS_NAME_LEN;
733 	buf->f_fsid.val[0] = (u32)id;
734 	buf->f_fsid.val[1] = (u32)(id >> 32);
735 
736 	return 0;
737 }
738 
739 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
740 {
741 	struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
742 
743 	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
744 		if (test_opt(sbi, FORCE_FG_GC))
745 			seq_printf(seq, ",background_gc=%s", "sync");
746 		else
747 			seq_printf(seq, ",background_gc=%s", "on");
748 	} else {
749 		seq_printf(seq, ",background_gc=%s", "off");
750 	}
751 	if (test_opt(sbi, DISABLE_ROLL_FORWARD))
752 		seq_puts(seq, ",disable_roll_forward");
753 	if (test_opt(sbi, DISCARD))
754 		seq_puts(seq, ",discard");
755 	if (test_opt(sbi, NOHEAP))
756 		seq_puts(seq, ",no_heap");
757 	else
758 		seq_puts(seq, ",heap");
759 #ifdef CONFIG_F2FS_FS_XATTR
760 	if (test_opt(sbi, XATTR_USER))
761 		seq_puts(seq, ",user_xattr");
762 	else
763 		seq_puts(seq, ",nouser_xattr");
764 	if (test_opt(sbi, INLINE_XATTR))
765 		seq_puts(seq, ",inline_xattr");
766 	else
767 		seq_puts(seq, ",noinline_xattr");
768 #endif
769 #ifdef CONFIG_F2FS_FS_POSIX_ACL
770 	if (test_opt(sbi, POSIX_ACL))
771 		seq_puts(seq, ",acl");
772 	else
773 		seq_puts(seq, ",noacl");
774 #endif
775 	if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
776 		seq_puts(seq, ",disable_ext_identify");
777 	if (test_opt(sbi, INLINE_DATA))
778 		seq_puts(seq, ",inline_data");
779 	else
780 		seq_puts(seq, ",noinline_data");
781 	if (test_opt(sbi, INLINE_DENTRY))
782 		seq_puts(seq, ",inline_dentry");
783 	else
784 		seq_puts(seq, ",noinline_dentry");
785 	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
786 		seq_puts(seq, ",flush_merge");
787 	if (test_opt(sbi, NOBARRIER))
788 		seq_puts(seq, ",nobarrier");
789 	if (test_opt(sbi, FASTBOOT))
790 		seq_puts(seq, ",fastboot");
791 	if (test_opt(sbi, EXTENT_CACHE))
792 		seq_puts(seq, ",extent_cache");
793 	else
794 		seq_puts(seq, ",noextent_cache");
795 	if (test_opt(sbi, DATA_FLUSH))
796 		seq_puts(seq, ",data_flush");
797 
798 	seq_puts(seq, ",mode=");
799 	if (test_opt(sbi, ADAPTIVE))
800 		seq_puts(seq, "adaptive");
801 	else if (test_opt(sbi, LFS))
802 		seq_puts(seq, "lfs");
803 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
804 	if (F2FS_IO_SIZE_BITS(sbi))
805 		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
806 #ifdef CONFIG_F2FS_FAULT_INJECTION
807 	if (test_opt(sbi, FAULT_INJECTION))
808 		seq_printf(seq, ",fault_injection=%u",
809 				sbi->fault_info.inject_rate);
810 #endif
811 #ifdef CONFIG_QUOTA
812 	if (test_opt(sbi, USRQUOTA))
813 		seq_puts(seq, ",usrquota");
814 	if (test_opt(sbi, GRPQUOTA))
815 		seq_puts(seq, ",grpquota");
816 #endif
817 
818 	return 0;
819 }
820 
821 static void default_options(struct f2fs_sb_info *sbi)
822 {
823 	/* init some FS parameters */
824 	sbi->active_logs = NR_CURSEG_TYPE;
825 
826 	set_opt(sbi, BG_GC);
827 	set_opt(sbi, INLINE_XATTR);
828 	set_opt(sbi, INLINE_DATA);
829 	set_opt(sbi, INLINE_DENTRY);
830 	set_opt(sbi, EXTENT_CACHE);
831 	set_opt(sbi, NOHEAP);
832 	sbi->sb->s_flags |= MS_LAZYTIME;
833 	set_opt(sbi, FLUSH_MERGE);
834 	if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
835 		set_opt_mode(sbi, F2FS_MOUNT_LFS);
836 		set_opt(sbi, DISCARD);
837 	} else {
838 		set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
839 	}
840 
841 #ifdef CONFIG_F2FS_FS_XATTR
842 	set_opt(sbi, XATTR_USER);
843 #endif
844 #ifdef CONFIG_F2FS_FS_POSIX_ACL
845 	set_opt(sbi, POSIX_ACL);
846 #endif
847 
848 #ifdef CONFIG_F2FS_FAULT_INJECTION
849 	f2fs_build_fault_attr(sbi, 0);
850 #endif
851 }
852 
853 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
854 {
855 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
856 	struct f2fs_mount_info org_mount_opt;
857 	unsigned long old_sb_flags;
858 	int err, active_logs;
859 	bool need_restart_gc = false;
860 	bool need_stop_gc = false;
861 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
862 #ifdef CONFIG_F2FS_FAULT_INJECTION
863 	struct f2fs_fault_info ffi = sbi->fault_info;
864 #endif
865 
866 	/*
867 	 * Save the old mount options in case we
868 	 * need to restore them.
869 	 */
870 	org_mount_opt = sbi->mount_opt;
871 	old_sb_flags = sb->s_flags;
872 	active_logs = sbi->active_logs;
873 
874 	/* recover superblocks we couldn't write due to previous RO mount */
875 	if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
876 		err = f2fs_commit_super(sbi, false);
877 		f2fs_msg(sb, KERN_INFO,
878 			"Try to recover all the superblocks, ret: %d", err);
879 		if (!err)
880 			clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
881 	}
882 
883 	default_options(sbi);
884 
885 	/* parse mount options */
886 	err = parse_options(sb, data);
887 	if (err)
888 		goto restore_opts;
889 
890 	/*
891 	 * Previous and new state of filesystem is RO,
892 	 * so skip checking GC and FLUSH_MERGE conditions.
893 	 */
894 	if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
895 		goto skip;
896 
897 	if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
898 		err = dquot_suspend(sb, -1);
899 		if (err < 0)
900 			goto restore_opts;
901 	} else {
902 		/* dquot_resume needs RW */
903 		sb->s_flags &= ~MS_RDONLY;
904 		dquot_resume(sb, -1);
905 	}
906 
907 	/* disallow enable/disable extent_cache dynamically */
908 	if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
909 		err = -EINVAL;
910 		f2fs_msg(sbi->sb, KERN_WARNING,
911 				"switch extent_cache option is not allowed");
912 		goto restore_opts;
913 	}
914 
915 	/*
916 	 * We stop the GC thread if FS is mounted as RO
917 	 * or if background_gc = off is passed in mount
918 	 * option. Also sync the filesystem.
919 	 */
920 	if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
921 		if (sbi->gc_thread) {
922 			stop_gc_thread(sbi);
923 			need_restart_gc = true;
924 		}
925 	} else if (!sbi->gc_thread) {
926 		err = start_gc_thread(sbi);
927 		if (err)
928 			goto restore_opts;
929 		need_stop_gc = true;
930 	}
931 
932 	if (*flags & MS_RDONLY) {
933 		writeback_inodes_sb(sb, WB_REASON_SYNC);
934 		sync_inodes_sb(sb);
935 
936 		set_sbi_flag(sbi, SBI_IS_DIRTY);
937 		set_sbi_flag(sbi, SBI_IS_CLOSE);
938 		f2fs_sync_fs(sb, 1);
939 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
940 	}
941 
942 	/*
943 	 * We stop issue flush thread if FS is mounted as RO
944 	 * or if flush_merge is not passed in mount option.
945 	 */
946 	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
947 		clear_opt(sbi, FLUSH_MERGE);
948 		destroy_flush_cmd_control(sbi, false);
949 	} else {
950 		err = create_flush_cmd_control(sbi);
951 		if (err)
952 			goto restore_gc;
953 	}
954 skip:
955 	/* Update the POSIXACL Flag */
956 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
957 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
958 
959 	return 0;
960 restore_gc:
961 	if (need_restart_gc) {
962 		if (start_gc_thread(sbi))
963 			f2fs_msg(sbi->sb, KERN_WARNING,
964 				"background gc thread has stopped");
965 	} else if (need_stop_gc) {
966 		stop_gc_thread(sbi);
967 	}
968 restore_opts:
969 	sbi->mount_opt = org_mount_opt;
970 	sbi->active_logs = active_logs;
971 	sb->s_flags = old_sb_flags;
972 #ifdef CONFIG_F2FS_FAULT_INJECTION
973 	sbi->fault_info = ffi;
974 #endif
975 	return err;
976 }
977 
978 #ifdef CONFIG_QUOTA
979 /* Read data from quotafile */
980 static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
981 			       size_t len, loff_t off)
982 {
983 	struct inode *inode = sb_dqopt(sb)->files[type];
984 	struct address_space *mapping = inode->i_mapping;
985 	block_t blkidx = F2FS_BYTES_TO_BLK(off);
986 	int offset = off & (sb->s_blocksize - 1);
987 	int tocopy;
988 	size_t toread;
989 	loff_t i_size = i_size_read(inode);
990 	struct page *page;
991 	char *kaddr;
992 
993 	if (off > i_size)
994 		return 0;
995 
996 	if (off + len > i_size)
997 		len = i_size - off;
998 	toread = len;
999 	while (toread > 0) {
1000 		tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
1001 repeat:
1002 		page = read_mapping_page(mapping, blkidx, NULL);
1003 		if (IS_ERR(page))
1004 			return PTR_ERR(page);
1005 
1006 		lock_page(page);
1007 
1008 		if (unlikely(page->mapping != mapping)) {
1009 			f2fs_put_page(page, 1);
1010 			goto repeat;
1011 		}
1012 		if (unlikely(!PageUptodate(page))) {
1013 			f2fs_put_page(page, 1);
1014 			return -EIO;
1015 		}
1016 
1017 		kaddr = kmap_atomic(page);
1018 		memcpy(data, kaddr + offset, tocopy);
1019 		kunmap_atomic(kaddr);
1020 		f2fs_put_page(page, 1);
1021 
1022 		offset = 0;
1023 		toread -= tocopy;
1024 		data += tocopy;
1025 		blkidx++;
1026 	}
1027 	return len;
1028 }
1029 
1030 /* Write to quotafile */
1031 static ssize_t f2fs_quota_write(struct super_block *sb, int type,
1032 				const char *data, size_t len, loff_t off)
1033 {
1034 	struct inode *inode = sb_dqopt(sb)->files[type];
1035 	struct address_space *mapping = inode->i_mapping;
1036 	const struct address_space_operations *a_ops = mapping->a_ops;
1037 	int offset = off & (sb->s_blocksize - 1);
1038 	size_t towrite = len;
1039 	struct page *page;
1040 	char *kaddr;
1041 	int err = 0;
1042 	int tocopy;
1043 
1044 	while (towrite > 0) {
1045 		tocopy = min_t(unsigned long, sb->s_blocksize - offset,
1046 								towrite);
1047 
1048 		err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
1049 							&page, NULL);
1050 		if (unlikely(err))
1051 			break;
1052 
1053 		kaddr = kmap_atomic(page);
1054 		memcpy(kaddr + offset, data, tocopy);
1055 		kunmap_atomic(kaddr);
1056 		flush_dcache_page(page);
1057 
1058 		a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
1059 						page, NULL);
1060 		offset = 0;
1061 		towrite -= tocopy;
1062 		off += tocopy;
1063 		data += tocopy;
1064 		cond_resched();
1065 	}
1066 
1067 	if (len == towrite)
1068 		return err;
1069 	inode->i_version++;
1070 	inode->i_mtime = inode->i_ctime = current_time(inode);
1071 	f2fs_mark_inode_dirty_sync(inode, false);
1072 	return len - towrite;
1073 }
1074 
1075 static struct dquot **f2fs_get_dquots(struct inode *inode)
1076 {
1077 	return F2FS_I(inode)->i_dquot;
1078 }
1079 
1080 static qsize_t *f2fs_get_reserved_space(struct inode *inode)
1081 {
1082 	return &F2FS_I(inode)->i_reserved_quota;
1083 }
1084 
1085 static int f2fs_quota_sync(struct super_block *sb, int type)
1086 {
1087 	struct quota_info *dqopt = sb_dqopt(sb);
1088 	int cnt;
1089 	int ret;
1090 
1091 	ret = dquot_writeback_dquots(sb, type);
1092 	if (ret)
1093 		return ret;
1094 
1095 	/*
1096 	 * Now when everything is written we can discard the pagecache so
1097 	 * that userspace sees the changes.
1098 	 */
1099 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1100 		if (type != -1 && cnt != type)
1101 			continue;
1102 		if (!sb_has_quota_active(sb, cnt))
1103 			continue;
1104 
1105 		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
1106 		if (ret)
1107 			return ret;
1108 
1109 		inode_lock(dqopt->files[cnt]);
1110 		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
1111 		inode_unlock(dqopt->files[cnt]);
1112 	}
1113 	return 0;
1114 }
1115 
1116 static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
1117 							const struct path *path)
1118 {
1119 	struct inode *inode;
1120 	int err;
1121 
1122 	err = f2fs_quota_sync(sb, -1);
1123 	if (err)
1124 		return err;
1125 
1126 	err = dquot_quota_on(sb, type, format_id, path);
1127 	if (err)
1128 		return err;
1129 
1130 	inode = d_inode(path->dentry);
1131 
1132 	inode_lock(inode);
1133 	F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
1134 	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
1135 					S_NOATIME | S_IMMUTABLE);
1136 	inode_unlock(inode);
1137 	f2fs_mark_inode_dirty_sync(inode, false);
1138 
1139 	return 0;
1140 }
1141 
1142 static int f2fs_quota_off(struct super_block *sb, int type)
1143 {
1144 	struct inode *inode = sb_dqopt(sb)->files[type];
1145 	int err;
1146 
1147 	if (!inode || !igrab(inode))
1148 		return dquot_quota_off(sb, type);
1149 
1150 	f2fs_quota_sync(sb, -1);
1151 
1152 	err = dquot_quota_off(sb, type);
1153 	if (err)
1154 		goto out_put;
1155 
1156 	inode_lock(inode);
1157 	F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
1158 	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
1159 	inode_unlock(inode);
1160 	f2fs_mark_inode_dirty_sync(inode, false);
1161 out_put:
1162 	iput(inode);
1163 	return err;
1164 }
1165 
1166 static void f2fs_quota_off_umount(struct super_block *sb)
1167 {
1168 	int type;
1169 
1170 	for (type = 0; type < MAXQUOTAS; type++)
1171 		f2fs_quota_off(sb, type);
1172 }
1173 
1174 static const struct dquot_operations f2fs_quota_operations = {
1175 	.get_reserved_space = f2fs_get_reserved_space,
1176 	.write_dquot	= dquot_commit,
1177 	.acquire_dquot	= dquot_acquire,
1178 	.release_dquot	= dquot_release,
1179 	.mark_dirty	= dquot_mark_dquot_dirty,
1180 	.write_info	= dquot_commit_info,
1181 	.alloc_dquot	= dquot_alloc,
1182 	.destroy_dquot	= dquot_destroy,
1183 	.get_next_id	= dquot_get_next_id,
1184 };
1185 
1186 static const struct quotactl_ops f2fs_quotactl_ops = {
1187 	.quota_on	= f2fs_quota_on,
1188 	.quota_off	= f2fs_quota_off,
1189 	.quota_sync	= f2fs_quota_sync,
1190 	.get_state	= dquot_get_state,
1191 	.set_info	= dquot_set_dqinfo,
1192 	.get_dqblk	= dquot_get_dqblk,
1193 	.set_dqblk	= dquot_set_dqblk,
1194 	.get_nextdqblk	= dquot_get_next_dqblk,
1195 };
1196 #else
1197 static inline void f2fs_quota_off_umount(struct super_block *sb)
1198 {
1199 }
1200 #endif
1201 
1202 static struct super_operations f2fs_sops = {
1203 	.alloc_inode	= f2fs_alloc_inode,
1204 	.drop_inode	= f2fs_drop_inode,
1205 	.destroy_inode	= f2fs_destroy_inode,
1206 	.write_inode	= f2fs_write_inode,
1207 	.dirty_inode	= f2fs_dirty_inode,
1208 	.show_options	= f2fs_show_options,
1209 #ifdef CONFIG_QUOTA
1210 	.quota_read	= f2fs_quota_read,
1211 	.quota_write	= f2fs_quota_write,
1212 	.get_dquots	= f2fs_get_dquots,
1213 #endif
1214 	.evict_inode	= f2fs_evict_inode,
1215 	.put_super	= f2fs_put_super,
1216 	.sync_fs	= f2fs_sync_fs,
1217 	.freeze_fs	= f2fs_freeze,
1218 	.unfreeze_fs	= f2fs_unfreeze,
1219 	.statfs		= f2fs_statfs,
1220 	.remount_fs	= f2fs_remount,
1221 };
1222 
1223 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1224 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
1225 {
1226 	return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1227 				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1228 				ctx, len, NULL);
1229 }
1230 
1231 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
1232 							void *fs_data)
1233 {
1234 	return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1235 				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1236 				ctx, len, fs_data, XATTR_CREATE);
1237 }
1238 
1239 static unsigned f2fs_max_namelen(struct inode *inode)
1240 {
1241 	return S_ISLNK(inode->i_mode) ?
1242 			inode->i_sb->s_blocksize : F2FS_NAME_LEN;
1243 }
1244 
1245 static const struct fscrypt_operations f2fs_cryptops = {
1246 	.key_prefix	= "f2fs:",
1247 	.get_context	= f2fs_get_context,
1248 	.set_context	= f2fs_set_context,
1249 	.is_encrypted	= f2fs_encrypted_inode,
1250 	.empty_dir	= f2fs_empty_dir,
1251 	.max_namelen	= f2fs_max_namelen,
1252 };
1253 #else
1254 static const struct fscrypt_operations f2fs_cryptops = {
1255 	.is_encrypted	= f2fs_encrypted_inode,
1256 };
1257 #endif
1258 
1259 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
1260 		u64 ino, u32 generation)
1261 {
1262 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
1263 	struct inode *inode;
1264 
1265 	if (check_nid_range(sbi, ino))
1266 		return ERR_PTR(-ESTALE);
1267 
1268 	/*
1269 	 * f2fs_iget isn't quite right if the inode is currently unallocated!
1270 	 * However f2fs_iget currently does appropriate checks to handle stale
1271 	 * inodes so everything is OK.
1272 	 */
1273 	inode = f2fs_iget(sb, ino);
1274 	if (IS_ERR(inode))
1275 		return ERR_CAST(inode);
1276 	if (unlikely(generation && inode->i_generation != generation)) {
1277 		/* we didn't find the right inode.. */
1278 		iput(inode);
1279 		return ERR_PTR(-ESTALE);
1280 	}
1281 	return inode;
1282 }
1283 
1284 static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1285 		int fh_len, int fh_type)
1286 {
1287 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1288 				    f2fs_nfs_get_inode);
1289 }
1290 
1291 static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
1292 		int fh_len, int fh_type)
1293 {
1294 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1295 				    f2fs_nfs_get_inode);
1296 }
1297 
1298 static const struct export_operations f2fs_export_ops = {
1299 	.fh_to_dentry = f2fs_fh_to_dentry,
1300 	.fh_to_parent = f2fs_fh_to_parent,
1301 	.get_parent = f2fs_get_parent,
1302 };
1303 
1304 static loff_t max_file_blocks(void)
1305 {
1306 	loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
1307 	loff_t leaf_count = ADDRS_PER_BLOCK;
1308 
1309 	/* two direct node blocks */
1310 	result += (leaf_count * 2);
1311 
1312 	/* two indirect node blocks */
1313 	leaf_count *= NIDS_PER_BLOCK;
1314 	result += (leaf_count * 2);
1315 
1316 	/* one double indirect node block */
1317 	leaf_count *= NIDS_PER_BLOCK;
1318 	result += leaf_count;
1319 
1320 	return result;
1321 }
1322 
1323 static int __f2fs_commit_super(struct buffer_head *bh,
1324 			struct f2fs_super_block *super)
1325 {
1326 	lock_buffer(bh);
1327 	if (super)
1328 		memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1329 	set_buffer_uptodate(bh);
1330 	set_buffer_dirty(bh);
1331 	unlock_buffer(bh);
1332 
1333 	/* it's rare case, we can do fua all the time */
1334 	return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
1335 }
1336 
1337 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1338 					struct buffer_head *bh)
1339 {
1340 	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1341 					(bh->b_data + F2FS_SUPER_OFFSET);
1342 	struct super_block *sb = sbi->sb;
1343 	u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1344 	u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1345 	u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
1346 	u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
1347 	u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1348 	u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1349 	u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
1350 	u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
1351 	u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
1352 	u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
1353 	u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
1354 	u32 segment_count = le32_to_cpu(raw_super->segment_count);
1355 	u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1356 	u64 main_end_blkaddr = main_blkaddr +
1357 				(segment_count_main << log_blocks_per_seg);
1358 	u64 seg_end_blkaddr = segment0_blkaddr +
1359 				(segment_count << log_blocks_per_seg);
1360 
1361 	if (segment0_blkaddr != cp_blkaddr) {
1362 		f2fs_msg(sb, KERN_INFO,
1363 			"Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1364 			segment0_blkaddr, cp_blkaddr);
1365 		return true;
1366 	}
1367 
1368 	if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
1369 							sit_blkaddr) {
1370 		f2fs_msg(sb, KERN_INFO,
1371 			"Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1372 			cp_blkaddr, sit_blkaddr,
1373 			segment_count_ckpt << log_blocks_per_seg);
1374 		return true;
1375 	}
1376 
1377 	if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
1378 							nat_blkaddr) {
1379 		f2fs_msg(sb, KERN_INFO,
1380 			"Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1381 			sit_blkaddr, nat_blkaddr,
1382 			segment_count_sit << log_blocks_per_seg);
1383 		return true;
1384 	}
1385 
1386 	if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
1387 							ssa_blkaddr) {
1388 		f2fs_msg(sb, KERN_INFO,
1389 			"Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1390 			nat_blkaddr, ssa_blkaddr,
1391 			segment_count_nat << log_blocks_per_seg);
1392 		return true;
1393 	}
1394 
1395 	if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
1396 							main_blkaddr) {
1397 		f2fs_msg(sb, KERN_INFO,
1398 			"Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1399 			ssa_blkaddr, main_blkaddr,
1400 			segment_count_ssa << log_blocks_per_seg);
1401 		return true;
1402 	}
1403 
1404 	if (main_end_blkaddr > seg_end_blkaddr) {
1405 		f2fs_msg(sb, KERN_INFO,
1406 			"Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1407 			main_blkaddr,
1408 			segment0_blkaddr +
1409 				(segment_count << log_blocks_per_seg),
1410 			segment_count_main << log_blocks_per_seg);
1411 		return true;
1412 	} else if (main_end_blkaddr < seg_end_blkaddr) {
1413 		int err = 0;
1414 		char *res;
1415 
1416 		/* fix in-memory information all the time */
1417 		raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
1418 				segment0_blkaddr) >> log_blocks_per_seg);
1419 
1420 		if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1421 			set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1422 			res = "internally";
1423 		} else {
1424 			err = __f2fs_commit_super(bh, NULL);
1425 			res = err ? "failed" : "done";
1426 		}
1427 		f2fs_msg(sb, KERN_INFO,
1428 			"Fix alignment : %s, start(%u) end(%u) block(%u)",
1429 			res, main_blkaddr,
1430 			segment0_blkaddr +
1431 				(segment_count << log_blocks_per_seg),
1432 			segment_count_main << log_blocks_per_seg);
1433 		if (err)
1434 			return true;
1435 	}
1436 	return false;
1437 }
1438 
1439 static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1440 				struct buffer_head *bh)
1441 {
1442 	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1443 					(bh->b_data + F2FS_SUPER_OFFSET);
1444 	struct super_block *sb = sbi->sb;
1445 	unsigned int blocksize;
1446 
1447 	if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
1448 		f2fs_msg(sb, KERN_INFO,
1449 			"Magic Mismatch, valid(0x%x) - read(0x%x)",
1450 			F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
1451 		return 1;
1452 	}
1453 
1454 	/* Currently, support only 4KB page cache size */
1455 	if (F2FS_BLKSIZE != PAGE_SIZE) {
1456 		f2fs_msg(sb, KERN_INFO,
1457 			"Invalid page_cache_size (%lu), supports only 4KB\n",
1458 			PAGE_SIZE);
1459 		return 1;
1460 	}
1461 
1462 	/* Currently, support only 4KB block size */
1463 	blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
1464 	if (blocksize != F2FS_BLKSIZE) {
1465 		f2fs_msg(sb, KERN_INFO,
1466 			"Invalid blocksize (%u), supports only 4KB\n",
1467 			blocksize);
1468 		return 1;
1469 	}
1470 
1471 	/* check log blocks per segment */
1472 	if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1473 		f2fs_msg(sb, KERN_INFO,
1474 			"Invalid log blocks per segment (%u)\n",
1475 			le32_to_cpu(raw_super->log_blocks_per_seg));
1476 		return 1;
1477 	}
1478 
1479 	/* Currently, support 512/1024/2048/4096 bytes sector size */
1480 	if (le32_to_cpu(raw_super->log_sectorsize) >
1481 				F2FS_MAX_LOG_SECTOR_SIZE ||
1482 		le32_to_cpu(raw_super->log_sectorsize) <
1483 				F2FS_MIN_LOG_SECTOR_SIZE) {
1484 		f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
1485 			le32_to_cpu(raw_super->log_sectorsize));
1486 		return 1;
1487 	}
1488 	if (le32_to_cpu(raw_super->log_sectors_per_block) +
1489 		le32_to_cpu(raw_super->log_sectorsize) !=
1490 			F2FS_MAX_LOG_SECTOR_SIZE) {
1491 		f2fs_msg(sb, KERN_INFO,
1492 			"Invalid log sectors per block(%u) log sectorsize(%u)",
1493 			le32_to_cpu(raw_super->log_sectors_per_block),
1494 			le32_to_cpu(raw_super->log_sectorsize));
1495 		return 1;
1496 	}
1497 
1498 	/* check reserved ino info */
1499 	if (le32_to_cpu(raw_super->node_ino) != 1 ||
1500 		le32_to_cpu(raw_super->meta_ino) != 2 ||
1501 		le32_to_cpu(raw_super->root_ino) != 3) {
1502 		f2fs_msg(sb, KERN_INFO,
1503 			"Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1504 			le32_to_cpu(raw_super->node_ino),
1505 			le32_to_cpu(raw_super->meta_ino),
1506 			le32_to_cpu(raw_super->root_ino));
1507 		return 1;
1508 	}
1509 
1510 	if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
1511 		f2fs_msg(sb, KERN_INFO,
1512 			"Invalid segment count (%u)",
1513 			le32_to_cpu(raw_super->segment_count));
1514 		return 1;
1515 	}
1516 
1517 	/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1518 	if (sanity_check_area_boundary(sbi, bh))
1519 		return 1;
1520 
1521 	return 0;
1522 }
1523 
1524 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1525 {
1526 	unsigned int total, fsmeta;
1527 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1528 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1529 	unsigned int ovp_segments, reserved_segments;
1530 	unsigned int main_segs, blocks_per_seg;
1531 	int i;
1532 
1533 	total = le32_to_cpu(raw_super->segment_count);
1534 	fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
1535 	fsmeta += le32_to_cpu(raw_super->segment_count_sit);
1536 	fsmeta += le32_to_cpu(raw_super->segment_count_nat);
1537 	fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
1538 	fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
1539 
1540 	if (unlikely(fsmeta >= total))
1541 		return 1;
1542 
1543 	ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1544 	reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1545 
1546 	if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1547 			ovp_segments == 0 || reserved_segments == 0)) {
1548 		f2fs_msg(sbi->sb, KERN_ERR,
1549 			"Wrong layout: check mkfs.f2fs version");
1550 		return 1;
1551 	}
1552 
1553 	main_segs = le32_to_cpu(raw_super->segment_count_main);
1554 	blocks_per_seg = sbi->blocks_per_seg;
1555 
1556 	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1557 		if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
1558 			le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
1559 			return 1;
1560 	}
1561 	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1562 		if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
1563 			le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
1564 			return 1;
1565 	}
1566 
1567 	if (unlikely(f2fs_cp_error(sbi))) {
1568 		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1569 		return 1;
1570 	}
1571 	return 0;
1572 }
1573 
1574 static void init_sb_info(struct f2fs_sb_info *sbi)
1575 {
1576 	struct f2fs_super_block *raw_super = sbi->raw_super;
1577 	int i, j;
1578 
1579 	sbi->log_sectors_per_block =
1580 		le32_to_cpu(raw_super->log_sectors_per_block);
1581 	sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
1582 	sbi->blocksize = 1 << sbi->log_blocksize;
1583 	sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1584 	sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
1585 	sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
1586 	sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
1587 	sbi->total_sections = le32_to_cpu(raw_super->section_count);
1588 	sbi->total_node_count =
1589 		(le32_to_cpu(raw_super->segment_count_nat) / 2)
1590 			* sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
1591 	sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
1592 	sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
1593 	sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
1594 	sbi->cur_victim_sec = NULL_SECNO;
1595 	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1596 
1597 	sbi->dir_level = DEF_DIR_LEVEL;
1598 	sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1599 	sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1600 	clear_sbi_flag(sbi, SBI_NEED_FSCK);
1601 
1602 	for (i = 0; i < NR_COUNT_TYPE; i++)
1603 		atomic_set(&sbi->nr_pages[i], 0);
1604 
1605 	atomic_set(&sbi->wb_sync_req, 0);
1606 
1607 	INIT_LIST_HEAD(&sbi->s_list);
1608 	mutex_init(&sbi->umount_mutex);
1609 	for (i = 0; i < NR_PAGE_TYPE - 1; i++)
1610 		for (j = HOT; j < NR_TEMP_TYPE; j++)
1611 			mutex_init(&sbi->wio_mutex[i][j]);
1612 	spin_lock_init(&sbi->cp_lock);
1613 }
1614 
1615 static int init_percpu_info(struct f2fs_sb_info *sbi)
1616 {
1617 	int err;
1618 
1619 	err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1620 	if (err)
1621 		return err;
1622 
1623 	return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1624 								GFP_KERNEL);
1625 }
1626 
1627 #ifdef CONFIG_BLK_DEV_ZONED
1628 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1629 {
1630 	struct block_device *bdev = FDEV(devi).bdev;
1631 	sector_t nr_sectors = bdev->bd_part->nr_sects;
1632 	sector_t sector = 0;
1633 	struct blk_zone *zones;
1634 	unsigned int i, nr_zones;
1635 	unsigned int n = 0;
1636 	int err = -EIO;
1637 
1638 	if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1639 		return 0;
1640 
1641 	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1642 				SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
1643 		return -EINVAL;
1644 	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
1645 	if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1646 				__ilog2_u32(sbi->blocks_per_blkz))
1647 		return -EINVAL;
1648 	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1649 	FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1650 					sbi->log_blocks_per_blkz;
1651 	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
1652 		FDEV(devi).nr_blkz++;
1653 
1654 	FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1655 	if (!FDEV(devi).blkz_type)
1656 		return -ENOMEM;
1657 
1658 #define F2FS_REPORT_NR_ZONES   4096
1659 
1660 	zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1661 			GFP_KERNEL);
1662 	if (!zones)
1663 		return -ENOMEM;
1664 
1665 	/* Get block zones type */
1666 	while (zones && sector < nr_sectors) {
1667 
1668 		nr_zones = F2FS_REPORT_NR_ZONES;
1669 		err = blkdev_report_zones(bdev, sector,
1670 					  zones, &nr_zones,
1671 					  GFP_KERNEL);
1672 		if (err)
1673 			break;
1674 		if (!nr_zones) {
1675 			err = -EIO;
1676 			break;
1677 		}
1678 
1679 		for (i = 0; i < nr_zones; i++) {
1680 			FDEV(devi).blkz_type[n] = zones[i].type;
1681 			sector += zones[i].len;
1682 			n++;
1683 		}
1684 	}
1685 
1686 	kfree(zones);
1687 
1688 	return err;
1689 }
1690 #endif
1691 
1692 /*
1693  * Read f2fs raw super block.
1694  * Because we have two copies of super block, so read both of them
1695  * to get the first valid one. If any one of them is broken, we pass
1696  * them recovery flag back to the caller.
1697  */
1698 static int read_raw_super_block(struct f2fs_sb_info *sbi,
1699 			struct f2fs_super_block **raw_super,
1700 			int *valid_super_block, int *recovery)
1701 {
1702 	struct super_block *sb = sbi->sb;
1703 	int block;
1704 	struct buffer_head *bh;
1705 	struct f2fs_super_block *super;
1706 	int err = 0;
1707 
1708 	super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1709 	if (!super)
1710 		return -ENOMEM;
1711 
1712 	for (block = 0; block < 2; block++) {
1713 		bh = sb_bread(sb, block);
1714 		if (!bh) {
1715 			f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1716 				block + 1);
1717 			err = -EIO;
1718 			continue;
1719 		}
1720 
1721 		/* sanity checking of raw super */
1722 		if (sanity_check_raw_super(sbi, bh)) {
1723 			f2fs_msg(sb, KERN_ERR,
1724 				"Can't find valid F2FS filesystem in %dth superblock",
1725 				block + 1);
1726 			err = -EINVAL;
1727 			brelse(bh);
1728 			continue;
1729 		}
1730 
1731 		if (!*raw_super) {
1732 			memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
1733 							sizeof(*super));
1734 			*valid_super_block = block;
1735 			*raw_super = super;
1736 		}
1737 		brelse(bh);
1738 	}
1739 
1740 	/* Fail to read any one of the superblocks*/
1741 	if (err < 0)
1742 		*recovery = 1;
1743 
1744 	/* No valid superblock */
1745 	if (!*raw_super)
1746 		kfree(super);
1747 	else
1748 		err = 0;
1749 
1750 	return err;
1751 }
1752 
1753 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1754 {
1755 	struct buffer_head *bh;
1756 	int err;
1757 
1758 	if ((recover && f2fs_readonly(sbi->sb)) ||
1759 				bdev_read_only(sbi->sb->s_bdev)) {
1760 		set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1761 		return -EROFS;
1762 	}
1763 
1764 	/* write back-up superblock first */
1765 	bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1766 	if (!bh)
1767 		return -EIO;
1768 	err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1769 	brelse(bh);
1770 
1771 	/* if we are in recovery path, skip writing valid superblock */
1772 	if (recover || err)
1773 		return err;
1774 
1775 	/* write current valid superblock */
1776 	bh = sb_getblk(sbi->sb, sbi->valid_super_block);
1777 	if (!bh)
1778 		return -EIO;
1779 	err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1780 	brelse(bh);
1781 	return err;
1782 }
1783 
1784 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1785 {
1786 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1787 	unsigned int max_devices = MAX_DEVICES;
1788 	int i;
1789 
1790 	/* Initialize single device information */
1791 	if (!RDEV(0).path[0]) {
1792 		if (!bdev_is_zoned(sbi->sb->s_bdev))
1793 			return 0;
1794 		max_devices = 1;
1795 	}
1796 
1797 	/*
1798 	 * Initialize multiple devices information, or single
1799 	 * zoned block device information.
1800 	 */
1801 	sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1802 				GFP_KERNEL);
1803 	if (!sbi->devs)
1804 		return -ENOMEM;
1805 
1806 	for (i = 0; i < max_devices; i++) {
1807 
1808 		if (i > 0 && !RDEV(i).path[0])
1809 			break;
1810 
1811 		if (max_devices == 1) {
1812 			/* Single zoned block device mount */
1813 			FDEV(0).bdev =
1814 				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1815 					sbi->sb->s_mode, sbi->sb->s_type);
1816 		} else {
1817 			/* Multi-device mount */
1818 			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1819 			FDEV(i).total_segments =
1820 				le32_to_cpu(RDEV(i).total_segments);
1821 			if (i == 0) {
1822 				FDEV(i).start_blk = 0;
1823 				FDEV(i).end_blk = FDEV(i).start_blk +
1824 				    (FDEV(i).total_segments <<
1825 				    sbi->log_blocks_per_seg) - 1 +
1826 				    le32_to_cpu(raw_super->segment0_blkaddr);
1827 			} else {
1828 				FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1829 				FDEV(i).end_blk = FDEV(i).start_blk +
1830 					(FDEV(i).total_segments <<
1831 					sbi->log_blocks_per_seg) - 1;
1832 			}
1833 			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1834 					sbi->sb->s_mode, sbi->sb->s_type);
1835 		}
1836 		if (IS_ERR(FDEV(i).bdev))
1837 			return PTR_ERR(FDEV(i).bdev);
1838 
1839 		/* to release errored devices */
1840 		sbi->s_ndevs = i + 1;
1841 
1842 #ifdef CONFIG_BLK_DEV_ZONED
1843 		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1844 				!f2fs_sb_mounted_blkzoned(sbi->sb)) {
1845 			f2fs_msg(sbi->sb, KERN_ERR,
1846 				"Zoned block device feature not enabled\n");
1847 			return -EINVAL;
1848 		}
1849 		if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1850 			if (init_blkz_info(sbi, i)) {
1851 				f2fs_msg(sbi->sb, KERN_ERR,
1852 					"Failed to initialize F2FS blkzone information");
1853 				return -EINVAL;
1854 			}
1855 			if (max_devices == 1)
1856 				break;
1857 			f2fs_msg(sbi->sb, KERN_INFO,
1858 				"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1859 				i, FDEV(i).path,
1860 				FDEV(i).total_segments,
1861 				FDEV(i).start_blk, FDEV(i).end_blk,
1862 				bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1863 				"Host-aware" : "Host-managed");
1864 			continue;
1865 		}
1866 #endif
1867 		f2fs_msg(sbi->sb, KERN_INFO,
1868 			"Mount Device [%2d]: %20s, %8u, %8x - %8x",
1869 				i, FDEV(i).path,
1870 				FDEV(i).total_segments,
1871 				FDEV(i).start_blk, FDEV(i).end_blk);
1872 	}
1873 	f2fs_msg(sbi->sb, KERN_INFO,
1874 			"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1875 	return 0;
1876 }
1877 
1878 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1879 {
1880 	struct f2fs_sb_info *sbi;
1881 	struct f2fs_super_block *raw_super;
1882 	struct inode *root;
1883 	int err;
1884 	bool retry = true, need_fsck = false;
1885 	char *options = NULL;
1886 	int recovery, i, valid_super_block;
1887 	struct curseg_info *seg_i;
1888 
1889 try_onemore:
1890 	err = -EINVAL;
1891 	raw_super = NULL;
1892 	valid_super_block = -1;
1893 	recovery = 0;
1894 
1895 	/* allocate memory for f2fs-specific super block info */
1896 	sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
1897 	if (!sbi)
1898 		return -ENOMEM;
1899 
1900 	sbi->sb = sb;
1901 
1902 	/* Load the checksum driver */
1903 	sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1904 	if (IS_ERR(sbi->s_chksum_driver)) {
1905 		f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
1906 		err = PTR_ERR(sbi->s_chksum_driver);
1907 		sbi->s_chksum_driver = NULL;
1908 		goto free_sbi;
1909 	}
1910 
1911 	/* set a block size */
1912 	if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
1913 		f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
1914 		goto free_sbi;
1915 	}
1916 
1917 	err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1918 								&recovery);
1919 	if (err)
1920 		goto free_sbi;
1921 
1922 	sb->s_fs_info = sbi;
1923 	sbi->raw_super = raw_super;
1924 
1925 	/*
1926 	 * The BLKZONED feature indicates that the drive was formatted with
1927 	 * zone alignment optimization. This is optional for host-aware
1928 	 * devices, but mandatory for host-managed zoned block devices.
1929 	 */
1930 #ifndef CONFIG_BLK_DEV_ZONED
1931 	if (f2fs_sb_mounted_blkzoned(sb)) {
1932 		f2fs_msg(sb, KERN_ERR,
1933 			 "Zoned block device support is not enabled\n");
1934 		err = -EOPNOTSUPP;
1935 		goto free_sb_buf;
1936 	}
1937 #endif
1938 	default_options(sbi);
1939 	/* parse mount options */
1940 	options = kstrdup((const char *)data, GFP_KERNEL);
1941 	if (data && !options) {
1942 		err = -ENOMEM;
1943 		goto free_sb_buf;
1944 	}
1945 
1946 	err = parse_options(sb, options);
1947 	if (err)
1948 		goto free_options;
1949 
1950 	sbi->max_file_blocks = max_file_blocks();
1951 	sb->s_maxbytes = sbi->max_file_blocks <<
1952 				le32_to_cpu(raw_super->log_blocksize);
1953 	sb->s_max_links = F2FS_LINK_MAX;
1954 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1955 
1956 #ifdef CONFIG_QUOTA
1957 	sb->dq_op = &f2fs_quota_operations;
1958 	sb->s_qcop = &f2fs_quotactl_ops;
1959 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
1960 #endif
1961 
1962 	sb->s_op = &f2fs_sops;
1963 	sb->s_cop = &f2fs_cryptops;
1964 	sb->s_xattr = f2fs_xattr_handlers;
1965 	sb->s_export_op = &f2fs_export_ops;
1966 	sb->s_magic = F2FS_SUPER_MAGIC;
1967 	sb->s_time_gran = 1;
1968 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1969 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1970 	memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1971 
1972 	/* init f2fs-specific super block info */
1973 	sbi->valid_super_block = valid_super_block;
1974 	mutex_init(&sbi->gc_mutex);
1975 	mutex_init(&sbi->cp_mutex);
1976 	init_rwsem(&sbi->node_write);
1977 	init_rwsem(&sbi->node_change);
1978 
1979 	/* disallow all the data/node/meta page writes */
1980 	set_sbi_flag(sbi, SBI_POR_DOING);
1981 	spin_lock_init(&sbi->stat_lock);
1982 
1983 	for (i = 0; i < NR_PAGE_TYPE; i++) {
1984 		int n = (i == META) ? 1: NR_TEMP_TYPE;
1985 		int j;
1986 
1987 		sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
1988 								GFP_KERNEL);
1989 		if (!sbi->write_io[i]) {
1990 			err = -ENOMEM;
1991 			goto free_options;
1992 		}
1993 
1994 		for (j = HOT; j < n; j++) {
1995 			init_rwsem(&sbi->write_io[i][j].io_rwsem);
1996 			sbi->write_io[i][j].sbi = sbi;
1997 			sbi->write_io[i][j].bio = NULL;
1998 			spin_lock_init(&sbi->write_io[i][j].io_lock);
1999 			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
2000 		}
2001 	}
2002 
2003 	init_rwsem(&sbi->cp_rwsem);
2004 	init_waitqueue_head(&sbi->cp_wait);
2005 	init_sb_info(sbi);
2006 
2007 	err = init_percpu_info(sbi);
2008 	if (err)
2009 		goto free_options;
2010 
2011 	if (F2FS_IO_SIZE(sbi) > 1) {
2012 		sbi->write_io_dummy =
2013 			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
2014 		if (!sbi->write_io_dummy) {
2015 			err = -ENOMEM;
2016 			goto free_options;
2017 		}
2018 	}
2019 
2020 	/* get an inode for meta space */
2021 	sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
2022 	if (IS_ERR(sbi->meta_inode)) {
2023 		f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
2024 		err = PTR_ERR(sbi->meta_inode);
2025 		goto free_io_dummy;
2026 	}
2027 
2028 	err = get_valid_checkpoint(sbi);
2029 	if (err) {
2030 		f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
2031 		goto free_meta_inode;
2032 	}
2033 
2034 	/* Initialize device list */
2035 	err = f2fs_scan_devices(sbi);
2036 	if (err) {
2037 		f2fs_msg(sb, KERN_ERR, "Failed to find devices");
2038 		goto free_devices;
2039 	}
2040 
2041 	sbi->total_valid_node_count =
2042 				le32_to_cpu(sbi->ckpt->valid_node_count);
2043 	percpu_counter_set(&sbi->total_valid_inode_count,
2044 				le32_to_cpu(sbi->ckpt->valid_inode_count));
2045 	sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
2046 	sbi->total_valid_block_count =
2047 				le64_to_cpu(sbi->ckpt->valid_block_count);
2048 	sbi->last_valid_block_count = sbi->total_valid_block_count;
2049 	sbi->reserved_blocks = 0;
2050 
2051 	for (i = 0; i < NR_INODE_TYPE; i++) {
2052 		INIT_LIST_HEAD(&sbi->inode_list[i]);
2053 		spin_lock_init(&sbi->inode_lock[i]);
2054 	}
2055 
2056 	init_extent_cache_info(sbi);
2057 
2058 	init_ino_entry_info(sbi);
2059 
2060 	/* setup f2fs internal modules */
2061 	err = build_segment_manager(sbi);
2062 	if (err) {
2063 		f2fs_msg(sb, KERN_ERR,
2064 			"Failed to initialize F2FS segment manager");
2065 		goto free_sm;
2066 	}
2067 	err = build_node_manager(sbi);
2068 	if (err) {
2069 		f2fs_msg(sb, KERN_ERR,
2070 			"Failed to initialize F2FS node manager");
2071 		goto free_nm;
2072 	}
2073 
2074 	/* For write statistics */
2075 	if (sb->s_bdev->bd_part)
2076 		sbi->sectors_written_start =
2077 			(u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
2078 
2079 	/* Read accumulated write IO statistics if exists */
2080 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
2081 	if (__exist_node_summaries(sbi))
2082 		sbi->kbytes_written =
2083 			le64_to_cpu(seg_i->journal->info.kbytes_written);
2084 
2085 	build_gc_manager(sbi);
2086 
2087 	/* get an inode for node space */
2088 	sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
2089 	if (IS_ERR(sbi->node_inode)) {
2090 		f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
2091 		err = PTR_ERR(sbi->node_inode);
2092 		goto free_nm;
2093 	}
2094 
2095 	f2fs_join_shrinker(sbi);
2096 
2097 	err = f2fs_build_stats(sbi);
2098 	if (err)
2099 		goto free_nm;
2100 
2101 	/* if there are nt orphan nodes free them */
2102 	err = recover_orphan_inodes(sbi);
2103 	if (err)
2104 		goto free_node_inode;
2105 
2106 	/* read root inode and dentry */
2107 	root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
2108 	if (IS_ERR(root)) {
2109 		f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
2110 		err = PTR_ERR(root);
2111 		goto free_node_inode;
2112 	}
2113 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2114 		iput(root);
2115 		err = -EINVAL;
2116 		goto free_node_inode;
2117 	}
2118 
2119 	sb->s_root = d_make_root(root); /* allocate root dentry */
2120 	if (!sb->s_root) {
2121 		err = -ENOMEM;
2122 		goto free_root_inode;
2123 	}
2124 
2125 	err = f2fs_init_sysfs(sbi);
2126 	if (err)
2127 		goto free_root_inode;
2128 
2129 	/* recover fsynced data */
2130 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
2131 		/*
2132 		 * mount should be failed, when device has readonly mode, and
2133 		 * previous checkpoint was not done by clean system shutdown.
2134 		 */
2135 		if (bdev_read_only(sb->s_bdev) &&
2136 				!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
2137 			err = -EROFS;
2138 			goto free_sysfs;
2139 		}
2140 
2141 		if (need_fsck)
2142 			set_sbi_flag(sbi, SBI_NEED_FSCK);
2143 
2144 		if (!retry)
2145 			goto skip_recovery;
2146 
2147 		err = recover_fsync_data(sbi, false);
2148 		if (err < 0) {
2149 			need_fsck = true;
2150 			f2fs_msg(sb, KERN_ERR,
2151 				"Cannot recover all fsync data errno=%d", err);
2152 			goto free_sysfs;
2153 		}
2154 	} else {
2155 		err = recover_fsync_data(sbi, true);
2156 
2157 		if (!f2fs_readonly(sb) && err > 0) {
2158 			err = -EINVAL;
2159 			f2fs_msg(sb, KERN_ERR,
2160 				"Need to recover fsync data");
2161 			goto free_sysfs;
2162 		}
2163 	}
2164 skip_recovery:
2165 	/* recover_fsync_data() cleared this already */
2166 	clear_sbi_flag(sbi, SBI_POR_DOING);
2167 
2168 	/*
2169 	 * If filesystem is not mounted as read-only then
2170 	 * do start the gc_thread.
2171 	 */
2172 	if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
2173 		/* After POR, we can run background GC thread.*/
2174 		err = start_gc_thread(sbi);
2175 		if (err)
2176 			goto free_sysfs;
2177 	}
2178 	kfree(options);
2179 
2180 	/* recover broken superblock */
2181 	if (recovery) {
2182 		err = f2fs_commit_super(sbi, true);
2183 		f2fs_msg(sb, KERN_INFO,
2184 			"Try to recover %dth superblock, ret: %d",
2185 			sbi->valid_super_block ? 1 : 2, err);
2186 	}
2187 
2188 	f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
2189 				cur_cp_version(F2FS_CKPT(sbi)));
2190 	f2fs_update_time(sbi, CP_TIME);
2191 	f2fs_update_time(sbi, REQ_TIME);
2192 	return 0;
2193 
2194 free_sysfs:
2195 	f2fs_sync_inode_meta(sbi);
2196 	f2fs_exit_sysfs(sbi);
2197 free_root_inode:
2198 	dput(sb->s_root);
2199 	sb->s_root = NULL;
2200 free_node_inode:
2201 	truncate_inode_pages_final(NODE_MAPPING(sbi));
2202 	mutex_lock(&sbi->umount_mutex);
2203 	release_ino_entry(sbi, true);
2204 	f2fs_leave_shrinker(sbi);
2205 	/*
2206 	 * Some dirty meta pages can be produced by recover_orphan_inodes()
2207 	 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2208 	 * followed by write_checkpoint() through f2fs_write_node_pages(), which
2209 	 * falls into an infinite loop in sync_meta_pages().
2210 	 */
2211 	truncate_inode_pages_final(META_MAPPING(sbi));
2212 	iput(sbi->node_inode);
2213 	mutex_unlock(&sbi->umount_mutex);
2214 	f2fs_destroy_stats(sbi);
2215 free_nm:
2216 	destroy_node_manager(sbi);
2217 free_sm:
2218 	destroy_segment_manager(sbi);
2219 free_devices:
2220 	destroy_device_list(sbi);
2221 	kfree(sbi->ckpt);
2222 free_meta_inode:
2223 	make_bad_inode(sbi->meta_inode);
2224 	iput(sbi->meta_inode);
2225 free_io_dummy:
2226 	mempool_destroy(sbi->write_io_dummy);
2227 free_options:
2228 	for (i = 0; i < NR_PAGE_TYPE; i++)
2229 		kfree(sbi->write_io[i]);
2230 	destroy_percpu_info(sbi);
2231 	kfree(options);
2232 free_sb_buf:
2233 	kfree(raw_super);
2234 free_sbi:
2235 	if (sbi->s_chksum_driver)
2236 		crypto_free_shash(sbi->s_chksum_driver);
2237 	kfree(sbi);
2238 
2239 	/* give only one another chance */
2240 	if (retry) {
2241 		retry = false;
2242 		shrink_dcache_sb(sb);
2243 		goto try_onemore;
2244 	}
2245 	return err;
2246 }
2247 
2248 static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
2249 			const char *dev_name, void *data)
2250 {
2251 	return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
2252 }
2253 
2254 static void kill_f2fs_super(struct super_block *sb)
2255 {
2256 	if (sb->s_root) {
2257 		set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
2258 		stop_gc_thread(F2FS_SB(sb));
2259 		stop_discard_thread(F2FS_SB(sb));
2260 	}
2261 	kill_block_super(sb);
2262 }
2263 
2264 static struct file_system_type f2fs_fs_type = {
2265 	.owner		= THIS_MODULE,
2266 	.name		= "f2fs",
2267 	.mount		= f2fs_mount,
2268 	.kill_sb	= kill_f2fs_super,
2269 	.fs_flags	= FS_REQUIRES_DEV,
2270 };
2271 MODULE_ALIAS_FS("f2fs");
2272 
2273 static int __init init_inodecache(void)
2274 {
2275 	f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
2276 			sizeof(struct f2fs_inode_info), 0,
2277 			SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
2278 	if (!f2fs_inode_cachep)
2279 		return -ENOMEM;
2280 	return 0;
2281 }
2282 
2283 static void destroy_inodecache(void)
2284 {
2285 	/*
2286 	 * Make sure all delayed rcu free inodes are flushed before we
2287 	 * destroy cache.
2288 	 */
2289 	rcu_barrier();
2290 	kmem_cache_destroy(f2fs_inode_cachep);
2291 }
2292 
2293 static int __init init_f2fs_fs(void)
2294 {
2295 	int err;
2296 
2297 	f2fs_build_trace_ios();
2298 
2299 	err = init_inodecache();
2300 	if (err)
2301 		goto fail;
2302 	err = create_node_manager_caches();
2303 	if (err)
2304 		goto free_inodecache;
2305 	err = create_segment_manager_caches();
2306 	if (err)
2307 		goto free_node_manager_caches;
2308 	err = create_checkpoint_caches();
2309 	if (err)
2310 		goto free_segment_manager_caches;
2311 	err = create_extent_cache();
2312 	if (err)
2313 		goto free_checkpoint_caches;
2314 	err = f2fs_register_sysfs();
2315 	if (err)
2316 		goto free_extent_cache;
2317 	err = register_shrinker(&f2fs_shrinker_info);
2318 	if (err)
2319 		goto free_sysfs;
2320 	err = register_filesystem(&f2fs_fs_type);
2321 	if (err)
2322 		goto free_shrinker;
2323 	err = f2fs_create_root_stats();
2324 	if (err)
2325 		goto free_filesystem;
2326 	return 0;
2327 
2328 free_filesystem:
2329 	unregister_filesystem(&f2fs_fs_type);
2330 free_shrinker:
2331 	unregister_shrinker(&f2fs_shrinker_info);
2332 free_sysfs:
2333 	f2fs_unregister_sysfs();
2334 free_extent_cache:
2335 	destroy_extent_cache();
2336 free_checkpoint_caches:
2337 	destroy_checkpoint_caches();
2338 free_segment_manager_caches:
2339 	destroy_segment_manager_caches();
2340 free_node_manager_caches:
2341 	destroy_node_manager_caches();
2342 free_inodecache:
2343 	destroy_inodecache();
2344 fail:
2345 	return err;
2346 }
2347 
2348 static void __exit exit_f2fs_fs(void)
2349 {
2350 	f2fs_destroy_root_stats();
2351 	unregister_filesystem(&f2fs_fs_type);
2352 	unregister_shrinker(&f2fs_shrinker_info);
2353 	f2fs_unregister_sysfs();
2354 	destroy_extent_cache();
2355 	destroy_checkpoint_caches();
2356 	destroy_segment_manager_caches();
2357 	destroy_node_manager_caches();
2358 	destroy_inodecache();
2359 	f2fs_destroy_trace_ios();
2360 }
2361 
2362 module_init(init_f2fs_fs)
2363 module_exit(exit_f2fs_fs)
2364 
2365 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2366 MODULE_DESCRIPTION("Flash Friendly File System");
2367 MODULE_LICENSE("GPL");
2368 
2369