xref: /openbmc/linux/fs/f2fs/segment.c (revision 3c62be17d4f562f43fe1d03b48194399caa35aa5)
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/swap.h>
18 #include <linux/timer.h>
19 #include <linux/timer.h>
20 
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "trace.h"
25 #include <trace/events/f2fs.h>
26 
27 #define __reverse_ffz(x) __reverse_ffs(~(x))
28 
29 static struct kmem_cache *discard_entry_slab;
30 static struct kmem_cache *bio_entry_slab;
31 static struct kmem_cache *sit_entry_set_slab;
32 static struct kmem_cache *inmem_entry_slab;
33 
34 static unsigned long __reverse_ulong(unsigned char *str)
35 {
36 	unsigned long tmp = 0;
37 	int shift = 24, idx = 0;
38 
39 #if BITS_PER_LONG == 64
40 	shift = 56;
41 #endif
42 	while (shift >= 0) {
43 		tmp |= (unsigned long)str[idx++] << shift;
44 		shift -= BITS_PER_BYTE;
45 	}
46 	return tmp;
47 }
48 
49 /*
50  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
51  * MSB and LSB are reversed in a byte by f2fs_set_bit.
52  */
53 static inline unsigned long __reverse_ffs(unsigned long word)
54 {
55 	int num = 0;
56 
57 #if BITS_PER_LONG == 64
58 	if ((word & 0xffffffff00000000UL) == 0)
59 		num += 32;
60 	else
61 		word >>= 32;
62 #endif
63 	if ((word & 0xffff0000) == 0)
64 		num += 16;
65 	else
66 		word >>= 16;
67 
68 	if ((word & 0xff00) == 0)
69 		num += 8;
70 	else
71 		word >>= 8;
72 
73 	if ((word & 0xf0) == 0)
74 		num += 4;
75 	else
76 		word >>= 4;
77 
78 	if ((word & 0xc) == 0)
79 		num += 2;
80 	else
81 		word >>= 2;
82 
83 	if ((word & 0x2) == 0)
84 		num += 1;
85 	return num;
86 }
87 
88 /*
89  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
90  * f2fs_set_bit makes MSB and LSB reversed in a byte.
91  * @size must be integral times of unsigned long.
92  * Example:
93  *                             MSB <--> LSB
94  *   f2fs_set_bit(0, bitmap) => 1000 0000
95  *   f2fs_set_bit(7, bitmap) => 0000 0001
96  */
97 static unsigned long __find_rev_next_bit(const unsigned long *addr,
98 			unsigned long size, unsigned long offset)
99 {
100 	const unsigned long *p = addr + BIT_WORD(offset);
101 	unsigned long result = size;
102 	unsigned long tmp;
103 
104 	if (offset >= size)
105 		return size;
106 
107 	size -= (offset & ~(BITS_PER_LONG - 1));
108 	offset %= BITS_PER_LONG;
109 
110 	while (1) {
111 		if (*p == 0)
112 			goto pass;
113 
114 		tmp = __reverse_ulong((unsigned char *)p);
115 
116 		tmp &= ~0UL >> offset;
117 		if (size < BITS_PER_LONG)
118 			tmp &= (~0UL << (BITS_PER_LONG - size));
119 		if (tmp)
120 			goto found;
121 pass:
122 		if (size <= BITS_PER_LONG)
123 			break;
124 		size -= BITS_PER_LONG;
125 		offset = 0;
126 		p++;
127 	}
128 	return result;
129 found:
130 	return result - size + __reverse_ffs(tmp);
131 }
132 
133 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
134 			unsigned long size, unsigned long offset)
135 {
136 	const unsigned long *p = addr + BIT_WORD(offset);
137 	unsigned long result = size;
138 	unsigned long tmp;
139 
140 	if (offset >= size)
141 		return size;
142 
143 	size -= (offset & ~(BITS_PER_LONG - 1));
144 	offset %= BITS_PER_LONG;
145 
146 	while (1) {
147 		if (*p == ~0UL)
148 			goto pass;
149 
150 		tmp = __reverse_ulong((unsigned char *)p);
151 
152 		if (offset)
153 			tmp |= ~0UL << (BITS_PER_LONG - offset);
154 		if (size < BITS_PER_LONG)
155 			tmp |= ~0UL >> size;
156 		if (tmp != ~0UL)
157 			goto found;
158 pass:
159 		if (size <= BITS_PER_LONG)
160 			break;
161 		size -= BITS_PER_LONG;
162 		offset = 0;
163 		p++;
164 	}
165 	return result;
166 found:
167 	return result - size + __reverse_ffz(tmp);
168 }
169 
170 void register_inmem_page(struct inode *inode, struct page *page)
171 {
172 	struct f2fs_inode_info *fi = F2FS_I(inode);
173 	struct inmem_pages *new;
174 
175 	f2fs_trace_pid(page);
176 
177 	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
178 	SetPagePrivate(page);
179 
180 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
181 
182 	/* add atomic page indices to the list */
183 	new->page = page;
184 	INIT_LIST_HEAD(&new->list);
185 
186 	/* increase reference count with clean state */
187 	mutex_lock(&fi->inmem_lock);
188 	get_page(page);
189 	list_add_tail(&new->list, &fi->inmem_pages);
190 	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
191 	mutex_unlock(&fi->inmem_lock);
192 
193 	trace_f2fs_register_inmem_page(page, INMEM);
194 }
195 
196 static int __revoke_inmem_pages(struct inode *inode,
197 				struct list_head *head, bool drop, bool recover)
198 {
199 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
200 	struct inmem_pages *cur, *tmp;
201 	int err = 0;
202 
203 	list_for_each_entry_safe(cur, tmp, head, list) {
204 		struct page *page = cur->page;
205 
206 		if (drop)
207 			trace_f2fs_commit_inmem_page(page, INMEM_DROP);
208 
209 		lock_page(page);
210 
211 		if (recover) {
212 			struct dnode_of_data dn;
213 			struct node_info ni;
214 
215 			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
216 
217 			set_new_dnode(&dn, inode, NULL, NULL, 0);
218 			if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) {
219 				err = -EAGAIN;
220 				goto next;
221 			}
222 			get_node_info(sbi, dn.nid, &ni);
223 			f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
224 					cur->old_addr, ni.version, true, true);
225 			f2fs_put_dnode(&dn);
226 		}
227 next:
228 		/* we don't need to invalidate this in the sccessful status */
229 		if (drop || recover)
230 			ClearPageUptodate(page);
231 		set_page_private(page, 0);
232 		ClearPagePrivate(page);
233 		f2fs_put_page(page, 1);
234 
235 		list_del(&cur->list);
236 		kmem_cache_free(inmem_entry_slab, cur);
237 		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
238 	}
239 	return err;
240 }
241 
242 void drop_inmem_pages(struct inode *inode)
243 {
244 	struct f2fs_inode_info *fi = F2FS_I(inode);
245 
246 	clear_inode_flag(inode, FI_ATOMIC_FILE);
247 
248 	mutex_lock(&fi->inmem_lock);
249 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
250 	mutex_unlock(&fi->inmem_lock);
251 }
252 
253 static int __commit_inmem_pages(struct inode *inode,
254 					struct list_head *revoke_list)
255 {
256 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
257 	struct f2fs_inode_info *fi = F2FS_I(inode);
258 	struct inmem_pages *cur, *tmp;
259 	struct f2fs_io_info fio = {
260 		.sbi = sbi,
261 		.type = DATA,
262 		.op = REQ_OP_WRITE,
263 		.op_flags = WRITE_SYNC | REQ_PRIO,
264 		.encrypted_page = NULL,
265 	};
266 	bool submit_bio = false;
267 	int err = 0;
268 
269 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
270 		struct page *page = cur->page;
271 
272 		lock_page(page);
273 		if (page->mapping == inode->i_mapping) {
274 			trace_f2fs_commit_inmem_page(page, INMEM);
275 
276 			set_page_dirty(page);
277 			f2fs_wait_on_page_writeback(page, DATA, true);
278 			if (clear_page_dirty_for_io(page)) {
279 				inode_dec_dirty_pages(inode);
280 				remove_dirty_inode(inode);
281 			}
282 
283 			fio.page = page;
284 			err = do_write_data_page(&fio);
285 			if (err) {
286 				unlock_page(page);
287 				break;
288 			}
289 
290 			/* record old blkaddr for revoking */
291 			cur->old_addr = fio.old_blkaddr;
292 
293 			clear_cold_data(page);
294 			submit_bio = true;
295 		}
296 		unlock_page(page);
297 		list_move_tail(&cur->list, revoke_list);
298 	}
299 
300 	if (submit_bio)
301 		f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
302 
303 	if (!err)
304 		__revoke_inmem_pages(inode, revoke_list, false, false);
305 
306 	return err;
307 }
308 
309 int commit_inmem_pages(struct inode *inode)
310 {
311 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
312 	struct f2fs_inode_info *fi = F2FS_I(inode);
313 	struct list_head revoke_list;
314 	int err;
315 
316 	INIT_LIST_HEAD(&revoke_list);
317 	f2fs_balance_fs(sbi, true);
318 	f2fs_lock_op(sbi);
319 
320 	mutex_lock(&fi->inmem_lock);
321 	err = __commit_inmem_pages(inode, &revoke_list);
322 	if (err) {
323 		int ret;
324 		/*
325 		 * try to revoke all committed pages, but still we could fail
326 		 * due to no memory or other reason, if that happened, EAGAIN
327 		 * will be returned, which means in such case, transaction is
328 		 * already not integrity, caller should use journal to do the
329 		 * recovery or rewrite & commit last transaction. For other
330 		 * error number, revoking was done by filesystem itself.
331 		 */
332 		ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
333 		if (ret)
334 			err = ret;
335 
336 		/* drop all uncommitted pages */
337 		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
338 	}
339 	mutex_unlock(&fi->inmem_lock);
340 
341 	f2fs_unlock_op(sbi);
342 	return err;
343 }
344 
345 /*
346  * This function balances dirty node and dentry pages.
347  * In addition, it controls garbage collection.
348  */
349 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
350 {
351 #ifdef CONFIG_F2FS_FAULT_INJECTION
352 	if (time_to_inject(sbi, FAULT_CHECKPOINT))
353 		f2fs_stop_checkpoint(sbi, false);
354 #endif
355 
356 	if (!need)
357 		return;
358 
359 	/* balance_fs_bg is able to be pending */
360 	if (excess_cached_nats(sbi))
361 		f2fs_balance_fs_bg(sbi);
362 
363 	/*
364 	 * We should do GC or end up with checkpoint, if there are so many dirty
365 	 * dir/node pages without enough free segments.
366 	 */
367 	if (has_not_enough_free_secs(sbi, 0, 0)) {
368 		mutex_lock(&sbi->gc_mutex);
369 		f2fs_gc(sbi, false);
370 	}
371 }
372 
373 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
374 {
375 	/* try to shrink extent cache when there is no enough memory */
376 	if (!available_free_memory(sbi, EXTENT_CACHE))
377 		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
378 
379 	/* check the # of cached NAT entries */
380 	if (!available_free_memory(sbi, NAT_ENTRIES))
381 		try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
382 
383 	if (!available_free_memory(sbi, FREE_NIDS))
384 		try_to_free_nids(sbi, MAX_FREE_NIDS);
385 	else
386 		build_free_nids(sbi, false);
387 
388 	/* checkpoint is the only way to shrink partial cached entries */
389 	if (!available_free_memory(sbi, NAT_ENTRIES) ||
390 			!available_free_memory(sbi, INO_ENTRIES) ||
391 			excess_prefree_segs(sbi) ||
392 			excess_dirty_nats(sbi) ||
393 			(is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
394 		if (test_opt(sbi, DATA_FLUSH)) {
395 			struct blk_plug plug;
396 
397 			blk_start_plug(&plug);
398 			sync_dirty_inodes(sbi, FILE_INODE);
399 			blk_finish_plug(&plug);
400 		}
401 		f2fs_sync_fs(sbi->sb, true);
402 		stat_inc_bg_cp_count(sbi->stat_info);
403 	}
404 }
405 
406 static int __submit_flush_wait(struct block_device *bdev)
407 {
408 	struct bio *bio = f2fs_bio_alloc(0);
409 	int ret;
410 
411 	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
412 	bio->bi_bdev = bdev;
413 	ret = submit_bio_wait(bio);
414 	bio_put(bio);
415 	return ret;
416 }
417 
418 static int submit_flush_wait(struct f2fs_sb_info *sbi)
419 {
420 	int ret = __submit_flush_wait(sbi->sb->s_bdev);
421 	int i;
422 
423 	if (sbi->s_ndevs && !ret) {
424 		for (i = 1; i < sbi->s_ndevs; i++) {
425 			ret = __submit_flush_wait(FDEV(i).bdev);
426 			if (ret)
427 				break;
428 		}
429 	}
430 	return ret;
431 }
432 
433 static int issue_flush_thread(void *data)
434 {
435 	struct f2fs_sb_info *sbi = data;
436 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
437 	wait_queue_head_t *q = &fcc->flush_wait_queue;
438 repeat:
439 	if (kthread_should_stop())
440 		return 0;
441 
442 	if (!llist_empty(&fcc->issue_list)) {
443 		struct flush_cmd *cmd, *next;
444 		int ret;
445 
446 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
447 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
448 
449 		ret = submit_flush_wait(sbi);
450 		llist_for_each_entry_safe(cmd, next,
451 					  fcc->dispatch_list, llnode) {
452 			cmd->ret = ret;
453 			complete(&cmd->wait);
454 		}
455 		fcc->dispatch_list = NULL;
456 	}
457 
458 	wait_event_interruptible(*q,
459 		kthread_should_stop() || !llist_empty(&fcc->issue_list));
460 	goto repeat;
461 }
462 
463 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
464 {
465 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
466 	struct flush_cmd cmd;
467 
468 	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
469 					test_opt(sbi, FLUSH_MERGE));
470 
471 	if (test_opt(sbi, NOBARRIER))
472 		return 0;
473 
474 	if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
475 		int ret;
476 
477 		atomic_inc(&fcc->submit_flush);
478 		ret = submit_flush_wait(sbi);
479 		atomic_dec(&fcc->submit_flush);
480 		return ret;
481 	}
482 
483 	init_completion(&cmd.wait);
484 
485 	atomic_inc(&fcc->submit_flush);
486 	llist_add(&cmd.llnode, &fcc->issue_list);
487 
488 	if (!fcc->dispatch_list)
489 		wake_up(&fcc->flush_wait_queue);
490 
491 	wait_for_completion(&cmd.wait);
492 	atomic_dec(&fcc->submit_flush);
493 
494 	return cmd.ret;
495 }
496 
497 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
498 {
499 	dev_t dev = sbi->sb->s_bdev->bd_dev;
500 	struct flush_cmd_control *fcc;
501 	int err = 0;
502 
503 	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
504 	if (!fcc)
505 		return -ENOMEM;
506 	atomic_set(&fcc->submit_flush, 0);
507 	init_waitqueue_head(&fcc->flush_wait_queue);
508 	init_llist_head(&fcc->issue_list);
509 	SM_I(sbi)->cmd_control_info = fcc;
510 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
511 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
512 	if (IS_ERR(fcc->f2fs_issue_flush)) {
513 		err = PTR_ERR(fcc->f2fs_issue_flush);
514 		kfree(fcc);
515 		SM_I(sbi)->cmd_control_info = NULL;
516 		return err;
517 	}
518 
519 	return err;
520 }
521 
522 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
523 {
524 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
525 
526 	if (fcc && fcc->f2fs_issue_flush)
527 		kthread_stop(fcc->f2fs_issue_flush);
528 	kfree(fcc);
529 	SM_I(sbi)->cmd_control_info = NULL;
530 }
531 
532 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
533 		enum dirty_type dirty_type)
534 {
535 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
536 
537 	/* need not be added */
538 	if (IS_CURSEG(sbi, segno))
539 		return;
540 
541 	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
542 		dirty_i->nr_dirty[dirty_type]++;
543 
544 	if (dirty_type == DIRTY) {
545 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
546 		enum dirty_type t = sentry->type;
547 
548 		if (unlikely(t >= DIRTY)) {
549 			f2fs_bug_on(sbi, 1);
550 			return;
551 		}
552 		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
553 			dirty_i->nr_dirty[t]++;
554 	}
555 }
556 
557 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
558 		enum dirty_type dirty_type)
559 {
560 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
561 
562 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
563 		dirty_i->nr_dirty[dirty_type]--;
564 
565 	if (dirty_type == DIRTY) {
566 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
567 		enum dirty_type t = sentry->type;
568 
569 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
570 			dirty_i->nr_dirty[t]--;
571 
572 		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
573 			clear_bit(GET_SECNO(sbi, segno),
574 						dirty_i->victim_secmap);
575 	}
576 }
577 
578 /*
579  * Should not occur error such as -ENOMEM.
580  * Adding dirty entry into seglist is not critical operation.
581  * If a given segment is one of current working segments, it won't be added.
582  */
583 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
584 {
585 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
586 	unsigned short valid_blocks;
587 
588 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
589 		return;
590 
591 	mutex_lock(&dirty_i->seglist_lock);
592 
593 	valid_blocks = get_valid_blocks(sbi, segno, 0);
594 
595 	if (valid_blocks == 0) {
596 		__locate_dirty_segment(sbi, segno, PRE);
597 		__remove_dirty_segment(sbi, segno, DIRTY);
598 	} else if (valid_blocks < sbi->blocks_per_seg) {
599 		__locate_dirty_segment(sbi, segno, DIRTY);
600 	} else {
601 		/* Recovery routine with SSR needs this */
602 		__remove_dirty_segment(sbi, segno, DIRTY);
603 	}
604 
605 	mutex_unlock(&dirty_i->seglist_lock);
606 }
607 
608 static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
609 							struct bio *bio)
610 {
611 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
612 	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
613 
614 	INIT_LIST_HEAD(&be->list);
615 	be->bio = bio;
616 	init_completion(&be->event);
617 	list_add_tail(&be->list, wait_list);
618 
619 	return be;
620 }
621 
622 void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
623 {
624 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
625 	struct bio_entry *be, *tmp;
626 
627 	list_for_each_entry_safe(be, tmp, wait_list, list) {
628 		struct bio *bio = be->bio;
629 		int err;
630 
631 		wait_for_completion_io(&be->event);
632 		err = be->error;
633 		if (err == -EOPNOTSUPP)
634 			err = 0;
635 
636 		if (err)
637 			f2fs_msg(sbi->sb, KERN_INFO,
638 				"Issue discard failed, ret: %d", err);
639 
640 		bio_put(bio);
641 		list_del(&be->list);
642 		kmem_cache_free(bio_entry_slab, be);
643 	}
644 }
645 
646 static void f2fs_submit_bio_wait_endio(struct bio *bio)
647 {
648 	struct bio_entry *be = (struct bio_entry *)bio->bi_private;
649 
650 	be->error = bio->bi_error;
651 	complete(&be->event);
652 }
653 
654 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
655 static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
656 		struct block_device *bdev, block_t blkstart, block_t blklen)
657 {
658 	struct bio *bio = NULL;
659 	int err;
660 
661 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
662 
663 	if (sbi->s_ndevs) {
664 		int devi = f2fs_target_device_index(sbi, blkstart);
665 
666 		blkstart -= FDEV(devi).start_blk;
667 	}
668 	err = __blkdev_issue_discard(bdev,
669 				SECTOR_FROM_BLOCK(blkstart),
670 				SECTOR_FROM_BLOCK(blklen),
671 				GFP_NOFS, 0, &bio);
672 	if (!err && bio) {
673 		struct bio_entry *be = __add_bio_entry(sbi, bio);
674 
675 		bio->bi_private = be;
676 		bio->bi_end_io = f2fs_submit_bio_wait_endio;
677 		bio->bi_opf |= REQ_SYNC;
678 		submit_bio(bio);
679 	}
680 
681 	return err;
682 }
683 
684 #ifdef CONFIG_BLK_DEV_ZONED
685 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
686 		struct block_device *bdev, block_t blkstart, block_t blklen)
687 {
688 	sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
689 	sector_t sector;
690 	int devi = 0;
691 
692 	if (sbi->s_ndevs) {
693 		devi = f2fs_target_device_index(sbi, blkstart);
694 		blkstart -= FDEV(devi).start_blk;
695 	}
696 	sector = SECTOR_FROM_BLOCK(blkstart);
697 
698 	if (sector % bdev_zone_size(bdev) || nr_sects != bdev_zone_size(bdev)) {
699 		f2fs_msg(sbi->sb, KERN_INFO,
700 			"(%d) %s: Unaligned discard attempted (block %x + %x)",
701 			devi, sbi->s_ndevs ? FDEV(devi).path: "",
702 			blkstart, blklen);
703 		return -EIO;
704 	}
705 
706 	/*
707 	 * We need to know the type of the zone: for conventional zones,
708 	 * use regular discard if the drive supports it. For sequential
709 	 * zones, reset the zone write pointer.
710 	 */
711 	switch (get_blkz_type(sbi, bdev, blkstart)) {
712 
713 	case BLK_ZONE_TYPE_CONVENTIONAL:
714 		if (!blk_queue_discard(bdev_get_queue(bdev)))
715 			return 0;
716 		return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
717 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
718 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
719 		trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
720 		return blkdev_reset_zones(bdev, sector,
721 					  nr_sects, GFP_NOFS);
722 	default:
723 		/* Unknown zone type: broken device ? */
724 		return -EIO;
725 	}
726 }
727 #endif
728 
729 static int __issue_discard_async(struct f2fs_sb_info *sbi,
730 		struct block_device *bdev, block_t blkstart, block_t blklen)
731 {
732 #ifdef CONFIG_BLK_DEV_ZONED
733 	if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
734 				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
735 		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
736 #endif
737 	return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
738 }
739 
740 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
741 				block_t blkstart, block_t blklen)
742 {
743 	sector_t start = blkstart, len = 0;
744 	struct block_device *bdev;
745 	struct seg_entry *se;
746 	unsigned int offset;
747 	block_t i;
748 	int err = 0;
749 
750 	bdev = f2fs_target_device(sbi, blkstart, NULL);
751 
752 	for (i = blkstart; i < blkstart + blklen; i++, len++) {
753 		if (i != start) {
754 			struct block_device *bdev2 =
755 				f2fs_target_device(sbi, i, NULL);
756 
757 			if (bdev2 != bdev) {
758 				err = __issue_discard_async(sbi, bdev,
759 						start, len);
760 				if (err)
761 					return err;
762 				bdev = bdev2;
763 				start = i;
764 				len = 0;
765 			}
766 		}
767 
768 		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
769 		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
770 
771 		if (!f2fs_test_and_set_bit(offset, se->discard_map))
772 			sbi->discard_blks--;
773 	}
774 
775 	if (len)
776 		err = __issue_discard_async(sbi, bdev, start, len);
777 	return err;
778 }
779 
780 static void __add_discard_entry(struct f2fs_sb_info *sbi,
781 		struct cp_control *cpc, struct seg_entry *se,
782 		unsigned int start, unsigned int end)
783 {
784 	struct list_head *head = &SM_I(sbi)->discard_list;
785 	struct discard_entry *new, *last;
786 
787 	if (!list_empty(head)) {
788 		last = list_last_entry(head, struct discard_entry, list);
789 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
790 						last->blkaddr + last->len) {
791 			last->len += end - start;
792 			goto done;
793 		}
794 	}
795 
796 	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
797 	INIT_LIST_HEAD(&new->list);
798 	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
799 	new->len = end - start;
800 	list_add_tail(&new->list, head);
801 done:
802 	SM_I(sbi)->nr_discards += end - start;
803 }
804 
805 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
806 {
807 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
808 	int max_blocks = sbi->blocks_per_seg;
809 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
810 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
811 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
812 	unsigned long *discard_map = (unsigned long *)se->discard_map;
813 	unsigned long *dmap = SIT_I(sbi)->tmp_map;
814 	unsigned int start = 0, end = -1;
815 	bool force = (cpc->reason == CP_DISCARD);
816 	int i;
817 
818 	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
819 		return;
820 
821 	if (!force) {
822 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
823 		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
824 			return;
825 	}
826 
827 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
828 	for (i = 0; i < entries; i++)
829 		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
830 				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
831 
832 	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
833 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
834 		if (start >= max_blocks)
835 			break;
836 
837 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
838 		if (force && start && end != max_blocks
839 					&& (end - start) < cpc->trim_minlen)
840 			continue;
841 
842 		__add_discard_entry(sbi, cpc, se, start, end);
843 	}
844 }
845 
846 void release_discard_addrs(struct f2fs_sb_info *sbi)
847 {
848 	struct list_head *head = &(SM_I(sbi)->discard_list);
849 	struct discard_entry *entry, *this;
850 
851 	/* drop caches */
852 	list_for_each_entry_safe(entry, this, head, list) {
853 		list_del(&entry->list);
854 		kmem_cache_free(discard_entry_slab, entry);
855 	}
856 }
857 
858 /*
859  * Should call clear_prefree_segments after checkpoint is done.
860  */
861 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
862 {
863 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
864 	unsigned int segno;
865 
866 	mutex_lock(&dirty_i->seglist_lock);
867 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
868 		__set_test_and_free(sbi, segno);
869 	mutex_unlock(&dirty_i->seglist_lock);
870 }
871 
872 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
873 {
874 	struct list_head *head = &(SM_I(sbi)->discard_list);
875 	struct discard_entry *entry, *this;
876 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
877 	struct blk_plug plug;
878 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
879 	unsigned int start = 0, end = -1;
880 	unsigned int secno, start_segno;
881 	bool force = (cpc->reason == CP_DISCARD);
882 
883 	blk_start_plug(&plug);
884 
885 	mutex_lock(&dirty_i->seglist_lock);
886 
887 	while (1) {
888 		int i;
889 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
890 		if (start >= MAIN_SEGS(sbi))
891 			break;
892 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
893 								start + 1);
894 
895 		for (i = start; i < end; i++)
896 			clear_bit(i, prefree_map);
897 
898 		dirty_i->nr_dirty[PRE] -= end - start;
899 
900 		if (force || !test_opt(sbi, DISCARD))
901 			continue;
902 
903 		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
904 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
905 				(end - start) << sbi->log_blocks_per_seg);
906 			continue;
907 		}
908 next:
909 		secno = GET_SECNO(sbi, start);
910 		start_segno = secno * sbi->segs_per_sec;
911 		if (!IS_CURSEC(sbi, secno) &&
912 			!get_valid_blocks(sbi, start, sbi->segs_per_sec))
913 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
914 				sbi->segs_per_sec << sbi->log_blocks_per_seg);
915 
916 		start = start_segno + sbi->segs_per_sec;
917 		if (start < end)
918 			goto next;
919 	}
920 	mutex_unlock(&dirty_i->seglist_lock);
921 
922 	/* send small discards */
923 	list_for_each_entry_safe(entry, this, head, list) {
924 		if (force && entry->len < cpc->trim_minlen)
925 			goto skip;
926 		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
927 		cpc->trimmed += entry->len;
928 skip:
929 		list_del(&entry->list);
930 		SM_I(sbi)->nr_discards -= entry->len;
931 		kmem_cache_free(discard_entry_slab, entry);
932 	}
933 
934 	blk_finish_plug(&plug);
935 }
936 
937 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
938 {
939 	struct sit_info *sit_i = SIT_I(sbi);
940 
941 	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
942 		sit_i->dirty_sentries++;
943 		return false;
944 	}
945 
946 	return true;
947 }
948 
949 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
950 					unsigned int segno, int modified)
951 {
952 	struct seg_entry *se = get_seg_entry(sbi, segno);
953 	se->type = type;
954 	if (modified)
955 		__mark_sit_entry_dirty(sbi, segno);
956 }
957 
958 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
959 {
960 	struct seg_entry *se;
961 	unsigned int segno, offset;
962 	long int new_vblocks;
963 
964 	segno = GET_SEGNO(sbi, blkaddr);
965 
966 	se = get_seg_entry(sbi, segno);
967 	new_vblocks = se->valid_blocks + del;
968 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
969 
970 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
971 				(new_vblocks > sbi->blocks_per_seg)));
972 
973 	se->valid_blocks = new_vblocks;
974 	se->mtime = get_mtime(sbi);
975 	SIT_I(sbi)->max_mtime = se->mtime;
976 
977 	/* Update valid block bitmap */
978 	if (del > 0) {
979 		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
980 			f2fs_bug_on(sbi, 1);
981 		if (f2fs_discard_en(sbi) &&
982 			!f2fs_test_and_set_bit(offset, se->discard_map))
983 			sbi->discard_blks--;
984 	} else {
985 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
986 			f2fs_bug_on(sbi, 1);
987 		if (f2fs_discard_en(sbi) &&
988 			f2fs_test_and_clear_bit(offset, se->discard_map))
989 			sbi->discard_blks++;
990 	}
991 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
992 		se->ckpt_valid_blocks += del;
993 
994 	__mark_sit_entry_dirty(sbi, segno);
995 
996 	/* update total number of valid blocks to be written in ckpt area */
997 	SIT_I(sbi)->written_valid_blocks += del;
998 
999 	if (sbi->segs_per_sec > 1)
1000 		get_sec_entry(sbi, segno)->valid_blocks += del;
1001 }
1002 
1003 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
1004 {
1005 	update_sit_entry(sbi, new, 1);
1006 	if (GET_SEGNO(sbi, old) != NULL_SEGNO)
1007 		update_sit_entry(sbi, old, -1);
1008 
1009 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
1010 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
1011 }
1012 
1013 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
1014 {
1015 	unsigned int segno = GET_SEGNO(sbi, addr);
1016 	struct sit_info *sit_i = SIT_I(sbi);
1017 
1018 	f2fs_bug_on(sbi, addr == NULL_ADDR);
1019 	if (addr == NEW_ADDR)
1020 		return;
1021 
1022 	/* add it into sit main buffer */
1023 	mutex_lock(&sit_i->sentry_lock);
1024 
1025 	update_sit_entry(sbi, addr, -1);
1026 
1027 	/* add it into dirty seglist */
1028 	locate_dirty_segment(sbi, segno);
1029 
1030 	mutex_unlock(&sit_i->sentry_lock);
1031 }
1032 
1033 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
1034 {
1035 	struct sit_info *sit_i = SIT_I(sbi);
1036 	unsigned int segno, offset;
1037 	struct seg_entry *se;
1038 	bool is_cp = false;
1039 
1040 	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1041 		return true;
1042 
1043 	mutex_lock(&sit_i->sentry_lock);
1044 
1045 	segno = GET_SEGNO(sbi, blkaddr);
1046 	se = get_seg_entry(sbi, segno);
1047 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
1048 
1049 	if (f2fs_test_bit(offset, se->ckpt_valid_map))
1050 		is_cp = true;
1051 
1052 	mutex_unlock(&sit_i->sentry_lock);
1053 
1054 	return is_cp;
1055 }
1056 
1057 /*
1058  * This function should be resided under the curseg_mutex lock
1059  */
1060 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
1061 					struct f2fs_summary *sum)
1062 {
1063 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1064 	void *addr = curseg->sum_blk;
1065 	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
1066 	memcpy(addr, sum, sizeof(struct f2fs_summary));
1067 }
1068 
1069 /*
1070  * Calculate the number of current summary pages for writing
1071  */
1072 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
1073 {
1074 	int valid_sum_count = 0;
1075 	int i, sum_in_page;
1076 
1077 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1078 		if (sbi->ckpt->alloc_type[i] == SSR)
1079 			valid_sum_count += sbi->blocks_per_seg;
1080 		else {
1081 			if (for_ra)
1082 				valid_sum_count += le16_to_cpu(
1083 					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
1084 			else
1085 				valid_sum_count += curseg_blkoff(sbi, i);
1086 		}
1087 	}
1088 
1089 	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
1090 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
1091 	if (valid_sum_count <= sum_in_page)
1092 		return 1;
1093 	else if ((valid_sum_count - sum_in_page) <=
1094 		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
1095 		return 2;
1096 	return 3;
1097 }
1098 
1099 /*
1100  * Caller should put this summary page
1101  */
1102 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
1103 {
1104 	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
1105 }
1106 
1107 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
1108 {
1109 	struct page *page = grab_meta_page(sbi, blk_addr);
1110 	void *dst = page_address(page);
1111 
1112 	if (src)
1113 		memcpy(dst, src, PAGE_SIZE);
1114 	else
1115 		memset(dst, 0, PAGE_SIZE);
1116 	set_page_dirty(page);
1117 	f2fs_put_page(page, 1);
1118 }
1119 
1120 static void write_sum_page(struct f2fs_sb_info *sbi,
1121 			struct f2fs_summary_block *sum_blk, block_t blk_addr)
1122 {
1123 	update_meta_page(sbi, (void *)sum_blk, blk_addr);
1124 }
1125 
1126 static void write_current_sum_page(struct f2fs_sb_info *sbi,
1127 						int type, block_t blk_addr)
1128 {
1129 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1130 	struct page *page = grab_meta_page(sbi, blk_addr);
1131 	struct f2fs_summary_block *src = curseg->sum_blk;
1132 	struct f2fs_summary_block *dst;
1133 
1134 	dst = (struct f2fs_summary_block *)page_address(page);
1135 
1136 	mutex_lock(&curseg->curseg_mutex);
1137 
1138 	down_read(&curseg->journal_rwsem);
1139 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
1140 	up_read(&curseg->journal_rwsem);
1141 
1142 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
1143 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
1144 
1145 	mutex_unlock(&curseg->curseg_mutex);
1146 
1147 	set_page_dirty(page);
1148 	f2fs_put_page(page, 1);
1149 }
1150 
1151 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
1152 {
1153 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1154 	unsigned int segno = curseg->segno + 1;
1155 	struct free_segmap_info *free_i = FREE_I(sbi);
1156 
1157 	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
1158 		return !test_bit(segno, free_i->free_segmap);
1159 	return 0;
1160 }
1161 
1162 /*
1163  * Find a new segment from the free segments bitmap to right order
1164  * This function should be returned with success, otherwise BUG
1165  */
1166 static void get_new_segment(struct f2fs_sb_info *sbi,
1167 			unsigned int *newseg, bool new_sec, int dir)
1168 {
1169 	struct free_segmap_info *free_i = FREE_I(sbi);
1170 	unsigned int segno, secno, zoneno;
1171 	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
1172 	unsigned int hint = *newseg / sbi->segs_per_sec;
1173 	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
1174 	unsigned int left_start = hint;
1175 	bool init = true;
1176 	int go_left = 0;
1177 	int i;
1178 
1179 	spin_lock(&free_i->segmap_lock);
1180 
1181 	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
1182 		segno = find_next_zero_bit(free_i->free_segmap,
1183 				(hint + 1) * sbi->segs_per_sec, *newseg + 1);
1184 		if (segno < (hint + 1) * sbi->segs_per_sec)
1185 			goto got_it;
1186 	}
1187 find_other_zone:
1188 	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
1189 	if (secno >= MAIN_SECS(sbi)) {
1190 		if (dir == ALLOC_RIGHT) {
1191 			secno = find_next_zero_bit(free_i->free_secmap,
1192 							MAIN_SECS(sbi), 0);
1193 			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
1194 		} else {
1195 			go_left = 1;
1196 			left_start = hint - 1;
1197 		}
1198 	}
1199 	if (go_left == 0)
1200 		goto skip_left;
1201 
1202 	while (test_bit(left_start, free_i->free_secmap)) {
1203 		if (left_start > 0) {
1204 			left_start--;
1205 			continue;
1206 		}
1207 		left_start = find_next_zero_bit(free_i->free_secmap,
1208 							MAIN_SECS(sbi), 0);
1209 		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
1210 		break;
1211 	}
1212 	secno = left_start;
1213 skip_left:
1214 	hint = secno;
1215 	segno = secno * sbi->segs_per_sec;
1216 	zoneno = secno / sbi->secs_per_zone;
1217 
1218 	/* give up on finding another zone */
1219 	if (!init)
1220 		goto got_it;
1221 	if (sbi->secs_per_zone == 1)
1222 		goto got_it;
1223 	if (zoneno == old_zoneno)
1224 		goto got_it;
1225 	if (dir == ALLOC_LEFT) {
1226 		if (!go_left && zoneno + 1 >= total_zones)
1227 			goto got_it;
1228 		if (go_left && zoneno == 0)
1229 			goto got_it;
1230 	}
1231 	for (i = 0; i < NR_CURSEG_TYPE; i++)
1232 		if (CURSEG_I(sbi, i)->zone == zoneno)
1233 			break;
1234 
1235 	if (i < NR_CURSEG_TYPE) {
1236 		/* zone is in user, try another */
1237 		if (go_left)
1238 			hint = zoneno * sbi->secs_per_zone - 1;
1239 		else if (zoneno + 1 >= total_zones)
1240 			hint = 0;
1241 		else
1242 			hint = (zoneno + 1) * sbi->secs_per_zone;
1243 		init = false;
1244 		goto find_other_zone;
1245 	}
1246 got_it:
1247 	/* set it as dirty segment in free segmap */
1248 	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
1249 	__set_inuse(sbi, segno);
1250 	*newseg = segno;
1251 	spin_unlock(&free_i->segmap_lock);
1252 }
1253 
1254 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
1255 {
1256 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1257 	struct summary_footer *sum_footer;
1258 
1259 	curseg->segno = curseg->next_segno;
1260 	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
1261 	curseg->next_blkoff = 0;
1262 	curseg->next_segno = NULL_SEGNO;
1263 
1264 	sum_footer = &(curseg->sum_blk->footer);
1265 	memset(sum_footer, 0, sizeof(struct summary_footer));
1266 	if (IS_DATASEG(type))
1267 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
1268 	if (IS_NODESEG(type))
1269 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
1270 	__set_sit_entry_type(sbi, type, curseg->segno, modified);
1271 }
1272 
1273 /*
1274  * Allocate a current working segment.
1275  * This function always allocates a free segment in LFS manner.
1276  */
1277 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
1278 {
1279 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1280 	unsigned int segno = curseg->segno;
1281 	int dir = ALLOC_LEFT;
1282 
1283 	write_sum_page(sbi, curseg->sum_blk,
1284 				GET_SUM_BLOCK(sbi, segno));
1285 	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
1286 		dir = ALLOC_RIGHT;
1287 
1288 	if (test_opt(sbi, NOHEAP))
1289 		dir = ALLOC_RIGHT;
1290 
1291 	get_new_segment(sbi, &segno, new_sec, dir);
1292 	curseg->next_segno = segno;
1293 	reset_curseg(sbi, type, 1);
1294 	curseg->alloc_type = LFS;
1295 }
1296 
1297 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
1298 			struct curseg_info *seg, block_t start)
1299 {
1300 	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
1301 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1302 	unsigned long *target_map = SIT_I(sbi)->tmp_map;
1303 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1304 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1305 	int i, pos;
1306 
1307 	for (i = 0; i < entries; i++)
1308 		target_map[i] = ckpt_map[i] | cur_map[i];
1309 
1310 	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
1311 
1312 	seg->next_blkoff = pos;
1313 }
1314 
1315 /*
1316  * If a segment is written by LFS manner, next block offset is just obtained
1317  * by increasing the current block offset. However, if a segment is written by
1318  * SSR manner, next block offset obtained by calling __next_free_blkoff
1319  */
1320 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
1321 				struct curseg_info *seg)
1322 {
1323 	if (seg->alloc_type == SSR)
1324 		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
1325 	else
1326 		seg->next_blkoff++;
1327 }
1328 
1329 /*
1330  * This function always allocates a used segment(from dirty seglist) by SSR
1331  * manner, so it should recover the existing segment information of valid blocks
1332  */
1333 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
1334 {
1335 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1336 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1337 	unsigned int new_segno = curseg->next_segno;
1338 	struct f2fs_summary_block *sum_node;
1339 	struct page *sum_page;
1340 
1341 	write_sum_page(sbi, curseg->sum_blk,
1342 				GET_SUM_BLOCK(sbi, curseg->segno));
1343 	__set_test_and_inuse(sbi, new_segno);
1344 
1345 	mutex_lock(&dirty_i->seglist_lock);
1346 	__remove_dirty_segment(sbi, new_segno, PRE);
1347 	__remove_dirty_segment(sbi, new_segno, DIRTY);
1348 	mutex_unlock(&dirty_i->seglist_lock);
1349 
1350 	reset_curseg(sbi, type, 1);
1351 	curseg->alloc_type = SSR;
1352 	__next_free_blkoff(sbi, curseg, 0);
1353 
1354 	if (reuse) {
1355 		sum_page = get_sum_page(sbi, new_segno);
1356 		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1357 		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1358 		f2fs_put_page(sum_page, 1);
1359 	}
1360 }
1361 
1362 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1363 {
1364 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1365 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1366 
1367 	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0))
1368 		return v_ops->get_victim(sbi,
1369 				&(curseg)->next_segno, BG_GC, type, SSR);
1370 
1371 	/* For data segments, let's do SSR more intensively */
1372 	for (; type >= CURSEG_HOT_DATA; type--)
1373 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1374 						BG_GC, type, SSR))
1375 			return 1;
1376 	return 0;
1377 }
1378 
1379 /*
1380  * flush out current segment and replace it with new segment
1381  * This function should be returned with success, otherwise BUG
1382  */
1383 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1384 						int type, bool force)
1385 {
1386 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1387 
1388 	if (force)
1389 		new_curseg(sbi, type, true);
1390 	else if (type == CURSEG_WARM_NODE)
1391 		new_curseg(sbi, type, false);
1392 	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1393 		new_curseg(sbi, type, false);
1394 	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1395 		change_curseg(sbi, type, true);
1396 	else
1397 		new_curseg(sbi, type, false);
1398 
1399 	stat_inc_seg_type(sbi, curseg);
1400 }
1401 
1402 void allocate_new_segments(struct f2fs_sb_info *sbi)
1403 {
1404 	struct curseg_info *curseg;
1405 	unsigned int old_segno;
1406 	int i;
1407 
1408 	if (test_opt(sbi, LFS))
1409 		return;
1410 
1411 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1412 		curseg = CURSEG_I(sbi, i);
1413 		old_segno = curseg->segno;
1414 		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1415 		locate_dirty_segment(sbi, old_segno);
1416 	}
1417 }
1418 
1419 static const struct segment_allocation default_salloc_ops = {
1420 	.allocate_segment = allocate_segment_by_default,
1421 };
1422 
1423 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1424 {
1425 	__u64 start = F2FS_BYTES_TO_BLK(range->start);
1426 	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1427 	unsigned int start_segno, end_segno;
1428 	struct cp_control cpc;
1429 	int err = 0;
1430 
1431 	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1432 		return -EINVAL;
1433 
1434 	cpc.trimmed = 0;
1435 	if (end <= MAIN_BLKADDR(sbi))
1436 		goto out;
1437 
1438 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1439 		f2fs_msg(sbi->sb, KERN_WARNING,
1440 			"Found FS corruption, run fsck to fix.");
1441 		goto out;
1442 	}
1443 
1444 	/* start/end segment number in main_area */
1445 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1446 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1447 						GET_SEGNO(sbi, end);
1448 	cpc.reason = CP_DISCARD;
1449 	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1450 
1451 	/* do checkpoint to issue discard commands safely */
1452 	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1453 		cpc.trim_start = start_segno;
1454 
1455 		if (sbi->discard_blks == 0)
1456 			break;
1457 		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1458 			cpc.trim_end = end_segno;
1459 		else
1460 			cpc.trim_end = min_t(unsigned int,
1461 				rounddown(start_segno +
1462 				BATCHED_TRIM_SEGMENTS(sbi),
1463 				sbi->segs_per_sec) - 1, end_segno);
1464 
1465 		mutex_lock(&sbi->gc_mutex);
1466 		err = write_checkpoint(sbi, &cpc);
1467 		mutex_unlock(&sbi->gc_mutex);
1468 		if (err)
1469 			break;
1470 
1471 		schedule();
1472 	}
1473 out:
1474 	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1475 	return err;
1476 }
1477 
1478 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1479 {
1480 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1481 	if (curseg->next_blkoff < sbi->blocks_per_seg)
1482 		return true;
1483 	return false;
1484 }
1485 
1486 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1487 {
1488 	if (p_type == DATA)
1489 		return CURSEG_HOT_DATA;
1490 	else
1491 		return CURSEG_HOT_NODE;
1492 }
1493 
1494 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1495 {
1496 	if (p_type == DATA) {
1497 		struct inode *inode = page->mapping->host;
1498 
1499 		if (S_ISDIR(inode->i_mode))
1500 			return CURSEG_HOT_DATA;
1501 		else
1502 			return CURSEG_COLD_DATA;
1503 	} else {
1504 		if (IS_DNODE(page) && is_cold_node(page))
1505 			return CURSEG_WARM_NODE;
1506 		else
1507 			return CURSEG_COLD_NODE;
1508 	}
1509 }
1510 
1511 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1512 {
1513 	if (p_type == DATA) {
1514 		struct inode *inode = page->mapping->host;
1515 
1516 		if (S_ISDIR(inode->i_mode))
1517 			return CURSEG_HOT_DATA;
1518 		else if (is_cold_data(page) || file_is_cold(inode))
1519 			return CURSEG_COLD_DATA;
1520 		else
1521 			return CURSEG_WARM_DATA;
1522 	} else {
1523 		if (IS_DNODE(page))
1524 			return is_cold_node(page) ? CURSEG_WARM_NODE :
1525 						CURSEG_HOT_NODE;
1526 		else
1527 			return CURSEG_COLD_NODE;
1528 	}
1529 }
1530 
1531 static int __get_segment_type(struct page *page, enum page_type p_type)
1532 {
1533 	switch (F2FS_P_SB(page)->active_logs) {
1534 	case 2:
1535 		return __get_segment_type_2(page, p_type);
1536 	case 4:
1537 		return __get_segment_type_4(page, p_type);
1538 	}
1539 	/* NR_CURSEG_TYPE(6) logs by default */
1540 	f2fs_bug_on(F2FS_P_SB(page),
1541 		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1542 	return __get_segment_type_6(page, p_type);
1543 }
1544 
1545 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1546 		block_t old_blkaddr, block_t *new_blkaddr,
1547 		struct f2fs_summary *sum, int type)
1548 {
1549 	struct sit_info *sit_i = SIT_I(sbi);
1550 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1551 
1552 	mutex_lock(&curseg->curseg_mutex);
1553 	mutex_lock(&sit_i->sentry_lock);
1554 
1555 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1556 
1557 	/*
1558 	 * __add_sum_entry should be resided under the curseg_mutex
1559 	 * because, this function updates a summary entry in the
1560 	 * current summary block.
1561 	 */
1562 	__add_sum_entry(sbi, type, sum);
1563 
1564 	__refresh_next_blkoff(sbi, curseg);
1565 
1566 	stat_inc_block_count(sbi, curseg);
1567 
1568 	if (!__has_curseg_space(sbi, type))
1569 		sit_i->s_ops->allocate_segment(sbi, type, false);
1570 	/*
1571 	 * SIT information should be updated before segment allocation,
1572 	 * since SSR needs latest valid block information.
1573 	 */
1574 	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1575 
1576 	mutex_unlock(&sit_i->sentry_lock);
1577 
1578 	if (page && IS_NODESEG(type))
1579 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1580 
1581 	mutex_unlock(&curseg->curseg_mutex);
1582 }
1583 
1584 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1585 {
1586 	int type = __get_segment_type(fio->page, fio->type);
1587 
1588 	if (fio->type == NODE || fio->type == DATA)
1589 		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
1590 
1591 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
1592 					&fio->new_blkaddr, sum, type);
1593 
1594 	/* writeout dirty page into bdev */
1595 	f2fs_submit_page_mbio(fio);
1596 
1597 	if (fio->type == NODE || fio->type == DATA)
1598 		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
1599 }
1600 
1601 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1602 {
1603 	struct f2fs_io_info fio = {
1604 		.sbi = sbi,
1605 		.type = META,
1606 		.op = REQ_OP_WRITE,
1607 		.op_flags = WRITE_SYNC | REQ_META | REQ_PRIO,
1608 		.old_blkaddr = page->index,
1609 		.new_blkaddr = page->index,
1610 		.page = page,
1611 		.encrypted_page = NULL,
1612 	};
1613 
1614 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
1615 		fio.op_flags &= ~REQ_META;
1616 
1617 	set_page_writeback(page);
1618 	f2fs_submit_page_mbio(&fio);
1619 }
1620 
1621 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1622 {
1623 	struct f2fs_summary sum;
1624 
1625 	set_summary(&sum, nid, 0, 0);
1626 	do_write_page(&sum, fio);
1627 }
1628 
1629 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1630 {
1631 	struct f2fs_sb_info *sbi = fio->sbi;
1632 	struct f2fs_summary sum;
1633 	struct node_info ni;
1634 
1635 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1636 	get_node_info(sbi, dn->nid, &ni);
1637 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1638 	do_write_page(&sum, fio);
1639 	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
1640 }
1641 
1642 void rewrite_data_page(struct f2fs_io_info *fio)
1643 {
1644 	fio->new_blkaddr = fio->old_blkaddr;
1645 	stat_inc_inplace_blocks(fio->sbi);
1646 	f2fs_submit_page_mbio(fio);
1647 }
1648 
1649 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
1650 				block_t old_blkaddr, block_t new_blkaddr,
1651 				bool recover_curseg, bool recover_newaddr)
1652 {
1653 	struct sit_info *sit_i = SIT_I(sbi);
1654 	struct curseg_info *curseg;
1655 	unsigned int segno, old_cursegno;
1656 	struct seg_entry *se;
1657 	int type;
1658 	unsigned short old_blkoff;
1659 
1660 	segno = GET_SEGNO(sbi, new_blkaddr);
1661 	se = get_seg_entry(sbi, segno);
1662 	type = se->type;
1663 
1664 	if (!recover_curseg) {
1665 		/* for recovery flow */
1666 		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1667 			if (old_blkaddr == NULL_ADDR)
1668 				type = CURSEG_COLD_DATA;
1669 			else
1670 				type = CURSEG_WARM_DATA;
1671 		}
1672 	} else {
1673 		if (!IS_CURSEG(sbi, segno))
1674 			type = CURSEG_WARM_DATA;
1675 	}
1676 
1677 	curseg = CURSEG_I(sbi, type);
1678 
1679 	mutex_lock(&curseg->curseg_mutex);
1680 	mutex_lock(&sit_i->sentry_lock);
1681 
1682 	old_cursegno = curseg->segno;
1683 	old_blkoff = curseg->next_blkoff;
1684 
1685 	/* change the current segment */
1686 	if (segno != curseg->segno) {
1687 		curseg->next_segno = segno;
1688 		change_curseg(sbi, type, true);
1689 	}
1690 
1691 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1692 	__add_sum_entry(sbi, type, sum);
1693 
1694 	if (!recover_curseg || recover_newaddr)
1695 		update_sit_entry(sbi, new_blkaddr, 1);
1696 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1697 		update_sit_entry(sbi, old_blkaddr, -1);
1698 
1699 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1700 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
1701 
1702 	locate_dirty_segment(sbi, old_cursegno);
1703 
1704 	if (recover_curseg) {
1705 		if (old_cursegno != curseg->segno) {
1706 			curseg->next_segno = old_cursegno;
1707 			change_curseg(sbi, type, true);
1708 		}
1709 		curseg->next_blkoff = old_blkoff;
1710 	}
1711 
1712 	mutex_unlock(&sit_i->sentry_lock);
1713 	mutex_unlock(&curseg->curseg_mutex);
1714 }
1715 
1716 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
1717 				block_t old_addr, block_t new_addr,
1718 				unsigned char version, bool recover_curseg,
1719 				bool recover_newaddr)
1720 {
1721 	struct f2fs_summary sum;
1722 
1723 	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
1724 
1725 	__f2fs_replace_block(sbi, &sum, old_addr, new_addr,
1726 					recover_curseg, recover_newaddr);
1727 
1728 	f2fs_update_data_blkaddr(dn, new_addr);
1729 }
1730 
1731 void f2fs_wait_on_page_writeback(struct page *page,
1732 				enum page_type type, bool ordered)
1733 {
1734 	if (PageWriteback(page)) {
1735 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1736 
1737 		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
1738 		if (ordered)
1739 			wait_on_page_writeback(page);
1740 		else
1741 			wait_for_stable_page(page);
1742 	}
1743 }
1744 
1745 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
1746 							block_t blkaddr)
1747 {
1748 	struct page *cpage;
1749 
1750 	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1751 		return;
1752 
1753 	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
1754 	if (cpage) {
1755 		f2fs_wait_on_page_writeback(cpage, DATA, true);
1756 		f2fs_put_page(cpage, 1);
1757 	}
1758 }
1759 
1760 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1761 {
1762 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1763 	struct curseg_info *seg_i;
1764 	unsigned char *kaddr;
1765 	struct page *page;
1766 	block_t start;
1767 	int i, j, offset;
1768 
1769 	start = start_sum_block(sbi);
1770 
1771 	page = get_meta_page(sbi, start++);
1772 	kaddr = (unsigned char *)page_address(page);
1773 
1774 	/* Step 1: restore nat cache */
1775 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1776 	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
1777 
1778 	/* Step 2: restore sit cache */
1779 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1780 	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
1781 	offset = 2 * SUM_JOURNAL_SIZE;
1782 
1783 	/* Step 3: restore summary entries */
1784 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1785 		unsigned short blk_off;
1786 		unsigned int segno;
1787 
1788 		seg_i = CURSEG_I(sbi, i);
1789 		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1790 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1791 		seg_i->next_segno = segno;
1792 		reset_curseg(sbi, i, 0);
1793 		seg_i->alloc_type = ckpt->alloc_type[i];
1794 		seg_i->next_blkoff = blk_off;
1795 
1796 		if (seg_i->alloc_type == SSR)
1797 			blk_off = sbi->blocks_per_seg;
1798 
1799 		for (j = 0; j < blk_off; j++) {
1800 			struct f2fs_summary *s;
1801 			s = (struct f2fs_summary *)(kaddr + offset);
1802 			seg_i->sum_blk->entries[j] = *s;
1803 			offset += SUMMARY_SIZE;
1804 			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
1805 						SUM_FOOTER_SIZE)
1806 				continue;
1807 
1808 			f2fs_put_page(page, 1);
1809 			page = NULL;
1810 
1811 			page = get_meta_page(sbi, start++);
1812 			kaddr = (unsigned char *)page_address(page);
1813 			offset = 0;
1814 		}
1815 	}
1816 	f2fs_put_page(page, 1);
1817 	return 0;
1818 }
1819 
1820 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1821 {
1822 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1823 	struct f2fs_summary_block *sum;
1824 	struct curseg_info *curseg;
1825 	struct page *new;
1826 	unsigned short blk_off;
1827 	unsigned int segno = 0;
1828 	block_t blk_addr = 0;
1829 
1830 	/* get segment number and block addr */
1831 	if (IS_DATASEG(type)) {
1832 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1833 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1834 							CURSEG_HOT_DATA]);
1835 		if (__exist_node_summaries(sbi))
1836 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1837 		else
1838 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1839 	} else {
1840 		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1841 							CURSEG_HOT_NODE]);
1842 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1843 							CURSEG_HOT_NODE]);
1844 		if (__exist_node_summaries(sbi))
1845 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1846 							type - CURSEG_HOT_NODE);
1847 		else
1848 			blk_addr = GET_SUM_BLOCK(sbi, segno);
1849 	}
1850 
1851 	new = get_meta_page(sbi, blk_addr);
1852 	sum = (struct f2fs_summary_block *)page_address(new);
1853 
1854 	if (IS_NODESEG(type)) {
1855 		if (__exist_node_summaries(sbi)) {
1856 			struct f2fs_summary *ns = &sum->entries[0];
1857 			int i;
1858 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1859 				ns->version = 0;
1860 				ns->ofs_in_node = 0;
1861 			}
1862 		} else {
1863 			int err;
1864 
1865 			err = restore_node_summary(sbi, segno, sum);
1866 			if (err) {
1867 				f2fs_put_page(new, 1);
1868 				return err;
1869 			}
1870 		}
1871 	}
1872 
1873 	/* set uncompleted segment to curseg */
1874 	curseg = CURSEG_I(sbi, type);
1875 	mutex_lock(&curseg->curseg_mutex);
1876 
1877 	/* update journal info */
1878 	down_write(&curseg->journal_rwsem);
1879 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
1880 	up_write(&curseg->journal_rwsem);
1881 
1882 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
1883 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
1884 	curseg->next_segno = segno;
1885 	reset_curseg(sbi, type, 0);
1886 	curseg->alloc_type = ckpt->alloc_type[type];
1887 	curseg->next_blkoff = blk_off;
1888 	mutex_unlock(&curseg->curseg_mutex);
1889 	f2fs_put_page(new, 1);
1890 	return 0;
1891 }
1892 
1893 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1894 {
1895 	int type = CURSEG_HOT_DATA;
1896 	int err;
1897 
1898 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
1899 		int npages = npages_for_summary_flush(sbi, true);
1900 
1901 		if (npages >= 2)
1902 			ra_meta_pages(sbi, start_sum_block(sbi), npages,
1903 							META_CP, true);
1904 
1905 		/* restore for compacted data summary */
1906 		if (read_compacted_summaries(sbi))
1907 			return -EINVAL;
1908 		type = CURSEG_HOT_NODE;
1909 	}
1910 
1911 	if (__exist_node_summaries(sbi))
1912 		ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1913 					NR_CURSEG_TYPE - type, META_CP, true);
1914 
1915 	for (; type <= CURSEG_COLD_NODE; type++) {
1916 		err = read_normal_summaries(sbi, type);
1917 		if (err)
1918 			return err;
1919 	}
1920 
1921 	return 0;
1922 }
1923 
1924 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1925 {
1926 	struct page *page;
1927 	unsigned char *kaddr;
1928 	struct f2fs_summary *summary;
1929 	struct curseg_info *seg_i;
1930 	int written_size = 0;
1931 	int i, j;
1932 
1933 	page = grab_meta_page(sbi, blkaddr++);
1934 	kaddr = (unsigned char *)page_address(page);
1935 
1936 	/* Step 1: write nat cache */
1937 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1938 	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
1939 	written_size += SUM_JOURNAL_SIZE;
1940 
1941 	/* Step 2: write sit cache */
1942 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1943 	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
1944 	written_size += SUM_JOURNAL_SIZE;
1945 
1946 	/* Step 3: write summary entries */
1947 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1948 		unsigned short blkoff;
1949 		seg_i = CURSEG_I(sbi, i);
1950 		if (sbi->ckpt->alloc_type[i] == SSR)
1951 			blkoff = sbi->blocks_per_seg;
1952 		else
1953 			blkoff = curseg_blkoff(sbi, i);
1954 
1955 		for (j = 0; j < blkoff; j++) {
1956 			if (!page) {
1957 				page = grab_meta_page(sbi, blkaddr++);
1958 				kaddr = (unsigned char *)page_address(page);
1959 				written_size = 0;
1960 			}
1961 			summary = (struct f2fs_summary *)(kaddr + written_size);
1962 			*summary = seg_i->sum_blk->entries[j];
1963 			written_size += SUMMARY_SIZE;
1964 
1965 			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
1966 							SUM_FOOTER_SIZE)
1967 				continue;
1968 
1969 			set_page_dirty(page);
1970 			f2fs_put_page(page, 1);
1971 			page = NULL;
1972 		}
1973 	}
1974 	if (page) {
1975 		set_page_dirty(page);
1976 		f2fs_put_page(page, 1);
1977 	}
1978 }
1979 
1980 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1981 					block_t blkaddr, int type)
1982 {
1983 	int i, end;
1984 	if (IS_DATASEG(type))
1985 		end = type + NR_CURSEG_DATA_TYPE;
1986 	else
1987 		end = type + NR_CURSEG_NODE_TYPE;
1988 
1989 	for (i = type; i < end; i++)
1990 		write_current_sum_page(sbi, i, blkaddr + (i - type));
1991 }
1992 
1993 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1994 {
1995 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
1996 		write_compacted_summaries(sbi, start_blk);
1997 	else
1998 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1999 }
2000 
2001 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
2002 {
2003 	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
2004 }
2005 
2006 int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
2007 					unsigned int val, int alloc)
2008 {
2009 	int i;
2010 
2011 	if (type == NAT_JOURNAL) {
2012 		for (i = 0; i < nats_in_cursum(journal); i++) {
2013 			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
2014 				return i;
2015 		}
2016 		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
2017 			return update_nats_in_cursum(journal, 1);
2018 	} else if (type == SIT_JOURNAL) {
2019 		for (i = 0; i < sits_in_cursum(journal); i++)
2020 			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
2021 				return i;
2022 		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
2023 			return update_sits_in_cursum(journal, 1);
2024 	}
2025 	return -1;
2026 }
2027 
2028 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
2029 					unsigned int segno)
2030 {
2031 	return get_meta_page(sbi, current_sit_addr(sbi, segno));
2032 }
2033 
2034 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
2035 					unsigned int start)
2036 {
2037 	struct sit_info *sit_i = SIT_I(sbi);
2038 	struct page *src_page, *dst_page;
2039 	pgoff_t src_off, dst_off;
2040 	void *src_addr, *dst_addr;
2041 
2042 	src_off = current_sit_addr(sbi, start);
2043 	dst_off = next_sit_addr(sbi, src_off);
2044 
2045 	/* get current sit block page without lock */
2046 	src_page = get_meta_page(sbi, src_off);
2047 	dst_page = grab_meta_page(sbi, dst_off);
2048 	f2fs_bug_on(sbi, PageDirty(src_page));
2049 
2050 	src_addr = page_address(src_page);
2051 	dst_addr = page_address(dst_page);
2052 	memcpy(dst_addr, src_addr, PAGE_SIZE);
2053 
2054 	set_page_dirty(dst_page);
2055 	f2fs_put_page(src_page, 1);
2056 
2057 	set_to_next_sit(sit_i, start);
2058 
2059 	return dst_page;
2060 }
2061 
2062 static struct sit_entry_set *grab_sit_entry_set(void)
2063 {
2064 	struct sit_entry_set *ses =
2065 			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
2066 
2067 	ses->entry_cnt = 0;
2068 	INIT_LIST_HEAD(&ses->set_list);
2069 	return ses;
2070 }
2071 
2072 static void release_sit_entry_set(struct sit_entry_set *ses)
2073 {
2074 	list_del(&ses->set_list);
2075 	kmem_cache_free(sit_entry_set_slab, ses);
2076 }
2077 
2078 static void adjust_sit_entry_set(struct sit_entry_set *ses,
2079 						struct list_head *head)
2080 {
2081 	struct sit_entry_set *next = ses;
2082 
2083 	if (list_is_last(&ses->set_list, head))
2084 		return;
2085 
2086 	list_for_each_entry_continue(next, head, set_list)
2087 		if (ses->entry_cnt <= next->entry_cnt)
2088 			break;
2089 
2090 	list_move_tail(&ses->set_list, &next->set_list);
2091 }
2092 
2093 static void add_sit_entry(unsigned int segno, struct list_head *head)
2094 {
2095 	struct sit_entry_set *ses;
2096 	unsigned int start_segno = START_SEGNO(segno);
2097 
2098 	list_for_each_entry(ses, head, set_list) {
2099 		if (ses->start_segno == start_segno) {
2100 			ses->entry_cnt++;
2101 			adjust_sit_entry_set(ses, head);
2102 			return;
2103 		}
2104 	}
2105 
2106 	ses = grab_sit_entry_set();
2107 
2108 	ses->start_segno = start_segno;
2109 	ses->entry_cnt++;
2110 	list_add(&ses->set_list, head);
2111 }
2112 
2113 static void add_sits_in_set(struct f2fs_sb_info *sbi)
2114 {
2115 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2116 	struct list_head *set_list = &sm_info->sit_entry_set;
2117 	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
2118 	unsigned int segno;
2119 
2120 	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
2121 		add_sit_entry(segno, set_list);
2122 }
2123 
2124 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
2125 {
2126 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2127 	struct f2fs_journal *journal = curseg->journal;
2128 	int i;
2129 
2130 	down_write(&curseg->journal_rwsem);
2131 	for (i = 0; i < sits_in_cursum(journal); i++) {
2132 		unsigned int segno;
2133 		bool dirtied;
2134 
2135 		segno = le32_to_cpu(segno_in_journal(journal, i));
2136 		dirtied = __mark_sit_entry_dirty(sbi, segno);
2137 
2138 		if (!dirtied)
2139 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
2140 	}
2141 	update_sits_in_cursum(journal, -i);
2142 	up_write(&curseg->journal_rwsem);
2143 }
2144 
2145 /*
2146  * CP calls this function, which flushes SIT entries including sit_journal,
2147  * and moves prefree segs to free segs.
2148  */
2149 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2150 {
2151 	struct sit_info *sit_i = SIT_I(sbi);
2152 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
2153 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2154 	struct f2fs_journal *journal = curseg->journal;
2155 	struct sit_entry_set *ses, *tmp;
2156 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
2157 	bool to_journal = true;
2158 	struct seg_entry *se;
2159 
2160 	mutex_lock(&sit_i->sentry_lock);
2161 
2162 	if (!sit_i->dirty_sentries)
2163 		goto out;
2164 
2165 	/*
2166 	 * add and account sit entries of dirty bitmap in sit entry
2167 	 * set temporarily
2168 	 */
2169 	add_sits_in_set(sbi);
2170 
2171 	/*
2172 	 * if there are no enough space in journal to store dirty sit
2173 	 * entries, remove all entries from journal and add and account
2174 	 * them in sit entry set.
2175 	 */
2176 	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
2177 		remove_sits_in_journal(sbi);
2178 
2179 	/*
2180 	 * there are two steps to flush sit entries:
2181 	 * #1, flush sit entries to journal in current cold data summary block.
2182 	 * #2, flush sit entries to sit page.
2183 	 */
2184 	list_for_each_entry_safe(ses, tmp, head, set_list) {
2185 		struct page *page = NULL;
2186 		struct f2fs_sit_block *raw_sit = NULL;
2187 		unsigned int start_segno = ses->start_segno;
2188 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
2189 						(unsigned long)MAIN_SEGS(sbi));
2190 		unsigned int segno = start_segno;
2191 
2192 		if (to_journal &&
2193 			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
2194 			to_journal = false;
2195 
2196 		if (to_journal) {
2197 			down_write(&curseg->journal_rwsem);
2198 		} else {
2199 			page = get_next_sit_page(sbi, start_segno);
2200 			raw_sit = page_address(page);
2201 		}
2202 
2203 		/* flush dirty sit entries in region of current sit set */
2204 		for_each_set_bit_from(segno, bitmap, end) {
2205 			int offset, sit_offset;
2206 
2207 			se = get_seg_entry(sbi, segno);
2208 
2209 			/* add discard candidates */
2210 			if (cpc->reason != CP_DISCARD) {
2211 				cpc->trim_start = segno;
2212 				add_discard_addrs(sbi, cpc);
2213 			}
2214 
2215 			if (to_journal) {
2216 				offset = lookup_journal_in_cursum(journal,
2217 							SIT_JOURNAL, segno, 1);
2218 				f2fs_bug_on(sbi, offset < 0);
2219 				segno_in_journal(journal, offset) =
2220 							cpu_to_le32(segno);
2221 				seg_info_to_raw_sit(se,
2222 					&sit_in_journal(journal, offset));
2223 			} else {
2224 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
2225 				seg_info_to_raw_sit(se,
2226 						&raw_sit->entries[sit_offset]);
2227 			}
2228 
2229 			__clear_bit(segno, bitmap);
2230 			sit_i->dirty_sentries--;
2231 			ses->entry_cnt--;
2232 		}
2233 
2234 		if (to_journal)
2235 			up_write(&curseg->journal_rwsem);
2236 		else
2237 			f2fs_put_page(page, 1);
2238 
2239 		f2fs_bug_on(sbi, ses->entry_cnt);
2240 		release_sit_entry_set(ses);
2241 	}
2242 
2243 	f2fs_bug_on(sbi, !list_empty(head));
2244 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
2245 out:
2246 	if (cpc->reason == CP_DISCARD) {
2247 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
2248 			add_discard_addrs(sbi, cpc);
2249 	}
2250 	mutex_unlock(&sit_i->sentry_lock);
2251 
2252 	set_prefree_as_free_segments(sbi);
2253 }
2254 
2255 static int build_sit_info(struct f2fs_sb_info *sbi)
2256 {
2257 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2258 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2259 	struct sit_info *sit_i;
2260 	unsigned int sit_segs, start;
2261 	char *src_bitmap, *dst_bitmap;
2262 	unsigned int bitmap_size;
2263 
2264 	/* allocate memory for SIT information */
2265 	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
2266 	if (!sit_i)
2267 		return -ENOMEM;
2268 
2269 	SM_I(sbi)->sit_info = sit_i;
2270 
2271 	sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
2272 					sizeof(struct seg_entry), GFP_KERNEL);
2273 	if (!sit_i->sentries)
2274 		return -ENOMEM;
2275 
2276 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2277 	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2278 	if (!sit_i->dirty_sentries_bitmap)
2279 		return -ENOMEM;
2280 
2281 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
2282 		sit_i->sentries[start].cur_valid_map
2283 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2284 		sit_i->sentries[start].ckpt_valid_map
2285 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2286 		if (!sit_i->sentries[start].cur_valid_map ||
2287 				!sit_i->sentries[start].ckpt_valid_map)
2288 			return -ENOMEM;
2289 
2290 		if (f2fs_discard_en(sbi)) {
2291 			sit_i->sentries[start].discard_map
2292 				= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2293 			if (!sit_i->sentries[start].discard_map)
2294 				return -ENOMEM;
2295 		}
2296 	}
2297 
2298 	sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2299 	if (!sit_i->tmp_map)
2300 		return -ENOMEM;
2301 
2302 	if (sbi->segs_per_sec > 1) {
2303 		sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
2304 					sizeof(struct sec_entry), GFP_KERNEL);
2305 		if (!sit_i->sec_entries)
2306 			return -ENOMEM;
2307 	}
2308 
2309 	/* get information related with SIT */
2310 	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
2311 
2312 	/* setup SIT bitmap from ckeckpoint pack */
2313 	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2314 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2315 
2316 	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2317 	if (!dst_bitmap)
2318 		return -ENOMEM;
2319 
2320 	/* init SIT information */
2321 	sit_i->s_ops = &default_salloc_ops;
2322 
2323 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2324 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2325 	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
2326 	sit_i->sit_bitmap = dst_bitmap;
2327 	sit_i->bitmap_size = bitmap_size;
2328 	sit_i->dirty_sentries = 0;
2329 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
2330 	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
2331 	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
2332 	mutex_init(&sit_i->sentry_lock);
2333 	return 0;
2334 }
2335 
2336 static int build_free_segmap(struct f2fs_sb_info *sbi)
2337 {
2338 	struct free_segmap_info *free_i;
2339 	unsigned int bitmap_size, sec_bitmap_size;
2340 
2341 	/* allocate memory for free segmap information */
2342 	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
2343 	if (!free_i)
2344 		return -ENOMEM;
2345 
2346 	SM_I(sbi)->free_info = free_i;
2347 
2348 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2349 	free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
2350 	if (!free_i->free_segmap)
2351 		return -ENOMEM;
2352 
2353 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2354 	free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
2355 	if (!free_i->free_secmap)
2356 		return -ENOMEM;
2357 
2358 	/* set all segments as dirty temporarily */
2359 	memset(free_i->free_segmap, 0xff, bitmap_size);
2360 	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2361 
2362 	/* init free segmap information */
2363 	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2364 	free_i->free_segments = 0;
2365 	free_i->free_sections = 0;
2366 	spin_lock_init(&free_i->segmap_lock);
2367 	return 0;
2368 }
2369 
2370 static int build_curseg(struct f2fs_sb_info *sbi)
2371 {
2372 	struct curseg_info *array;
2373 	int i;
2374 
2375 	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2376 	if (!array)
2377 		return -ENOMEM;
2378 
2379 	SM_I(sbi)->curseg_array = array;
2380 
2381 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
2382 		mutex_init(&array[i].curseg_mutex);
2383 		array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
2384 		if (!array[i].sum_blk)
2385 			return -ENOMEM;
2386 		init_rwsem(&array[i].journal_rwsem);
2387 		array[i].journal = kzalloc(sizeof(struct f2fs_journal),
2388 							GFP_KERNEL);
2389 		if (!array[i].journal)
2390 			return -ENOMEM;
2391 		array[i].segno = NULL_SEGNO;
2392 		array[i].next_blkoff = 0;
2393 	}
2394 	return restore_curseg_summaries(sbi);
2395 }
2396 
2397 static void build_sit_entries(struct f2fs_sb_info *sbi)
2398 {
2399 	struct sit_info *sit_i = SIT_I(sbi);
2400 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2401 	struct f2fs_journal *journal = curseg->journal;
2402 	struct seg_entry *se;
2403 	struct f2fs_sit_entry sit;
2404 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
2405 	unsigned int i, start, end;
2406 	unsigned int readed, start_blk = 0;
2407 
2408 	do {
2409 		readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
2410 							META_SIT, true);
2411 
2412 		start = start_blk * sit_i->sents_per_block;
2413 		end = (start_blk + readed) * sit_i->sents_per_block;
2414 
2415 		for (; start < end && start < MAIN_SEGS(sbi); start++) {
2416 			struct f2fs_sit_block *sit_blk;
2417 			struct page *page;
2418 
2419 			se = &sit_i->sentries[start];
2420 			page = get_current_sit_page(sbi, start);
2421 			sit_blk = (struct f2fs_sit_block *)page_address(page);
2422 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2423 			f2fs_put_page(page, 1);
2424 
2425 			check_block_count(sbi, start, &sit);
2426 			seg_info_from_raw_sit(se, &sit);
2427 
2428 			/* build discard map only one time */
2429 			if (f2fs_discard_en(sbi)) {
2430 				memcpy(se->discard_map, se->cur_valid_map,
2431 							SIT_VBLOCK_MAP_SIZE);
2432 				sbi->discard_blks += sbi->blocks_per_seg -
2433 							se->valid_blocks;
2434 			}
2435 
2436 			if (sbi->segs_per_sec > 1)
2437 				get_sec_entry(sbi, start)->valid_blocks +=
2438 							se->valid_blocks;
2439 		}
2440 		start_blk += readed;
2441 	} while (start_blk < sit_blk_cnt);
2442 
2443 	down_read(&curseg->journal_rwsem);
2444 	for (i = 0; i < sits_in_cursum(journal); i++) {
2445 		unsigned int old_valid_blocks;
2446 
2447 		start = le32_to_cpu(segno_in_journal(journal, i));
2448 		se = &sit_i->sentries[start];
2449 		sit = sit_in_journal(journal, i);
2450 
2451 		old_valid_blocks = se->valid_blocks;
2452 
2453 		check_block_count(sbi, start, &sit);
2454 		seg_info_from_raw_sit(se, &sit);
2455 
2456 		if (f2fs_discard_en(sbi)) {
2457 			memcpy(se->discard_map, se->cur_valid_map,
2458 						SIT_VBLOCK_MAP_SIZE);
2459 			sbi->discard_blks += old_valid_blocks -
2460 						se->valid_blocks;
2461 		}
2462 
2463 		if (sbi->segs_per_sec > 1)
2464 			get_sec_entry(sbi, start)->valid_blocks +=
2465 				se->valid_blocks - old_valid_blocks;
2466 	}
2467 	up_read(&curseg->journal_rwsem);
2468 }
2469 
2470 static void init_free_segmap(struct f2fs_sb_info *sbi)
2471 {
2472 	unsigned int start;
2473 	int type;
2474 
2475 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
2476 		struct seg_entry *sentry = get_seg_entry(sbi, start);
2477 		if (!sentry->valid_blocks)
2478 			__set_free(sbi, start);
2479 	}
2480 
2481 	/* set use the current segments */
2482 	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2483 		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2484 		__set_test_and_inuse(sbi, curseg_t->segno);
2485 	}
2486 }
2487 
2488 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2489 {
2490 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2491 	struct free_segmap_info *free_i = FREE_I(sbi);
2492 	unsigned int segno = 0, offset = 0;
2493 	unsigned short valid_blocks;
2494 
2495 	while (1) {
2496 		/* find dirty segment based on free segmap */
2497 		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2498 		if (segno >= MAIN_SEGS(sbi))
2499 			break;
2500 		offset = segno + 1;
2501 		valid_blocks = get_valid_blocks(sbi, segno, 0);
2502 		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2503 			continue;
2504 		if (valid_blocks > sbi->blocks_per_seg) {
2505 			f2fs_bug_on(sbi, 1);
2506 			continue;
2507 		}
2508 		mutex_lock(&dirty_i->seglist_lock);
2509 		__locate_dirty_segment(sbi, segno, DIRTY);
2510 		mutex_unlock(&dirty_i->seglist_lock);
2511 	}
2512 }
2513 
2514 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2515 {
2516 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2517 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2518 
2519 	dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2520 	if (!dirty_i->victim_secmap)
2521 		return -ENOMEM;
2522 	return 0;
2523 }
2524 
2525 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2526 {
2527 	struct dirty_seglist_info *dirty_i;
2528 	unsigned int bitmap_size, i;
2529 
2530 	/* allocate memory for dirty segments list information */
2531 	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2532 	if (!dirty_i)
2533 		return -ENOMEM;
2534 
2535 	SM_I(sbi)->dirty_info = dirty_i;
2536 	mutex_init(&dirty_i->seglist_lock);
2537 
2538 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2539 
2540 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
2541 		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2542 		if (!dirty_i->dirty_segmap[i])
2543 			return -ENOMEM;
2544 	}
2545 
2546 	init_dirty_segmap(sbi);
2547 	return init_victim_secmap(sbi);
2548 }
2549 
2550 /*
2551  * Update min, max modified time for cost-benefit GC algorithm
2552  */
2553 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2554 {
2555 	struct sit_info *sit_i = SIT_I(sbi);
2556 	unsigned int segno;
2557 
2558 	mutex_lock(&sit_i->sentry_lock);
2559 
2560 	sit_i->min_mtime = LLONG_MAX;
2561 
2562 	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2563 		unsigned int i;
2564 		unsigned long long mtime = 0;
2565 
2566 		for (i = 0; i < sbi->segs_per_sec; i++)
2567 			mtime += get_seg_entry(sbi, segno + i)->mtime;
2568 
2569 		mtime = div_u64(mtime, sbi->segs_per_sec);
2570 
2571 		if (sit_i->min_mtime > mtime)
2572 			sit_i->min_mtime = mtime;
2573 	}
2574 	sit_i->max_mtime = get_mtime(sbi);
2575 	mutex_unlock(&sit_i->sentry_lock);
2576 }
2577 
2578 int build_segment_manager(struct f2fs_sb_info *sbi)
2579 {
2580 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2581 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2582 	struct f2fs_sm_info *sm_info;
2583 	int err;
2584 
2585 	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2586 	if (!sm_info)
2587 		return -ENOMEM;
2588 
2589 	/* init sm info */
2590 	sbi->sm_info = sm_info;
2591 	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2592 	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2593 	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2594 	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2595 	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2596 	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2597 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2598 	sm_info->rec_prefree_segments = sm_info->main_segments *
2599 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2600 	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
2601 		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
2602 
2603 	if (!test_opt(sbi, LFS))
2604 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2605 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2606 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2607 
2608 	INIT_LIST_HEAD(&sm_info->discard_list);
2609 	INIT_LIST_HEAD(&sm_info->wait_list);
2610 	sm_info->nr_discards = 0;
2611 	sm_info->max_discards = 0;
2612 
2613 	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2614 
2615 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
2616 
2617 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2618 		err = create_flush_cmd_control(sbi);
2619 		if (err)
2620 			return err;
2621 	}
2622 
2623 	err = build_sit_info(sbi);
2624 	if (err)
2625 		return err;
2626 	err = build_free_segmap(sbi);
2627 	if (err)
2628 		return err;
2629 	err = build_curseg(sbi);
2630 	if (err)
2631 		return err;
2632 
2633 	/* reinit free segmap based on SIT */
2634 	build_sit_entries(sbi);
2635 
2636 	init_free_segmap(sbi);
2637 	err = build_dirty_segmap(sbi);
2638 	if (err)
2639 		return err;
2640 
2641 	init_min_max_mtime(sbi);
2642 	return 0;
2643 }
2644 
2645 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2646 		enum dirty_type dirty_type)
2647 {
2648 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2649 
2650 	mutex_lock(&dirty_i->seglist_lock);
2651 	kvfree(dirty_i->dirty_segmap[dirty_type]);
2652 	dirty_i->nr_dirty[dirty_type] = 0;
2653 	mutex_unlock(&dirty_i->seglist_lock);
2654 }
2655 
2656 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2657 {
2658 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2659 	kvfree(dirty_i->victim_secmap);
2660 }
2661 
2662 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2663 {
2664 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2665 	int i;
2666 
2667 	if (!dirty_i)
2668 		return;
2669 
2670 	/* discard pre-free/dirty segments list */
2671 	for (i = 0; i < NR_DIRTY_TYPE; i++)
2672 		discard_dirty_segmap(sbi, i);
2673 
2674 	destroy_victim_secmap(sbi);
2675 	SM_I(sbi)->dirty_info = NULL;
2676 	kfree(dirty_i);
2677 }
2678 
2679 static void destroy_curseg(struct f2fs_sb_info *sbi)
2680 {
2681 	struct curseg_info *array = SM_I(sbi)->curseg_array;
2682 	int i;
2683 
2684 	if (!array)
2685 		return;
2686 	SM_I(sbi)->curseg_array = NULL;
2687 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
2688 		kfree(array[i].sum_blk);
2689 		kfree(array[i].journal);
2690 	}
2691 	kfree(array);
2692 }
2693 
2694 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2695 {
2696 	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2697 	if (!free_i)
2698 		return;
2699 	SM_I(sbi)->free_info = NULL;
2700 	kvfree(free_i->free_segmap);
2701 	kvfree(free_i->free_secmap);
2702 	kfree(free_i);
2703 }
2704 
2705 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2706 {
2707 	struct sit_info *sit_i = SIT_I(sbi);
2708 	unsigned int start;
2709 
2710 	if (!sit_i)
2711 		return;
2712 
2713 	if (sit_i->sentries) {
2714 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
2715 			kfree(sit_i->sentries[start].cur_valid_map);
2716 			kfree(sit_i->sentries[start].ckpt_valid_map);
2717 			kfree(sit_i->sentries[start].discard_map);
2718 		}
2719 	}
2720 	kfree(sit_i->tmp_map);
2721 
2722 	kvfree(sit_i->sentries);
2723 	kvfree(sit_i->sec_entries);
2724 	kvfree(sit_i->dirty_sentries_bitmap);
2725 
2726 	SM_I(sbi)->sit_info = NULL;
2727 	kfree(sit_i->sit_bitmap);
2728 	kfree(sit_i);
2729 }
2730 
2731 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2732 {
2733 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2734 
2735 	if (!sm_info)
2736 		return;
2737 	destroy_flush_cmd_control(sbi);
2738 	destroy_dirty_segmap(sbi);
2739 	destroy_curseg(sbi);
2740 	destroy_free_segmap(sbi);
2741 	destroy_sit_info(sbi);
2742 	sbi->sm_info = NULL;
2743 	kfree(sm_info);
2744 }
2745 
2746 int __init create_segment_manager_caches(void)
2747 {
2748 	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2749 			sizeof(struct discard_entry));
2750 	if (!discard_entry_slab)
2751 		goto fail;
2752 
2753 	bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
2754 			sizeof(struct bio_entry));
2755 	if (!bio_entry_slab)
2756 		goto destroy_discard_entry;
2757 
2758 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2759 			sizeof(struct sit_entry_set));
2760 	if (!sit_entry_set_slab)
2761 		goto destroy_bio_entry;
2762 
2763 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2764 			sizeof(struct inmem_pages));
2765 	if (!inmem_entry_slab)
2766 		goto destroy_sit_entry_set;
2767 	return 0;
2768 
2769 destroy_sit_entry_set:
2770 	kmem_cache_destroy(sit_entry_set_slab);
2771 destroy_bio_entry:
2772 	kmem_cache_destroy(bio_entry_slab);
2773 destroy_discard_entry:
2774 	kmem_cache_destroy(discard_entry_slab);
2775 fail:
2776 	return -ENOMEM;
2777 }
2778 
2779 void destroy_segment_manager_caches(void)
2780 {
2781 	kmem_cache_destroy(sit_entry_set_slab);
2782 	kmem_cache_destroy(bio_entry_slab);
2783 	kmem_cache_destroy(discard_entry_slab);
2784 	kmem_cache_destroy(inmem_entry_slab);
2785 }
2786