xref: /openbmc/linux/fs/f2fs/segment.c (revision ad4d307fce0909a5f70635826f779321ab95b469)
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/swap.h>
18 #include <linux/timer.h>
19 
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25 
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27 
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *discard_cmd_slab;
30 static struct kmem_cache *sit_entry_set_slab;
31 static struct kmem_cache *inmem_entry_slab;
32 
33 static unsigned long __reverse_ulong(unsigned char *str)
34 {
35 	unsigned long tmp = 0;
36 	int shift = 24, idx = 0;
37 
38 #if BITS_PER_LONG == 64
39 	shift = 56;
40 #endif
41 	while (shift >= 0) {
42 		tmp |= (unsigned long)str[idx++] << shift;
43 		shift -= BITS_PER_BYTE;
44 	}
45 	return tmp;
46 }
47 
48 /*
49  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
50  * MSB and LSB are reversed in a byte by f2fs_set_bit.
51  */
52 static inline unsigned long __reverse_ffs(unsigned long word)
53 {
54 	int num = 0;
55 
56 #if BITS_PER_LONG == 64
57 	if ((word & 0xffffffff00000000UL) == 0)
58 		num += 32;
59 	else
60 		word >>= 32;
61 #endif
62 	if ((word & 0xffff0000) == 0)
63 		num += 16;
64 	else
65 		word >>= 16;
66 
67 	if ((word & 0xff00) == 0)
68 		num += 8;
69 	else
70 		word >>= 8;
71 
72 	if ((word & 0xf0) == 0)
73 		num += 4;
74 	else
75 		word >>= 4;
76 
77 	if ((word & 0xc) == 0)
78 		num += 2;
79 	else
80 		word >>= 2;
81 
82 	if ((word & 0x2) == 0)
83 		num += 1;
84 	return num;
85 }
86 
87 /*
88  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89  * f2fs_set_bit makes MSB and LSB reversed in a byte.
90  * @size must be integral times of unsigned long.
91  * Example:
92  *                             MSB <--> LSB
93  *   f2fs_set_bit(0, bitmap) => 1000 0000
94  *   f2fs_set_bit(7, bitmap) => 0000 0001
95  */
96 static unsigned long __find_rev_next_bit(const unsigned long *addr,
97 			unsigned long size, unsigned long offset)
98 {
99 	const unsigned long *p = addr + BIT_WORD(offset);
100 	unsigned long result = size;
101 	unsigned long tmp;
102 
103 	if (offset >= size)
104 		return size;
105 
106 	size -= (offset & ~(BITS_PER_LONG - 1));
107 	offset %= BITS_PER_LONG;
108 
109 	while (1) {
110 		if (*p == 0)
111 			goto pass;
112 
113 		tmp = __reverse_ulong((unsigned char *)p);
114 
115 		tmp &= ~0UL >> offset;
116 		if (size < BITS_PER_LONG)
117 			tmp &= (~0UL << (BITS_PER_LONG - size));
118 		if (tmp)
119 			goto found;
120 pass:
121 		if (size <= BITS_PER_LONG)
122 			break;
123 		size -= BITS_PER_LONG;
124 		offset = 0;
125 		p++;
126 	}
127 	return result;
128 found:
129 	return result - size + __reverse_ffs(tmp);
130 }
131 
132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133 			unsigned long size, unsigned long offset)
134 {
135 	const unsigned long *p = addr + BIT_WORD(offset);
136 	unsigned long result = size;
137 	unsigned long tmp;
138 
139 	if (offset >= size)
140 		return size;
141 
142 	size -= (offset & ~(BITS_PER_LONG - 1));
143 	offset %= BITS_PER_LONG;
144 
145 	while (1) {
146 		if (*p == ~0UL)
147 			goto pass;
148 
149 		tmp = __reverse_ulong((unsigned char *)p);
150 
151 		if (offset)
152 			tmp |= ~0UL << (BITS_PER_LONG - offset);
153 		if (size < BITS_PER_LONG)
154 			tmp |= ~0UL >> size;
155 		if (tmp != ~0UL)
156 			goto found;
157 pass:
158 		if (size <= BITS_PER_LONG)
159 			break;
160 		size -= BITS_PER_LONG;
161 		offset = 0;
162 		p++;
163 	}
164 	return result;
165 found:
166 	return result - size + __reverse_ffz(tmp);
167 }
168 
169 void register_inmem_page(struct inode *inode, struct page *page)
170 {
171 	struct f2fs_inode_info *fi = F2FS_I(inode);
172 	struct inmem_pages *new;
173 
174 	f2fs_trace_pid(page);
175 
176 	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
177 	SetPagePrivate(page);
178 
179 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
180 
181 	/* add atomic page indices to the list */
182 	new->page = page;
183 	INIT_LIST_HEAD(&new->list);
184 
185 	/* increase reference count with clean state */
186 	mutex_lock(&fi->inmem_lock);
187 	get_page(page);
188 	list_add_tail(&new->list, &fi->inmem_pages);
189 	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
190 	mutex_unlock(&fi->inmem_lock);
191 
192 	trace_f2fs_register_inmem_page(page, INMEM);
193 }
194 
195 static int __revoke_inmem_pages(struct inode *inode,
196 				struct list_head *head, bool drop, bool recover)
197 {
198 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
199 	struct inmem_pages *cur, *tmp;
200 	int err = 0;
201 
202 	list_for_each_entry_safe(cur, tmp, head, list) {
203 		struct page *page = cur->page;
204 
205 		if (drop)
206 			trace_f2fs_commit_inmem_page(page, INMEM_DROP);
207 
208 		lock_page(page);
209 
210 		if (recover) {
211 			struct dnode_of_data dn;
212 			struct node_info ni;
213 
214 			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
215 
216 			set_new_dnode(&dn, inode, NULL, NULL, 0);
217 			if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) {
218 				err = -EAGAIN;
219 				goto next;
220 			}
221 			get_node_info(sbi, dn.nid, &ni);
222 			f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
223 					cur->old_addr, ni.version, true, true);
224 			f2fs_put_dnode(&dn);
225 		}
226 next:
227 		/* we don't need to invalidate this in the sccessful status */
228 		if (drop || recover)
229 			ClearPageUptodate(page);
230 		set_page_private(page, 0);
231 		ClearPagePrivate(page);
232 		f2fs_put_page(page, 1);
233 
234 		list_del(&cur->list);
235 		kmem_cache_free(inmem_entry_slab, cur);
236 		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
237 	}
238 	return err;
239 }
240 
241 void drop_inmem_pages(struct inode *inode)
242 {
243 	struct f2fs_inode_info *fi = F2FS_I(inode);
244 
245 	mutex_lock(&fi->inmem_lock);
246 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
247 	mutex_unlock(&fi->inmem_lock);
248 
249 	clear_inode_flag(inode, FI_ATOMIC_FILE);
250 	stat_dec_atomic_write(inode);
251 }
252 
253 static int __commit_inmem_pages(struct inode *inode,
254 					struct list_head *revoke_list)
255 {
256 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
257 	struct f2fs_inode_info *fi = F2FS_I(inode);
258 	struct inmem_pages *cur, *tmp;
259 	struct f2fs_io_info fio = {
260 		.sbi = sbi,
261 		.type = DATA,
262 		.op = REQ_OP_WRITE,
263 		.op_flags = REQ_SYNC | REQ_PRIO,
264 		.encrypted_page = NULL,
265 	};
266 	pgoff_t last_idx = ULONG_MAX;
267 	int err = 0;
268 
269 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
270 		struct page *page = cur->page;
271 
272 		lock_page(page);
273 		if (page->mapping == inode->i_mapping) {
274 			trace_f2fs_commit_inmem_page(page, INMEM);
275 
276 			set_page_dirty(page);
277 			f2fs_wait_on_page_writeback(page, DATA, true);
278 			if (clear_page_dirty_for_io(page)) {
279 				inode_dec_dirty_pages(inode);
280 				remove_dirty_inode(inode);
281 			}
282 
283 			fio.page = page;
284 			err = do_write_data_page(&fio);
285 			if (err) {
286 				unlock_page(page);
287 				break;
288 			}
289 
290 			/* record old blkaddr for revoking */
291 			cur->old_addr = fio.old_blkaddr;
292 			last_idx = page->index;
293 		}
294 		unlock_page(page);
295 		list_move_tail(&cur->list, revoke_list);
296 	}
297 
298 	if (last_idx != ULONG_MAX)
299 		f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
300 							DATA, WRITE);
301 
302 	if (!err)
303 		__revoke_inmem_pages(inode, revoke_list, false, false);
304 
305 	return err;
306 }
307 
308 int commit_inmem_pages(struct inode *inode)
309 {
310 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
311 	struct f2fs_inode_info *fi = F2FS_I(inode);
312 	struct list_head revoke_list;
313 	int err;
314 
315 	INIT_LIST_HEAD(&revoke_list);
316 	f2fs_balance_fs(sbi, true);
317 	f2fs_lock_op(sbi);
318 
319 	set_inode_flag(inode, FI_ATOMIC_COMMIT);
320 
321 	mutex_lock(&fi->inmem_lock);
322 	err = __commit_inmem_pages(inode, &revoke_list);
323 	if (err) {
324 		int ret;
325 		/*
326 		 * try to revoke all committed pages, but still we could fail
327 		 * due to no memory or other reason, if that happened, EAGAIN
328 		 * will be returned, which means in such case, transaction is
329 		 * already not integrity, caller should use journal to do the
330 		 * recovery or rewrite & commit last transaction. For other
331 		 * error number, revoking was done by filesystem itself.
332 		 */
333 		ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
334 		if (ret)
335 			err = ret;
336 
337 		/* drop all uncommitted pages */
338 		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
339 	}
340 	mutex_unlock(&fi->inmem_lock);
341 
342 	clear_inode_flag(inode, FI_ATOMIC_COMMIT);
343 
344 	f2fs_unlock_op(sbi);
345 	return err;
346 }
347 
348 /*
349  * This function balances dirty node and dentry pages.
350  * In addition, it controls garbage collection.
351  */
352 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
353 {
354 #ifdef CONFIG_F2FS_FAULT_INJECTION
355 	if (time_to_inject(sbi, FAULT_CHECKPOINT))
356 		f2fs_stop_checkpoint(sbi, false);
357 #endif
358 
359 	if (!need)
360 		return;
361 
362 	/* balance_fs_bg is able to be pending */
363 	if (excess_cached_nats(sbi))
364 		f2fs_balance_fs_bg(sbi);
365 
366 	/*
367 	 * We should do GC or end up with checkpoint, if there are so many dirty
368 	 * dir/node pages without enough free segments.
369 	 */
370 	if (has_not_enough_free_secs(sbi, 0, 0)) {
371 		mutex_lock(&sbi->gc_mutex);
372 		f2fs_gc(sbi, false, false);
373 	}
374 }
375 
376 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
377 {
378 	/* try to shrink extent cache when there is no enough memory */
379 	if (!available_free_memory(sbi, EXTENT_CACHE))
380 		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
381 
382 	/* check the # of cached NAT entries */
383 	if (!available_free_memory(sbi, NAT_ENTRIES))
384 		try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
385 
386 	if (!available_free_memory(sbi, FREE_NIDS))
387 		try_to_free_nids(sbi, MAX_FREE_NIDS);
388 	else
389 		build_free_nids(sbi, false);
390 
391 	if (!is_idle(sbi))
392 		return;
393 
394 	/* checkpoint is the only way to shrink partial cached entries */
395 	if (!available_free_memory(sbi, NAT_ENTRIES) ||
396 			!available_free_memory(sbi, INO_ENTRIES) ||
397 			excess_prefree_segs(sbi) ||
398 			excess_dirty_nats(sbi) ||
399 			f2fs_time_over(sbi, CP_TIME)) {
400 		if (test_opt(sbi, DATA_FLUSH)) {
401 			struct blk_plug plug;
402 
403 			blk_start_plug(&plug);
404 			sync_dirty_inodes(sbi, FILE_INODE);
405 			blk_finish_plug(&plug);
406 		}
407 		f2fs_sync_fs(sbi->sb, true);
408 		stat_inc_bg_cp_count(sbi->stat_info);
409 	}
410 }
411 
412 static int __submit_flush_wait(struct block_device *bdev)
413 {
414 	struct bio *bio = f2fs_bio_alloc(0);
415 	int ret;
416 
417 	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
418 	bio->bi_bdev = bdev;
419 	ret = submit_bio_wait(bio);
420 	bio_put(bio);
421 	return ret;
422 }
423 
424 static int submit_flush_wait(struct f2fs_sb_info *sbi)
425 {
426 	int ret = __submit_flush_wait(sbi->sb->s_bdev);
427 	int i;
428 
429 	if (sbi->s_ndevs && !ret) {
430 		for (i = 1; i < sbi->s_ndevs; i++) {
431 			trace_f2fs_issue_flush(FDEV(i).bdev,
432 					test_opt(sbi, NOBARRIER),
433 					test_opt(sbi, FLUSH_MERGE));
434 			ret = __submit_flush_wait(FDEV(i).bdev);
435 			if (ret)
436 				break;
437 		}
438 	}
439 	return ret;
440 }
441 
442 static int issue_flush_thread(void *data)
443 {
444 	struct f2fs_sb_info *sbi = data;
445 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
446 	wait_queue_head_t *q = &fcc->flush_wait_queue;
447 repeat:
448 	if (kthread_should_stop())
449 		return 0;
450 
451 	if (!llist_empty(&fcc->issue_list)) {
452 		struct flush_cmd *cmd, *next;
453 		int ret;
454 
455 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
456 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
457 
458 		ret = submit_flush_wait(sbi);
459 		llist_for_each_entry_safe(cmd, next,
460 					  fcc->dispatch_list, llnode) {
461 			cmd->ret = ret;
462 			complete(&cmd->wait);
463 		}
464 		fcc->dispatch_list = NULL;
465 	}
466 
467 	wait_event_interruptible(*q,
468 		kthread_should_stop() || !llist_empty(&fcc->issue_list));
469 	goto repeat;
470 }
471 
472 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
473 {
474 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
475 	struct flush_cmd cmd;
476 
477 	if (test_opt(sbi, NOBARRIER))
478 		return 0;
479 
480 	if (!test_opt(sbi, FLUSH_MERGE))
481 		return submit_flush_wait(sbi);
482 
483 	if (!atomic_read(&fcc->submit_flush)) {
484 		int ret;
485 
486 		atomic_inc(&fcc->submit_flush);
487 		ret = submit_flush_wait(sbi);
488 		atomic_dec(&fcc->submit_flush);
489 		return ret;
490 	}
491 
492 	init_completion(&cmd.wait);
493 
494 	atomic_inc(&fcc->submit_flush);
495 	llist_add(&cmd.llnode, &fcc->issue_list);
496 
497 	if (!fcc->dispatch_list)
498 		wake_up(&fcc->flush_wait_queue);
499 
500 	if (fcc->f2fs_issue_flush) {
501 		wait_for_completion(&cmd.wait);
502 		atomic_dec(&fcc->submit_flush);
503 	} else {
504 		llist_del_all(&fcc->issue_list);
505 		atomic_set(&fcc->submit_flush, 0);
506 	}
507 
508 	return cmd.ret;
509 }
510 
511 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
512 {
513 	dev_t dev = sbi->sb->s_bdev->bd_dev;
514 	struct flush_cmd_control *fcc;
515 	int err = 0;
516 
517 	if (SM_I(sbi)->fcc_info) {
518 		fcc = SM_I(sbi)->fcc_info;
519 		goto init_thread;
520 	}
521 
522 	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
523 	if (!fcc)
524 		return -ENOMEM;
525 	atomic_set(&fcc->submit_flush, 0);
526 	init_waitqueue_head(&fcc->flush_wait_queue);
527 	init_llist_head(&fcc->issue_list);
528 	SM_I(sbi)->fcc_info = fcc;
529 init_thread:
530 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
531 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
532 	if (IS_ERR(fcc->f2fs_issue_flush)) {
533 		err = PTR_ERR(fcc->f2fs_issue_flush);
534 		kfree(fcc);
535 		SM_I(sbi)->fcc_info = NULL;
536 		return err;
537 	}
538 
539 	return err;
540 }
541 
542 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
543 {
544 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
545 
546 	if (fcc && fcc->f2fs_issue_flush) {
547 		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
548 
549 		fcc->f2fs_issue_flush = NULL;
550 		kthread_stop(flush_thread);
551 	}
552 	if (free) {
553 		kfree(fcc);
554 		SM_I(sbi)->fcc_info = NULL;
555 	}
556 }
557 
558 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
559 		enum dirty_type dirty_type)
560 {
561 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
562 
563 	/* need not be added */
564 	if (IS_CURSEG(sbi, segno))
565 		return;
566 
567 	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
568 		dirty_i->nr_dirty[dirty_type]++;
569 
570 	if (dirty_type == DIRTY) {
571 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
572 		enum dirty_type t = sentry->type;
573 
574 		if (unlikely(t >= DIRTY)) {
575 			f2fs_bug_on(sbi, 1);
576 			return;
577 		}
578 		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
579 			dirty_i->nr_dirty[t]++;
580 	}
581 }
582 
583 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
584 		enum dirty_type dirty_type)
585 {
586 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
587 
588 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
589 		dirty_i->nr_dirty[dirty_type]--;
590 
591 	if (dirty_type == DIRTY) {
592 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
593 		enum dirty_type t = sentry->type;
594 
595 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
596 			dirty_i->nr_dirty[t]--;
597 
598 		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
599 			clear_bit(GET_SECNO(sbi, segno),
600 						dirty_i->victim_secmap);
601 	}
602 }
603 
604 /*
605  * Should not occur error such as -ENOMEM.
606  * Adding dirty entry into seglist is not critical operation.
607  * If a given segment is one of current working segments, it won't be added.
608  */
609 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
610 {
611 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
612 	unsigned short valid_blocks;
613 
614 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
615 		return;
616 
617 	mutex_lock(&dirty_i->seglist_lock);
618 
619 	valid_blocks = get_valid_blocks(sbi, segno, 0);
620 
621 	if (valid_blocks == 0) {
622 		__locate_dirty_segment(sbi, segno, PRE);
623 		__remove_dirty_segment(sbi, segno, DIRTY);
624 	} else if (valid_blocks < sbi->blocks_per_seg) {
625 		__locate_dirty_segment(sbi, segno, DIRTY);
626 	} else {
627 		/* Recovery routine with SSR needs this */
628 		__remove_dirty_segment(sbi, segno, DIRTY);
629 	}
630 
631 	mutex_unlock(&dirty_i->seglist_lock);
632 }
633 
634 static void __add_discard_cmd(struct f2fs_sb_info *sbi,
635 			struct bio *bio, block_t lstart, block_t len)
636 {
637 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
638 	struct list_head *cmd_list = &(dcc->discard_cmd_list);
639 	struct discard_cmd *dc;
640 
641 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
642 	INIT_LIST_HEAD(&dc->list);
643 	dc->bio = bio;
644 	bio->bi_private = dc;
645 	dc->lstart = lstart;
646 	dc->len = len;
647 	dc->state = D_PREP;
648 	init_completion(&dc->wait);
649 
650 	mutex_lock(&dcc->cmd_lock);
651 	list_add_tail(&dc->list, cmd_list);
652 	mutex_unlock(&dcc->cmd_lock);
653 }
654 
655 static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
656 {
657 	int err = dc->bio->bi_error;
658 
659 	if (dc->state == D_DONE)
660 		atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
661 
662 	if (err == -EOPNOTSUPP)
663 		err = 0;
664 
665 	if (err)
666 		f2fs_msg(sbi->sb, KERN_INFO,
667 				"Issue discard failed, ret: %d", err);
668 	bio_put(dc->bio);
669 	list_del(&dc->list);
670 	kmem_cache_free(discard_cmd_slab, dc);
671 }
672 
673 /* This should be covered by global mutex, &sit_i->sentry_lock */
674 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
675 {
676 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
677 	struct list_head *wait_list = &(dcc->discard_cmd_list);
678 	struct discard_cmd *dc, *tmp;
679 
680 	mutex_lock(&dcc->cmd_lock);
681 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
682 
683 		if (blkaddr == NULL_ADDR) {
684 			if (dc->state == D_PREP) {
685 				dc->state = D_SUBMIT;
686 				submit_bio(dc->bio);
687 				atomic_inc(&dcc->submit_discard);
688 			}
689 			wait_for_completion_io(&dc->wait);
690 
691 			__remove_discard_cmd(sbi, dc);
692 			continue;
693 		}
694 
695 		if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
696 			if (dc->state == D_SUBMIT)
697 				wait_for_completion_io(&dc->wait);
698 			else
699 				__remove_discard_cmd(sbi, dc);
700 		}
701 	}
702 	mutex_unlock(&dcc->cmd_lock);
703 }
704 
705 static void f2fs_submit_discard_endio(struct bio *bio)
706 {
707 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
708 
709 	complete(&dc->wait);
710 	dc->state = D_DONE;
711 }
712 
713 static int issue_discard_thread(void *data)
714 {
715 	struct f2fs_sb_info *sbi = data;
716 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
717 	wait_queue_head_t *q = &dcc->discard_wait_queue;
718 	struct list_head *cmd_list = &dcc->discard_cmd_list;
719 	struct discard_cmd *dc, *tmp;
720 	struct blk_plug plug;
721 	int iter = 0;
722 repeat:
723 	if (kthread_should_stop())
724 		return 0;
725 
726 	blk_start_plug(&plug);
727 
728 	mutex_lock(&dcc->cmd_lock);
729 	list_for_each_entry_safe(dc, tmp, cmd_list, list) {
730 		if (dc->state == D_PREP) {
731 			dc->state = D_SUBMIT;
732 			submit_bio(dc->bio);
733 			atomic_inc(&dcc->submit_discard);
734 			if (iter++ > DISCARD_ISSUE_RATE)
735 				break;
736 		} else if (dc->state == D_DONE) {
737 			__remove_discard_cmd(sbi, dc);
738 		}
739 	}
740 	mutex_unlock(&dcc->cmd_lock);
741 
742 	blk_finish_plug(&plug);
743 
744 	iter = 0;
745 	congestion_wait(BLK_RW_SYNC, HZ/50);
746 
747 	wait_event_interruptible(*q,
748 		kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
749 	goto repeat;
750 }
751 
752 
753 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
754 static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
755 		struct block_device *bdev, block_t blkstart, block_t blklen)
756 {
757 	struct bio *bio = NULL;
758 	block_t lblkstart = blkstart;
759 	int err;
760 
761 	trace_f2fs_issue_discard(bdev, blkstart, blklen);
762 
763 	if (sbi->s_ndevs) {
764 		int devi = f2fs_target_device_index(sbi, blkstart);
765 
766 		blkstart -= FDEV(devi).start_blk;
767 	}
768 	err = __blkdev_issue_discard(bdev,
769 				SECTOR_FROM_BLOCK(blkstart),
770 				SECTOR_FROM_BLOCK(blklen),
771 				GFP_NOFS, 0, &bio);
772 	if (!err && bio) {
773 		bio->bi_end_io = f2fs_submit_discard_endio;
774 		bio->bi_opf |= REQ_SYNC;
775 
776 		__add_discard_cmd(sbi, bio, lblkstart, blklen);
777 		wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
778 	}
779 	return err;
780 }
781 
782 #ifdef CONFIG_BLK_DEV_ZONED
783 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
784 		struct block_device *bdev, block_t blkstart, block_t blklen)
785 {
786 	sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
787 	sector_t sector;
788 	int devi = 0;
789 
790 	if (sbi->s_ndevs) {
791 		devi = f2fs_target_device_index(sbi, blkstart);
792 		blkstart -= FDEV(devi).start_blk;
793 	}
794 	sector = SECTOR_FROM_BLOCK(blkstart);
795 
796 	if (sector & (bdev_zone_sectors(bdev) - 1) ||
797 	    nr_sects != bdev_zone_sectors(bdev)) {
798 		f2fs_msg(sbi->sb, KERN_INFO,
799 			"(%d) %s: Unaligned discard attempted (block %x + %x)",
800 			devi, sbi->s_ndevs ? FDEV(devi).path: "",
801 			blkstart, blklen);
802 		return -EIO;
803 	}
804 
805 	/*
806 	 * We need to know the type of the zone: for conventional zones,
807 	 * use regular discard if the drive supports it. For sequential
808 	 * zones, reset the zone write pointer.
809 	 */
810 	switch (get_blkz_type(sbi, bdev, blkstart)) {
811 
812 	case BLK_ZONE_TYPE_CONVENTIONAL:
813 		if (!blk_queue_discard(bdev_get_queue(bdev)))
814 			return 0;
815 		return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
816 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
817 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
818 		trace_f2fs_issue_reset_zone(bdev, blkstart);
819 		return blkdev_reset_zones(bdev, sector,
820 					  nr_sects, GFP_NOFS);
821 	default:
822 		/* Unknown zone type: broken device ? */
823 		return -EIO;
824 	}
825 }
826 #endif
827 
828 static int __issue_discard_async(struct f2fs_sb_info *sbi,
829 		struct block_device *bdev, block_t blkstart, block_t blklen)
830 {
831 #ifdef CONFIG_BLK_DEV_ZONED
832 	if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
833 				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
834 		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
835 #endif
836 	return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
837 }
838 
839 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
840 				block_t blkstart, block_t blklen)
841 {
842 	sector_t start = blkstart, len = 0;
843 	struct block_device *bdev;
844 	struct seg_entry *se;
845 	unsigned int offset;
846 	block_t i;
847 	int err = 0;
848 
849 	bdev = f2fs_target_device(sbi, blkstart, NULL);
850 
851 	for (i = blkstart; i < blkstart + blklen; i++, len++) {
852 		if (i != start) {
853 			struct block_device *bdev2 =
854 				f2fs_target_device(sbi, i, NULL);
855 
856 			if (bdev2 != bdev) {
857 				err = __issue_discard_async(sbi, bdev,
858 						start, len);
859 				if (err)
860 					return err;
861 				bdev = bdev2;
862 				start = i;
863 				len = 0;
864 			}
865 		}
866 
867 		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
868 		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
869 
870 		if (!f2fs_test_and_set_bit(offset, se->discard_map))
871 			sbi->discard_blks--;
872 	}
873 
874 	if (len)
875 		err = __issue_discard_async(sbi, bdev, start, len);
876 	return err;
877 }
878 
879 static void __add_discard_entry(struct f2fs_sb_info *sbi,
880 		struct cp_control *cpc, struct seg_entry *se,
881 		unsigned int start, unsigned int end)
882 {
883 	struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
884 	struct discard_entry *new, *last;
885 
886 	if (!list_empty(head)) {
887 		last = list_last_entry(head, struct discard_entry, list);
888 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
889 				last->blkaddr + last->len &&
890 				last->len < MAX_DISCARD_BLOCKS(sbi)) {
891 			last->len += end - start;
892 			goto done;
893 		}
894 	}
895 
896 	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
897 	INIT_LIST_HEAD(&new->list);
898 	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
899 	new->len = end - start;
900 	list_add_tail(&new->list, head);
901 done:
902 	SM_I(sbi)->dcc_info->nr_discards += end - start;
903 }
904 
905 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
906 							bool check_only)
907 {
908 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
909 	int max_blocks = sbi->blocks_per_seg;
910 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
911 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
912 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
913 	unsigned long *discard_map = (unsigned long *)se->discard_map;
914 	unsigned long *dmap = SIT_I(sbi)->tmp_map;
915 	unsigned int start = 0, end = -1;
916 	bool force = (cpc->reason == CP_DISCARD);
917 	int i;
918 
919 	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
920 		return false;
921 
922 	if (!force) {
923 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
924 			SM_I(sbi)->dcc_info->nr_discards >=
925 				SM_I(sbi)->dcc_info->max_discards)
926 			return false;
927 	}
928 
929 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
930 	for (i = 0; i < entries; i++)
931 		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
932 				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
933 
934 	while (force || SM_I(sbi)->dcc_info->nr_discards <=
935 				SM_I(sbi)->dcc_info->max_discards) {
936 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
937 		if (start >= max_blocks)
938 			break;
939 
940 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
941 		if (force && start && end != max_blocks
942 					&& (end - start) < cpc->trim_minlen)
943 			continue;
944 
945 		if (check_only)
946 			return true;
947 
948 		__add_discard_entry(sbi, cpc, se, start, end);
949 	}
950 	return false;
951 }
952 
953 void release_discard_addrs(struct f2fs_sb_info *sbi)
954 {
955 	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
956 	struct discard_entry *entry, *this;
957 
958 	/* drop caches */
959 	list_for_each_entry_safe(entry, this, head, list) {
960 		list_del(&entry->list);
961 		kmem_cache_free(discard_entry_slab, entry);
962 	}
963 }
964 
965 /*
966  * Should call clear_prefree_segments after checkpoint is done.
967  */
968 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
969 {
970 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
971 	unsigned int segno;
972 
973 	mutex_lock(&dirty_i->seglist_lock);
974 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
975 		__set_test_and_free(sbi, segno);
976 	mutex_unlock(&dirty_i->seglist_lock);
977 }
978 
979 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
980 {
981 	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
982 	struct discard_entry *entry, *this;
983 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
984 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
985 	unsigned int start = 0, end = -1;
986 	unsigned int secno, start_segno;
987 	bool force = (cpc->reason == CP_DISCARD);
988 
989 	mutex_lock(&dirty_i->seglist_lock);
990 
991 	while (1) {
992 		int i;
993 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
994 		if (start >= MAIN_SEGS(sbi))
995 			break;
996 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
997 								start + 1);
998 
999 		for (i = start; i < end; i++)
1000 			clear_bit(i, prefree_map);
1001 
1002 		dirty_i->nr_dirty[PRE] -= end - start;
1003 
1004 		if (!test_opt(sbi, DISCARD))
1005 			continue;
1006 
1007 		if (force && start >= cpc->trim_start &&
1008 					(end - 1) <= cpc->trim_end)
1009 				continue;
1010 
1011 		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
1012 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1013 				(end - start) << sbi->log_blocks_per_seg);
1014 			continue;
1015 		}
1016 next:
1017 		secno = GET_SECNO(sbi, start);
1018 		start_segno = secno * sbi->segs_per_sec;
1019 		if (!IS_CURSEC(sbi, secno) &&
1020 			!get_valid_blocks(sbi, start, sbi->segs_per_sec))
1021 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1022 				sbi->segs_per_sec << sbi->log_blocks_per_seg);
1023 
1024 		start = start_segno + sbi->segs_per_sec;
1025 		if (start < end)
1026 			goto next;
1027 	}
1028 	mutex_unlock(&dirty_i->seglist_lock);
1029 
1030 	/* send small discards */
1031 	list_for_each_entry_safe(entry, this, head, list) {
1032 		if (force && entry->len < cpc->trim_minlen)
1033 			goto skip;
1034 		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
1035 		cpc->trimmed += entry->len;
1036 skip:
1037 		list_del(&entry->list);
1038 		SM_I(sbi)->dcc_info->nr_discards -= entry->len;
1039 		kmem_cache_free(discard_entry_slab, entry);
1040 	}
1041 }
1042 
1043 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1044 {
1045 	dev_t dev = sbi->sb->s_bdev->bd_dev;
1046 	struct discard_cmd_control *dcc;
1047 	int err = 0;
1048 
1049 	if (SM_I(sbi)->dcc_info) {
1050 		dcc = SM_I(sbi)->dcc_info;
1051 		goto init_thread;
1052 	}
1053 
1054 	dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
1055 	if (!dcc)
1056 		return -ENOMEM;
1057 
1058 	INIT_LIST_HEAD(&dcc->discard_entry_list);
1059 	INIT_LIST_HEAD(&dcc->discard_cmd_list);
1060 	mutex_init(&dcc->cmd_lock);
1061 	atomic_set(&dcc->submit_discard, 0);
1062 	dcc->nr_discards = 0;
1063 	dcc->max_discards = 0;
1064 
1065 	init_waitqueue_head(&dcc->discard_wait_queue);
1066 	SM_I(sbi)->dcc_info = dcc;
1067 init_thread:
1068 	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
1069 				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
1070 	if (IS_ERR(dcc->f2fs_issue_discard)) {
1071 		err = PTR_ERR(dcc->f2fs_issue_discard);
1072 		kfree(dcc);
1073 		SM_I(sbi)->dcc_info = NULL;
1074 		return err;
1075 	}
1076 
1077 	return err;
1078 }
1079 
1080 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
1081 {
1082 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1083 
1084 	if (dcc && dcc->f2fs_issue_discard) {
1085 		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1086 
1087 		dcc->f2fs_issue_discard = NULL;
1088 		kthread_stop(discard_thread);
1089 	}
1090 	if (free) {
1091 		kfree(dcc);
1092 		SM_I(sbi)->dcc_info = NULL;
1093 	}
1094 }
1095 
1096 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
1097 {
1098 	struct sit_info *sit_i = SIT_I(sbi);
1099 
1100 	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
1101 		sit_i->dirty_sentries++;
1102 		return false;
1103 	}
1104 
1105 	return true;
1106 }
1107 
1108 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
1109 					unsigned int segno, int modified)
1110 {
1111 	struct seg_entry *se = get_seg_entry(sbi, segno);
1112 	se->type = type;
1113 	if (modified)
1114 		__mark_sit_entry_dirty(sbi, segno);
1115 }
1116 
1117 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
1118 {
1119 	struct seg_entry *se;
1120 	unsigned int segno, offset;
1121 	long int new_vblocks;
1122 
1123 	segno = GET_SEGNO(sbi, blkaddr);
1124 
1125 	se = get_seg_entry(sbi, segno);
1126 	new_vblocks = se->valid_blocks + del;
1127 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
1128 
1129 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
1130 				(new_vblocks > sbi->blocks_per_seg)));
1131 
1132 	se->valid_blocks = new_vblocks;
1133 	se->mtime = get_mtime(sbi);
1134 	SIT_I(sbi)->max_mtime = se->mtime;
1135 
1136 	/* Update valid block bitmap */
1137 	if (del > 0) {
1138 		if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
1139 #ifdef CONFIG_F2FS_CHECK_FS
1140 			if (f2fs_test_and_set_bit(offset,
1141 						se->cur_valid_map_mir))
1142 				f2fs_bug_on(sbi, 1);
1143 			else
1144 				WARN_ON(1);
1145 #else
1146 			f2fs_bug_on(sbi, 1);
1147 #endif
1148 		}
1149 		if (f2fs_discard_en(sbi) &&
1150 			!f2fs_test_and_set_bit(offset, se->discard_map))
1151 			sbi->discard_blks--;
1152 	} else {
1153 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
1154 #ifdef CONFIG_F2FS_CHECK_FS
1155 			if (!f2fs_test_and_clear_bit(offset,
1156 						se->cur_valid_map_mir))
1157 				f2fs_bug_on(sbi, 1);
1158 			else
1159 				WARN_ON(1);
1160 #else
1161 			f2fs_bug_on(sbi, 1);
1162 #endif
1163 		}
1164 		if (f2fs_discard_en(sbi) &&
1165 			f2fs_test_and_clear_bit(offset, se->discard_map))
1166 			sbi->discard_blks++;
1167 	}
1168 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
1169 		se->ckpt_valid_blocks += del;
1170 
1171 	__mark_sit_entry_dirty(sbi, segno);
1172 
1173 	/* update total number of valid blocks to be written in ckpt area */
1174 	SIT_I(sbi)->written_valid_blocks += del;
1175 
1176 	if (sbi->segs_per_sec > 1)
1177 		get_sec_entry(sbi, segno)->valid_blocks += del;
1178 }
1179 
1180 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
1181 {
1182 	update_sit_entry(sbi, new, 1);
1183 	if (GET_SEGNO(sbi, old) != NULL_SEGNO)
1184 		update_sit_entry(sbi, old, -1);
1185 
1186 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
1187 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
1188 }
1189 
1190 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
1191 {
1192 	unsigned int segno = GET_SEGNO(sbi, addr);
1193 	struct sit_info *sit_i = SIT_I(sbi);
1194 
1195 	f2fs_bug_on(sbi, addr == NULL_ADDR);
1196 	if (addr == NEW_ADDR)
1197 		return;
1198 
1199 	/* add it into sit main buffer */
1200 	mutex_lock(&sit_i->sentry_lock);
1201 
1202 	update_sit_entry(sbi, addr, -1);
1203 
1204 	/* add it into dirty seglist */
1205 	locate_dirty_segment(sbi, segno);
1206 
1207 	mutex_unlock(&sit_i->sentry_lock);
1208 }
1209 
1210 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
1211 {
1212 	struct sit_info *sit_i = SIT_I(sbi);
1213 	unsigned int segno, offset;
1214 	struct seg_entry *se;
1215 	bool is_cp = false;
1216 
1217 	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1218 		return true;
1219 
1220 	mutex_lock(&sit_i->sentry_lock);
1221 
1222 	segno = GET_SEGNO(sbi, blkaddr);
1223 	se = get_seg_entry(sbi, segno);
1224 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
1225 
1226 	if (f2fs_test_bit(offset, se->ckpt_valid_map))
1227 		is_cp = true;
1228 
1229 	mutex_unlock(&sit_i->sentry_lock);
1230 
1231 	return is_cp;
1232 }
1233 
1234 /*
1235  * This function should be resided under the curseg_mutex lock
1236  */
1237 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
1238 					struct f2fs_summary *sum)
1239 {
1240 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1241 	void *addr = curseg->sum_blk;
1242 	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
1243 	memcpy(addr, sum, sizeof(struct f2fs_summary));
1244 }
1245 
1246 /*
1247  * Calculate the number of current summary pages for writing
1248  */
1249 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
1250 {
1251 	int valid_sum_count = 0;
1252 	int i, sum_in_page;
1253 
1254 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1255 		if (sbi->ckpt->alloc_type[i] == SSR)
1256 			valid_sum_count += sbi->blocks_per_seg;
1257 		else {
1258 			if (for_ra)
1259 				valid_sum_count += le16_to_cpu(
1260 					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
1261 			else
1262 				valid_sum_count += curseg_blkoff(sbi, i);
1263 		}
1264 	}
1265 
1266 	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
1267 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
1268 	if (valid_sum_count <= sum_in_page)
1269 		return 1;
1270 	else if ((valid_sum_count - sum_in_page) <=
1271 		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
1272 		return 2;
1273 	return 3;
1274 }
1275 
1276 /*
1277  * Caller should put this summary page
1278  */
1279 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
1280 {
1281 	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
1282 }
1283 
1284 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
1285 {
1286 	struct page *page = grab_meta_page(sbi, blk_addr);
1287 	void *dst = page_address(page);
1288 
1289 	if (src)
1290 		memcpy(dst, src, PAGE_SIZE);
1291 	else
1292 		memset(dst, 0, PAGE_SIZE);
1293 	set_page_dirty(page);
1294 	f2fs_put_page(page, 1);
1295 }
1296 
1297 static void write_sum_page(struct f2fs_sb_info *sbi,
1298 			struct f2fs_summary_block *sum_blk, block_t blk_addr)
1299 {
1300 	update_meta_page(sbi, (void *)sum_blk, blk_addr);
1301 }
1302 
1303 static void write_current_sum_page(struct f2fs_sb_info *sbi,
1304 						int type, block_t blk_addr)
1305 {
1306 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1307 	struct page *page = grab_meta_page(sbi, blk_addr);
1308 	struct f2fs_summary_block *src = curseg->sum_blk;
1309 	struct f2fs_summary_block *dst;
1310 
1311 	dst = (struct f2fs_summary_block *)page_address(page);
1312 
1313 	mutex_lock(&curseg->curseg_mutex);
1314 
1315 	down_read(&curseg->journal_rwsem);
1316 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
1317 	up_read(&curseg->journal_rwsem);
1318 
1319 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
1320 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
1321 
1322 	mutex_unlock(&curseg->curseg_mutex);
1323 
1324 	set_page_dirty(page);
1325 	f2fs_put_page(page, 1);
1326 }
1327 
1328 /*
1329  * Find a new segment from the free segments bitmap to right order
1330  * This function should be returned with success, otherwise BUG
1331  */
1332 static void get_new_segment(struct f2fs_sb_info *sbi,
1333 			unsigned int *newseg, bool new_sec, int dir)
1334 {
1335 	struct free_segmap_info *free_i = FREE_I(sbi);
1336 	unsigned int segno, secno, zoneno;
1337 	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
1338 	unsigned int hint = *newseg / sbi->segs_per_sec;
1339 	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
1340 	unsigned int left_start = hint;
1341 	bool init = true;
1342 	int go_left = 0;
1343 	int i;
1344 
1345 	spin_lock(&free_i->segmap_lock);
1346 
1347 	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
1348 		segno = find_next_zero_bit(free_i->free_segmap,
1349 				(hint + 1) * sbi->segs_per_sec, *newseg + 1);
1350 		if (segno < (hint + 1) * sbi->segs_per_sec)
1351 			goto got_it;
1352 	}
1353 find_other_zone:
1354 	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
1355 	if (secno >= MAIN_SECS(sbi)) {
1356 		if (dir == ALLOC_RIGHT) {
1357 			secno = find_next_zero_bit(free_i->free_secmap,
1358 							MAIN_SECS(sbi), 0);
1359 			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
1360 		} else {
1361 			go_left = 1;
1362 			left_start = hint - 1;
1363 		}
1364 	}
1365 	if (go_left == 0)
1366 		goto skip_left;
1367 
1368 	while (test_bit(left_start, free_i->free_secmap)) {
1369 		if (left_start > 0) {
1370 			left_start--;
1371 			continue;
1372 		}
1373 		left_start = find_next_zero_bit(free_i->free_secmap,
1374 							MAIN_SECS(sbi), 0);
1375 		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
1376 		break;
1377 	}
1378 	secno = left_start;
1379 skip_left:
1380 	hint = secno;
1381 	segno = secno * sbi->segs_per_sec;
1382 	zoneno = secno / sbi->secs_per_zone;
1383 
1384 	/* give up on finding another zone */
1385 	if (!init)
1386 		goto got_it;
1387 	if (sbi->secs_per_zone == 1)
1388 		goto got_it;
1389 	if (zoneno == old_zoneno)
1390 		goto got_it;
1391 	if (dir == ALLOC_LEFT) {
1392 		if (!go_left && zoneno + 1 >= total_zones)
1393 			goto got_it;
1394 		if (go_left && zoneno == 0)
1395 			goto got_it;
1396 	}
1397 	for (i = 0; i < NR_CURSEG_TYPE; i++)
1398 		if (CURSEG_I(sbi, i)->zone == zoneno)
1399 			break;
1400 
1401 	if (i < NR_CURSEG_TYPE) {
1402 		/* zone is in user, try another */
1403 		if (go_left)
1404 			hint = zoneno * sbi->secs_per_zone - 1;
1405 		else if (zoneno + 1 >= total_zones)
1406 			hint = 0;
1407 		else
1408 			hint = (zoneno + 1) * sbi->secs_per_zone;
1409 		init = false;
1410 		goto find_other_zone;
1411 	}
1412 got_it:
1413 	/* set it as dirty segment in free segmap */
1414 	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
1415 	__set_inuse(sbi, segno);
1416 	*newseg = segno;
1417 	spin_unlock(&free_i->segmap_lock);
1418 }
1419 
1420 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
1421 {
1422 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1423 	struct summary_footer *sum_footer;
1424 
1425 	curseg->segno = curseg->next_segno;
1426 	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
1427 	curseg->next_blkoff = 0;
1428 	curseg->next_segno = NULL_SEGNO;
1429 
1430 	sum_footer = &(curseg->sum_blk->footer);
1431 	memset(sum_footer, 0, sizeof(struct summary_footer));
1432 	if (IS_DATASEG(type))
1433 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
1434 	if (IS_NODESEG(type))
1435 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
1436 	__set_sit_entry_type(sbi, type, curseg->segno, modified);
1437 }
1438 
1439 /*
1440  * Allocate a current working segment.
1441  * This function always allocates a free segment in LFS manner.
1442  */
1443 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
1444 {
1445 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1446 	unsigned int segno = curseg->segno;
1447 	int dir = ALLOC_LEFT;
1448 
1449 	write_sum_page(sbi, curseg->sum_blk,
1450 				GET_SUM_BLOCK(sbi, segno));
1451 	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
1452 		dir = ALLOC_RIGHT;
1453 
1454 	if (test_opt(sbi, NOHEAP))
1455 		dir = ALLOC_RIGHT;
1456 
1457 	get_new_segment(sbi, &segno, new_sec, dir);
1458 	curseg->next_segno = segno;
1459 	reset_curseg(sbi, type, 1);
1460 	curseg->alloc_type = LFS;
1461 }
1462 
1463 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
1464 			struct curseg_info *seg, block_t start)
1465 {
1466 	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
1467 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1468 	unsigned long *target_map = SIT_I(sbi)->tmp_map;
1469 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1470 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1471 	int i, pos;
1472 
1473 	for (i = 0; i < entries; i++)
1474 		target_map[i] = ckpt_map[i] | cur_map[i];
1475 
1476 	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
1477 
1478 	seg->next_blkoff = pos;
1479 }
1480 
1481 /*
1482  * If a segment is written by LFS manner, next block offset is just obtained
1483  * by increasing the current block offset. However, if a segment is written by
1484  * SSR manner, next block offset obtained by calling __next_free_blkoff
1485  */
1486 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
1487 				struct curseg_info *seg)
1488 {
1489 	if (seg->alloc_type == SSR)
1490 		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
1491 	else
1492 		seg->next_blkoff++;
1493 }
1494 
1495 /*
1496  * This function always allocates a used segment(from dirty seglist) by SSR
1497  * manner, so it should recover the existing segment information of valid blocks
1498  */
1499 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
1500 {
1501 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1502 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1503 	unsigned int new_segno = curseg->next_segno;
1504 	struct f2fs_summary_block *sum_node;
1505 	struct page *sum_page;
1506 
1507 	write_sum_page(sbi, curseg->sum_blk,
1508 				GET_SUM_BLOCK(sbi, curseg->segno));
1509 	__set_test_and_inuse(sbi, new_segno);
1510 
1511 	mutex_lock(&dirty_i->seglist_lock);
1512 	__remove_dirty_segment(sbi, new_segno, PRE);
1513 	__remove_dirty_segment(sbi, new_segno, DIRTY);
1514 	mutex_unlock(&dirty_i->seglist_lock);
1515 
1516 	reset_curseg(sbi, type, 1);
1517 	curseg->alloc_type = SSR;
1518 	__next_free_blkoff(sbi, curseg, 0);
1519 
1520 	if (reuse) {
1521 		sum_page = get_sum_page(sbi, new_segno);
1522 		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1523 		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1524 		f2fs_put_page(sum_page, 1);
1525 	}
1526 }
1527 
1528 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1529 {
1530 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1531 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1532 	int i, n;
1533 
1534 	/* need_SSR() already forces to do this */
1535 	if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
1536 		return 1;
1537 
1538 	/* For node segments, let's do SSR more intensively */
1539 	if (IS_NODESEG(type)) {
1540 		i = CURSEG_HOT_NODE;
1541 		n = CURSEG_COLD_NODE;
1542 	} else {
1543 		i = CURSEG_HOT_DATA;
1544 		n = CURSEG_COLD_DATA;
1545 	}
1546 
1547 	for (; i <= n; i++) {
1548 		if (i == type)
1549 			continue;
1550 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1551 						BG_GC, i, SSR))
1552 			return 1;
1553 	}
1554 	return 0;
1555 }
1556 
1557 /*
1558  * flush out current segment and replace it with new segment
1559  * This function should be returned with success, otherwise BUG
1560  */
1561 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1562 						int type, bool force)
1563 {
1564 	if (force)
1565 		new_curseg(sbi, type, true);
1566 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
1567 					type == CURSEG_WARM_NODE)
1568 		new_curseg(sbi, type, false);
1569 	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1570 		change_curseg(sbi, type, true);
1571 	else
1572 		new_curseg(sbi, type, false);
1573 
1574 	stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
1575 }
1576 
1577 void allocate_new_segments(struct f2fs_sb_info *sbi)
1578 {
1579 	struct curseg_info *curseg;
1580 	unsigned int old_segno;
1581 	int i;
1582 
1583 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1584 		curseg = CURSEG_I(sbi, i);
1585 		old_segno = curseg->segno;
1586 		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1587 		locate_dirty_segment(sbi, old_segno);
1588 	}
1589 }
1590 
1591 static const struct segment_allocation default_salloc_ops = {
1592 	.allocate_segment = allocate_segment_by_default,
1593 };
1594 
1595 bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1596 {
1597 	__u64 trim_start = cpc->trim_start;
1598 	bool has_candidate = false;
1599 
1600 	mutex_lock(&SIT_I(sbi)->sentry_lock);
1601 	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
1602 		if (add_discard_addrs(sbi, cpc, true)) {
1603 			has_candidate = true;
1604 			break;
1605 		}
1606 	}
1607 	mutex_unlock(&SIT_I(sbi)->sentry_lock);
1608 
1609 	cpc->trim_start = trim_start;
1610 	return has_candidate;
1611 }
1612 
1613 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1614 {
1615 	__u64 start = F2FS_BYTES_TO_BLK(range->start);
1616 	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1617 	unsigned int start_segno, end_segno;
1618 	struct cp_control cpc;
1619 	int err = 0;
1620 
1621 	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1622 		return -EINVAL;
1623 
1624 	cpc.trimmed = 0;
1625 	if (end <= MAIN_BLKADDR(sbi))
1626 		goto out;
1627 
1628 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1629 		f2fs_msg(sbi->sb, KERN_WARNING,
1630 			"Found FS corruption, run fsck to fix.");
1631 		goto out;
1632 	}
1633 
1634 	/* start/end segment number in main_area */
1635 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1636 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1637 						GET_SEGNO(sbi, end);
1638 	cpc.reason = CP_DISCARD;
1639 	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1640 
1641 	/* do checkpoint to issue discard commands safely */
1642 	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1643 		cpc.trim_start = start_segno;
1644 
1645 		if (sbi->discard_blks == 0)
1646 			break;
1647 		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1648 			cpc.trim_end = end_segno;
1649 		else
1650 			cpc.trim_end = min_t(unsigned int,
1651 				rounddown(start_segno +
1652 				BATCHED_TRIM_SEGMENTS(sbi),
1653 				sbi->segs_per_sec) - 1, end_segno);
1654 
1655 		mutex_lock(&sbi->gc_mutex);
1656 		err = write_checkpoint(sbi, &cpc);
1657 		mutex_unlock(&sbi->gc_mutex);
1658 		if (err)
1659 			break;
1660 
1661 		schedule();
1662 	}
1663 out:
1664 	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1665 	return err;
1666 }
1667 
1668 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1669 {
1670 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1671 	if (curseg->next_blkoff < sbi->blocks_per_seg)
1672 		return true;
1673 	return false;
1674 }
1675 
1676 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1677 {
1678 	if (p_type == DATA)
1679 		return CURSEG_HOT_DATA;
1680 	else
1681 		return CURSEG_HOT_NODE;
1682 }
1683 
1684 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1685 {
1686 	if (p_type == DATA) {
1687 		struct inode *inode = page->mapping->host;
1688 
1689 		if (S_ISDIR(inode->i_mode))
1690 			return CURSEG_HOT_DATA;
1691 		else
1692 			return CURSEG_COLD_DATA;
1693 	} else {
1694 		if (IS_DNODE(page) && is_cold_node(page))
1695 			return CURSEG_WARM_NODE;
1696 		else
1697 			return CURSEG_COLD_NODE;
1698 	}
1699 }
1700 
1701 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1702 {
1703 	if (p_type == DATA) {
1704 		struct inode *inode = page->mapping->host;
1705 
1706 		if (S_ISDIR(inode->i_mode))
1707 			return CURSEG_HOT_DATA;
1708 		else if (is_cold_data(page) || file_is_cold(inode))
1709 			return CURSEG_COLD_DATA;
1710 		else
1711 			return CURSEG_WARM_DATA;
1712 	} else {
1713 		if (IS_DNODE(page))
1714 			return is_cold_node(page) ? CURSEG_WARM_NODE :
1715 						CURSEG_HOT_NODE;
1716 		else
1717 			return CURSEG_COLD_NODE;
1718 	}
1719 }
1720 
1721 static int __get_segment_type(struct page *page, enum page_type p_type)
1722 {
1723 	switch (F2FS_P_SB(page)->active_logs) {
1724 	case 2:
1725 		return __get_segment_type_2(page, p_type);
1726 	case 4:
1727 		return __get_segment_type_4(page, p_type);
1728 	}
1729 	/* NR_CURSEG_TYPE(6) logs by default */
1730 	f2fs_bug_on(F2FS_P_SB(page),
1731 		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1732 	return __get_segment_type_6(page, p_type);
1733 }
1734 
1735 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1736 		block_t old_blkaddr, block_t *new_blkaddr,
1737 		struct f2fs_summary *sum, int type)
1738 {
1739 	struct sit_info *sit_i = SIT_I(sbi);
1740 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1741 
1742 	mutex_lock(&curseg->curseg_mutex);
1743 	mutex_lock(&sit_i->sentry_lock);
1744 
1745 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1746 
1747 	f2fs_wait_discard_bio(sbi, *new_blkaddr);
1748 
1749 	/*
1750 	 * __add_sum_entry should be resided under the curseg_mutex
1751 	 * because, this function updates a summary entry in the
1752 	 * current summary block.
1753 	 */
1754 	__add_sum_entry(sbi, type, sum);
1755 
1756 	__refresh_next_blkoff(sbi, curseg);
1757 
1758 	stat_inc_block_count(sbi, curseg);
1759 
1760 	/*
1761 	 * SIT information should be updated before segment allocation,
1762 	 * since SSR needs latest valid block information.
1763 	 */
1764 	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1765 
1766 	if (!__has_curseg_space(sbi, type))
1767 		sit_i->s_ops->allocate_segment(sbi, type, false);
1768 
1769 	mutex_unlock(&sit_i->sentry_lock);
1770 
1771 	if (page && IS_NODESEG(type))
1772 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1773 
1774 	mutex_unlock(&curseg->curseg_mutex);
1775 }
1776 
1777 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1778 {
1779 	int type = __get_segment_type(fio->page, fio->type);
1780 	int err;
1781 
1782 	if (fio->type == NODE || fio->type == DATA)
1783 		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
1784 reallocate:
1785 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
1786 					&fio->new_blkaddr, sum, type);
1787 
1788 	/* writeout dirty page into bdev */
1789 	err = f2fs_submit_page_mbio(fio);
1790 	if (err == -EAGAIN) {
1791 		fio->old_blkaddr = fio->new_blkaddr;
1792 		goto reallocate;
1793 	}
1794 
1795 	if (fio->type == NODE || fio->type == DATA)
1796 		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
1797 }
1798 
1799 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1800 {
1801 	struct f2fs_io_info fio = {
1802 		.sbi = sbi,
1803 		.type = META,
1804 		.op = REQ_OP_WRITE,
1805 		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
1806 		.old_blkaddr = page->index,
1807 		.new_blkaddr = page->index,
1808 		.page = page,
1809 		.encrypted_page = NULL,
1810 	};
1811 
1812 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
1813 		fio.op_flags &= ~REQ_META;
1814 
1815 	set_page_writeback(page);
1816 	f2fs_submit_page_mbio(&fio);
1817 }
1818 
1819 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1820 {
1821 	struct f2fs_summary sum;
1822 
1823 	set_summary(&sum, nid, 0, 0);
1824 	do_write_page(&sum, fio);
1825 }
1826 
1827 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1828 {
1829 	struct f2fs_sb_info *sbi = fio->sbi;
1830 	struct f2fs_summary sum;
1831 	struct node_info ni;
1832 
1833 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1834 	get_node_info(sbi, dn->nid, &ni);
1835 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1836 	do_write_page(&sum, fio);
1837 	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
1838 }
1839 
1840 void rewrite_data_page(struct f2fs_io_info *fio)
1841 {
1842 	fio->new_blkaddr = fio->old_blkaddr;
1843 	stat_inc_inplace_blocks(fio->sbi);
1844 	f2fs_submit_page_mbio(fio);
1845 }
1846 
1847 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
1848 				block_t old_blkaddr, block_t new_blkaddr,
1849 				bool recover_curseg, bool recover_newaddr)
1850 {
1851 	struct sit_info *sit_i = SIT_I(sbi);
1852 	struct curseg_info *curseg;
1853 	unsigned int segno, old_cursegno;
1854 	struct seg_entry *se;
1855 	int type;
1856 	unsigned short old_blkoff;
1857 
1858 	segno = GET_SEGNO(sbi, new_blkaddr);
1859 	se = get_seg_entry(sbi, segno);
1860 	type = se->type;
1861 
1862 	if (!recover_curseg) {
1863 		/* for recovery flow */
1864 		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1865 			if (old_blkaddr == NULL_ADDR)
1866 				type = CURSEG_COLD_DATA;
1867 			else
1868 				type = CURSEG_WARM_DATA;
1869 		}
1870 	} else {
1871 		if (!IS_CURSEG(sbi, segno))
1872 			type = CURSEG_WARM_DATA;
1873 	}
1874 
1875 	curseg = CURSEG_I(sbi, type);
1876 
1877 	mutex_lock(&curseg->curseg_mutex);
1878 	mutex_lock(&sit_i->sentry_lock);
1879 
1880 	old_cursegno = curseg->segno;
1881 	old_blkoff = curseg->next_blkoff;
1882 
1883 	/* change the current segment */
1884 	if (segno != curseg->segno) {
1885 		curseg->next_segno = segno;
1886 		change_curseg(sbi, type, true);
1887 	}
1888 
1889 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1890 	__add_sum_entry(sbi, type, sum);
1891 
1892 	if (!recover_curseg || recover_newaddr)
1893 		update_sit_entry(sbi, new_blkaddr, 1);
1894 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1895 		update_sit_entry(sbi, old_blkaddr, -1);
1896 
1897 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1898 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
1899 
1900 	locate_dirty_segment(sbi, old_cursegno);
1901 
1902 	if (recover_curseg) {
1903 		if (old_cursegno != curseg->segno) {
1904 			curseg->next_segno = old_cursegno;
1905 			change_curseg(sbi, type, true);
1906 		}
1907 		curseg->next_blkoff = old_blkoff;
1908 	}
1909 
1910 	mutex_unlock(&sit_i->sentry_lock);
1911 	mutex_unlock(&curseg->curseg_mutex);
1912 }
1913 
1914 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
1915 				block_t old_addr, block_t new_addr,
1916 				unsigned char version, bool recover_curseg,
1917 				bool recover_newaddr)
1918 {
1919 	struct f2fs_summary sum;
1920 
1921 	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
1922 
1923 	__f2fs_replace_block(sbi, &sum, old_addr, new_addr,
1924 					recover_curseg, recover_newaddr);
1925 
1926 	f2fs_update_data_blkaddr(dn, new_addr);
1927 }
1928 
1929 void f2fs_wait_on_page_writeback(struct page *page,
1930 				enum page_type type, bool ordered)
1931 {
1932 	if (PageWriteback(page)) {
1933 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1934 
1935 		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
1936 						0, page->index, type, WRITE);
1937 		if (ordered)
1938 			wait_on_page_writeback(page);
1939 		else
1940 			wait_for_stable_page(page);
1941 	}
1942 }
1943 
1944 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
1945 							block_t blkaddr)
1946 {
1947 	struct page *cpage;
1948 
1949 	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1950 		return;
1951 
1952 	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
1953 	if (cpage) {
1954 		f2fs_wait_on_page_writeback(cpage, DATA, true);
1955 		f2fs_put_page(cpage, 1);
1956 	}
1957 }
1958 
1959 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1960 {
1961 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1962 	struct curseg_info *seg_i;
1963 	unsigned char *kaddr;
1964 	struct page *page;
1965 	block_t start;
1966 	int i, j, offset;
1967 
1968 	start = start_sum_block(sbi);
1969 
1970 	page = get_meta_page(sbi, start++);
1971 	kaddr = (unsigned char *)page_address(page);
1972 
1973 	/* Step 1: restore nat cache */
1974 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1975 	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
1976 
1977 	/* Step 2: restore sit cache */
1978 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1979 	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
1980 	offset = 2 * SUM_JOURNAL_SIZE;
1981 
1982 	/* Step 3: restore summary entries */
1983 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1984 		unsigned short blk_off;
1985 		unsigned int segno;
1986 
1987 		seg_i = CURSEG_I(sbi, i);
1988 		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1989 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1990 		seg_i->next_segno = segno;
1991 		reset_curseg(sbi, i, 0);
1992 		seg_i->alloc_type = ckpt->alloc_type[i];
1993 		seg_i->next_blkoff = blk_off;
1994 
1995 		if (seg_i->alloc_type == SSR)
1996 			blk_off = sbi->blocks_per_seg;
1997 
1998 		for (j = 0; j < blk_off; j++) {
1999 			struct f2fs_summary *s;
2000 			s = (struct f2fs_summary *)(kaddr + offset);
2001 			seg_i->sum_blk->entries[j] = *s;
2002 			offset += SUMMARY_SIZE;
2003 			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
2004 						SUM_FOOTER_SIZE)
2005 				continue;
2006 
2007 			f2fs_put_page(page, 1);
2008 			page = NULL;
2009 
2010 			page = get_meta_page(sbi, start++);
2011 			kaddr = (unsigned char *)page_address(page);
2012 			offset = 0;
2013 		}
2014 	}
2015 	f2fs_put_page(page, 1);
2016 	return 0;
2017 }
2018 
2019 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
2020 {
2021 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2022 	struct f2fs_summary_block *sum;
2023 	struct curseg_info *curseg;
2024 	struct page *new;
2025 	unsigned short blk_off;
2026 	unsigned int segno = 0;
2027 	block_t blk_addr = 0;
2028 
2029 	/* get segment number and block addr */
2030 	if (IS_DATASEG(type)) {
2031 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
2032 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
2033 							CURSEG_HOT_DATA]);
2034 		if (__exist_node_summaries(sbi))
2035 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
2036 		else
2037 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
2038 	} else {
2039 		segno = le32_to_cpu(ckpt->cur_node_segno[type -
2040 							CURSEG_HOT_NODE]);
2041 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
2042 							CURSEG_HOT_NODE]);
2043 		if (__exist_node_summaries(sbi))
2044 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
2045 							type - CURSEG_HOT_NODE);
2046 		else
2047 			blk_addr = GET_SUM_BLOCK(sbi, segno);
2048 	}
2049 
2050 	new = get_meta_page(sbi, blk_addr);
2051 	sum = (struct f2fs_summary_block *)page_address(new);
2052 
2053 	if (IS_NODESEG(type)) {
2054 		if (__exist_node_summaries(sbi)) {
2055 			struct f2fs_summary *ns = &sum->entries[0];
2056 			int i;
2057 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
2058 				ns->version = 0;
2059 				ns->ofs_in_node = 0;
2060 			}
2061 		} else {
2062 			int err;
2063 
2064 			err = restore_node_summary(sbi, segno, sum);
2065 			if (err) {
2066 				f2fs_put_page(new, 1);
2067 				return err;
2068 			}
2069 		}
2070 	}
2071 
2072 	/* set uncompleted segment to curseg */
2073 	curseg = CURSEG_I(sbi, type);
2074 	mutex_lock(&curseg->curseg_mutex);
2075 
2076 	/* update journal info */
2077 	down_write(&curseg->journal_rwsem);
2078 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
2079 	up_write(&curseg->journal_rwsem);
2080 
2081 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
2082 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
2083 	curseg->next_segno = segno;
2084 	reset_curseg(sbi, type, 0);
2085 	curseg->alloc_type = ckpt->alloc_type[type];
2086 	curseg->next_blkoff = blk_off;
2087 	mutex_unlock(&curseg->curseg_mutex);
2088 	f2fs_put_page(new, 1);
2089 	return 0;
2090 }
2091 
2092 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
2093 {
2094 	int type = CURSEG_HOT_DATA;
2095 	int err;
2096 
2097 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
2098 		int npages = npages_for_summary_flush(sbi, true);
2099 
2100 		if (npages >= 2)
2101 			ra_meta_pages(sbi, start_sum_block(sbi), npages,
2102 							META_CP, true);
2103 
2104 		/* restore for compacted data summary */
2105 		if (read_compacted_summaries(sbi))
2106 			return -EINVAL;
2107 		type = CURSEG_HOT_NODE;
2108 	}
2109 
2110 	if (__exist_node_summaries(sbi))
2111 		ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
2112 					NR_CURSEG_TYPE - type, META_CP, true);
2113 
2114 	for (; type <= CURSEG_COLD_NODE; type++) {
2115 		err = read_normal_summaries(sbi, type);
2116 		if (err)
2117 			return err;
2118 	}
2119 
2120 	return 0;
2121 }
2122 
2123 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
2124 {
2125 	struct page *page;
2126 	unsigned char *kaddr;
2127 	struct f2fs_summary *summary;
2128 	struct curseg_info *seg_i;
2129 	int written_size = 0;
2130 	int i, j;
2131 
2132 	page = grab_meta_page(sbi, blkaddr++);
2133 	kaddr = (unsigned char *)page_address(page);
2134 
2135 	/* Step 1: write nat cache */
2136 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
2137 	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
2138 	written_size += SUM_JOURNAL_SIZE;
2139 
2140 	/* Step 2: write sit cache */
2141 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
2142 	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
2143 	written_size += SUM_JOURNAL_SIZE;
2144 
2145 	/* Step 3: write summary entries */
2146 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2147 		unsigned short blkoff;
2148 		seg_i = CURSEG_I(sbi, i);
2149 		if (sbi->ckpt->alloc_type[i] == SSR)
2150 			blkoff = sbi->blocks_per_seg;
2151 		else
2152 			blkoff = curseg_blkoff(sbi, i);
2153 
2154 		for (j = 0; j < blkoff; j++) {
2155 			if (!page) {
2156 				page = grab_meta_page(sbi, blkaddr++);
2157 				kaddr = (unsigned char *)page_address(page);
2158 				written_size = 0;
2159 			}
2160 			summary = (struct f2fs_summary *)(kaddr + written_size);
2161 			*summary = seg_i->sum_blk->entries[j];
2162 			written_size += SUMMARY_SIZE;
2163 
2164 			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
2165 							SUM_FOOTER_SIZE)
2166 				continue;
2167 
2168 			set_page_dirty(page);
2169 			f2fs_put_page(page, 1);
2170 			page = NULL;
2171 		}
2172 	}
2173 	if (page) {
2174 		set_page_dirty(page);
2175 		f2fs_put_page(page, 1);
2176 	}
2177 }
2178 
2179 static void write_normal_summaries(struct f2fs_sb_info *sbi,
2180 					block_t blkaddr, int type)
2181 {
2182 	int i, end;
2183 	if (IS_DATASEG(type))
2184 		end = type + NR_CURSEG_DATA_TYPE;
2185 	else
2186 		end = type + NR_CURSEG_NODE_TYPE;
2187 
2188 	for (i = type; i < end; i++)
2189 		write_current_sum_page(sbi, i, blkaddr + (i - type));
2190 }
2191 
2192 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
2193 {
2194 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
2195 		write_compacted_summaries(sbi, start_blk);
2196 	else
2197 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
2198 }
2199 
2200 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
2201 {
2202 	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
2203 }
2204 
2205 int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
2206 					unsigned int val, int alloc)
2207 {
2208 	int i;
2209 
2210 	if (type == NAT_JOURNAL) {
2211 		for (i = 0; i < nats_in_cursum(journal); i++) {
2212 			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
2213 				return i;
2214 		}
2215 		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
2216 			return update_nats_in_cursum(journal, 1);
2217 	} else if (type == SIT_JOURNAL) {
2218 		for (i = 0; i < sits_in_cursum(journal); i++)
2219 			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
2220 				return i;
2221 		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
2222 			return update_sits_in_cursum(journal, 1);
2223 	}
2224 	return -1;
2225 }
2226 
2227 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
2228 					unsigned int segno)
2229 {
2230 	return get_meta_page(sbi, current_sit_addr(sbi, segno));
2231 }
2232 
2233 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
2234 					unsigned int start)
2235 {
2236 	struct sit_info *sit_i = SIT_I(sbi);
2237 	struct page *src_page, *dst_page;
2238 	pgoff_t src_off, dst_off;
2239 	void *src_addr, *dst_addr;
2240 
2241 	src_off = current_sit_addr(sbi, start);
2242 	dst_off = next_sit_addr(sbi, src_off);
2243 
2244 	/* get current sit block page without lock */
2245 	src_page = get_meta_page(sbi, src_off);
2246 	dst_page = grab_meta_page(sbi, dst_off);
2247 	f2fs_bug_on(sbi, PageDirty(src_page));
2248 
2249 	src_addr = page_address(src_page);
2250 	dst_addr = page_address(dst_page);
2251 	memcpy(dst_addr, src_addr, PAGE_SIZE);
2252 
2253 	set_page_dirty(dst_page);
2254 	f2fs_put_page(src_page, 1);
2255 
2256 	set_to_next_sit(sit_i, start);
2257 
2258 	return dst_page;
2259 }
2260 
2261 static struct sit_entry_set *grab_sit_entry_set(void)
2262 {
2263 	struct sit_entry_set *ses =
2264 			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
2265 
2266 	ses->entry_cnt = 0;
2267 	INIT_LIST_HEAD(&ses->set_list);
2268 	return ses;
2269 }
2270 
2271 static void release_sit_entry_set(struct sit_entry_set *ses)
2272 {
2273 	list_del(&ses->set_list);
2274 	kmem_cache_free(sit_entry_set_slab, ses);
2275 }
2276 
2277 static void adjust_sit_entry_set(struct sit_entry_set *ses,
2278 						struct list_head *head)
2279 {
2280 	struct sit_entry_set *next = ses;
2281 
2282 	if (list_is_last(&ses->set_list, head))
2283 		return;
2284 
2285 	list_for_each_entry_continue(next, head, set_list)
2286 		if (ses->entry_cnt <= next->entry_cnt)
2287 			break;
2288 
2289 	list_move_tail(&ses->set_list, &next->set_list);
2290 }
2291 
2292 static void add_sit_entry(unsigned int segno, struct list_head *head)
2293 {
2294 	struct sit_entry_set *ses;
2295 	unsigned int start_segno = START_SEGNO(segno);
2296 
2297 	list_for_each_entry(ses, head, set_list) {
2298 		if (ses->start_segno == start_segno) {
2299 			ses->entry_cnt++;
2300 			adjust_sit_entry_set(ses, head);
2301 			return;
2302 		}
2303 	}
2304 
2305 	ses = grab_sit_entry_set();
2306 
2307 	ses->start_segno = start_segno;
2308 	ses->entry_cnt++;
2309 	list_add(&ses->set_list, head);
2310 }
2311 
2312 static void add_sits_in_set(struct f2fs_sb_info *sbi)
2313 {
2314 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2315 	struct list_head *set_list = &sm_info->sit_entry_set;
2316 	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
2317 	unsigned int segno;
2318 
2319 	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
2320 		add_sit_entry(segno, set_list);
2321 }
2322 
2323 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
2324 {
2325 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2326 	struct f2fs_journal *journal = curseg->journal;
2327 	int i;
2328 
2329 	down_write(&curseg->journal_rwsem);
2330 	for (i = 0; i < sits_in_cursum(journal); i++) {
2331 		unsigned int segno;
2332 		bool dirtied;
2333 
2334 		segno = le32_to_cpu(segno_in_journal(journal, i));
2335 		dirtied = __mark_sit_entry_dirty(sbi, segno);
2336 
2337 		if (!dirtied)
2338 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
2339 	}
2340 	update_sits_in_cursum(journal, -i);
2341 	up_write(&curseg->journal_rwsem);
2342 }
2343 
2344 /*
2345  * CP calls this function, which flushes SIT entries including sit_journal,
2346  * and moves prefree segs to free segs.
2347  */
2348 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2349 {
2350 	struct sit_info *sit_i = SIT_I(sbi);
2351 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
2352 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2353 	struct f2fs_journal *journal = curseg->journal;
2354 	struct sit_entry_set *ses, *tmp;
2355 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
2356 	bool to_journal = true;
2357 	struct seg_entry *se;
2358 
2359 	mutex_lock(&sit_i->sentry_lock);
2360 
2361 	if (!sit_i->dirty_sentries)
2362 		goto out;
2363 
2364 	/*
2365 	 * add and account sit entries of dirty bitmap in sit entry
2366 	 * set temporarily
2367 	 */
2368 	add_sits_in_set(sbi);
2369 
2370 	/*
2371 	 * if there are no enough space in journal to store dirty sit
2372 	 * entries, remove all entries from journal and add and account
2373 	 * them in sit entry set.
2374 	 */
2375 	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
2376 		remove_sits_in_journal(sbi);
2377 
2378 	/*
2379 	 * there are two steps to flush sit entries:
2380 	 * #1, flush sit entries to journal in current cold data summary block.
2381 	 * #2, flush sit entries to sit page.
2382 	 */
2383 	list_for_each_entry_safe(ses, tmp, head, set_list) {
2384 		struct page *page = NULL;
2385 		struct f2fs_sit_block *raw_sit = NULL;
2386 		unsigned int start_segno = ses->start_segno;
2387 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
2388 						(unsigned long)MAIN_SEGS(sbi));
2389 		unsigned int segno = start_segno;
2390 
2391 		if (to_journal &&
2392 			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
2393 			to_journal = false;
2394 
2395 		if (to_journal) {
2396 			down_write(&curseg->journal_rwsem);
2397 		} else {
2398 			page = get_next_sit_page(sbi, start_segno);
2399 			raw_sit = page_address(page);
2400 		}
2401 
2402 		/* flush dirty sit entries in region of current sit set */
2403 		for_each_set_bit_from(segno, bitmap, end) {
2404 			int offset, sit_offset;
2405 
2406 			se = get_seg_entry(sbi, segno);
2407 
2408 			/* add discard candidates */
2409 			if (cpc->reason != CP_DISCARD) {
2410 				cpc->trim_start = segno;
2411 				add_discard_addrs(sbi, cpc, false);
2412 			}
2413 
2414 			if (to_journal) {
2415 				offset = lookup_journal_in_cursum(journal,
2416 							SIT_JOURNAL, segno, 1);
2417 				f2fs_bug_on(sbi, offset < 0);
2418 				segno_in_journal(journal, offset) =
2419 							cpu_to_le32(segno);
2420 				seg_info_to_raw_sit(se,
2421 					&sit_in_journal(journal, offset));
2422 			} else {
2423 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
2424 				seg_info_to_raw_sit(se,
2425 						&raw_sit->entries[sit_offset]);
2426 			}
2427 
2428 			__clear_bit(segno, bitmap);
2429 			sit_i->dirty_sentries--;
2430 			ses->entry_cnt--;
2431 		}
2432 
2433 		if (to_journal)
2434 			up_write(&curseg->journal_rwsem);
2435 		else
2436 			f2fs_put_page(page, 1);
2437 
2438 		f2fs_bug_on(sbi, ses->entry_cnt);
2439 		release_sit_entry_set(ses);
2440 	}
2441 
2442 	f2fs_bug_on(sbi, !list_empty(head));
2443 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
2444 out:
2445 	if (cpc->reason == CP_DISCARD) {
2446 		__u64 trim_start = cpc->trim_start;
2447 
2448 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
2449 			add_discard_addrs(sbi, cpc, false);
2450 
2451 		cpc->trim_start = trim_start;
2452 	}
2453 	mutex_unlock(&sit_i->sentry_lock);
2454 
2455 	set_prefree_as_free_segments(sbi);
2456 }
2457 
2458 static int build_sit_info(struct f2fs_sb_info *sbi)
2459 {
2460 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2461 	struct sit_info *sit_i;
2462 	unsigned int sit_segs, start;
2463 	char *src_bitmap;
2464 	unsigned int bitmap_size;
2465 
2466 	/* allocate memory for SIT information */
2467 	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
2468 	if (!sit_i)
2469 		return -ENOMEM;
2470 
2471 	SM_I(sbi)->sit_info = sit_i;
2472 
2473 	sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
2474 					sizeof(struct seg_entry), GFP_KERNEL);
2475 	if (!sit_i->sentries)
2476 		return -ENOMEM;
2477 
2478 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2479 	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2480 	if (!sit_i->dirty_sentries_bitmap)
2481 		return -ENOMEM;
2482 
2483 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
2484 		sit_i->sentries[start].cur_valid_map
2485 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2486 		sit_i->sentries[start].ckpt_valid_map
2487 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2488 		if (!sit_i->sentries[start].cur_valid_map ||
2489 				!sit_i->sentries[start].ckpt_valid_map)
2490 			return -ENOMEM;
2491 
2492 #ifdef CONFIG_F2FS_CHECK_FS
2493 		sit_i->sentries[start].cur_valid_map_mir
2494 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2495 		if (!sit_i->sentries[start].cur_valid_map_mir)
2496 			return -ENOMEM;
2497 #endif
2498 
2499 		if (f2fs_discard_en(sbi)) {
2500 			sit_i->sentries[start].discard_map
2501 				= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2502 			if (!sit_i->sentries[start].discard_map)
2503 				return -ENOMEM;
2504 		}
2505 	}
2506 
2507 	sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2508 	if (!sit_i->tmp_map)
2509 		return -ENOMEM;
2510 
2511 	if (sbi->segs_per_sec > 1) {
2512 		sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
2513 					sizeof(struct sec_entry), GFP_KERNEL);
2514 		if (!sit_i->sec_entries)
2515 			return -ENOMEM;
2516 	}
2517 
2518 	/* get information related with SIT */
2519 	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
2520 
2521 	/* setup SIT bitmap from ckeckpoint pack */
2522 	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2523 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2524 
2525 	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2526 	if (!sit_i->sit_bitmap)
2527 		return -ENOMEM;
2528 
2529 #ifdef CONFIG_F2FS_CHECK_FS
2530 	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2531 	if (!sit_i->sit_bitmap_mir)
2532 		return -ENOMEM;
2533 #endif
2534 
2535 	/* init SIT information */
2536 	sit_i->s_ops = &default_salloc_ops;
2537 
2538 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2539 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2540 	sit_i->written_valid_blocks = 0;
2541 	sit_i->bitmap_size = bitmap_size;
2542 	sit_i->dirty_sentries = 0;
2543 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
2544 	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
2545 	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
2546 	mutex_init(&sit_i->sentry_lock);
2547 	return 0;
2548 }
2549 
2550 static int build_free_segmap(struct f2fs_sb_info *sbi)
2551 {
2552 	struct free_segmap_info *free_i;
2553 	unsigned int bitmap_size, sec_bitmap_size;
2554 
2555 	/* allocate memory for free segmap information */
2556 	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
2557 	if (!free_i)
2558 		return -ENOMEM;
2559 
2560 	SM_I(sbi)->free_info = free_i;
2561 
2562 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2563 	free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
2564 	if (!free_i->free_segmap)
2565 		return -ENOMEM;
2566 
2567 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2568 	free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
2569 	if (!free_i->free_secmap)
2570 		return -ENOMEM;
2571 
2572 	/* set all segments as dirty temporarily */
2573 	memset(free_i->free_segmap, 0xff, bitmap_size);
2574 	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2575 
2576 	/* init free segmap information */
2577 	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2578 	free_i->free_segments = 0;
2579 	free_i->free_sections = 0;
2580 	spin_lock_init(&free_i->segmap_lock);
2581 	return 0;
2582 }
2583 
2584 static int build_curseg(struct f2fs_sb_info *sbi)
2585 {
2586 	struct curseg_info *array;
2587 	int i;
2588 
2589 	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2590 	if (!array)
2591 		return -ENOMEM;
2592 
2593 	SM_I(sbi)->curseg_array = array;
2594 
2595 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
2596 		mutex_init(&array[i].curseg_mutex);
2597 		array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
2598 		if (!array[i].sum_blk)
2599 			return -ENOMEM;
2600 		init_rwsem(&array[i].journal_rwsem);
2601 		array[i].journal = kzalloc(sizeof(struct f2fs_journal),
2602 							GFP_KERNEL);
2603 		if (!array[i].journal)
2604 			return -ENOMEM;
2605 		array[i].segno = NULL_SEGNO;
2606 		array[i].next_blkoff = 0;
2607 	}
2608 	return restore_curseg_summaries(sbi);
2609 }
2610 
2611 static void build_sit_entries(struct f2fs_sb_info *sbi)
2612 {
2613 	struct sit_info *sit_i = SIT_I(sbi);
2614 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2615 	struct f2fs_journal *journal = curseg->journal;
2616 	struct seg_entry *se;
2617 	struct f2fs_sit_entry sit;
2618 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
2619 	unsigned int i, start, end;
2620 	unsigned int readed, start_blk = 0;
2621 
2622 	do {
2623 		readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
2624 							META_SIT, true);
2625 
2626 		start = start_blk * sit_i->sents_per_block;
2627 		end = (start_blk + readed) * sit_i->sents_per_block;
2628 
2629 		for (; start < end && start < MAIN_SEGS(sbi); start++) {
2630 			struct f2fs_sit_block *sit_blk;
2631 			struct page *page;
2632 
2633 			se = &sit_i->sentries[start];
2634 			page = get_current_sit_page(sbi, start);
2635 			sit_blk = (struct f2fs_sit_block *)page_address(page);
2636 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2637 			f2fs_put_page(page, 1);
2638 
2639 			check_block_count(sbi, start, &sit);
2640 			seg_info_from_raw_sit(se, &sit);
2641 
2642 			/* build discard map only one time */
2643 			if (f2fs_discard_en(sbi)) {
2644 				memcpy(se->discard_map, se->cur_valid_map,
2645 							SIT_VBLOCK_MAP_SIZE);
2646 				sbi->discard_blks += sbi->blocks_per_seg -
2647 							se->valid_blocks;
2648 			}
2649 
2650 			if (sbi->segs_per_sec > 1)
2651 				get_sec_entry(sbi, start)->valid_blocks +=
2652 							se->valid_blocks;
2653 		}
2654 		start_blk += readed;
2655 	} while (start_blk < sit_blk_cnt);
2656 
2657 	down_read(&curseg->journal_rwsem);
2658 	for (i = 0; i < sits_in_cursum(journal); i++) {
2659 		unsigned int old_valid_blocks;
2660 
2661 		start = le32_to_cpu(segno_in_journal(journal, i));
2662 		se = &sit_i->sentries[start];
2663 		sit = sit_in_journal(journal, i);
2664 
2665 		old_valid_blocks = se->valid_blocks;
2666 
2667 		check_block_count(sbi, start, &sit);
2668 		seg_info_from_raw_sit(se, &sit);
2669 
2670 		if (f2fs_discard_en(sbi)) {
2671 			memcpy(se->discard_map, se->cur_valid_map,
2672 						SIT_VBLOCK_MAP_SIZE);
2673 			sbi->discard_blks += old_valid_blocks -
2674 						se->valid_blocks;
2675 		}
2676 
2677 		if (sbi->segs_per_sec > 1)
2678 			get_sec_entry(sbi, start)->valid_blocks +=
2679 				se->valid_blocks - old_valid_blocks;
2680 	}
2681 	up_read(&curseg->journal_rwsem);
2682 }
2683 
2684 static void init_free_segmap(struct f2fs_sb_info *sbi)
2685 {
2686 	unsigned int start;
2687 	int type;
2688 
2689 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
2690 		struct seg_entry *sentry = get_seg_entry(sbi, start);
2691 		if (!sentry->valid_blocks)
2692 			__set_free(sbi, start);
2693 		else
2694 			SIT_I(sbi)->written_valid_blocks +=
2695 						sentry->valid_blocks;
2696 	}
2697 
2698 	/* set use the current segments */
2699 	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2700 		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2701 		__set_test_and_inuse(sbi, curseg_t->segno);
2702 	}
2703 }
2704 
2705 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2706 {
2707 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2708 	struct free_segmap_info *free_i = FREE_I(sbi);
2709 	unsigned int segno = 0, offset = 0;
2710 	unsigned short valid_blocks;
2711 
2712 	while (1) {
2713 		/* find dirty segment based on free segmap */
2714 		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2715 		if (segno >= MAIN_SEGS(sbi))
2716 			break;
2717 		offset = segno + 1;
2718 		valid_blocks = get_valid_blocks(sbi, segno, 0);
2719 		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2720 			continue;
2721 		if (valid_blocks > sbi->blocks_per_seg) {
2722 			f2fs_bug_on(sbi, 1);
2723 			continue;
2724 		}
2725 		mutex_lock(&dirty_i->seglist_lock);
2726 		__locate_dirty_segment(sbi, segno, DIRTY);
2727 		mutex_unlock(&dirty_i->seglist_lock);
2728 	}
2729 }
2730 
2731 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2732 {
2733 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2734 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2735 
2736 	dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2737 	if (!dirty_i->victim_secmap)
2738 		return -ENOMEM;
2739 	return 0;
2740 }
2741 
2742 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2743 {
2744 	struct dirty_seglist_info *dirty_i;
2745 	unsigned int bitmap_size, i;
2746 
2747 	/* allocate memory for dirty segments list information */
2748 	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2749 	if (!dirty_i)
2750 		return -ENOMEM;
2751 
2752 	SM_I(sbi)->dirty_info = dirty_i;
2753 	mutex_init(&dirty_i->seglist_lock);
2754 
2755 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2756 
2757 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
2758 		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2759 		if (!dirty_i->dirty_segmap[i])
2760 			return -ENOMEM;
2761 	}
2762 
2763 	init_dirty_segmap(sbi);
2764 	return init_victim_secmap(sbi);
2765 }
2766 
2767 /*
2768  * Update min, max modified time for cost-benefit GC algorithm
2769  */
2770 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2771 {
2772 	struct sit_info *sit_i = SIT_I(sbi);
2773 	unsigned int segno;
2774 
2775 	mutex_lock(&sit_i->sentry_lock);
2776 
2777 	sit_i->min_mtime = LLONG_MAX;
2778 
2779 	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2780 		unsigned int i;
2781 		unsigned long long mtime = 0;
2782 
2783 		for (i = 0; i < sbi->segs_per_sec; i++)
2784 			mtime += get_seg_entry(sbi, segno + i)->mtime;
2785 
2786 		mtime = div_u64(mtime, sbi->segs_per_sec);
2787 
2788 		if (sit_i->min_mtime > mtime)
2789 			sit_i->min_mtime = mtime;
2790 	}
2791 	sit_i->max_mtime = get_mtime(sbi);
2792 	mutex_unlock(&sit_i->sentry_lock);
2793 }
2794 
2795 int build_segment_manager(struct f2fs_sb_info *sbi)
2796 {
2797 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2798 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2799 	struct f2fs_sm_info *sm_info;
2800 	int err;
2801 
2802 	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2803 	if (!sm_info)
2804 		return -ENOMEM;
2805 
2806 	/* init sm info */
2807 	sbi->sm_info = sm_info;
2808 	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2809 	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2810 	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2811 	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2812 	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2813 	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2814 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2815 	sm_info->rec_prefree_segments = sm_info->main_segments *
2816 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2817 	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
2818 		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
2819 
2820 	if (!test_opt(sbi, LFS))
2821 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2822 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2823 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2824 
2825 	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2826 
2827 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
2828 
2829 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2830 		err = create_flush_cmd_control(sbi);
2831 		if (err)
2832 			return err;
2833 	}
2834 
2835 	err = create_discard_cmd_control(sbi);
2836 	if (err)
2837 		return err;
2838 
2839 	err = build_sit_info(sbi);
2840 	if (err)
2841 		return err;
2842 	err = build_free_segmap(sbi);
2843 	if (err)
2844 		return err;
2845 	err = build_curseg(sbi);
2846 	if (err)
2847 		return err;
2848 
2849 	/* reinit free segmap based on SIT */
2850 	build_sit_entries(sbi);
2851 
2852 	init_free_segmap(sbi);
2853 	err = build_dirty_segmap(sbi);
2854 	if (err)
2855 		return err;
2856 
2857 	init_min_max_mtime(sbi);
2858 	return 0;
2859 }
2860 
2861 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2862 		enum dirty_type dirty_type)
2863 {
2864 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2865 
2866 	mutex_lock(&dirty_i->seglist_lock);
2867 	kvfree(dirty_i->dirty_segmap[dirty_type]);
2868 	dirty_i->nr_dirty[dirty_type] = 0;
2869 	mutex_unlock(&dirty_i->seglist_lock);
2870 }
2871 
2872 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2873 {
2874 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2875 	kvfree(dirty_i->victim_secmap);
2876 }
2877 
2878 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2879 {
2880 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2881 	int i;
2882 
2883 	if (!dirty_i)
2884 		return;
2885 
2886 	/* discard pre-free/dirty segments list */
2887 	for (i = 0; i < NR_DIRTY_TYPE; i++)
2888 		discard_dirty_segmap(sbi, i);
2889 
2890 	destroy_victim_secmap(sbi);
2891 	SM_I(sbi)->dirty_info = NULL;
2892 	kfree(dirty_i);
2893 }
2894 
2895 static void destroy_curseg(struct f2fs_sb_info *sbi)
2896 {
2897 	struct curseg_info *array = SM_I(sbi)->curseg_array;
2898 	int i;
2899 
2900 	if (!array)
2901 		return;
2902 	SM_I(sbi)->curseg_array = NULL;
2903 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
2904 		kfree(array[i].sum_blk);
2905 		kfree(array[i].journal);
2906 	}
2907 	kfree(array);
2908 }
2909 
2910 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2911 {
2912 	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2913 	if (!free_i)
2914 		return;
2915 	SM_I(sbi)->free_info = NULL;
2916 	kvfree(free_i->free_segmap);
2917 	kvfree(free_i->free_secmap);
2918 	kfree(free_i);
2919 }
2920 
2921 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2922 {
2923 	struct sit_info *sit_i = SIT_I(sbi);
2924 	unsigned int start;
2925 
2926 	if (!sit_i)
2927 		return;
2928 
2929 	if (sit_i->sentries) {
2930 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
2931 			kfree(sit_i->sentries[start].cur_valid_map);
2932 #ifdef CONFIG_F2FS_CHECK_FS
2933 			kfree(sit_i->sentries[start].cur_valid_map_mir);
2934 #endif
2935 			kfree(sit_i->sentries[start].ckpt_valid_map);
2936 			kfree(sit_i->sentries[start].discard_map);
2937 		}
2938 	}
2939 	kfree(sit_i->tmp_map);
2940 
2941 	kvfree(sit_i->sentries);
2942 	kvfree(sit_i->sec_entries);
2943 	kvfree(sit_i->dirty_sentries_bitmap);
2944 
2945 	SM_I(sbi)->sit_info = NULL;
2946 	kfree(sit_i->sit_bitmap);
2947 #ifdef CONFIG_F2FS_CHECK_FS
2948 	kfree(sit_i->sit_bitmap_mir);
2949 #endif
2950 	kfree(sit_i);
2951 }
2952 
2953 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2954 {
2955 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2956 
2957 	if (!sm_info)
2958 		return;
2959 	destroy_flush_cmd_control(sbi, true);
2960 	destroy_discard_cmd_control(sbi, true);
2961 	destroy_dirty_segmap(sbi);
2962 	destroy_curseg(sbi);
2963 	destroy_free_segmap(sbi);
2964 	destroy_sit_info(sbi);
2965 	sbi->sm_info = NULL;
2966 	kfree(sm_info);
2967 }
2968 
2969 int __init create_segment_manager_caches(void)
2970 {
2971 	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2972 			sizeof(struct discard_entry));
2973 	if (!discard_entry_slab)
2974 		goto fail;
2975 
2976 	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
2977 			sizeof(struct discard_cmd));
2978 	if (!discard_cmd_slab)
2979 		goto destroy_discard_entry;
2980 
2981 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2982 			sizeof(struct sit_entry_set));
2983 	if (!sit_entry_set_slab)
2984 		goto destroy_discard_cmd;
2985 
2986 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2987 			sizeof(struct inmem_pages));
2988 	if (!inmem_entry_slab)
2989 		goto destroy_sit_entry_set;
2990 	return 0;
2991 
2992 destroy_sit_entry_set:
2993 	kmem_cache_destroy(sit_entry_set_slab);
2994 destroy_discard_cmd:
2995 	kmem_cache_destroy(discard_cmd_slab);
2996 destroy_discard_entry:
2997 	kmem_cache_destroy(discard_entry_slab);
2998 fail:
2999 	return -ENOMEM;
3000 }
3001 
3002 void destroy_segment_manager_caches(void)
3003 {
3004 	kmem_cache_destroy(sit_entry_set_slab);
3005 	kmem_cache_destroy(discard_cmd_slab);
3006 	kmem_cache_destroy(discard_entry_slab);
3007 	kmem_cache_destroy(inmem_entry_slab);
3008 }
3009