xref: /openbmc/linux/fs/f2fs/segment.c (revision 9be32d72becca41d7d9b010d7d9be1d39489414f)
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19 
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include <trace/events/f2fs.h>
24 
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26 
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *sit_entry_set_slab;
29 static struct kmem_cache *inmem_entry_slab;
30 
31 /*
32  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
33  * MSB and LSB are reversed in a byte by f2fs_set_bit.
34  */
35 static inline unsigned long __reverse_ffs(unsigned long word)
36 {
37 	int num = 0;
38 
39 #if BITS_PER_LONG == 64
40 	if ((word & 0xffffffff) == 0) {
41 		num += 32;
42 		word >>= 32;
43 	}
44 #endif
45 	if ((word & 0xffff) == 0) {
46 		num += 16;
47 		word >>= 16;
48 	}
49 	if ((word & 0xff) == 0) {
50 		num += 8;
51 		word >>= 8;
52 	}
53 	if ((word & 0xf0) == 0)
54 		num += 4;
55 	else
56 		word >>= 4;
57 	if ((word & 0xc) == 0)
58 		num += 2;
59 	else
60 		word >>= 2;
61 	if ((word & 0x2) == 0)
62 		num += 1;
63 	return num;
64 }
65 
66 /*
67  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
68  * f2fs_set_bit makes MSB and LSB reversed in a byte.
69  * Example:
70  *                             LSB <--> MSB
71  *   f2fs_set_bit(0, bitmap) => 0000 0001
72  *   f2fs_set_bit(7, bitmap) => 1000 0000
73  */
74 static unsigned long __find_rev_next_bit(const unsigned long *addr,
75 			unsigned long size, unsigned long offset)
76 {
77 	const unsigned long *p = addr + BIT_WORD(offset);
78 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
79 	unsigned long tmp;
80 	unsigned long mask, submask;
81 	unsigned long quot, rest;
82 
83 	if (offset >= size)
84 		return size;
85 
86 	size -= result;
87 	offset %= BITS_PER_LONG;
88 	if (!offset)
89 		goto aligned;
90 
91 	tmp = *(p++);
92 	quot = (offset >> 3) << 3;
93 	rest = offset & 0x7;
94 	mask = ~0UL << quot;
95 	submask = (unsigned char)(0xff << rest) >> rest;
96 	submask <<= quot;
97 	mask &= submask;
98 	tmp &= mask;
99 	if (size < BITS_PER_LONG)
100 		goto found_first;
101 	if (tmp)
102 		goto found_middle;
103 
104 	size -= BITS_PER_LONG;
105 	result += BITS_PER_LONG;
106 aligned:
107 	while (size & ~(BITS_PER_LONG-1)) {
108 		tmp = *(p++);
109 		if (tmp)
110 			goto found_middle;
111 		result += BITS_PER_LONG;
112 		size -= BITS_PER_LONG;
113 	}
114 	if (!size)
115 		return result;
116 	tmp = *p;
117 found_first:
118 	tmp &= (~0UL >> (BITS_PER_LONG - size));
119 	if (tmp == 0UL)		/* Are any bits set? */
120 		return result + size;   /* Nope. */
121 found_middle:
122 	return result + __reverse_ffs(tmp);
123 }
124 
125 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
126 			unsigned long size, unsigned long offset)
127 {
128 	const unsigned long *p = addr + BIT_WORD(offset);
129 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
130 	unsigned long tmp;
131 	unsigned long mask, submask;
132 	unsigned long quot, rest;
133 
134 	if (offset >= size)
135 		return size;
136 
137 	size -= result;
138 	offset %= BITS_PER_LONG;
139 	if (!offset)
140 		goto aligned;
141 
142 	tmp = *(p++);
143 	quot = (offset >> 3) << 3;
144 	rest = offset & 0x7;
145 	mask = ~(~0UL << quot);
146 	submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
147 	submask <<= quot;
148 	mask += submask;
149 	tmp |= mask;
150 	if (size < BITS_PER_LONG)
151 		goto found_first;
152 	if (~tmp)
153 		goto found_middle;
154 
155 	size -= BITS_PER_LONG;
156 	result += BITS_PER_LONG;
157 aligned:
158 	while (size & ~(BITS_PER_LONG - 1)) {
159 		tmp = *(p++);
160 		if (~tmp)
161 			goto found_middle;
162 		result += BITS_PER_LONG;
163 		size -= BITS_PER_LONG;
164 	}
165 	if (!size)
166 		return result;
167 	tmp = *p;
168 
169 found_first:
170 	tmp |= ~0UL << size;
171 	if (tmp == ~0UL)        /* Are any bits zero? */
172 		return result + size;   /* Nope. */
173 found_middle:
174 	return result + __reverse_ffz(tmp);
175 }
176 
177 void register_inmem_page(struct inode *inode, struct page *page)
178 {
179 	struct f2fs_inode_info *fi = F2FS_I(inode);
180 	struct inmem_pages *new;
181 	int err;
182 
183 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
184 
185 	/* add atomic page indices to the list */
186 	new->page = page;
187 	INIT_LIST_HEAD(&new->list);
188 retry:
189 	/* increase reference count with clean state */
190 	mutex_lock(&fi->inmem_lock);
191 	err = radix_tree_insert(&fi->inmem_root, page->index, new);
192 	if (err == -EEXIST) {
193 		mutex_unlock(&fi->inmem_lock);
194 		kmem_cache_free(inmem_entry_slab, new);
195 		return;
196 	} else if (err) {
197 		mutex_unlock(&fi->inmem_lock);
198 		goto retry;
199 	}
200 	get_page(page);
201 	list_add_tail(&new->list, &fi->inmem_pages);
202 	mutex_unlock(&fi->inmem_lock);
203 }
204 
205 void invalidate_inmem_page(struct inode *inode, struct page *page)
206 {
207 	struct f2fs_inode_info *fi = F2FS_I(inode);
208 	struct inmem_pages *cur;
209 
210 	mutex_lock(&fi->inmem_lock);
211 	cur = radix_tree_lookup(&fi->inmem_root, page->index);
212 	if (cur) {
213 		radix_tree_delete(&fi->inmem_root, cur->page->index);
214 		f2fs_put_page(cur->page, 0);
215 		list_del(&cur->list);
216 		kmem_cache_free(inmem_entry_slab, cur);
217 	}
218 	mutex_unlock(&fi->inmem_lock);
219 }
220 
221 void commit_inmem_pages(struct inode *inode, bool abort)
222 {
223 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
224 	struct f2fs_inode_info *fi = F2FS_I(inode);
225 	struct inmem_pages *cur, *tmp;
226 	bool submit_bio = false;
227 	struct f2fs_io_info fio = {
228 		.type = DATA,
229 		.rw = WRITE_SYNC,
230 	};
231 
232 	/*
233 	 * The abort is true only when f2fs_evict_inode is called.
234 	 * Basically, the f2fs_evict_inode doesn't produce any data writes, so
235 	 * that we don't need to call f2fs_balance_fs.
236 	 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
237 	 * inode becomes free by iget_locked in f2fs_iget.
238 	 */
239 	if (!abort)
240 		f2fs_balance_fs(sbi);
241 
242 	f2fs_lock_op(sbi);
243 
244 	mutex_lock(&fi->inmem_lock);
245 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
246 		lock_page(cur->page);
247 		if (!abort && cur->page->mapping == inode->i_mapping) {
248 			f2fs_wait_on_page_writeback(cur->page, DATA);
249 			if (clear_page_dirty_for_io(cur->page))
250 				inode_dec_dirty_pages(inode);
251 			do_write_data_page(cur->page, &fio);
252 			submit_bio = true;
253 		}
254 		radix_tree_delete(&fi->inmem_root, cur->page->index);
255 		f2fs_put_page(cur->page, 1);
256 		list_del(&cur->list);
257 		kmem_cache_free(inmem_entry_slab, cur);
258 	}
259 	if (submit_bio)
260 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
261 	mutex_unlock(&fi->inmem_lock);
262 
263 	filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
264 	f2fs_unlock_op(sbi);
265 }
266 
267 /*
268  * This function balances dirty node and dentry pages.
269  * In addition, it controls garbage collection.
270  */
271 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
272 {
273 	/*
274 	 * We should do GC or end up with checkpoint, if there are so many dirty
275 	 * dir/node pages without enough free segments.
276 	 */
277 	if (has_not_enough_free_secs(sbi, 0)) {
278 		mutex_lock(&sbi->gc_mutex);
279 		f2fs_gc(sbi);
280 	}
281 }
282 
283 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
284 {
285 	/* check the # of cached NAT entries and prefree segments */
286 	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
287 			excess_prefree_segs(sbi) ||
288 			available_free_memory(sbi, INO_ENTRIES))
289 		f2fs_sync_fs(sbi->sb, true);
290 }
291 
292 static int issue_flush_thread(void *data)
293 {
294 	struct f2fs_sb_info *sbi = data;
295 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
296 	wait_queue_head_t *q = &fcc->flush_wait_queue;
297 repeat:
298 	if (kthread_should_stop())
299 		return 0;
300 
301 	if (!llist_empty(&fcc->issue_list)) {
302 		struct bio *bio = bio_alloc(GFP_NOIO, 0);
303 		struct flush_cmd *cmd, *next;
304 		int ret;
305 
306 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
307 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
308 
309 		bio->bi_bdev = sbi->sb->s_bdev;
310 		ret = submit_bio_wait(WRITE_FLUSH, bio);
311 
312 		llist_for_each_entry_safe(cmd, next,
313 					  fcc->dispatch_list, llnode) {
314 			cmd->ret = ret;
315 			complete(&cmd->wait);
316 		}
317 		bio_put(bio);
318 		fcc->dispatch_list = NULL;
319 	}
320 
321 	wait_event_interruptible(*q,
322 		kthread_should_stop() || !llist_empty(&fcc->issue_list));
323 	goto repeat;
324 }
325 
326 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
327 {
328 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
329 	struct flush_cmd cmd;
330 
331 	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
332 					test_opt(sbi, FLUSH_MERGE));
333 
334 	if (test_opt(sbi, NOBARRIER))
335 		return 0;
336 
337 	if (!test_opt(sbi, FLUSH_MERGE))
338 		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
339 
340 	init_completion(&cmd.wait);
341 
342 	llist_add(&cmd.llnode, &fcc->issue_list);
343 
344 	if (!fcc->dispatch_list)
345 		wake_up(&fcc->flush_wait_queue);
346 
347 	wait_for_completion(&cmd.wait);
348 
349 	return cmd.ret;
350 }
351 
352 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
353 {
354 	dev_t dev = sbi->sb->s_bdev->bd_dev;
355 	struct flush_cmd_control *fcc;
356 	int err = 0;
357 
358 	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
359 	if (!fcc)
360 		return -ENOMEM;
361 	init_waitqueue_head(&fcc->flush_wait_queue);
362 	init_llist_head(&fcc->issue_list);
363 	SM_I(sbi)->cmd_control_info = fcc;
364 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
365 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
366 	if (IS_ERR(fcc->f2fs_issue_flush)) {
367 		err = PTR_ERR(fcc->f2fs_issue_flush);
368 		kfree(fcc);
369 		SM_I(sbi)->cmd_control_info = NULL;
370 		return err;
371 	}
372 
373 	return err;
374 }
375 
376 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
377 {
378 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
379 
380 	if (fcc && fcc->f2fs_issue_flush)
381 		kthread_stop(fcc->f2fs_issue_flush);
382 	kfree(fcc);
383 	SM_I(sbi)->cmd_control_info = NULL;
384 }
385 
386 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
387 		enum dirty_type dirty_type)
388 {
389 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
390 
391 	/* need not be added */
392 	if (IS_CURSEG(sbi, segno))
393 		return;
394 
395 	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
396 		dirty_i->nr_dirty[dirty_type]++;
397 
398 	if (dirty_type == DIRTY) {
399 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
400 		enum dirty_type t = sentry->type;
401 
402 		if (unlikely(t >= DIRTY)) {
403 			f2fs_bug_on(sbi, 1);
404 			return;
405 		}
406 		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
407 			dirty_i->nr_dirty[t]++;
408 	}
409 }
410 
411 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
412 		enum dirty_type dirty_type)
413 {
414 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
415 
416 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
417 		dirty_i->nr_dirty[dirty_type]--;
418 
419 	if (dirty_type == DIRTY) {
420 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
421 		enum dirty_type t = sentry->type;
422 
423 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
424 			dirty_i->nr_dirty[t]--;
425 
426 		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
427 			clear_bit(GET_SECNO(sbi, segno),
428 						dirty_i->victim_secmap);
429 	}
430 }
431 
432 /*
433  * Should not occur error such as -ENOMEM.
434  * Adding dirty entry into seglist is not critical operation.
435  * If a given segment is one of current working segments, it won't be added.
436  */
437 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
438 {
439 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
440 	unsigned short valid_blocks;
441 
442 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
443 		return;
444 
445 	mutex_lock(&dirty_i->seglist_lock);
446 
447 	valid_blocks = get_valid_blocks(sbi, segno, 0);
448 
449 	if (valid_blocks == 0) {
450 		__locate_dirty_segment(sbi, segno, PRE);
451 		__remove_dirty_segment(sbi, segno, DIRTY);
452 	} else if (valid_blocks < sbi->blocks_per_seg) {
453 		__locate_dirty_segment(sbi, segno, DIRTY);
454 	} else {
455 		/* Recovery routine with SSR needs this */
456 		__remove_dirty_segment(sbi, segno, DIRTY);
457 	}
458 
459 	mutex_unlock(&dirty_i->seglist_lock);
460 }
461 
462 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
463 				block_t blkstart, block_t blklen)
464 {
465 	sector_t start = SECTOR_FROM_BLOCK(blkstart);
466 	sector_t len = SECTOR_FROM_BLOCK(blklen);
467 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
468 	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
469 }
470 
471 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
472 {
473 	if (f2fs_issue_discard(sbi, blkaddr, 1)) {
474 		struct page *page = grab_meta_page(sbi, blkaddr);
475 		/* zero-filled page */
476 		set_page_dirty(page);
477 		f2fs_put_page(page, 1);
478 	}
479 }
480 
481 static void __add_discard_entry(struct f2fs_sb_info *sbi,
482 		struct cp_control *cpc, unsigned int start, unsigned int end)
483 {
484 	struct list_head *head = &SM_I(sbi)->discard_list;
485 	struct discard_entry *new, *last;
486 
487 	if (!list_empty(head)) {
488 		last = list_last_entry(head, struct discard_entry, list);
489 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
490 						last->blkaddr + last->len) {
491 			last->len += end - start;
492 			goto done;
493 		}
494 	}
495 
496 	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
497 	INIT_LIST_HEAD(&new->list);
498 	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
499 	new->len = end - start;
500 	list_add_tail(&new->list, head);
501 done:
502 	SM_I(sbi)->nr_discards += end - start;
503 	cpc->trimmed += end - start;
504 }
505 
506 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
507 {
508 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
509 	int max_blocks = sbi->blocks_per_seg;
510 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
511 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
512 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
513 	unsigned long dmap[entries];
514 	unsigned int start = 0, end = -1;
515 	bool force = (cpc->reason == CP_DISCARD);
516 	int i;
517 
518 	if (!force && !test_opt(sbi, DISCARD))
519 		return;
520 
521 	if (force && !se->valid_blocks) {
522 		struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
523 		/*
524 		 * if this segment is registered in the prefree list, then
525 		 * we should skip adding a discard candidate, and let the
526 		 * checkpoint do that later.
527 		 */
528 		mutex_lock(&dirty_i->seglist_lock);
529 		if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
530 			mutex_unlock(&dirty_i->seglist_lock);
531 			cpc->trimmed += sbi->blocks_per_seg;
532 			return;
533 		}
534 		mutex_unlock(&dirty_i->seglist_lock);
535 
536 		__add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
537 		return;
538 	}
539 
540 	/* zero block will be discarded through the prefree list */
541 	if (!se->valid_blocks || se->valid_blocks == max_blocks)
542 		return;
543 
544 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
545 	for (i = 0; i < entries; i++)
546 		dmap[i] = ~(cur_map[i] | ckpt_map[i]);
547 
548 	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
549 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
550 		if (start >= max_blocks)
551 			break;
552 
553 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
554 
555 		if (end - start < cpc->trim_minlen)
556 			continue;
557 
558 		__add_discard_entry(sbi, cpc, start, end);
559 	}
560 }
561 
562 void release_discard_addrs(struct f2fs_sb_info *sbi)
563 {
564 	struct list_head *head = &(SM_I(sbi)->discard_list);
565 	struct discard_entry *entry, *this;
566 
567 	/* drop caches */
568 	list_for_each_entry_safe(entry, this, head, list) {
569 		list_del(&entry->list);
570 		kmem_cache_free(discard_entry_slab, entry);
571 	}
572 }
573 
574 /*
575  * Should call clear_prefree_segments after checkpoint is done.
576  */
577 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
578 {
579 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
580 	unsigned int segno;
581 
582 	mutex_lock(&dirty_i->seglist_lock);
583 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
584 		__set_test_and_free(sbi, segno);
585 	mutex_unlock(&dirty_i->seglist_lock);
586 }
587 
588 void clear_prefree_segments(struct f2fs_sb_info *sbi)
589 {
590 	struct list_head *head = &(SM_I(sbi)->discard_list);
591 	struct discard_entry *entry, *this;
592 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
593 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
594 	unsigned int start = 0, end = -1;
595 
596 	mutex_lock(&dirty_i->seglist_lock);
597 
598 	while (1) {
599 		int i;
600 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
601 		if (start >= MAIN_SEGS(sbi))
602 			break;
603 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
604 								start + 1);
605 
606 		for (i = start; i < end; i++)
607 			clear_bit(i, prefree_map);
608 
609 		dirty_i->nr_dirty[PRE] -= end - start;
610 
611 		if (!test_opt(sbi, DISCARD))
612 			continue;
613 
614 		f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
615 				(end - start) << sbi->log_blocks_per_seg);
616 	}
617 	mutex_unlock(&dirty_i->seglist_lock);
618 
619 	/* send small discards */
620 	list_for_each_entry_safe(entry, this, head, list) {
621 		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
622 		list_del(&entry->list);
623 		SM_I(sbi)->nr_discards -= entry->len;
624 		kmem_cache_free(discard_entry_slab, entry);
625 	}
626 }
627 
628 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
629 {
630 	struct sit_info *sit_i = SIT_I(sbi);
631 
632 	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
633 		sit_i->dirty_sentries++;
634 		return false;
635 	}
636 
637 	return true;
638 }
639 
640 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
641 					unsigned int segno, int modified)
642 {
643 	struct seg_entry *se = get_seg_entry(sbi, segno);
644 	se->type = type;
645 	if (modified)
646 		__mark_sit_entry_dirty(sbi, segno);
647 }
648 
649 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
650 {
651 	struct seg_entry *se;
652 	unsigned int segno, offset;
653 	long int new_vblocks;
654 
655 	segno = GET_SEGNO(sbi, blkaddr);
656 
657 	se = get_seg_entry(sbi, segno);
658 	new_vblocks = se->valid_blocks + del;
659 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
660 
661 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
662 				(new_vblocks > sbi->blocks_per_seg)));
663 
664 	se->valid_blocks = new_vblocks;
665 	se->mtime = get_mtime(sbi);
666 	SIT_I(sbi)->max_mtime = se->mtime;
667 
668 	/* Update valid block bitmap */
669 	if (del > 0) {
670 		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
671 			f2fs_bug_on(sbi, 1);
672 	} else {
673 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
674 			f2fs_bug_on(sbi, 1);
675 	}
676 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
677 		se->ckpt_valid_blocks += del;
678 
679 	__mark_sit_entry_dirty(sbi, segno);
680 
681 	/* update total number of valid blocks to be written in ckpt area */
682 	SIT_I(sbi)->written_valid_blocks += del;
683 
684 	if (sbi->segs_per_sec > 1)
685 		get_sec_entry(sbi, segno)->valid_blocks += del;
686 }
687 
688 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
689 {
690 	update_sit_entry(sbi, new, 1);
691 	if (GET_SEGNO(sbi, old) != NULL_SEGNO)
692 		update_sit_entry(sbi, old, -1);
693 
694 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
695 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
696 }
697 
698 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
699 {
700 	unsigned int segno = GET_SEGNO(sbi, addr);
701 	struct sit_info *sit_i = SIT_I(sbi);
702 
703 	f2fs_bug_on(sbi, addr == NULL_ADDR);
704 	if (addr == NEW_ADDR)
705 		return;
706 
707 	/* add it into sit main buffer */
708 	mutex_lock(&sit_i->sentry_lock);
709 
710 	update_sit_entry(sbi, addr, -1);
711 
712 	/* add it into dirty seglist */
713 	locate_dirty_segment(sbi, segno);
714 
715 	mutex_unlock(&sit_i->sentry_lock);
716 }
717 
718 /*
719  * This function should be resided under the curseg_mutex lock
720  */
721 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
722 					struct f2fs_summary *sum)
723 {
724 	struct curseg_info *curseg = CURSEG_I(sbi, type);
725 	void *addr = curseg->sum_blk;
726 	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
727 	memcpy(addr, sum, sizeof(struct f2fs_summary));
728 }
729 
730 /*
731  * Calculate the number of current summary pages for writing
732  */
733 int npages_for_summary_flush(struct f2fs_sb_info *sbi)
734 {
735 	int valid_sum_count = 0;
736 	int i, sum_in_page;
737 
738 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
739 		if (sbi->ckpt->alloc_type[i] == SSR)
740 			valid_sum_count += sbi->blocks_per_seg;
741 		else
742 			valid_sum_count += curseg_blkoff(sbi, i);
743 	}
744 
745 	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
746 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
747 	if (valid_sum_count <= sum_in_page)
748 		return 1;
749 	else if ((valid_sum_count - sum_in_page) <=
750 		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
751 		return 2;
752 	return 3;
753 }
754 
755 /*
756  * Caller should put this summary page
757  */
758 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
759 {
760 	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
761 }
762 
763 static void write_sum_page(struct f2fs_sb_info *sbi,
764 			struct f2fs_summary_block *sum_blk, block_t blk_addr)
765 {
766 	struct page *page = grab_meta_page(sbi, blk_addr);
767 	void *kaddr = page_address(page);
768 	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
769 	set_page_dirty(page);
770 	f2fs_put_page(page, 1);
771 }
772 
773 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
774 {
775 	struct curseg_info *curseg = CURSEG_I(sbi, type);
776 	unsigned int segno = curseg->segno + 1;
777 	struct free_segmap_info *free_i = FREE_I(sbi);
778 
779 	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
780 		return !test_bit(segno, free_i->free_segmap);
781 	return 0;
782 }
783 
784 /*
785  * Find a new segment from the free segments bitmap to right order
786  * This function should be returned with success, otherwise BUG
787  */
788 static void get_new_segment(struct f2fs_sb_info *sbi,
789 			unsigned int *newseg, bool new_sec, int dir)
790 {
791 	struct free_segmap_info *free_i = FREE_I(sbi);
792 	unsigned int segno, secno, zoneno;
793 	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
794 	unsigned int hint = *newseg / sbi->segs_per_sec;
795 	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
796 	unsigned int left_start = hint;
797 	bool init = true;
798 	int go_left = 0;
799 	int i;
800 
801 	write_lock(&free_i->segmap_lock);
802 
803 	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
804 		segno = find_next_zero_bit(free_i->free_segmap,
805 					MAIN_SEGS(sbi), *newseg + 1);
806 		if (segno - *newseg < sbi->segs_per_sec -
807 					(*newseg % sbi->segs_per_sec))
808 			goto got_it;
809 	}
810 find_other_zone:
811 	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
812 	if (secno >= MAIN_SECS(sbi)) {
813 		if (dir == ALLOC_RIGHT) {
814 			secno = find_next_zero_bit(free_i->free_secmap,
815 							MAIN_SECS(sbi), 0);
816 			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
817 		} else {
818 			go_left = 1;
819 			left_start = hint - 1;
820 		}
821 	}
822 	if (go_left == 0)
823 		goto skip_left;
824 
825 	while (test_bit(left_start, free_i->free_secmap)) {
826 		if (left_start > 0) {
827 			left_start--;
828 			continue;
829 		}
830 		left_start = find_next_zero_bit(free_i->free_secmap,
831 							MAIN_SECS(sbi), 0);
832 		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
833 		break;
834 	}
835 	secno = left_start;
836 skip_left:
837 	hint = secno;
838 	segno = secno * sbi->segs_per_sec;
839 	zoneno = secno / sbi->secs_per_zone;
840 
841 	/* give up on finding another zone */
842 	if (!init)
843 		goto got_it;
844 	if (sbi->secs_per_zone == 1)
845 		goto got_it;
846 	if (zoneno == old_zoneno)
847 		goto got_it;
848 	if (dir == ALLOC_LEFT) {
849 		if (!go_left && zoneno + 1 >= total_zones)
850 			goto got_it;
851 		if (go_left && zoneno == 0)
852 			goto got_it;
853 	}
854 	for (i = 0; i < NR_CURSEG_TYPE; i++)
855 		if (CURSEG_I(sbi, i)->zone == zoneno)
856 			break;
857 
858 	if (i < NR_CURSEG_TYPE) {
859 		/* zone is in user, try another */
860 		if (go_left)
861 			hint = zoneno * sbi->secs_per_zone - 1;
862 		else if (zoneno + 1 >= total_zones)
863 			hint = 0;
864 		else
865 			hint = (zoneno + 1) * sbi->secs_per_zone;
866 		init = false;
867 		goto find_other_zone;
868 	}
869 got_it:
870 	/* set it as dirty segment in free segmap */
871 	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
872 	__set_inuse(sbi, segno);
873 	*newseg = segno;
874 	write_unlock(&free_i->segmap_lock);
875 }
876 
877 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
878 {
879 	struct curseg_info *curseg = CURSEG_I(sbi, type);
880 	struct summary_footer *sum_footer;
881 
882 	curseg->segno = curseg->next_segno;
883 	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
884 	curseg->next_blkoff = 0;
885 	curseg->next_segno = NULL_SEGNO;
886 
887 	sum_footer = &(curseg->sum_blk->footer);
888 	memset(sum_footer, 0, sizeof(struct summary_footer));
889 	if (IS_DATASEG(type))
890 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
891 	if (IS_NODESEG(type))
892 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
893 	__set_sit_entry_type(sbi, type, curseg->segno, modified);
894 }
895 
896 /*
897  * Allocate a current working segment.
898  * This function always allocates a free segment in LFS manner.
899  */
900 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
901 {
902 	struct curseg_info *curseg = CURSEG_I(sbi, type);
903 	unsigned int segno = curseg->segno;
904 	int dir = ALLOC_LEFT;
905 
906 	write_sum_page(sbi, curseg->sum_blk,
907 				GET_SUM_BLOCK(sbi, segno));
908 	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
909 		dir = ALLOC_RIGHT;
910 
911 	if (test_opt(sbi, NOHEAP))
912 		dir = ALLOC_RIGHT;
913 
914 	get_new_segment(sbi, &segno, new_sec, dir);
915 	curseg->next_segno = segno;
916 	reset_curseg(sbi, type, 1);
917 	curseg->alloc_type = LFS;
918 }
919 
920 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
921 			struct curseg_info *seg, block_t start)
922 {
923 	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
924 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
925 	unsigned long target_map[entries];
926 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
927 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
928 	int i, pos;
929 
930 	for (i = 0; i < entries; i++)
931 		target_map[i] = ckpt_map[i] | cur_map[i];
932 
933 	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
934 
935 	seg->next_blkoff = pos;
936 }
937 
938 /*
939  * If a segment is written by LFS manner, next block offset is just obtained
940  * by increasing the current block offset. However, if a segment is written by
941  * SSR manner, next block offset obtained by calling __next_free_blkoff
942  */
943 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
944 				struct curseg_info *seg)
945 {
946 	if (seg->alloc_type == SSR)
947 		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
948 	else
949 		seg->next_blkoff++;
950 }
951 
952 /*
953  * This function always allocates a used segment(from dirty seglist) by SSR
954  * manner, so it should recover the existing segment information of valid blocks
955  */
956 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
957 {
958 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
959 	struct curseg_info *curseg = CURSEG_I(sbi, type);
960 	unsigned int new_segno = curseg->next_segno;
961 	struct f2fs_summary_block *sum_node;
962 	struct page *sum_page;
963 
964 	write_sum_page(sbi, curseg->sum_blk,
965 				GET_SUM_BLOCK(sbi, curseg->segno));
966 	__set_test_and_inuse(sbi, new_segno);
967 
968 	mutex_lock(&dirty_i->seglist_lock);
969 	__remove_dirty_segment(sbi, new_segno, PRE);
970 	__remove_dirty_segment(sbi, new_segno, DIRTY);
971 	mutex_unlock(&dirty_i->seglist_lock);
972 
973 	reset_curseg(sbi, type, 1);
974 	curseg->alloc_type = SSR;
975 	__next_free_blkoff(sbi, curseg, 0);
976 
977 	if (reuse) {
978 		sum_page = get_sum_page(sbi, new_segno);
979 		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
980 		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
981 		f2fs_put_page(sum_page, 1);
982 	}
983 }
984 
985 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
986 {
987 	struct curseg_info *curseg = CURSEG_I(sbi, type);
988 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
989 
990 	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
991 		return v_ops->get_victim(sbi,
992 				&(curseg)->next_segno, BG_GC, type, SSR);
993 
994 	/* For data segments, let's do SSR more intensively */
995 	for (; type >= CURSEG_HOT_DATA; type--)
996 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
997 						BG_GC, type, SSR))
998 			return 1;
999 	return 0;
1000 }
1001 
1002 /*
1003  * flush out current segment and replace it with new segment
1004  * This function should be returned with success, otherwise BUG
1005  */
1006 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1007 						int type, bool force)
1008 {
1009 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1010 
1011 	if (force)
1012 		new_curseg(sbi, type, true);
1013 	else if (type == CURSEG_WARM_NODE)
1014 		new_curseg(sbi, type, false);
1015 	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1016 		new_curseg(sbi, type, false);
1017 	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1018 		change_curseg(sbi, type, true);
1019 	else
1020 		new_curseg(sbi, type, false);
1021 
1022 	stat_inc_seg_type(sbi, curseg);
1023 }
1024 
1025 void allocate_new_segments(struct f2fs_sb_info *sbi)
1026 {
1027 	struct curseg_info *curseg;
1028 	unsigned int old_curseg;
1029 	int i;
1030 
1031 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1032 		curseg = CURSEG_I(sbi, i);
1033 		old_curseg = curseg->segno;
1034 		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1035 		locate_dirty_segment(sbi, old_curseg);
1036 	}
1037 }
1038 
1039 static const struct segment_allocation default_salloc_ops = {
1040 	.allocate_segment = allocate_segment_by_default,
1041 };
1042 
1043 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1044 {
1045 	__u64 start = range->start >> sbi->log_blocksize;
1046 	__u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1047 	unsigned int start_segno, end_segno;
1048 	struct cp_control cpc;
1049 
1050 	if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1051 						range->len < sbi->blocksize)
1052 		return -EINVAL;
1053 
1054 	cpc.trimmed = 0;
1055 	if (end <= MAIN_BLKADDR(sbi))
1056 		goto out;
1057 
1058 	/* start/end segment number in main_area */
1059 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1060 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1061 						GET_SEGNO(sbi, end);
1062 	cpc.reason = CP_DISCARD;
1063 	cpc.trim_start = start_segno;
1064 	cpc.trim_end = end_segno;
1065 	cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1066 
1067 	/* do checkpoint to issue discard commands safely */
1068 	mutex_lock(&sbi->gc_mutex);
1069 	write_checkpoint(sbi, &cpc);
1070 	mutex_unlock(&sbi->gc_mutex);
1071 out:
1072 	range->len = cpc.trimmed << sbi->log_blocksize;
1073 	return 0;
1074 }
1075 
1076 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1077 {
1078 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1079 	if (curseg->next_blkoff < sbi->blocks_per_seg)
1080 		return true;
1081 	return false;
1082 }
1083 
1084 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1085 {
1086 	if (p_type == DATA)
1087 		return CURSEG_HOT_DATA;
1088 	else
1089 		return CURSEG_HOT_NODE;
1090 }
1091 
1092 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1093 {
1094 	if (p_type == DATA) {
1095 		struct inode *inode = page->mapping->host;
1096 
1097 		if (S_ISDIR(inode->i_mode))
1098 			return CURSEG_HOT_DATA;
1099 		else
1100 			return CURSEG_COLD_DATA;
1101 	} else {
1102 		if (IS_DNODE(page) && is_cold_node(page))
1103 			return CURSEG_WARM_NODE;
1104 		else
1105 			return CURSEG_COLD_NODE;
1106 	}
1107 }
1108 
1109 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1110 {
1111 	if (p_type == DATA) {
1112 		struct inode *inode = page->mapping->host;
1113 
1114 		if (S_ISDIR(inode->i_mode))
1115 			return CURSEG_HOT_DATA;
1116 		else if (is_cold_data(page) || file_is_cold(inode))
1117 			return CURSEG_COLD_DATA;
1118 		else
1119 			return CURSEG_WARM_DATA;
1120 	} else {
1121 		if (IS_DNODE(page))
1122 			return is_cold_node(page) ? CURSEG_WARM_NODE :
1123 						CURSEG_HOT_NODE;
1124 		else
1125 			return CURSEG_COLD_NODE;
1126 	}
1127 }
1128 
1129 static int __get_segment_type(struct page *page, enum page_type p_type)
1130 {
1131 	switch (F2FS_P_SB(page)->active_logs) {
1132 	case 2:
1133 		return __get_segment_type_2(page, p_type);
1134 	case 4:
1135 		return __get_segment_type_4(page, p_type);
1136 	}
1137 	/* NR_CURSEG_TYPE(6) logs by default */
1138 	f2fs_bug_on(F2FS_P_SB(page),
1139 		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1140 	return __get_segment_type_6(page, p_type);
1141 }
1142 
1143 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1144 		block_t old_blkaddr, block_t *new_blkaddr,
1145 		struct f2fs_summary *sum, int type)
1146 {
1147 	struct sit_info *sit_i = SIT_I(sbi);
1148 	struct curseg_info *curseg;
1149 
1150 	curseg = CURSEG_I(sbi, type);
1151 
1152 	mutex_lock(&curseg->curseg_mutex);
1153 
1154 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1155 
1156 	/*
1157 	 * __add_sum_entry should be resided under the curseg_mutex
1158 	 * because, this function updates a summary entry in the
1159 	 * current summary block.
1160 	 */
1161 	__add_sum_entry(sbi, type, sum);
1162 
1163 	mutex_lock(&sit_i->sentry_lock);
1164 	__refresh_next_blkoff(sbi, curseg);
1165 
1166 	stat_inc_block_count(sbi, curseg);
1167 
1168 	if (!__has_curseg_space(sbi, type))
1169 		sit_i->s_ops->allocate_segment(sbi, type, false);
1170 	/*
1171 	 * SIT information should be updated before segment allocation,
1172 	 * since SSR needs latest valid block information.
1173 	 */
1174 	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1175 
1176 	mutex_unlock(&sit_i->sentry_lock);
1177 
1178 	if (page && IS_NODESEG(type))
1179 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1180 
1181 	mutex_unlock(&curseg->curseg_mutex);
1182 }
1183 
1184 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1185 			block_t old_blkaddr, block_t *new_blkaddr,
1186 			struct f2fs_summary *sum, struct f2fs_io_info *fio)
1187 {
1188 	int type = __get_segment_type(page, fio->type);
1189 
1190 	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
1191 
1192 	/* writeout dirty page into bdev */
1193 	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
1194 }
1195 
1196 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1197 {
1198 	struct f2fs_io_info fio = {
1199 		.type = META,
1200 		.rw = WRITE_SYNC | REQ_META | REQ_PRIO
1201 	};
1202 
1203 	set_page_writeback(page);
1204 	f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1205 }
1206 
1207 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1208 		struct f2fs_io_info *fio,
1209 		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1210 {
1211 	struct f2fs_summary sum;
1212 	set_summary(&sum, nid, 0, 0);
1213 	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
1214 }
1215 
1216 void write_data_page(struct page *page, struct dnode_of_data *dn,
1217 		block_t *new_blkaddr, struct f2fs_io_info *fio)
1218 {
1219 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1220 	struct f2fs_summary sum;
1221 	struct node_info ni;
1222 
1223 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1224 	get_node_info(sbi, dn->nid, &ni);
1225 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1226 
1227 	do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
1228 }
1229 
1230 void rewrite_data_page(struct page *page, block_t old_blkaddr,
1231 					struct f2fs_io_info *fio)
1232 {
1233 	f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1234 }
1235 
1236 void recover_data_page(struct f2fs_sb_info *sbi,
1237 			struct page *page, struct f2fs_summary *sum,
1238 			block_t old_blkaddr, block_t new_blkaddr)
1239 {
1240 	struct sit_info *sit_i = SIT_I(sbi);
1241 	struct curseg_info *curseg;
1242 	unsigned int segno, old_cursegno;
1243 	struct seg_entry *se;
1244 	int type;
1245 
1246 	segno = GET_SEGNO(sbi, new_blkaddr);
1247 	se = get_seg_entry(sbi, segno);
1248 	type = se->type;
1249 
1250 	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1251 		if (old_blkaddr == NULL_ADDR)
1252 			type = CURSEG_COLD_DATA;
1253 		else
1254 			type = CURSEG_WARM_DATA;
1255 	}
1256 	curseg = CURSEG_I(sbi, type);
1257 
1258 	mutex_lock(&curseg->curseg_mutex);
1259 	mutex_lock(&sit_i->sentry_lock);
1260 
1261 	old_cursegno = curseg->segno;
1262 
1263 	/* change the current segment */
1264 	if (segno != curseg->segno) {
1265 		curseg->next_segno = segno;
1266 		change_curseg(sbi, type, true);
1267 	}
1268 
1269 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1270 	__add_sum_entry(sbi, type, sum);
1271 
1272 	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1273 	locate_dirty_segment(sbi, old_cursegno);
1274 
1275 	mutex_unlock(&sit_i->sentry_lock);
1276 	mutex_unlock(&curseg->curseg_mutex);
1277 }
1278 
1279 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1280 					struct page *page, enum page_type type)
1281 {
1282 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
1283 	struct f2fs_bio_info *io = &sbi->write_io[btype];
1284 	struct bio_vec *bvec;
1285 	int i;
1286 
1287 	down_read(&io->io_rwsem);
1288 	if (!io->bio)
1289 		goto out;
1290 
1291 	bio_for_each_segment_all(bvec, io->bio, i) {
1292 		if (page == bvec->bv_page) {
1293 			up_read(&io->io_rwsem);
1294 			return true;
1295 		}
1296 	}
1297 
1298 out:
1299 	up_read(&io->io_rwsem);
1300 	return false;
1301 }
1302 
1303 void f2fs_wait_on_page_writeback(struct page *page,
1304 				enum page_type type)
1305 {
1306 	if (PageWriteback(page)) {
1307 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1308 
1309 		if (is_merged_page(sbi, page, type))
1310 			f2fs_submit_merged_bio(sbi, type, WRITE);
1311 		wait_on_page_writeback(page);
1312 	}
1313 }
1314 
1315 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1316 {
1317 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1318 	struct curseg_info *seg_i;
1319 	unsigned char *kaddr;
1320 	struct page *page;
1321 	block_t start;
1322 	int i, j, offset;
1323 
1324 	start = start_sum_block(sbi);
1325 
1326 	page = get_meta_page(sbi, start++);
1327 	kaddr = (unsigned char *)page_address(page);
1328 
1329 	/* Step 1: restore nat cache */
1330 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1331 	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1332 
1333 	/* Step 2: restore sit cache */
1334 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1335 	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1336 						SUM_JOURNAL_SIZE);
1337 	offset = 2 * SUM_JOURNAL_SIZE;
1338 
1339 	/* Step 3: restore summary entries */
1340 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1341 		unsigned short blk_off;
1342 		unsigned int segno;
1343 
1344 		seg_i = CURSEG_I(sbi, i);
1345 		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1346 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1347 		seg_i->next_segno = segno;
1348 		reset_curseg(sbi, i, 0);
1349 		seg_i->alloc_type = ckpt->alloc_type[i];
1350 		seg_i->next_blkoff = blk_off;
1351 
1352 		if (seg_i->alloc_type == SSR)
1353 			blk_off = sbi->blocks_per_seg;
1354 
1355 		for (j = 0; j < blk_off; j++) {
1356 			struct f2fs_summary *s;
1357 			s = (struct f2fs_summary *)(kaddr + offset);
1358 			seg_i->sum_blk->entries[j] = *s;
1359 			offset += SUMMARY_SIZE;
1360 			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1361 						SUM_FOOTER_SIZE)
1362 				continue;
1363 
1364 			f2fs_put_page(page, 1);
1365 			page = NULL;
1366 
1367 			page = get_meta_page(sbi, start++);
1368 			kaddr = (unsigned char *)page_address(page);
1369 			offset = 0;
1370 		}
1371 	}
1372 	f2fs_put_page(page, 1);
1373 	return 0;
1374 }
1375 
1376 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1377 {
1378 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1379 	struct f2fs_summary_block *sum;
1380 	struct curseg_info *curseg;
1381 	struct page *new;
1382 	unsigned short blk_off;
1383 	unsigned int segno = 0;
1384 	block_t blk_addr = 0;
1385 
1386 	/* get segment number and block addr */
1387 	if (IS_DATASEG(type)) {
1388 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1389 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1390 							CURSEG_HOT_DATA]);
1391 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1392 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1393 		else
1394 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1395 	} else {
1396 		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1397 							CURSEG_HOT_NODE]);
1398 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1399 							CURSEG_HOT_NODE]);
1400 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1401 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1402 							type - CURSEG_HOT_NODE);
1403 		else
1404 			blk_addr = GET_SUM_BLOCK(sbi, segno);
1405 	}
1406 
1407 	new = get_meta_page(sbi, blk_addr);
1408 	sum = (struct f2fs_summary_block *)page_address(new);
1409 
1410 	if (IS_NODESEG(type)) {
1411 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1412 			struct f2fs_summary *ns = &sum->entries[0];
1413 			int i;
1414 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1415 				ns->version = 0;
1416 				ns->ofs_in_node = 0;
1417 			}
1418 		} else {
1419 			int err;
1420 
1421 			err = restore_node_summary(sbi, segno, sum);
1422 			if (err) {
1423 				f2fs_put_page(new, 1);
1424 				return err;
1425 			}
1426 		}
1427 	}
1428 
1429 	/* set uncompleted segment to curseg */
1430 	curseg = CURSEG_I(sbi, type);
1431 	mutex_lock(&curseg->curseg_mutex);
1432 	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1433 	curseg->next_segno = segno;
1434 	reset_curseg(sbi, type, 0);
1435 	curseg->alloc_type = ckpt->alloc_type[type];
1436 	curseg->next_blkoff = blk_off;
1437 	mutex_unlock(&curseg->curseg_mutex);
1438 	f2fs_put_page(new, 1);
1439 	return 0;
1440 }
1441 
1442 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1443 {
1444 	int type = CURSEG_HOT_DATA;
1445 	int err;
1446 
1447 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1448 		/* restore for compacted data summary */
1449 		if (read_compacted_summaries(sbi))
1450 			return -EINVAL;
1451 		type = CURSEG_HOT_NODE;
1452 	}
1453 
1454 	for (; type <= CURSEG_COLD_NODE; type++) {
1455 		err = read_normal_summaries(sbi, type);
1456 		if (err)
1457 			return err;
1458 	}
1459 
1460 	return 0;
1461 }
1462 
1463 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1464 {
1465 	struct page *page;
1466 	unsigned char *kaddr;
1467 	struct f2fs_summary *summary;
1468 	struct curseg_info *seg_i;
1469 	int written_size = 0;
1470 	int i, j;
1471 
1472 	page = grab_meta_page(sbi, blkaddr++);
1473 	kaddr = (unsigned char *)page_address(page);
1474 
1475 	/* Step 1: write nat cache */
1476 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1477 	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1478 	written_size += SUM_JOURNAL_SIZE;
1479 
1480 	/* Step 2: write sit cache */
1481 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1482 	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1483 						SUM_JOURNAL_SIZE);
1484 	written_size += SUM_JOURNAL_SIZE;
1485 
1486 	/* Step 3: write summary entries */
1487 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1488 		unsigned short blkoff;
1489 		seg_i = CURSEG_I(sbi, i);
1490 		if (sbi->ckpt->alloc_type[i] == SSR)
1491 			blkoff = sbi->blocks_per_seg;
1492 		else
1493 			blkoff = curseg_blkoff(sbi, i);
1494 
1495 		for (j = 0; j < blkoff; j++) {
1496 			if (!page) {
1497 				page = grab_meta_page(sbi, blkaddr++);
1498 				kaddr = (unsigned char *)page_address(page);
1499 				written_size = 0;
1500 			}
1501 			summary = (struct f2fs_summary *)(kaddr + written_size);
1502 			*summary = seg_i->sum_blk->entries[j];
1503 			written_size += SUMMARY_SIZE;
1504 
1505 			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1506 							SUM_FOOTER_SIZE)
1507 				continue;
1508 
1509 			set_page_dirty(page);
1510 			f2fs_put_page(page, 1);
1511 			page = NULL;
1512 		}
1513 	}
1514 	if (page) {
1515 		set_page_dirty(page);
1516 		f2fs_put_page(page, 1);
1517 	}
1518 }
1519 
1520 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1521 					block_t blkaddr, int type)
1522 {
1523 	int i, end;
1524 	if (IS_DATASEG(type))
1525 		end = type + NR_CURSEG_DATA_TYPE;
1526 	else
1527 		end = type + NR_CURSEG_NODE_TYPE;
1528 
1529 	for (i = type; i < end; i++) {
1530 		struct curseg_info *sum = CURSEG_I(sbi, i);
1531 		mutex_lock(&sum->curseg_mutex);
1532 		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1533 		mutex_unlock(&sum->curseg_mutex);
1534 	}
1535 }
1536 
1537 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1538 {
1539 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1540 		write_compacted_summaries(sbi, start_blk);
1541 	else
1542 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1543 }
1544 
1545 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1546 {
1547 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1548 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1549 }
1550 
1551 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1552 					unsigned int val, int alloc)
1553 {
1554 	int i;
1555 
1556 	if (type == NAT_JOURNAL) {
1557 		for (i = 0; i < nats_in_cursum(sum); i++) {
1558 			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1559 				return i;
1560 		}
1561 		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1562 			return update_nats_in_cursum(sum, 1);
1563 	} else if (type == SIT_JOURNAL) {
1564 		for (i = 0; i < sits_in_cursum(sum); i++)
1565 			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1566 				return i;
1567 		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1568 			return update_sits_in_cursum(sum, 1);
1569 	}
1570 	return -1;
1571 }
1572 
1573 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1574 					unsigned int segno)
1575 {
1576 	return get_meta_page(sbi, current_sit_addr(sbi, segno));
1577 }
1578 
1579 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1580 					unsigned int start)
1581 {
1582 	struct sit_info *sit_i = SIT_I(sbi);
1583 	struct page *src_page, *dst_page;
1584 	pgoff_t src_off, dst_off;
1585 	void *src_addr, *dst_addr;
1586 
1587 	src_off = current_sit_addr(sbi, start);
1588 	dst_off = next_sit_addr(sbi, src_off);
1589 
1590 	/* get current sit block page without lock */
1591 	src_page = get_meta_page(sbi, src_off);
1592 	dst_page = grab_meta_page(sbi, dst_off);
1593 	f2fs_bug_on(sbi, PageDirty(src_page));
1594 
1595 	src_addr = page_address(src_page);
1596 	dst_addr = page_address(dst_page);
1597 	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1598 
1599 	set_page_dirty(dst_page);
1600 	f2fs_put_page(src_page, 1);
1601 
1602 	set_to_next_sit(sit_i, start);
1603 
1604 	return dst_page;
1605 }
1606 
1607 static struct sit_entry_set *grab_sit_entry_set(void)
1608 {
1609 	struct sit_entry_set *ses =
1610 			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1611 
1612 	ses->entry_cnt = 0;
1613 	INIT_LIST_HEAD(&ses->set_list);
1614 	return ses;
1615 }
1616 
1617 static void release_sit_entry_set(struct sit_entry_set *ses)
1618 {
1619 	list_del(&ses->set_list);
1620 	kmem_cache_free(sit_entry_set_slab, ses);
1621 }
1622 
1623 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1624 						struct list_head *head)
1625 {
1626 	struct sit_entry_set *next = ses;
1627 
1628 	if (list_is_last(&ses->set_list, head))
1629 		return;
1630 
1631 	list_for_each_entry_continue(next, head, set_list)
1632 		if (ses->entry_cnt <= next->entry_cnt)
1633 			break;
1634 
1635 	list_move_tail(&ses->set_list, &next->set_list);
1636 }
1637 
1638 static void add_sit_entry(unsigned int segno, struct list_head *head)
1639 {
1640 	struct sit_entry_set *ses;
1641 	unsigned int start_segno = START_SEGNO(segno);
1642 
1643 	list_for_each_entry(ses, head, set_list) {
1644 		if (ses->start_segno == start_segno) {
1645 			ses->entry_cnt++;
1646 			adjust_sit_entry_set(ses, head);
1647 			return;
1648 		}
1649 	}
1650 
1651 	ses = grab_sit_entry_set();
1652 
1653 	ses->start_segno = start_segno;
1654 	ses->entry_cnt++;
1655 	list_add(&ses->set_list, head);
1656 }
1657 
1658 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1659 {
1660 	struct f2fs_sm_info *sm_info = SM_I(sbi);
1661 	struct list_head *set_list = &sm_info->sit_entry_set;
1662 	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1663 	unsigned int segno;
1664 
1665 	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1666 		add_sit_entry(segno, set_list);
1667 }
1668 
1669 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1670 {
1671 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1672 	struct f2fs_summary_block *sum = curseg->sum_blk;
1673 	int i;
1674 
1675 	for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1676 		unsigned int segno;
1677 		bool dirtied;
1678 
1679 		segno = le32_to_cpu(segno_in_journal(sum, i));
1680 		dirtied = __mark_sit_entry_dirty(sbi, segno);
1681 
1682 		if (!dirtied)
1683 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1684 	}
1685 	update_sits_in_cursum(sum, -sits_in_cursum(sum));
1686 }
1687 
1688 /*
1689  * CP calls this function, which flushes SIT entries including sit_journal,
1690  * and moves prefree segs to free segs.
1691  */
1692 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1693 {
1694 	struct sit_info *sit_i = SIT_I(sbi);
1695 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1696 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1697 	struct f2fs_summary_block *sum = curseg->sum_blk;
1698 	struct sit_entry_set *ses, *tmp;
1699 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
1700 	bool to_journal = true;
1701 	struct seg_entry *se;
1702 
1703 	mutex_lock(&curseg->curseg_mutex);
1704 	mutex_lock(&sit_i->sentry_lock);
1705 
1706 	/*
1707 	 * add and account sit entries of dirty bitmap in sit entry
1708 	 * set temporarily
1709 	 */
1710 	add_sits_in_set(sbi);
1711 
1712 	/*
1713 	 * if there are no enough space in journal to store dirty sit
1714 	 * entries, remove all entries from journal and add and account
1715 	 * them in sit entry set.
1716 	 */
1717 	if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1718 		remove_sits_in_journal(sbi);
1719 
1720 	if (!sit_i->dirty_sentries)
1721 		goto out;
1722 
1723 	/*
1724 	 * there are two steps to flush sit entries:
1725 	 * #1, flush sit entries to journal in current cold data summary block.
1726 	 * #2, flush sit entries to sit page.
1727 	 */
1728 	list_for_each_entry_safe(ses, tmp, head, set_list) {
1729 		struct page *page = NULL;
1730 		struct f2fs_sit_block *raw_sit = NULL;
1731 		unsigned int start_segno = ses->start_segno;
1732 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1733 						(unsigned long)MAIN_SEGS(sbi));
1734 		unsigned int segno = start_segno;
1735 
1736 		if (to_journal &&
1737 			!__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1738 			to_journal = false;
1739 
1740 		if (!to_journal) {
1741 			page = get_next_sit_page(sbi, start_segno);
1742 			raw_sit = page_address(page);
1743 		}
1744 
1745 		/* flush dirty sit entries in region of current sit set */
1746 		for_each_set_bit_from(segno, bitmap, end) {
1747 			int offset, sit_offset;
1748 
1749 			se = get_seg_entry(sbi, segno);
1750 
1751 			/* add discard candidates */
1752 			if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1753 				cpc->trim_start = segno;
1754 				add_discard_addrs(sbi, cpc);
1755 			}
1756 
1757 			if (to_journal) {
1758 				offset = lookup_journal_in_cursum(sum,
1759 							SIT_JOURNAL, segno, 1);
1760 				f2fs_bug_on(sbi, offset < 0);
1761 				segno_in_journal(sum, offset) =
1762 							cpu_to_le32(segno);
1763 				seg_info_to_raw_sit(se,
1764 						&sit_in_journal(sum, offset));
1765 			} else {
1766 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1767 				seg_info_to_raw_sit(se,
1768 						&raw_sit->entries[sit_offset]);
1769 			}
1770 
1771 			__clear_bit(segno, bitmap);
1772 			sit_i->dirty_sentries--;
1773 			ses->entry_cnt--;
1774 		}
1775 
1776 		if (!to_journal)
1777 			f2fs_put_page(page, 1);
1778 
1779 		f2fs_bug_on(sbi, ses->entry_cnt);
1780 		release_sit_entry_set(ses);
1781 	}
1782 
1783 	f2fs_bug_on(sbi, !list_empty(head));
1784 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
1785 out:
1786 	if (cpc->reason == CP_DISCARD) {
1787 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1788 			add_discard_addrs(sbi, cpc);
1789 	}
1790 	mutex_unlock(&sit_i->sentry_lock);
1791 	mutex_unlock(&curseg->curseg_mutex);
1792 
1793 	set_prefree_as_free_segments(sbi);
1794 }
1795 
1796 static int build_sit_info(struct f2fs_sb_info *sbi)
1797 {
1798 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1799 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1800 	struct sit_info *sit_i;
1801 	unsigned int sit_segs, start;
1802 	char *src_bitmap, *dst_bitmap;
1803 	unsigned int bitmap_size;
1804 
1805 	/* allocate memory for SIT information */
1806 	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1807 	if (!sit_i)
1808 		return -ENOMEM;
1809 
1810 	SM_I(sbi)->sit_info = sit_i;
1811 
1812 	sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1813 	if (!sit_i->sentries)
1814 		return -ENOMEM;
1815 
1816 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1817 	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1818 	if (!sit_i->dirty_sentries_bitmap)
1819 		return -ENOMEM;
1820 
1821 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
1822 		sit_i->sentries[start].cur_valid_map
1823 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1824 		sit_i->sentries[start].ckpt_valid_map
1825 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1826 		if (!sit_i->sentries[start].cur_valid_map
1827 				|| !sit_i->sentries[start].ckpt_valid_map)
1828 			return -ENOMEM;
1829 	}
1830 
1831 	if (sbi->segs_per_sec > 1) {
1832 		sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1833 					sizeof(struct sec_entry));
1834 		if (!sit_i->sec_entries)
1835 			return -ENOMEM;
1836 	}
1837 
1838 	/* get information related with SIT */
1839 	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1840 
1841 	/* setup SIT bitmap from ckeckpoint pack */
1842 	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1843 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1844 
1845 	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1846 	if (!dst_bitmap)
1847 		return -ENOMEM;
1848 
1849 	/* init SIT information */
1850 	sit_i->s_ops = &default_salloc_ops;
1851 
1852 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1853 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1854 	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1855 	sit_i->sit_bitmap = dst_bitmap;
1856 	sit_i->bitmap_size = bitmap_size;
1857 	sit_i->dirty_sentries = 0;
1858 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1859 	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1860 	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1861 	mutex_init(&sit_i->sentry_lock);
1862 	return 0;
1863 }
1864 
1865 static int build_free_segmap(struct f2fs_sb_info *sbi)
1866 {
1867 	struct free_segmap_info *free_i;
1868 	unsigned int bitmap_size, sec_bitmap_size;
1869 
1870 	/* allocate memory for free segmap information */
1871 	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1872 	if (!free_i)
1873 		return -ENOMEM;
1874 
1875 	SM_I(sbi)->free_info = free_i;
1876 
1877 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1878 	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1879 	if (!free_i->free_segmap)
1880 		return -ENOMEM;
1881 
1882 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1883 	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1884 	if (!free_i->free_secmap)
1885 		return -ENOMEM;
1886 
1887 	/* set all segments as dirty temporarily */
1888 	memset(free_i->free_segmap, 0xff, bitmap_size);
1889 	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1890 
1891 	/* init free segmap information */
1892 	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1893 	free_i->free_segments = 0;
1894 	free_i->free_sections = 0;
1895 	rwlock_init(&free_i->segmap_lock);
1896 	return 0;
1897 }
1898 
1899 static int build_curseg(struct f2fs_sb_info *sbi)
1900 {
1901 	struct curseg_info *array;
1902 	int i;
1903 
1904 	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1905 	if (!array)
1906 		return -ENOMEM;
1907 
1908 	SM_I(sbi)->curseg_array = array;
1909 
1910 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
1911 		mutex_init(&array[i].curseg_mutex);
1912 		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1913 		if (!array[i].sum_blk)
1914 			return -ENOMEM;
1915 		array[i].segno = NULL_SEGNO;
1916 		array[i].next_blkoff = 0;
1917 	}
1918 	return restore_curseg_summaries(sbi);
1919 }
1920 
1921 static void build_sit_entries(struct f2fs_sb_info *sbi)
1922 {
1923 	struct sit_info *sit_i = SIT_I(sbi);
1924 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1925 	struct f2fs_summary_block *sum = curseg->sum_blk;
1926 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
1927 	unsigned int i, start, end;
1928 	unsigned int readed, start_blk = 0;
1929 	int nrpages = MAX_BIO_BLOCKS(sbi);
1930 
1931 	do {
1932 		readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1933 
1934 		start = start_blk * sit_i->sents_per_block;
1935 		end = (start_blk + readed) * sit_i->sents_per_block;
1936 
1937 		for (; start < end && start < MAIN_SEGS(sbi); start++) {
1938 			struct seg_entry *se = &sit_i->sentries[start];
1939 			struct f2fs_sit_block *sit_blk;
1940 			struct f2fs_sit_entry sit;
1941 			struct page *page;
1942 
1943 			mutex_lock(&curseg->curseg_mutex);
1944 			for (i = 0; i < sits_in_cursum(sum); i++) {
1945 				if (le32_to_cpu(segno_in_journal(sum, i))
1946 								== start) {
1947 					sit = sit_in_journal(sum, i);
1948 					mutex_unlock(&curseg->curseg_mutex);
1949 					goto got_it;
1950 				}
1951 			}
1952 			mutex_unlock(&curseg->curseg_mutex);
1953 
1954 			page = get_current_sit_page(sbi, start);
1955 			sit_blk = (struct f2fs_sit_block *)page_address(page);
1956 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1957 			f2fs_put_page(page, 1);
1958 got_it:
1959 			check_block_count(sbi, start, &sit);
1960 			seg_info_from_raw_sit(se, &sit);
1961 			if (sbi->segs_per_sec > 1) {
1962 				struct sec_entry *e = get_sec_entry(sbi, start);
1963 				e->valid_blocks += se->valid_blocks;
1964 			}
1965 		}
1966 		start_blk += readed;
1967 	} while (start_blk < sit_blk_cnt);
1968 }
1969 
1970 static void init_free_segmap(struct f2fs_sb_info *sbi)
1971 {
1972 	unsigned int start;
1973 	int type;
1974 
1975 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
1976 		struct seg_entry *sentry = get_seg_entry(sbi, start);
1977 		if (!sentry->valid_blocks)
1978 			__set_free(sbi, start);
1979 	}
1980 
1981 	/* set use the current segments */
1982 	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1983 		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1984 		__set_test_and_inuse(sbi, curseg_t->segno);
1985 	}
1986 }
1987 
1988 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1989 {
1990 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1991 	struct free_segmap_info *free_i = FREE_I(sbi);
1992 	unsigned int segno = 0, offset = 0;
1993 	unsigned short valid_blocks;
1994 
1995 	while (1) {
1996 		/* find dirty segment based on free segmap */
1997 		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1998 		if (segno >= MAIN_SEGS(sbi))
1999 			break;
2000 		offset = segno + 1;
2001 		valid_blocks = get_valid_blocks(sbi, segno, 0);
2002 		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2003 			continue;
2004 		if (valid_blocks > sbi->blocks_per_seg) {
2005 			f2fs_bug_on(sbi, 1);
2006 			continue;
2007 		}
2008 		mutex_lock(&dirty_i->seglist_lock);
2009 		__locate_dirty_segment(sbi, segno, DIRTY);
2010 		mutex_unlock(&dirty_i->seglist_lock);
2011 	}
2012 }
2013 
2014 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2015 {
2016 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2017 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2018 
2019 	dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2020 	if (!dirty_i->victim_secmap)
2021 		return -ENOMEM;
2022 	return 0;
2023 }
2024 
2025 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2026 {
2027 	struct dirty_seglist_info *dirty_i;
2028 	unsigned int bitmap_size, i;
2029 
2030 	/* allocate memory for dirty segments list information */
2031 	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2032 	if (!dirty_i)
2033 		return -ENOMEM;
2034 
2035 	SM_I(sbi)->dirty_info = dirty_i;
2036 	mutex_init(&dirty_i->seglist_lock);
2037 
2038 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2039 
2040 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
2041 		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2042 		if (!dirty_i->dirty_segmap[i])
2043 			return -ENOMEM;
2044 	}
2045 
2046 	init_dirty_segmap(sbi);
2047 	return init_victim_secmap(sbi);
2048 }
2049 
2050 /*
2051  * Update min, max modified time for cost-benefit GC algorithm
2052  */
2053 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2054 {
2055 	struct sit_info *sit_i = SIT_I(sbi);
2056 	unsigned int segno;
2057 
2058 	mutex_lock(&sit_i->sentry_lock);
2059 
2060 	sit_i->min_mtime = LLONG_MAX;
2061 
2062 	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2063 		unsigned int i;
2064 		unsigned long long mtime = 0;
2065 
2066 		for (i = 0; i < sbi->segs_per_sec; i++)
2067 			mtime += get_seg_entry(sbi, segno + i)->mtime;
2068 
2069 		mtime = div_u64(mtime, sbi->segs_per_sec);
2070 
2071 		if (sit_i->min_mtime > mtime)
2072 			sit_i->min_mtime = mtime;
2073 	}
2074 	sit_i->max_mtime = get_mtime(sbi);
2075 	mutex_unlock(&sit_i->sentry_lock);
2076 }
2077 
2078 int build_segment_manager(struct f2fs_sb_info *sbi)
2079 {
2080 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2081 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2082 	struct f2fs_sm_info *sm_info;
2083 	int err;
2084 
2085 	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2086 	if (!sm_info)
2087 		return -ENOMEM;
2088 
2089 	/* init sm info */
2090 	sbi->sm_info = sm_info;
2091 	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2092 	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2093 	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2094 	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2095 	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2096 	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2097 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2098 	sm_info->rec_prefree_segments = sm_info->main_segments *
2099 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2100 	sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2101 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2102 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2103 
2104 	INIT_LIST_HEAD(&sm_info->discard_list);
2105 	sm_info->nr_discards = 0;
2106 	sm_info->max_discards = 0;
2107 
2108 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
2109 
2110 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2111 		err = create_flush_cmd_control(sbi);
2112 		if (err)
2113 			return err;
2114 	}
2115 
2116 	err = build_sit_info(sbi);
2117 	if (err)
2118 		return err;
2119 	err = build_free_segmap(sbi);
2120 	if (err)
2121 		return err;
2122 	err = build_curseg(sbi);
2123 	if (err)
2124 		return err;
2125 
2126 	/* reinit free segmap based on SIT */
2127 	build_sit_entries(sbi);
2128 
2129 	init_free_segmap(sbi);
2130 	err = build_dirty_segmap(sbi);
2131 	if (err)
2132 		return err;
2133 
2134 	init_min_max_mtime(sbi);
2135 	return 0;
2136 }
2137 
2138 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2139 		enum dirty_type dirty_type)
2140 {
2141 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2142 
2143 	mutex_lock(&dirty_i->seglist_lock);
2144 	kfree(dirty_i->dirty_segmap[dirty_type]);
2145 	dirty_i->nr_dirty[dirty_type] = 0;
2146 	mutex_unlock(&dirty_i->seglist_lock);
2147 }
2148 
2149 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2150 {
2151 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2152 	kfree(dirty_i->victim_secmap);
2153 }
2154 
2155 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2156 {
2157 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2158 	int i;
2159 
2160 	if (!dirty_i)
2161 		return;
2162 
2163 	/* discard pre-free/dirty segments list */
2164 	for (i = 0; i < NR_DIRTY_TYPE; i++)
2165 		discard_dirty_segmap(sbi, i);
2166 
2167 	destroy_victim_secmap(sbi);
2168 	SM_I(sbi)->dirty_info = NULL;
2169 	kfree(dirty_i);
2170 }
2171 
2172 static void destroy_curseg(struct f2fs_sb_info *sbi)
2173 {
2174 	struct curseg_info *array = SM_I(sbi)->curseg_array;
2175 	int i;
2176 
2177 	if (!array)
2178 		return;
2179 	SM_I(sbi)->curseg_array = NULL;
2180 	for (i = 0; i < NR_CURSEG_TYPE; i++)
2181 		kfree(array[i].sum_blk);
2182 	kfree(array);
2183 }
2184 
2185 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2186 {
2187 	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2188 	if (!free_i)
2189 		return;
2190 	SM_I(sbi)->free_info = NULL;
2191 	kfree(free_i->free_segmap);
2192 	kfree(free_i->free_secmap);
2193 	kfree(free_i);
2194 }
2195 
2196 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2197 {
2198 	struct sit_info *sit_i = SIT_I(sbi);
2199 	unsigned int start;
2200 
2201 	if (!sit_i)
2202 		return;
2203 
2204 	if (sit_i->sentries) {
2205 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
2206 			kfree(sit_i->sentries[start].cur_valid_map);
2207 			kfree(sit_i->sentries[start].ckpt_valid_map);
2208 		}
2209 	}
2210 	vfree(sit_i->sentries);
2211 	vfree(sit_i->sec_entries);
2212 	kfree(sit_i->dirty_sentries_bitmap);
2213 
2214 	SM_I(sbi)->sit_info = NULL;
2215 	kfree(sit_i->sit_bitmap);
2216 	kfree(sit_i);
2217 }
2218 
2219 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2220 {
2221 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2222 
2223 	if (!sm_info)
2224 		return;
2225 	destroy_flush_cmd_control(sbi);
2226 	destroy_dirty_segmap(sbi);
2227 	destroy_curseg(sbi);
2228 	destroy_free_segmap(sbi);
2229 	destroy_sit_info(sbi);
2230 	sbi->sm_info = NULL;
2231 	kfree(sm_info);
2232 }
2233 
2234 int __init create_segment_manager_caches(void)
2235 {
2236 	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2237 			sizeof(struct discard_entry));
2238 	if (!discard_entry_slab)
2239 		goto fail;
2240 
2241 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2242 			sizeof(struct sit_entry_set));
2243 	if (!sit_entry_set_slab)
2244 		goto destory_discard_entry;
2245 
2246 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2247 			sizeof(struct inmem_pages));
2248 	if (!inmem_entry_slab)
2249 		goto destroy_sit_entry_set;
2250 	return 0;
2251 
2252 destroy_sit_entry_set:
2253 	kmem_cache_destroy(sit_entry_set_slab);
2254 destory_discard_entry:
2255 	kmem_cache_destroy(discard_entry_slab);
2256 fail:
2257 	return -ENOMEM;
2258 }
2259 
2260 void destroy_segment_manager_caches(void)
2261 {
2262 	kmem_cache_destroy(sit_entry_set_slab);
2263 	kmem_cache_destroy(discard_entry_slab);
2264 	kmem_cache_destroy(inmem_entry_slab);
2265 }
2266