xref: /openbmc/linux/fs/f2fs/segment.c (revision 0341845efcb4a656707b6d551c3057d6dd27009f)
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19 
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include <trace/events/f2fs.h>
24 
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26 
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *sit_entry_set_slab;
29 static struct kmem_cache *inmem_entry_slab;
30 
31 /*
32  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
33  * MSB and LSB are reversed in a byte by f2fs_set_bit.
34  */
35 static inline unsigned long __reverse_ffs(unsigned long word)
36 {
37 	int num = 0;
38 
39 #if BITS_PER_LONG == 64
40 	if ((word & 0xffffffff) == 0) {
41 		num += 32;
42 		word >>= 32;
43 	}
44 #endif
45 	if ((word & 0xffff) == 0) {
46 		num += 16;
47 		word >>= 16;
48 	}
49 	if ((word & 0xff) == 0) {
50 		num += 8;
51 		word >>= 8;
52 	}
53 	if ((word & 0xf0) == 0)
54 		num += 4;
55 	else
56 		word >>= 4;
57 	if ((word & 0xc) == 0)
58 		num += 2;
59 	else
60 		word >>= 2;
61 	if ((word & 0x2) == 0)
62 		num += 1;
63 	return num;
64 }
65 
66 /*
67  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
68  * f2fs_set_bit makes MSB and LSB reversed in a byte.
69  * Example:
70  *                             LSB <--> MSB
71  *   f2fs_set_bit(0, bitmap) => 0000 0001
72  *   f2fs_set_bit(7, bitmap) => 1000 0000
73  */
74 static unsigned long __find_rev_next_bit(const unsigned long *addr,
75 			unsigned long size, unsigned long offset)
76 {
77 	const unsigned long *p = addr + BIT_WORD(offset);
78 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
79 	unsigned long tmp;
80 	unsigned long mask, submask;
81 	unsigned long quot, rest;
82 
83 	if (offset >= size)
84 		return size;
85 
86 	size -= result;
87 	offset %= BITS_PER_LONG;
88 	if (!offset)
89 		goto aligned;
90 
91 	tmp = *(p++);
92 	quot = (offset >> 3) << 3;
93 	rest = offset & 0x7;
94 	mask = ~0UL << quot;
95 	submask = (unsigned char)(0xff << rest) >> rest;
96 	submask <<= quot;
97 	mask &= submask;
98 	tmp &= mask;
99 	if (size < BITS_PER_LONG)
100 		goto found_first;
101 	if (tmp)
102 		goto found_middle;
103 
104 	size -= BITS_PER_LONG;
105 	result += BITS_PER_LONG;
106 aligned:
107 	while (size & ~(BITS_PER_LONG-1)) {
108 		tmp = *(p++);
109 		if (tmp)
110 			goto found_middle;
111 		result += BITS_PER_LONG;
112 		size -= BITS_PER_LONG;
113 	}
114 	if (!size)
115 		return result;
116 	tmp = *p;
117 found_first:
118 	tmp &= (~0UL >> (BITS_PER_LONG - size));
119 	if (tmp == 0UL)		/* Are any bits set? */
120 		return result + size;   /* Nope. */
121 found_middle:
122 	return result + __reverse_ffs(tmp);
123 }
124 
125 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
126 			unsigned long size, unsigned long offset)
127 {
128 	const unsigned long *p = addr + BIT_WORD(offset);
129 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
130 	unsigned long tmp;
131 	unsigned long mask, submask;
132 	unsigned long quot, rest;
133 
134 	if (offset >= size)
135 		return size;
136 
137 	size -= result;
138 	offset %= BITS_PER_LONG;
139 	if (!offset)
140 		goto aligned;
141 
142 	tmp = *(p++);
143 	quot = (offset >> 3) << 3;
144 	rest = offset & 0x7;
145 	mask = ~(~0UL << quot);
146 	submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
147 	submask <<= quot;
148 	mask += submask;
149 	tmp |= mask;
150 	if (size < BITS_PER_LONG)
151 		goto found_first;
152 	if (~tmp)
153 		goto found_middle;
154 
155 	size -= BITS_PER_LONG;
156 	result += BITS_PER_LONG;
157 aligned:
158 	while (size & ~(BITS_PER_LONG - 1)) {
159 		tmp = *(p++);
160 		if (~tmp)
161 			goto found_middle;
162 		result += BITS_PER_LONG;
163 		size -= BITS_PER_LONG;
164 	}
165 	if (!size)
166 		return result;
167 	tmp = *p;
168 
169 found_first:
170 	tmp |= ~0UL << size;
171 	if (tmp == ~0UL)        /* Are any bits zero? */
172 		return result + size;   /* Nope. */
173 found_middle:
174 	return result + __reverse_ffz(tmp);
175 }
176 
177 void register_inmem_page(struct inode *inode, struct page *page)
178 {
179 	struct f2fs_inode_info *fi = F2FS_I(inode);
180 	struct inmem_pages *new;
181 	int err;
182 retry:
183 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
184 
185 	/* add atomic page indices to the list */
186 	new->page = page;
187 	INIT_LIST_HEAD(&new->list);
188 
189 	/* increase reference count with clean state */
190 	mutex_lock(&fi->inmem_lock);
191 	err = radix_tree_insert(&fi->inmem_root, page->index, new);
192 	if (err == -EEXIST) {
193 		mutex_unlock(&fi->inmem_lock);
194 		kmem_cache_free(inmem_entry_slab, new);
195 		return;
196 	} else if (err) {
197 		mutex_unlock(&fi->inmem_lock);
198 		kmem_cache_free(inmem_entry_slab, new);
199 		goto retry;
200 	}
201 	get_page(page);
202 	list_add_tail(&new->list, &fi->inmem_pages);
203 	mutex_unlock(&fi->inmem_lock);
204 }
205 
206 void invalidate_inmem_page(struct inode *inode, struct page *page)
207 {
208 	struct f2fs_inode_info *fi = F2FS_I(inode);
209 	struct inmem_pages *cur;
210 
211 	mutex_lock(&fi->inmem_lock);
212 	cur = radix_tree_lookup(&fi->inmem_root, page->index);
213 	if (cur) {
214 		radix_tree_delete(&fi->inmem_root, cur->page->index);
215 		f2fs_put_page(cur->page, 0);
216 		list_del(&cur->list);
217 		kmem_cache_free(inmem_entry_slab, cur);
218 	}
219 	mutex_unlock(&fi->inmem_lock);
220 }
221 
222 void commit_inmem_pages(struct inode *inode, bool abort)
223 {
224 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
225 	struct f2fs_inode_info *fi = F2FS_I(inode);
226 	struct inmem_pages *cur, *tmp;
227 	bool submit_bio = false;
228 	struct f2fs_io_info fio = {
229 		.type = DATA,
230 		.rw = WRITE_SYNC,
231 	};
232 
233 	/*
234 	 * The abort is true only when f2fs_evict_inode is called.
235 	 * Basically, the f2fs_evict_inode doesn't produce any data writes, so
236 	 * that we don't need to call f2fs_balance_fs.
237 	 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
238 	 * inode becomes free by iget_locked in f2fs_iget.
239 	 */
240 	if (!abort)
241 		f2fs_balance_fs(sbi);
242 
243 	f2fs_lock_op(sbi);
244 
245 	mutex_lock(&fi->inmem_lock);
246 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
247 		lock_page(cur->page);
248 		if (!abort && cur->page->mapping == inode->i_mapping) {
249 			f2fs_wait_on_page_writeback(cur->page, DATA);
250 			if (clear_page_dirty_for_io(cur->page))
251 				inode_dec_dirty_pages(inode);
252 			do_write_data_page(cur->page, &fio);
253 			submit_bio = true;
254 		}
255 		radix_tree_delete(&fi->inmem_root, cur->page->index);
256 		f2fs_put_page(cur->page, 1);
257 		list_del(&cur->list);
258 		kmem_cache_free(inmem_entry_slab, cur);
259 	}
260 	if (submit_bio)
261 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
262 	mutex_unlock(&fi->inmem_lock);
263 
264 	filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
265 	f2fs_unlock_op(sbi);
266 }
267 
268 /*
269  * This function balances dirty node and dentry pages.
270  * In addition, it controls garbage collection.
271  */
272 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
273 {
274 	/*
275 	 * We should do GC or end up with checkpoint, if there are so many dirty
276 	 * dir/node pages without enough free segments.
277 	 */
278 	if (has_not_enough_free_secs(sbi, 0)) {
279 		mutex_lock(&sbi->gc_mutex);
280 		f2fs_gc(sbi);
281 	}
282 }
283 
284 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
285 {
286 	/* check the # of cached NAT entries and prefree segments */
287 	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
288 			excess_prefree_segs(sbi) ||
289 			available_free_memory(sbi, INO_ENTRIES))
290 		f2fs_sync_fs(sbi->sb, true);
291 }
292 
293 static int issue_flush_thread(void *data)
294 {
295 	struct f2fs_sb_info *sbi = data;
296 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
297 	wait_queue_head_t *q = &fcc->flush_wait_queue;
298 repeat:
299 	if (kthread_should_stop())
300 		return 0;
301 
302 	if (!llist_empty(&fcc->issue_list)) {
303 		struct bio *bio = bio_alloc(GFP_NOIO, 0);
304 		struct flush_cmd *cmd, *next;
305 		int ret;
306 
307 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
308 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
309 
310 		bio->bi_bdev = sbi->sb->s_bdev;
311 		ret = submit_bio_wait(WRITE_FLUSH, bio);
312 
313 		llist_for_each_entry_safe(cmd, next,
314 					  fcc->dispatch_list, llnode) {
315 			cmd->ret = ret;
316 			complete(&cmd->wait);
317 		}
318 		bio_put(bio);
319 		fcc->dispatch_list = NULL;
320 	}
321 
322 	wait_event_interruptible(*q,
323 		kthread_should_stop() || !llist_empty(&fcc->issue_list));
324 	goto repeat;
325 }
326 
327 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
328 {
329 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
330 	struct flush_cmd cmd;
331 
332 	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
333 					test_opt(sbi, FLUSH_MERGE));
334 
335 	if (test_opt(sbi, NOBARRIER))
336 		return 0;
337 
338 	if (!test_opt(sbi, FLUSH_MERGE))
339 		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
340 
341 	init_completion(&cmd.wait);
342 
343 	llist_add(&cmd.llnode, &fcc->issue_list);
344 
345 	if (!fcc->dispatch_list)
346 		wake_up(&fcc->flush_wait_queue);
347 
348 	wait_for_completion(&cmd.wait);
349 
350 	return cmd.ret;
351 }
352 
353 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
354 {
355 	dev_t dev = sbi->sb->s_bdev->bd_dev;
356 	struct flush_cmd_control *fcc;
357 	int err = 0;
358 
359 	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
360 	if (!fcc)
361 		return -ENOMEM;
362 	init_waitqueue_head(&fcc->flush_wait_queue);
363 	init_llist_head(&fcc->issue_list);
364 	SM_I(sbi)->cmd_control_info = fcc;
365 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
366 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
367 	if (IS_ERR(fcc->f2fs_issue_flush)) {
368 		err = PTR_ERR(fcc->f2fs_issue_flush);
369 		kfree(fcc);
370 		SM_I(sbi)->cmd_control_info = NULL;
371 		return err;
372 	}
373 
374 	return err;
375 }
376 
377 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
378 {
379 	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
380 
381 	if (fcc && fcc->f2fs_issue_flush)
382 		kthread_stop(fcc->f2fs_issue_flush);
383 	kfree(fcc);
384 	SM_I(sbi)->cmd_control_info = NULL;
385 }
386 
387 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
388 		enum dirty_type dirty_type)
389 {
390 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
391 
392 	/* need not be added */
393 	if (IS_CURSEG(sbi, segno))
394 		return;
395 
396 	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
397 		dirty_i->nr_dirty[dirty_type]++;
398 
399 	if (dirty_type == DIRTY) {
400 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
401 		enum dirty_type t = sentry->type;
402 
403 		if (unlikely(t >= DIRTY)) {
404 			f2fs_bug_on(sbi, 1);
405 			return;
406 		}
407 		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
408 			dirty_i->nr_dirty[t]++;
409 	}
410 }
411 
412 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
413 		enum dirty_type dirty_type)
414 {
415 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
416 
417 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
418 		dirty_i->nr_dirty[dirty_type]--;
419 
420 	if (dirty_type == DIRTY) {
421 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
422 		enum dirty_type t = sentry->type;
423 
424 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
425 			dirty_i->nr_dirty[t]--;
426 
427 		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
428 			clear_bit(GET_SECNO(sbi, segno),
429 						dirty_i->victim_secmap);
430 	}
431 }
432 
433 /*
434  * Should not occur error such as -ENOMEM.
435  * Adding dirty entry into seglist is not critical operation.
436  * If a given segment is one of current working segments, it won't be added.
437  */
438 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
439 {
440 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
441 	unsigned short valid_blocks;
442 
443 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
444 		return;
445 
446 	mutex_lock(&dirty_i->seglist_lock);
447 
448 	valid_blocks = get_valid_blocks(sbi, segno, 0);
449 
450 	if (valid_blocks == 0) {
451 		__locate_dirty_segment(sbi, segno, PRE);
452 		__remove_dirty_segment(sbi, segno, DIRTY);
453 	} else if (valid_blocks < sbi->blocks_per_seg) {
454 		__locate_dirty_segment(sbi, segno, DIRTY);
455 	} else {
456 		/* Recovery routine with SSR needs this */
457 		__remove_dirty_segment(sbi, segno, DIRTY);
458 	}
459 
460 	mutex_unlock(&dirty_i->seglist_lock);
461 }
462 
463 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
464 				block_t blkstart, block_t blklen)
465 {
466 	sector_t start = SECTOR_FROM_BLOCK(blkstart);
467 	sector_t len = SECTOR_FROM_BLOCK(blklen);
468 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
469 	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
470 }
471 
472 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
473 {
474 	if (f2fs_issue_discard(sbi, blkaddr, 1)) {
475 		struct page *page = grab_meta_page(sbi, blkaddr);
476 		/* zero-filled page */
477 		set_page_dirty(page);
478 		f2fs_put_page(page, 1);
479 	}
480 }
481 
482 static void __add_discard_entry(struct f2fs_sb_info *sbi,
483 		struct cp_control *cpc, unsigned int start, unsigned int end)
484 {
485 	struct list_head *head = &SM_I(sbi)->discard_list;
486 	struct discard_entry *new, *last;
487 
488 	if (!list_empty(head)) {
489 		last = list_last_entry(head, struct discard_entry, list);
490 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
491 						last->blkaddr + last->len) {
492 			last->len += end - start;
493 			goto done;
494 		}
495 	}
496 
497 	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
498 	INIT_LIST_HEAD(&new->list);
499 	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
500 	new->len = end - start;
501 	list_add_tail(&new->list, head);
502 done:
503 	SM_I(sbi)->nr_discards += end - start;
504 	cpc->trimmed += end - start;
505 }
506 
507 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
508 {
509 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
510 	int max_blocks = sbi->blocks_per_seg;
511 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
512 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
513 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
514 	unsigned long dmap[entries];
515 	unsigned int start = 0, end = -1;
516 	bool force = (cpc->reason == CP_DISCARD);
517 	int i;
518 
519 	if (!force && !test_opt(sbi, DISCARD))
520 		return;
521 
522 	if (force && !se->valid_blocks) {
523 		struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
524 		/*
525 		 * if this segment is registered in the prefree list, then
526 		 * we should skip adding a discard candidate, and let the
527 		 * checkpoint do that later.
528 		 */
529 		mutex_lock(&dirty_i->seglist_lock);
530 		if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
531 			mutex_unlock(&dirty_i->seglist_lock);
532 			cpc->trimmed += sbi->blocks_per_seg;
533 			return;
534 		}
535 		mutex_unlock(&dirty_i->seglist_lock);
536 
537 		__add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
538 		return;
539 	}
540 
541 	/* zero block will be discarded through the prefree list */
542 	if (!se->valid_blocks || se->valid_blocks == max_blocks)
543 		return;
544 
545 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
546 	for (i = 0; i < entries; i++)
547 		dmap[i] = ~(cur_map[i] | ckpt_map[i]);
548 
549 	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
550 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
551 		if (start >= max_blocks)
552 			break;
553 
554 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
555 
556 		if (end - start < cpc->trim_minlen)
557 			continue;
558 
559 		__add_discard_entry(sbi, cpc, start, end);
560 	}
561 }
562 
563 void release_discard_addrs(struct f2fs_sb_info *sbi)
564 {
565 	struct list_head *head = &(SM_I(sbi)->discard_list);
566 	struct discard_entry *entry, *this;
567 
568 	/* drop caches */
569 	list_for_each_entry_safe(entry, this, head, list) {
570 		list_del(&entry->list);
571 		kmem_cache_free(discard_entry_slab, entry);
572 	}
573 }
574 
575 /*
576  * Should call clear_prefree_segments after checkpoint is done.
577  */
578 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
579 {
580 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
581 	unsigned int segno;
582 
583 	mutex_lock(&dirty_i->seglist_lock);
584 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
585 		__set_test_and_free(sbi, segno);
586 	mutex_unlock(&dirty_i->seglist_lock);
587 }
588 
589 void clear_prefree_segments(struct f2fs_sb_info *sbi)
590 {
591 	struct list_head *head = &(SM_I(sbi)->discard_list);
592 	struct discard_entry *entry, *this;
593 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
594 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
595 	unsigned int start = 0, end = -1;
596 
597 	mutex_lock(&dirty_i->seglist_lock);
598 
599 	while (1) {
600 		int i;
601 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
602 		if (start >= MAIN_SEGS(sbi))
603 			break;
604 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
605 								start + 1);
606 
607 		for (i = start; i < end; i++)
608 			clear_bit(i, prefree_map);
609 
610 		dirty_i->nr_dirty[PRE] -= end - start;
611 
612 		if (!test_opt(sbi, DISCARD))
613 			continue;
614 
615 		f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
616 				(end - start) << sbi->log_blocks_per_seg);
617 	}
618 	mutex_unlock(&dirty_i->seglist_lock);
619 
620 	/* send small discards */
621 	list_for_each_entry_safe(entry, this, head, list) {
622 		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
623 		list_del(&entry->list);
624 		SM_I(sbi)->nr_discards -= entry->len;
625 		kmem_cache_free(discard_entry_slab, entry);
626 	}
627 }
628 
629 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
630 {
631 	struct sit_info *sit_i = SIT_I(sbi);
632 
633 	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
634 		sit_i->dirty_sentries++;
635 		return false;
636 	}
637 
638 	return true;
639 }
640 
641 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
642 					unsigned int segno, int modified)
643 {
644 	struct seg_entry *se = get_seg_entry(sbi, segno);
645 	se->type = type;
646 	if (modified)
647 		__mark_sit_entry_dirty(sbi, segno);
648 }
649 
650 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
651 {
652 	struct seg_entry *se;
653 	unsigned int segno, offset;
654 	long int new_vblocks;
655 
656 	segno = GET_SEGNO(sbi, blkaddr);
657 
658 	se = get_seg_entry(sbi, segno);
659 	new_vblocks = se->valid_blocks + del;
660 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
661 
662 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
663 				(new_vblocks > sbi->blocks_per_seg)));
664 
665 	se->valid_blocks = new_vblocks;
666 	se->mtime = get_mtime(sbi);
667 	SIT_I(sbi)->max_mtime = se->mtime;
668 
669 	/* Update valid block bitmap */
670 	if (del > 0) {
671 		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
672 			f2fs_bug_on(sbi, 1);
673 	} else {
674 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
675 			f2fs_bug_on(sbi, 1);
676 	}
677 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
678 		se->ckpt_valid_blocks += del;
679 
680 	__mark_sit_entry_dirty(sbi, segno);
681 
682 	/* update total number of valid blocks to be written in ckpt area */
683 	SIT_I(sbi)->written_valid_blocks += del;
684 
685 	if (sbi->segs_per_sec > 1)
686 		get_sec_entry(sbi, segno)->valid_blocks += del;
687 }
688 
689 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
690 {
691 	update_sit_entry(sbi, new, 1);
692 	if (GET_SEGNO(sbi, old) != NULL_SEGNO)
693 		update_sit_entry(sbi, old, -1);
694 
695 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
696 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
697 }
698 
699 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
700 {
701 	unsigned int segno = GET_SEGNO(sbi, addr);
702 	struct sit_info *sit_i = SIT_I(sbi);
703 
704 	f2fs_bug_on(sbi, addr == NULL_ADDR);
705 	if (addr == NEW_ADDR)
706 		return;
707 
708 	/* add it into sit main buffer */
709 	mutex_lock(&sit_i->sentry_lock);
710 
711 	update_sit_entry(sbi, addr, -1);
712 
713 	/* add it into dirty seglist */
714 	locate_dirty_segment(sbi, segno);
715 
716 	mutex_unlock(&sit_i->sentry_lock);
717 }
718 
719 /*
720  * This function should be resided under the curseg_mutex lock
721  */
722 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
723 					struct f2fs_summary *sum)
724 {
725 	struct curseg_info *curseg = CURSEG_I(sbi, type);
726 	void *addr = curseg->sum_blk;
727 	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
728 	memcpy(addr, sum, sizeof(struct f2fs_summary));
729 }
730 
731 /*
732  * Calculate the number of current summary pages for writing
733  */
734 int npages_for_summary_flush(struct f2fs_sb_info *sbi)
735 {
736 	int valid_sum_count = 0;
737 	int i, sum_in_page;
738 
739 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
740 		if (sbi->ckpt->alloc_type[i] == SSR)
741 			valid_sum_count += sbi->blocks_per_seg;
742 		else
743 			valid_sum_count += curseg_blkoff(sbi, i);
744 	}
745 
746 	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
747 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
748 	if (valid_sum_count <= sum_in_page)
749 		return 1;
750 	else if ((valid_sum_count - sum_in_page) <=
751 		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
752 		return 2;
753 	return 3;
754 }
755 
756 /*
757  * Caller should put this summary page
758  */
759 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
760 {
761 	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
762 }
763 
764 static void write_sum_page(struct f2fs_sb_info *sbi,
765 			struct f2fs_summary_block *sum_blk, block_t blk_addr)
766 {
767 	struct page *page = grab_meta_page(sbi, blk_addr);
768 	void *kaddr = page_address(page);
769 	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
770 	set_page_dirty(page);
771 	f2fs_put_page(page, 1);
772 }
773 
774 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
775 {
776 	struct curseg_info *curseg = CURSEG_I(sbi, type);
777 	unsigned int segno = curseg->segno + 1;
778 	struct free_segmap_info *free_i = FREE_I(sbi);
779 
780 	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
781 		return !test_bit(segno, free_i->free_segmap);
782 	return 0;
783 }
784 
785 /*
786  * Find a new segment from the free segments bitmap to right order
787  * This function should be returned with success, otherwise BUG
788  */
789 static void get_new_segment(struct f2fs_sb_info *sbi,
790 			unsigned int *newseg, bool new_sec, int dir)
791 {
792 	struct free_segmap_info *free_i = FREE_I(sbi);
793 	unsigned int segno, secno, zoneno;
794 	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
795 	unsigned int hint = *newseg / sbi->segs_per_sec;
796 	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
797 	unsigned int left_start = hint;
798 	bool init = true;
799 	int go_left = 0;
800 	int i;
801 
802 	write_lock(&free_i->segmap_lock);
803 
804 	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
805 		segno = find_next_zero_bit(free_i->free_segmap,
806 					MAIN_SEGS(sbi), *newseg + 1);
807 		if (segno - *newseg < sbi->segs_per_sec -
808 					(*newseg % sbi->segs_per_sec))
809 			goto got_it;
810 	}
811 find_other_zone:
812 	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
813 	if (secno >= MAIN_SECS(sbi)) {
814 		if (dir == ALLOC_RIGHT) {
815 			secno = find_next_zero_bit(free_i->free_secmap,
816 							MAIN_SECS(sbi), 0);
817 			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
818 		} else {
819 			go_left = 1;
820 			left_start = hint - 1;
821 		}
822 	}
823 	if (go_left == 0)
824 		goto skip_left;
825 
826 	while (test_bit(left_start, free_i->free_secmap)) {
827 		if (left_start > 0) {
828 			left_start--;
829 			continue;
830 		}
831 		left_start = find_next_zero_bit(free_i->free_secmap,
832 							MAIN_SECS(sbi), 0);
833 		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
834 		break;
835 	}
836 	secno = left_start;
837 skip_left:
838 	hint = secno;
839 	segno = secno * sbi->segs_per_sec;
840 	zoneno = secno / sbi->secs_per_zone;
841 
842 	/* give up on finding another zone */
843 	if (!init)
844 		goto got_it;
845 	if (sbi->secs_per_zone == 1)
846 		goto got_it;
847 	if (zoneno == old_zoneno)
848 		goto got_it;
849 	if (dir == ALLOC_LEFT) {
850 		if (!go_left && zoneno + 1 >= total_zones)
851 			goto got_it;
852 		if (go_left && zoneno == 0)
853 			goto got_it;
854 	}
855 	for (i = 0; i < NR_CURSEG_TYPE; i++)
856 		if (CURSEG_I(sbi, i)->zone == zoneno)
857 			break;
858 
859 	if (i < NR_CURSEG_TYPE) {
860 		/* zone is in user, try another */
861 		if (go_left)
862 			hint = zoneno * sbi->secs_per_zone - 1;
863 		else if (zoneno + 1 >= total_zones)
864 			hint = 0;
865 		else
866 			hint = (zoneno + 1) * sbi->secs_per_zone;
867 		init = false;
868 		goto find_other_zone;
869 	}
870 got_it:
871 	/* set it as dirty segment in free segmap */
872 	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
873 	__set_inuse(sbi, segno);
874 	*newseg = segno;
875 	write_unlock(&free_i->segmap_lock);
876 }
877 
878 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
879 {
880 	struct curseg_info *curseg = CURSEG_I(sbi, type);
881 	struct summary_footer *sum_footer;
882 
883 	curseg->segno = curseg->next_segno;
884 	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
885 	curseg->next_blkoff = 0;
886 	curseg->next_segno = NULL_SEGNO;
887 
888 	sum_footer = &(curseg->sum_blk->footer);
889 	memset(sum_footer, 0, sizeof(struct summary_footer));
890 	if (IS_DATASEG(type))
891 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
892 	if (IS_NODESEG(type))
893 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
894 	__set_sit_entry_type(sbi, type, curseg->segno, modified);
895 }
896 
897 /*
898  * Allocate a current working segment.
899  * This function always allocates a free segment in LFS manner.
900  */
901 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
902 {
903 	struct curseg_info *curseg = CURSEG_I(sbi, type);
904 	unsigned int segno = curseg->segno;
905 	int dir = ALLOC_LEFT;
906 
907 	write_sum_page(sbi, curseg->sum_blk,
908 				GET_SUM_BLOCK(sbi, segno));
909 	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
910 		dir = ALLOC_RIGHT;
911 
912 	if (test_opt(sbi, NOHEAP))
913 		dir = ALLOC_RIGHT;
914 
915 	get_new_segment(sbi, &segno, new_sec, dir);
916 	curseg->next_segno = segno;
917 	reset_curseg(sbi, type, 1);
918 	curseg->alloc_type = LFS;
919 }
920 
921 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
922 			struct curseg_info *seg, block_t start)
923 {
924 	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
925 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
926 	unsigned long target_map[entries];
927 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
928 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
929 	int i, pos;
930 
931 	for (i = 0; i < entries; i++)
932 		target_map[i] = ckpt_map[i] | cur_map[i];
933 
934 	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
935 
936 	seg->next_blkoff = pos;
937 }
938 
939 /*
940  * If a segment is written by LFS manner, next block offset is just obtained
941  * by increasing the current block offset. However, if a segment is written by
942  * SSR manner, next block offset obtained by calling __next_free_blkoff
943  */
944 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
945 				struct curseg_info *seg)
946 {
947 	if (seg->alloc_type == SSR)
948 		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
949 	else
950 		seg->next_blkoff++;
951 }
952 
953 /*
954  * This function always allocates a used segment(from dirty seglist) by SSR
955  * manner, so it should recover the existing segment information of valid blocks
956  */
957 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
958 {
959 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
960 	struct curseg_info *curseg = CURSEG_I(sbi, type);
961 	unsigned int new_segno = curseg->next_segno;
962 	struct f2fs_summary_block *sum_node;
963 	struct page *sum_page;
964 
965 	write_sum_page(sbi, curseg->sum_blk,
966 				GET_SUM_BLOCK(sbi, curseg->segno));
967 	__set_test_and_inuse(sbi, new_segno);
968 
969 	mutex_lock(&dirty_i->seglist_lock);
970 	__remove_dirty_segment(sbi, new_segno, PRE);
971 	__remove_dirty_segment(sbi, new_segno, DIRTY);
972 	mutex_unlock(&dirty_i->seglist_lock);
973 
974 	reset_curseg(sbi, type, 1);
975 	curseg->alloc_type = SSR;
976 	__next_free_blkoff(sbi, curseg, 0);
977 
978 	if (reuse) {
979 		sum_page = get_sum_page(sbi, new_segno);
980 		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
981 		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
982 		f2fs_put_page(sum_page, 1);
983 	}
984 }
985 
986 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
987 {
988 	struct curseg_info *curseg = CURSEG_I(sbi, type);
989 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
990 
991 	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
992 		return v_ops->get_victim(sbi,
993 				&(curseg)->next_segno, BG_GC, type, SSR);
994 
995 	/* For data segments, let's do SSR more intensively */
996 	for (; type >= CURSEG_HOT_DATA; type--)
997 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
998 						BG_GC, type, SSR))
999 			return 1;
1000 	return 0;
1001 }
1002 
1003 /*
1004  * flush out current segment and replace it with new segment
1005  * This function should be returned with success, otherwise BUG
1006  */
1007 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1008 						int type, bool force)
1009 {
1010 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1011 
1012 	if (force)
1013 		new_curseg(sbi, type, true);
1014 	else if (type == CURSEG_WARM_NODE)
1015 		new_curseg(sbi, type, false);
1016 	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1017 		new_curseg(sbi, type, false);
1018 	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1019 		change_curseg(sbi, type, true);
1020 	else
1021 		new_curseg(sbi, type, false);
1022 
1023 	stat_inc_seg_type(sbi, curseg);
1024 }
1025 
1026 void allocate_new_segments(struct f2fs_sb_info *sbi)
1027 {
1028 	struct curseg_info *curseg;
1029 	unsigned int old_curseg;
1030 	int i;
1031 
1032 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1033 		curseg = CURSEG_I(sbi, i);
1034 		old_curseg = curseg->segno;
1035 		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1036 		locate_dirty_segment(sbi, old_curseg);
1037 	}
1038 }
1039 
1040 static const struct segment_allocation default_salloc_ops = {
1041 	.allocate_segment = allocate_segment_by_default,
1042 };
1043 
1044 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1045 {
1046 	__u64 start = range->start >> sbi->log_blocksize;
1047 	__u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1048 	unsigned int start_segno, end_segno;
1049 	struct cp_control cpc;
1050 
1051 	if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1052 						range->len < sbi->blocksize)
1053 		return -EINVAL;
1054 
1055 	cpc.trimmed = 0;
1056 	if (end <= MAIN_BLKADDR(sbi))
1057 		goto out;
1058 
1059 	/* start/end segment number in main_area */
1060 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1061 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1062 						GET_SEGNO(sbi, end);
1063 	cpc.reason = CP_DISCARD;
1064 	cpc.trim_start = start_segno;
1065 	cpc.trim_end = end_segno;
1066 	cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1067 
1068 	/* do checkpoint to issue discard commands safely */
1069 	mutex_lock(&sbi->gc_mutex);
1070 	write_checkpoint(sbi, &cpc);
1071 	mutex_unlock(&sbi->gc_mutex);
1072 out:
1073 	range->len = cpc.trimmed << sbi->log_blocksize;
1074 	return 0;
1075 }
1076 
1077 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1078 {
1079 	struct curseg_info *curseg = CURSEG_I(sbi, type);
1080 	if (curseg->next_blkoff < sbi->blocks_per_seg)
1081 		return true;
1082 	return false;
1083 }
1084 
1085 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1086 {
1087 	if (p_type == DATA)
1088 		return CURSEG_HOT_DATA;
1089 	else
1090 		return CURSEG_HOT_NODE;
1091 }
1092 
1093 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1094 {
1095 	if (p_type == DATA) {
1096 		struct inode *inode = page->mapping->host;
1097 
1098 		if (S_ISDIR(inode->i_mode))
1099 			return CURSEG_HOT_DATA;
1100 		else
1101 			return CURSEG_COLD_DATA;
1102 	} else {
1103 		if (IS_DNODE(page) && is_cold_node(page))
1104 			return CURSEG_WARM_NODE;
1105 		else
1106 			return CURSEG_COLD_NODE;
1107 	}
1108 }
1109 
1110 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1111 {
1112 	if (p_type == DATA) {
1113 		struct inode *inode = page->mapping->host;
1114 
1115 		if (S_ISDIR(inode->i_mode))
1116 			return CURSEG_HOT_DATA;
1117 		else if (is_cold_data(page) || file_is_cold(inode))
1118 			return CURSEG_COLD_DATA;
1119 		else
1120 			return CURSEG_WARM_DATA;
1121 	} else {
1122 		if (IS_DNODE(page))
1123 			return is_cold_node(page) ? CURSEG_WARM_NODE :
1124 						CURSEG_HOT_NODE;
1125 		else
1126 			return CURSEG_COLD_NODE;
1127 	}
1128 }
1129 
1130 static int __get_segment_type(struct page *page, enum page_type p_type)
1131 {
1132 	switch (F2FS_P_SB(page)->active_logs) {
1133 	case 2:
1134 		return __get_segment_type_2(page, p_type);
1135 	case 4:
1136 		return __get_segment_type_4(page, p_type);
1137 	}
1138 	/* NR_CURSEG_TYPE(6) logs by default */
1139 	f2fs_bug_on(F2FS_P_SB(page),
1140 		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1141 	return __get_segment_type_6(page, p_type);
1142 }
1143 
1144 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1145 		block_t old_blkaddr, block_t *new_blkaddr,
1146 		struct f2fs_summary *sum, int type)
1147 {
1148 	struct sit_info *sit_i = SIT_I(sbi);
1149 	struct curseg_info *curseg;
1150 
1151 	curseg = CURSEG_I(sbi, type);
1152 
1153 	mutex_lock(&curseg->curseg_mutex);
1154 
1155 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1156 
1157 	/*
1158 	 * __add_sum_entry should be resided under the curseg_mutex
1159 	 * because, this function updates a summary entry in the
1160 	 * current summary block.
1161 	 */
1162 	__add_sum_entry(sbi, type, sum);
1163 
1164 	mutex_lock(&sit_i->sentry_lock);
1165 	__refresh_next_blkoff(sbi, curseg);
1166 
1167 	stat_inc_block_count(sbi, curseg);
1168 
1169 	if (!__has_curseg_space(sbi, type))
1170 		sit_i->s_ops->allocate_segment(sbi, type, false);
1171 	/*
1172 	 * SIT information should be updated before segment allocation,
1173 	 * since SSR needs latest valid block information.
1174 	 */
1175 	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1176 
1177 	mutex_unlock(&sit_i->sentry_lock);
1178 
1179 	if (page && IS_NODESEG(type))
1180 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1181 
1182 	mutex_unlock(&curseg->curseg_mutex);
1183 }
1184 
1185 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1186 			block_t old_blkaddr, block_t *new_blkaddr,
1187 			struct f2fs_summary *sum, struct f2fs_io_info *fio)
1188 {
1189 	int type = __get_segment_type(page, fio->type);
1190 
1191 	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
1192 
1193 	/* writeout dirty page into bdev */
1194 	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
1195 }
1196 
1197 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1198 {
1199 	struct f2fs_io_info fio = {
1200 		.type = META,
1201 		.rw = WRITE_SYNC | REQ_META | REQ_PRIO
1202 	};
1203 
1204 	set_page_writeback(page);
1205 	f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1206 }
1207 
1208 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1209 		struct f2fs_io_info *fio,
1210 		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1211 {
1212 	struct f2fs_summary sum;
1213 	set_summary(&sum, nid, 0, 0);
1214 	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
1215 }
1216 
1217 void write_data_page(struct page *page, struct dnode_of_data *dn,
1218 		block_t *new_blkaddr, struct f2fs_io_info *fio)
1219 {
1220 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1221 	struct f2fs_summary sum;
1222 	struct node_info ni;
1223 
1224 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1225 	get_node_info(sbi, dn->nid, &ni);
1226 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1227 
1228 	do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
1229 }
1230 
1231 void rewrite_data_page(struct page *page, block_t old_blkaddr,
1232 					struct f2fs_io_info *fio)
1233 {
1234 	f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1235 }
1236 
1237 void recover_data_page(struct f2fs_sb_info *sbi,
1238 			struct page *page, struct f2fs_summary *sum,
1239 			block_t old_blkaddr, block_t new_blkaddr)
1240 {
1241 	struct sit_info *sit_i = SIT_I(sbi);
1242 	struct curseg_info *curseg;
1243 	unsigned int segno, old_cursegno;
1244 	struct seg_entry *se;
1245 	int type;
1246 
1247 	segno = GET_SEGNO(sbi, new_blkaddr);
1248 	se = get_seg_entry(sbi, segno);
1249 	type = se->type;
1250 
1251 	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1252 		if (old_blkaddr == NULL_ADDR)
1253 			type = CURSEG_COLD_DATA;
1254 		else
1255 			type = CURSEG_WARM_DATA;
1256 	}
1257 	curseg = CURSEG_I(sbi, type);
1258 
1259 	mutex_lock(&curseg->curseg_mutex);
1260 	mutex_lock(&sit_i->sentry_lock);
1261 
1262 	old_cursegno = curseg->segno;
1263 
1264 	/* change the current segment */
1265 	if (segno != curseg->segno) {
1266 		curseg->next_segno = segno;
1267 		change_curseg(sbi, type, true);
1268 	}
1269 
1270 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1271 	__add_sum_entry(sbi, type, sum);
1272 
1273 	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1274 	locate_dirty_segment(sbi, old_cursegno);
1275 
1276 	mutex_unlock(&sit_i->sentry_lock);
1277 	mutex_unlock(&curseg->curseg_mutex);
1278 }
1279 
1280 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1281 					struct page *page, enum page_type type)
1282 {
1283 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
1284 	struct f2fs_bio_info *io = &sbi->write_io[btype];
1285 	struct bio_vec *bvec;
1286 	int i;
1287 
1288 	down_read(&io->io_rwsem);
1289 	if (!io->bio)
1290 		goto out;
1291 
1292 	bio_for_each_segment_all(bvec, io->bio, i) {
1293 		if (page == bvec->bv_page) {
1294 			up_read(&io->io_rwsem);
1295 			return true;
1296 		}
1297 	}
1298 
1299 out:
1300 	up_read(&io->io_rwsem);
1301 	return false;
1302 }
1303 
1304 void f2fs_wait_on_page_writeback(struct page *page,
1305 				enum page_type type)
1306 {
1307 	if (PageWriteback(page)) {
1308 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1309 
1310 		if (is_merged_page(sbi, page, type))
1311 			f2fs_submit_merged_bio(sbi, type, WRITE);
1312 		wait_on_page_writeback(page);
1313 	}
1314 }
1315 
1316 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1317 {
1318 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1319 	struct curseg_info *seg_i;
1320 	unsigned char *kaddr;
1321 	struct page *page;
1322 	block_t start;
1323 	int i, j, offset;
1324 
1325 	start = start_sum_block(sbi);
1326 
1327 	page = get_meta_page(sbi, start++);
1328 	kaddr = (unsigned char *)page_address(page);
1329 
1330 	/* Step 1: restore nat cache */
1331 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1332 	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1333 
1334 	/* Step 2: restore sit cache */
1335 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1336 	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1337 						SUM_JOURNAL_SIZE);
1338 	offset = 2 * SUM_JOURNAL_SIZE;
1339 
1340 	/* Step 3: restore summary entries */
1341 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1342 		unsigned short blk_off;
1343 		unsigned int segno;
1344 
1345 		seg_i = CURSEG_I(sbi, i);
1346 		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1347 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1348 		seg_i->next_segno = segno;
1349 		reset_curseg(sbi, i, 0);
1350 		seg_i->alloc_type = ckpt->alloc_type[i];
1351 		seg_i->next_blkoff = blk_off;
1352 
1353 		if (seg_i->alloc_type == SSR)
1354 			blk_off = sbi->blocks_per_seg;
1355 
1356 		for (j = 0; j < blk_off; j++) {
1357 			struct f2fs_summary *s;
1358 			s = (struct f2fs_summary *)(kaddr + offset);
1359 			seg_i->sum_blk->entries[j] = *s;
1360 			offset += SUMMARY_SIZE;
1361 			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1362 						SUM_FOOTER_SIZE)
1363 				continue;
1364 
1365 			f2fs_put_page(page, 1);
1366 			page = NULL;
1367 
1368 			page = get_meta_page(sbi, start++);
1369 			kaddr = (unsigned char *)page_address(page);
1370 			offset = 0;
1371 		}
1372 	}
1373 	f2fs_put_page(page, 1);
1374 	return 0;
1375 }
1376 
1377 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1378 {
1379 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1380 	struct f2fs_summary_block *sum;
1381 	struct curseg_info *curseg;
1382 	struct page *new;
1383 	unsigned short blk_off;
1384 	unsigned int segno = 0;
1385 	block_t blk_addr = 0;
1386 
1387 	/* get segment number and block addr */
1388 	if (IS_DATASEG(type)) {
1389 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1390 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1391 							CURSEG_HOT_DATA]);
1392 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1393 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1394 		else
1395 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1396 	} else {
1397 		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1398 							CURSEG_HOT_NODE]);
1399 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1400 							CURSEG_HOT_NODE]);
1401 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1402 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1403 							type - CURSEG_HOT_NODE);
1404 		else
1405 			blk_addr = GET_SUM_BLOCK(sbi, segno);
1406 	}
1407 
1408 	new = get_meta_page(sbi, blk_addr);
1409 	sum = (struct f2fs_summary_block *)page_address(new);
1410 
1411 	if (IS_NODESEG(type)) {
1412 		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1413 			struct f2fs_summary *ns = &sum->entries[0];
1414 			int i;
1415 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1416 				ns->version = 0;
1417 				ns->ofs_in_node = 0;
1418 			}
1419 		} else {
1420 			int err;
1421 
1422 			err = restore_node_summary(sbi, segno, sum);
1423 			if (err) {
1424 				f2fs_put_page(new, 1);
1425 				return err;
1426 			}
1427 		}
1428 	}
1429 
1430 	/* set uncompleted segment to curseg */
1431 	curseg = CURSEG_I(sbi, type);
1432 	mutex_lock(&curseg->curseg_mutex);
1433 	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1434 	curseg->next_segno = segno;
1435 	reset_curseg(sbi, type, 0);
1436 	curseg->alloc_type = ckpt->alloc_type[type];
1437 	curseg->next_blkoff = blk_off;
1438 	mutex_unlock(&curseg->curseg_mutex);
1439 	f2fs_put_page(new, 1);
1440 	return 0;
1441 }
1442 
1443 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1444 {
1445 	int type = CURSEG_HOT_DATA;
1446 	int err;
1447 
1448 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1449 		/* restore for compacted data summary */
1450 		if (read_compacted_summaries(sbi))
1451 			return -EINVAL;
1452 		type = CURSEG_HOT_NODE;
1453 	}
1454 
1455 	for (; type <= CURSEG_COLD_NODE; type++) {
1456 		err = read_normal_summaries(sbi, type);
1457 		if (err)
1458 			return err;
1459 	}
1460 
1461 	return 0;
1462 }
1463 
1464 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1465 {
1466 	struct page *page;
1467 	unsigned char *kaddr;
1468 	struct f2fs_summary *summary;
1469 	struct curseg_info *seg_i;
1470 	int written_size = 0;
1471 	int i, j;
1472 
1473 	page = grab_meta_page(sbi, blkaddr++);
1474 	kaddr = (unsigned char *)page_address(page);
1475 
1476 	/* Step 1: write nat cache */
1477 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1478 	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1479 	written_size += SUM_JOURNAL_SIZE;
1480 
1481 	/* Step 2: write sit cache */
1482 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1483 	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1484 						SUM_JOURNAL_SIZE);
1485 	written_size += SUM_JOURNAL_SIZE;
1486 
1487 	/* Step 3: write summary entries */
1488 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1489 		unsigned short blkoff;
1490 		seg_i = CURSEG_I(sbi, i);
1491 		if (sbi->ckpt->alloc_type[i] == SSR)
1492 			blkoff = sbi->blocks_per_seg;
1493 		else
1494 			blkoff = curseg_blkoff(sbi, i);
1495 
1496 		for (j = 0; j < blkoff; j++) {
1497 			if (!page) {
1498 				page = grab_meta_page(sbi, blkaddr++);
1499 				kaddr = (unsigned char *)page_address(page);
1500 				written_size = 0;
1501 			}
1502 			summary = (struct f2fs_summary *)(kaddr + written_size);
1503 			*summary = seg_i->sum_blk->entries[j];
1504 			written_size += SUMMARY_SIZE;
1505 
1506 			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1507 							SUM_FOOTER_SIZE)
1508 				continue;
1509 
1510 			set_page_dirty(page);
1511 			f2fs_put_page(page, 1);
1512 			page = NULL;
1513 		}
1514 	}
1515 	if (page) {
1516 		set_page_dirty(page);
1517 		f2fs_put_page(page, 1);
1518 	}
1519 }
1520 
1521 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1522 					block_t blkaddr, int type)
1523 {
1524 	int i, end;
1525 	if (IS_DATASEG(type))
1526 		end = type + NR_CURSEG_DATA_TYPE;
1527 	else
1528 		end = type + NR_CURSEG_NODE_TYPE;
1529 
1530 	for (i = type; i < end; i++) {
1531 		struct curseg_info *sum = CURSEG_I(sbi, i);
1532 		mutex_lock(&sum->curseg_mutex);
1533 		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1534 		mutex_unlock(&sum->curseg_mutex);
1535 	}
1536 }
1537 
1538 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1539 {
1540 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1541 		write_compacted_summaries(sbi, start_blk);
1542 	else
1543 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1544 }
1545 
1546 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1547 {
1548 	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1549 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1550 }
1551 
1552 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1553 					unsigned int val, int alloc)
1554 {
1555 	int i;
1556 
1557 	if (type == NAT_JOURNAL) {
1558 		for (i = 0; i < nats_in_cursum(sum); i++) {
1559 			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1560 				return i;
1561 		}
1562 		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1563 			return update_nats_in_cursum(sum, 1);
1564 	} else if (type == SIT_JOURNAL) {
1565 		for (i = 0; i < sits_in_cursum(sum); i++)
1566 			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1567 				return i;
1568 		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1569 			return update_sits_in_cursum(sum, 1);
1570 	}
1571 	return -1;
1572 }
1573 
1574 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1575 					unsigned int segno)
1576 {
1577 	return get_meta_page(sbi, current_sit_addr(sbi, segno));
1578 }
1579 
1580 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1581 					unsigned int start)
1582 {
1583 	struct sit_info *sit_i = SIT_I(sbi);
1584 	struct page *src_page, *dst_page;
1585 	pgoff_t src_off, dst_off;
1586 	void *src_addr, *dst_addr;
1587 
1588 	src_off = current_sit_addr(sbi, start);
1589 	dst_off = next_sit_addr(sbi, src_off);
1590 
1591 	/* get current sit block page without lock */
1592 	src_page = get_meta_page(sbi, src_off);
1593 	dst_page = grab_meta_page(sbi, dst_off);
1594 	f2fs_bug_on(sbi, PageDirty(src_page));
1595 
1596 	src_addr = page_address(src_page);
1597 	dst_addr = page_address(dst_page);
1598 	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1599 
1600 	set_page_dirty(dst_page);
1601 	f2fs_put_page(src_page, 1);
1602 
1603 	set_to_next_sit(sit_i, start);
1604 
1605 	return dst_page;
1606 }
1607 
1608 static struct sit_entry_set *grab_sit_entry_set(void)
1609 {
1610 	struct sit_entry_set *ses =
1611 			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1612 
1613 	ses->entry_cnt = 0;
1614 	INIT_LIST_HEAD(&ses->set_list);
1615 	return ses;
1616 }
1617 
1618 static void release_sit_entry_set(struct sit_entry_set *ses)
1619 {
1620 	list_del(&ses->set_list);
1621 	kmem_cache_free(sit_entry_set_slab, ses);
1622 }
1623 
1624 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1625 						struct list_head *head)
1626 {
1627 	struct sit_entry_set *next = ses;
1628 
1629 	if (list_is_last(&ses->set_list, head))
1630 		return;
1631 
1632 	list_for_each_entry_continue(next, head, set_list)
1633 		if (ses->entry_cnt <= next->entry_cnt)
1634 			break;
1635 
1636 	list_move_tail(&ses->set_list, &next->set_list);
1637 }
1638 
1639 static void add_sit_entry(unsigned int segno, struct list_head *head)
1640 {
1641 	struct sit_entry_set *ses;
1642 	unsigned int start_segno = START_SEGNO(segno);
1643 
1644 	list_for_each_entry(ses, head, set_list) {
1645 		if (ses->start_segno == start_segno) {
1646 			ses->entry_cnt++;
1647 			adjust_sit_entry_set(ses, head);
1648 			return;
1649 		}
1650 	}
1651 
1652 	ses = grab_sit_entry_set();
1653 
1654 	ses->start_segno = start_segno;
1655 	ses->entry_cnt++;
1656 	list_add(&ses->set_list, head);
1657 }
1658 
1659 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1660 {
1661 	struct f2fs_sm_info *sm_info = SM_I(sbi);
1662 	struct list_head *set_list = &sm_info->sit_entry_set;
1663 	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1664 	unsigned int segno;
1665 
1666 	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1667 		add_sit_entry(segno, set_list);
1668 }
1669 
1670 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1671 {
1672 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1673 	struct f2fs_summary_block *sum = curseg->sum_blk;
1674 	int i;
1675 
1676 	for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1677 		unsigned int segno;
1678 		bool dirtied;
1679 
1680 		segno = le32_to_cpu(segno_in_journal(sum, i));
1681 		dirtied = __mark_sit_entry_dirty(sbi, segno);
1682 
1683 		if (!dirtied)
1684 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1685 	}
1686 	update_sits_in_cursum(sum, -sits_in_cursum(sum));
1687 }
1688 
1689 /*
1690  * CP calls this function, which flushes SIT entries including sit_journal,
1691  * and moves prefree segs to free segs.
1692  */
1693 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1694 {
1695 	struct sit_info *sit_i = SIT_I(sbi);
1696 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1697 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1698 	struct f2fs_summary_block *sum = curseg->sum_blk;
1699 	struct sit_entry_set *ses, *tmp;
1700 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
1701 	bool to_journal = true;
1702 	struct seg_entry *se;
1703 
1704 	mutex_lock(&curseg->curseg_mutex);
1705 	mutex_lock(&sit_i->sentry_lock);
1706 
1707 	/*
1708 	 * add and account sit entries of dirty bitmap in sit entry
1709 	 * set temporarily
1710 	 */
1711 	add_sits_in_set(sbi);
1712 
1713 	/*
1714 	 * if there are no enough space in journal to store dirty sit
1715 	 * entries, remove all entries from journal and add and account
1716 	 * them in sit entry set.
1717 	 */
1718 	if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1719 		remove_sits_in_journal(sbi);
1720 
1721 	if (!sit_i->dirty_sentries)
1722 		goto out;
1723 
1724 	/*
1725 	 * there are two steps to flush sit entries:
1726 	 * #1, flush sit entries to journal in current cold data summary block.
1727 	 * #2, flush sit entries to sit page.
1728 	 */
1729 	list_for_each_entry_safe(ses, tmp, head, set_list) {
1730 		struct page *page = NULL;
1731 		struct f2fs_sit_block *raw_sit = NULL;
1732 		unsigned int start_segno = ses->start_segno;
1733 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1734 						(unsigned long)MAIN_SEGS(sbi));
1735 		unsigned int segno = start_segno;
1736 
1737 		if (to_journal &&
1738 			!__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1739 			to_journal = false;
1740 
1741 		if (!to_journal) {
1742 			page = get_next_sit_page(sbi, start_segno);
1743 			raw_sit = page_address(page);
1744 		}
1745 
1746 		/* flush dirty sit entries in region of current sit set */
1747 		for_each_set_bit_from(segno, bitmap, end) {
1748 			int offset, sit_offset;
1749 
1750 			se = get_seg_entry(sbi, segno);
1751 
1752 			/* add discard candidates */
1753 			if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1754 				cpc->trim_start = segno;
1755 				add_discard_addrs(sbi, cpc);
1756 			}
1757 
1758 			if (to_journal) {
1759 				offset = lookup_journal_in_cursum(sum,
1760 							SIT_JOURNAL, segno, 1);
1761 				f2fs_bug_on(sbi, offset < 0);
1762 				segno_in_journal(sum, offset) =
1763 							cpu_to_le32(segno);
1764 				seg_info_to_raw_sit(se,
1765 						&sit_in_journal(sum, offset));
1766 			} else {
1767 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1768 				seg_info_to_raw_sit(se,
1769 						&raw_sit->entries[sit_offset]);
1770 			}
1771 
1772 			__clear_bit(segno, bitmap);
1773 			sit_i->dirty_sentries--;
1774 			ses->entry_cnt--;
1775 		}
1776 
1777 		if (!to_journal)
1778 			f2fs_put_page(page, 1);
1779 
1780 		f2fs_bug_on(sbi, ses->entry_cnt);
1781 		release_sit_entry_set(ses);
1782 	}
1783 
1784 	f2fs_bug_on(sbi, !list_empty(head));
1785 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
1786 out:
1787 	if (cpc->reason == CP_DISCARD) {
1788 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1789 			add_discard_addrs(sbi, cpc);
1790 	}
1791 	mutex_unlock(&sit_i->sentry_lock);
1792 	mutex_unlock(&curseg->curseg_mutex);
1793 
1794 	set_prefree_as_free_segments(sbi);
1795 }
1796 
1797 static int build_sit_info(struct f2fs_sb_info *sbi)
1798 {
1799 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1800 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1801 	struct sit_info *sit_i;
1802 	unsigned int sit_segs, start;
1803 	char *src_bitmap, *dst_bitmap;
1804 	unsigned int bitmap_size;
1805 
1806 	/* allocate memory for SIT information */
1807 	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1808 	if (!sit_i)
1809 		return -ENOMEM;
1810 
1811 	SM_I(sbi)->sit_info = sit_i;
1812 
1813 	sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1814 	if (!sit_i->sentries)
1815 		return -ENOMEM;
1816 
1817 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1818 	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1819 	if (!sit_i->dirty_sentries_bitmap)
1820 		return -ENOMEM;
1821 
1822 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
1823 		sit_i->sentries[start].cur_valid_map
1824 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1825 		sit_i->sentries[start].ckpt_valid_map
1826 			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1827 		if (!sit_i->sentries[start].cur_valid_map
1828 				|| !sit_i->sentries[start].ckpt_valid_map)
1829 			return -ENOMEM;
1830 	}
1831 
1832 	if (sbi->segs_per_sec > 1) {
1833 		sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1834 					sizeof(struct sec_entry));
1835 		if (!sit_i->sec_entries)
1836 			return -ENOMEM;
1837 	}
1838 
1839 	/* get information related with SIT */
1840 	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1841 
1842 	/* setup SIT bitmap from ckeckpoint pack */
1843 	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1844 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1845 
1846 	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1847 	if (!dst_bitmap)
1848 		return -ENOMEM;
1849 
1850 	/* init SIT information */
1851 	sit_i->s_ops = &default_salloc_ops;
1852 
1853 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1854 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1855 	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1856 	sit_i->sit_bitmap = dst_bitmap;
1857 	sit_i->bitmap_size = bitmap_size;
1858 	sit_i->dirty_sentries = 0;
1859 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1860 	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1861 	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1862 	mutex_init(&sit_i->sentry_lock);
1863 	return 0;
1864 }
1865 
1866 static int build_free_segmap(struct f2fs_sb_info *sbi)
1867 {
1868 	struct free_segmap_info *free_i;
1869 	unsigned int bitmap_size, sec_bitmap_size;
1870 
1871 	/* allocate memory for free segmap information */
1872 	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1873 	if (!free_i)
1874 		return -ENOMEM;
1875 
1876 	SM_I(sbi)->free_info = free_i;
1877 
1878 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1879 	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1880 	if (!free_i->free_segmap)
1881 		return -ENOMEM;
1882 
1883 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1884 	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1885 	if (!free_i->free_secmap)
1886 		return -ENOMEM;
1887 
1888 	/* set all segments as dirty temporarily */
1889 	memset(free_i->free_segmap, 0xff, bitmap_size);
1890 	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1891 
1892 	/* init free segmap information */
1893 	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1894 	free_i->free_segments = 0;
1895 	free_i->free_sections = 0;
1896 	rwlock_init(&free_i->segmap_lock);
1897 	return 0;
1898 }
1899 
1900 static int build_curseg(struct f2fs_sb_info *sbi)
1901 {
1902 	struct curseg_info *array;
1903 	int i;
1904 
1905 	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1906 	if (!array)
1907 		return -ENOMEM;
1908 
1909 	SM_I(sbi)->curseg_array = array;
1910 
1911 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
1912 		mutex_init(&array[i].curseg_mutex);
1913 		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1914 		if (!array[i].sum_blk)
1915 			return -ENOMEM;
1916 		array[i].segno = NULL_SEGNO;
1917 		array[i].next_blkoff = 0;
1918 	}
1919 	return restore_curseg_summaries(sbi);
1920 }
1921 
1922 static void build_sit_entries(struct f2fs_sb_info *sbi)
1923 {
1924 	struct sit_info *sit_i = SIT_I(sbi);
1925 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1926 	struct f2fs_summary_block *sum = curseg->sum_blk;
1927 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
1928 	unsigned int i, start, end;
1929 	unsigned int readed, start_blk = 0;
1930 	int nrpages = MAX_BIO_BLOCKS(sbi);
1931 
1932 	do {
1933 		readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1934 
1935 		start = start_blk * sit_i->sents_per_block;
1936 		end = (start_blk + readed) * sit_i->sents_per_block;
1937 
1938 		for (; start < end && start < MAIN_SEGS(sbi); start++) {
1939 			struct seg_entry *se = &sit_i->sentries[start];
1940 			struct f2fs_sit_block *sit_blk;
1941 			struct f2fs_sit_entry sit;
1942 			struct page *page;
1943 
1944 			mutex_lock(&curseg->curseg_mutex);
1945 			for (i = 0; i < sits_in_cursum(sum); i++) {
1946 				if (le32_to_cpu(segno_in_journal(sum, i))
1947 								== start) {
1948 					sit = sit_in_journal(sum, i);
1949 					mutex_unlock(&curseg->curseg_mutex);
1950 					goto got_it;
1951 				}
1952 			}
1953 			mutex_unlock(&curseg->curseg_mutex);
1954 
1955 			page = get_current_sit_page(sbi, start);
1956 			sit_blk = (struct f2fs_sit_block *)page_address(page);
1957 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1958 			f2fs_put_page(page, 1);
1959 got_it:
1960 			check_block_count(sbi, start, &sit);
1961 			seg_info_from_raw_sit(se, &sit);
1962 			if (sbi->segs_per_sec > 1) {
1963 				struct sec_entry *e = get_sec_entry(sbi, start);
1964 				e->valid_blocks += se->valid_blocks;
1965 			}
1966 		}
1967 		start_blk += readed;
1968 	} while (start_blk < sit_blk_cnt);
1969 }
1970 
1971 static void init_free_segmap(struct f2fs_sb_info *sbi)
1972 {
1973 	unsigned int start;
1974 	int type;
1975 
1976 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
1977 		struct seg_entry *sentry = get_seg_entry(sbi, start);
1978 		if (!sentry->valid_blocks)
1979 			__set_free(sbi, start);
1980 	}
1981 
1982 	/* set use the current segments */
1983 	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1984 		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1985 		__set_test_and_inuse(sbi, curseg_t->segno);
1986 	}
1987 }
1988 
1989 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1990 {
1991 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1992 	struct free_segmap_info *free_i = FREE_I(sbi);
1993 	unsigned int segno = 0, offset = 0;
1994 	unsigned short valid_blocks;
1995 
1996 	while (1) {
1997 		/* find dirty segment based on free segmap */
1998 		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1999 		if (segno >= MAIN_SEGS(sbi))
2000 			break;
2001 		offset = segno + 1;
2002 		valid_blocks = get_valid_blocks(sbi, segno, 0);
2003 		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2004 			continue;
2005 		if (valid_blocks > sbi->blocks_per_seg) {
2006 			f2fs_bug_on(sbi, 1);
2007 			continue;
2008 		}
2009 		mutex_lock(&dirty_i->seglist_lock);
2010 		__locate_dirty_segment(sbi, segno, DIRTY);
2011 		mutex_unlock(&dirty_i->seglist_lock);
2012 	}
2013 }
2014 
2015 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2016 {
2017 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2018 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2019 
2020 	dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2021 	if (!dirty_i->victim_secmap)
2022 		return -ENOMEM;
2023 	return 0;
2024 }
2025 
2026 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2027 {
2028 	struct dirty_seglist_info *dirty_i;
2029 	unsigned int bitmap_size, i;
2030 
2031 	/* allocate memory for dirty segments list information */
2032 	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2033 	if (!dirty_i)
2034 		return -ENOMEM;
2035 
2036 	SM_I(sbi)->dirty_info = dirty_i;
2037 	mutex_init(&dirty_i->seglist_lock);
2038 
2039 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2040 
2041 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
2042 		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2043 		if (!dirty_i->dirty_segmap[i])
2044 			return -ENOMEM;
2045 	}
2046 
2047 	init_dirty_segmap(sbi);
2048 	return init_victim_secmap(sbi);
2049 }
2050 
2051 /*
2052  * Update min, max modified time for cost-benefit GC algorithm
2053  */
2054 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2055 {
2056 	struct sit_info *sit_i = SIT_I(sbi);
2057 	unsigned int segno;
2058 
2059 	mutex_lock(&sit_i->sentry_lock);
2060 
2061 	sit_i->min_mtime = LLONG_MAX;
2062 
2063 	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2064 		unsigned int i;
2065 		unsigned long long mtime = 0;
2066 
2067 		for (i = 0; i < sbi->segs_per_sec; i++)
2068 			mtime += get_seg_entry(sbi, segno + i)->mtime;
2069 
2070 		mtime = div_u64(mtime, sbi->segs_per_sec);
2071 
2072 		if (sit_i->min_mtime > mtime)
2073 			sit_i->min_mtime = mtime;
2074 	}
2075 	sit_i->max_mtime = get_mtime(sbi);
2076 	mutex_unlock(&sit_i->sentry_lock);
2077 }
2078 
2079 int build_segment_manager(struct f2fs_sb_info *sbi)
2080 {
2081 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2082 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2083 	struct f2fs_sm_info *sm_info;
2084 	int err;
2085 
2086 	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2087 	if (!sm_info)
2088 		return -ENOMEM;
2089 
2090 	/* init sm info */
2091 	sbi->sm_info = sm_info;
2092 	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2093 	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2094 	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2095 	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2096 	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2097 	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2098 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2099 	sm_info->rec_prefree_segments = sm_info->main_segments *
2100 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2101 	sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2102 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2103 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2104 
2105 	INIT_LIST_HEAD(&sm_info->discard_list);
2106 	sm_info->nr_discards = 0;
2107 	sm_info->max_discards = 0;
2108 
2109 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
2110 
2111 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2112 		err = create_flush_cmd_control(sbi);
2113 		if (err)
2114 			return err;
2115 	}
2116 
2117 	err = build_sit_info(sbi);
2118 	if (err)
2119 		return err;
2120 	err = build_free_segmap(sbi);
2121 	if (err)
2122 		return err;
2123 	err = build_curseg(sbi);
2124 	if (err)
2125 		return err;
2126 
2127 	/* reinit free segmap based on SIT */
2128 	build_sit_entries(sbi);
2129 
2130 	init_free_segmap(sbi);
2131 	err = build_dirty_segmap(sbi);
2132 	if (err)
2133 		return err;
2134 
2135 	init_min_max_mtime(sbi);
2136 	return 0;
2137 }
2138 
2139 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2140 		enum dirty_type dirty_type)
2141 {
2142 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2143 
2144 	mutex_lock(&dirty_i->seglist_lock);
2145 	kfree(dirty_i->dirty_segmap[dirty_type]);
2146 	dirty_i->nr_dirty[dirty_type] = 0;
2147 	mutex_unlock(&dirty_i->seglist_lock);
2148 }
2149 
2150 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2151 {
2152 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2153 	kfree(dirty_i->victim_secmap);
2154 }
2155 
2156 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2157 {
2158 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2159 	int i;
2160 
2161 	if (!dirty_i)
2162 		return;
2163 
2164 	/* discard pre-free/dirty segments list */
2165 	for (i = 0; i < NR_DIRTY_TYPE; i++)
2166 		discard_dirty_segmap(sbi, i);
2167 
2168 	destroy_victim_secmap(sbi);
2169 	SM_I(sbi)->dirty_info = NULL;
2170 	kfree(dirty_i);
2171 }
2172 
2173 static void destroy_curseg(struct f2fs_sb_info *sbi)
2174 {
2175 	struct curseg_info *array = SM_I(sbi)->curseg_array;
2176 	int i;
2177 
2178 	if (!array)
2179 		return;
2180 	SM_I(sbi)->curseg_array = NULL;
2181 	for (i = 0; i < NR_CURSEG_TYPE; i++)
2182 		kfree(array[i].sum_blk);
2183 	kfree(array);
2184 }
2185 
2186 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2187 {
2188 	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2189 	if (!free_i)
2190 		return;
2191 	SM_I(sbi)->free_info = NULL;
2192 	kfree(free_i->free_segmap);
2193 	kfree(free_i->free_secmap);
2194 	kfree(free_i);
2195 }
2196 
2197 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2198 {
2199 	struct sit_info *sit_i = SIT_I(sbi);
2200 	unsigned int start;
2201 
2202 	if (!sit_i)
2203 		return;
2204 
2205 	if (sit_i->sentries) {
2206 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
2207 			kfree(sit_i->sentries[start].cur_valid_map);
2208 			kfree(sit_i->sentries[start].ckpt_valid_map);
2209 		}
2210 	}
2211 	vfree(sit_i->sentries);
2212 	vfree(sit_i->sec_entries);
2213 	kfree(sit_i->dirty_sentries_bitmap);
2214 
2215 	SM_I(sbi)->sit_info = NULL;
2216 	kfree(sit_i->sit_bitmap);
2217 	kfree(sit_i);
2218 }
2219 
2220 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2221 {
2222 	struct f2fs_sm_info *sm_info = SM_I(sbi);
2223 
2224 	if (!sm_info)
2225 		return;
2226 	destroy_flush_cmd_control(sbi);
2227 	destroy_dirty_segmap(sbi);
2228 	destroy_curseg(sbi);
2229 	destroy_free_segmap(sbi);
2230 	destroy_sit_info(sbi);
2231 	sbi->sm_info = NULL;
2232 	kfree(sm_info);
2233 }
2234 
2235 int __init create_segment_manager_caches(void)
2236 {
2237 	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2238 			sizeof(struct discard_entry));
2239 	if (!discard_entry_slab)
2240 		goto fail;
2241 
2242 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2243 			sizeof(struct sit_entry_set));
2244 	if (!sit_entry_set_slab)
2245 		goto destory_discard_entry;
2246 
2247 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2248 			sizeof(struct inmem_pages));
2249 	if (!inmem_entry_slab)
2250 		goto destroy_sit_entry_set;
2251 	return 0;
2252 
2253 destroy_sit_entry_set:
2254 	kmem_cache_destroy(sit_entry_set_slab);
2255 destory_discard_entry:
2256 	kmem_cache_destroy(discard_entry_slab);
2257 fail:
2258 	return -ENOMEM;
2259 }
2260 
2261 void destroy_segment_manager_caches(void)
2262 {
2263 	kmem_cache_destroy(sit_entry_set_slab);
2264 	kmem_cache_destroy(discard_entry_slab);
2265 	kmem_cache_destroy(inmem_entry_slab);
2266 }
2267