xref: /openbmc/linux/fs/f2fs/checkpoint.c (revision 483eb062)
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19 
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include <trace/events/f2fs.h>
24 
25 static struct kmem_cache *orphan_entry_slab;
26 static struct kmem_cache *inode_entry_slab;
27 
28 /*
29  * We guarantee no failure on the returned page.
30  */
31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32 {
33 	struct address_space *mapping = META_MAPPING(sbi);
34 	struct page *page = NULL;
35 repeat:
36 	page = grab_cache_page(mapping, index);
37 	if (!page) {
38 		cond_resched();
39 		goto repeat;
40 	}
41 
42 	/* We wait writeback only inside grab_meta_page() */
43 	wait_on_page_writeback(page);
44 	SetPageUptodate(page);
45 	return page;
46 }
47 
48 /*
49  * We guarantee no failure on the returned page.
50  */
51 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
52 {
53 	struct address_space *mapping = META_MAPPING(sbi);
54 	struct page *page;
55 repeat:
56 	page = grab_cache_page(mapping, index);
57 	if (!page) {
58 		cond_resched();
59 		goto repeat;
60 	}
61 	if (PageUptodate(page))
62 		goto out;
63 
64 	if (f2fs_submit_page_bio(sbi, page, index,
65 				READ_SYNC | REQ_META | REQ_PRIO))
66 		goto repeat;
67 
68 	lock_page(page);
69 	if (unlikely(page->mapping != mapping)) {
70 		f2fs_put_page(page, 1);
71 		goto repeat;
72 	}
73 out:
74 	mark_page_accessed(page);
75 	return page;
76 }
77 
78 static int f2fs_write_meta_page(struct page *page,
79 				struct writeback_control *wbc)
80 {
81 	struct inode *inode = page->mapping->host;
82 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
83 
84 	/* Should not write any meta pages, if any IO error was occurred */
85 	if (unlikely(sbi->por_doing ||
86 			is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
87 		goto redirty_out;
88 
89 	if (wbc->for_reclaim)
90 		goto redirty_out;
91 
92 	wait_on_page_writeback(page);
93 
94 	write_meta_page(sbi, page);
95 	dec_page_count(sbi, F2FS_DIRTY_META);
96 	unlock_page(page);
97 	return 0;
98 
99 redirty_out:
100 	dec_page_count(sbi, F2FS_DIRTY_META);
101 	wbc->pages_skipped++;
102 	set_page_dirty(page);
103 	return AOP_WRITEPAGE_ACTIVATE;
104 }
105 
106 static int f2fs_write_meta_pages(struct address_space *mapping,
107 				struct writeback_control *wbc)
108 {
109 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
110 	int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
111 	long written;
112 
113 	if (wbc->for_kupdate)
114 		return 0;
115 
116 	/* collect a number of dirty meta pages and write together */
117 	if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
118 		return 0;
119 
120 	/* if mounting is failed, skip writing node pages */
121 	mutex_lock(&sbi->cp_mutex);
122 	written = sync_meta_pages(sbi, META, nrpages);
123 	mutex_unlock(&sbi->cp_mutex);
124 	wbc->nr_to_write -= written;
125 	return 0;
126 }
127 
128 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
129 						long nr_to_write)
130 {
131 	struct address_space *mapping = META_MAPPING(sbi);
132 	pgoff_t index = 0, end = LONG_MAX;
133 	struct pagevec pvec;
134 	long nwritten = 0;
135 	struct writeback_control wbc = {
136 		.for_reclaim = 0,
137 	};
138 
139 	pagevec_init(&pvec, 0);
140 
141 	while (index <= end) {
142 		int i, nr_pages;
143 		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
144 				PAGECACHE_TAG_DIRTY,
145 				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
146 		if (unlikely(nr_pages == 0))
147 			break;
148 
149 		for (i = 0; i < nr_pages; i++) {
150 			struct page *page = pvec.pages[i];
151 			lock_page(page);
152 			f2fs_bug_on(page->mapping != mapping);
153 			f2fs_bug_on(!PageDirty(page));
154 			clear_page_dirty_for_io(page);
155 			if (f2fs_write_meta_page(page, &wbc)) {
156 				unlock_page(page);
157 				break;
158 			}
159 			nwritten++;
160 			if (unlikely(nwritten >= nr_to_write))
161 				break;
162 		}
163 		pagevec_release(&pvec);
164 		cond_resched();
165 	}
166 
167 	if (nwritten)
168 		f2fs_submit_merged_bio(sbi, type, WRITE);
169 
170 	return nwritten;
171 }
172 
173 static int f2fs_set_meta_page_dirty(struct page *page)
174 {
175 	struct address_space *mapping = page->mapping;
176 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
177 
178 	trace_f2fs_set_page_dirty(page, META);
179 
180 	SetPageUptodate(page);
181 	if (!PageDirty(page)) {
182 		__set_page_dirty_nobuffers(page);
183 		inc_page_count(sbi, F2FS_DIRTY_META);
184 		return 1;
185 	}
186 	return 0;
187 }
188 
189 const struct address_space_operations f2fs_meta_aops = {
190 	.writepage	= f2fs_write_meta_page,
191 	.writepages	= f2fs_write_meta_pages,
192 	.set_page_dirty	= f2fs_set_meta_page_dirty,
193 };
194 
195 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
196 {
197 	int err = 0;
198 
199 	spin_lock(&sbi->orphan_inode_lock);
200 	if (unlikely(sbi->n_orphans >= sbi->max_orphans))
201 		err = -ENOSPC;
202 	else
203 		sbi->n_orphans++;
204 	spin_unlock(&sbi->orphan_inode_lock);
205 
206 	return err;
207 }
208 
209 void release_orphan_inode(struct f2fs_sb_info *sbi)
210 {
211 	spin_lock(&sbi->orphan_inode_lock);
212 	f2fs_bug_on(sbi->n_orphans == 0);
213 	sbi->n_orphans--;
214 	spin_unlock(&sbi->orphan_inode_lock);
215 }
216 
217 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
218 {
219 	struct list_head *head, *this;
220 	struct orphan_inode_entry *new = NULL, *orphan = NULL;
221 
222 	new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
223 	new->ino = ino;
224 
225 	spin_lock(&sbi->orphan_inode_lock);
226 	head = &sbi->orphan_inode_list;
227 	list_for_each(this, head) {
228 		orphan = list_entry(this, struct orphan_inode_entry, list);
229 		if (orphan->ino == ino) {
230 			spin_unlock(&sbi->orphan_inode_lock);
231 			kmem_cache_free(orphan_entry_slab, new);
232 			return;
233 		}
234 
235 		if (orphan->ino > ino)
236 			break;
237 		orphan = NULL;
238 	}
239 
240 	/* add new_oentry into list which is sorted by inode number */
241 	if (orphan)
242 		list_add(&new->list, this->prev);
243 	else
244 		list_add_tail(&new->list, head);
245 	spin_unlock(&sbi->orphan_inode_lock);
246 }
247 
248 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
249 {
250 	struct list_head *head;
251 	struct orphan_inode_entry *orphan;
252 
253 	spin_lock(&sbi->orphan_inode_lock);
254 	head = &sbi->orphan_inode_list;
255 	list_for_each_entry(orphan, head, list) {
256 		if (orphan->ino == ino) {
257 			list_del(&orphan->list);
258 			kmem_cache_free(orphan_entry_slab, orphan);
259 			f2fs_bug_on(sbi->n_orphans == 0);
260 			sbi->n_orphans--;
261 			break;
262 		}
263 	}
264 	spin_unlock(&sbi->orphan_inode_lock);
265 }
266 
267 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
268 {
269 	struct inode *inode = f2fs_iget(sbi->sb, ino);
270 	f2fs_bug_on(IS_ERR(inode));
271 	clear_nlink(inode);
272 
273 	/* truncate all the data during iput */
274 	iput(inode);
275 }
276 
277 void recover_orphan_inodes(struct f2fs_sb_info *sbi)
278 {
279 	block_t start_blk, orphan_blkaddr, i, j;
280 
281 	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
282 		return;
283 
284 	sbi->por_doing = true;
285 	start_blk = __start_cp_addr(sbi) + 1;
286 	orphan_blkaddr = __start_sum_addr(sbi) - 1;
287 
288 	for (i = 0; i < orphan_blkaddr; i++) {
289 		struct page *page = get_meta_page(sbi, start_blk + i);
290 		struct f2fs_orphan_block *orphan_blk;
291 
292 		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
293 		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
294 			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
295 			recover_orphan_inode(sbi, ino);
296 		}
297 		f2fs_put_page(page, 1);
298 	}
299 	/* clear Orphan Flag */
300 	clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
301 	sbi->por_doing = false;
302 	return;
303 }
304 
305 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
306 {
307 	struct list_head *head;
308 	struct f2fs_orphan_block *orphan_blk = NULL;
309 	unsigned int nentries = 0;
310 	unsigned short index;
311 	unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
312 		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
313 	struct page *page = NULL;
314 	struct orphan_inode_entry *orphan = NULL;
315 
316 	for (index = 0; index < orphan_blocks; index++)
317 		grab_meta_page(sbi, start_blk + index);
318 
319 	index = 1;
320 	spin_lock(&sbi->orphan_inode_lock);
321 	head = &sbi->orphan_inode_list;
322 
323 	/* loop for each orphan inode entry and write them in Jornal block */
324 	list_for_each_entry(orphan, head, list) {
325 		if (!page) {
326 			page = find_get_page(META_MAPPING(sbi), start_blk++);
327 			f2fs_bug_on(!page);
328 			orphan_blk =
329 				(struct f2fs_orphan_block *)page_address(page);
330 			memset(orphan_blk, 0, sizeof(*orphan_blk));
331 			f2fs_put_page(page, 0);
332 		}
333 
334 		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
335 
336 		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
337 			/*
338 			 * an orphan block is full of 1020 entries,
339 			 * then we need to flush current orphan blocks
340 			 * and bring another one in memory
341 			 */
342 			orphan_blk->blk_addr = cpu_to_le16(index);
343 			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
344 			orphan_blk->entry_count = cpu_to_le32(nentries);
345 			set_page_dirty(page);
346 			f2fs_put_page(page, 1);
347 			index++;
348 			nentries = 0;
349 			page = NULL;
350 		}
351 	}
352 
353 	if (page) {
354 		orphan_blk->blk_addr = cpu_to_le16(index);
355 		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
356 		orphan_blk->entry_count = cpu_to_le32(nentries);
357 		set_page_dirty(page);
358 		f2fs_put_page(page, 1);
359 	}
360 
361 	spin_unlock(&sbi->orphan_inode_lock);
362 }
363 
364 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
365 				block_t cp_addr, unsigned long long *version)
366 {
367 	struct page *cp_page_1, *cp_page_2 = NULL;
368 	unsigned long blk_size = sbi->blocksize;
369 	struct f2fs_checkpoint *cp_block;
370 	unsigned long long cur_version = 0, pre_version = 0;
371 	size_t crc_offset;
372 	__u32 crc = 0;
373 
374 	/* Read the 1st cp block in this CP pack */
375 	cp_page_1 = get_meta_page(sbi, cp_addr);
376 
377 	/* get the version number */
378 	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
379 	crc_offset = le32_to_cpu(cp_block->checksum_offset);
380 	if (crc_offset >= blk_size)
381 		goto invalid_cp1;
382 
383 	crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
384 	if (!f2fs_crc_valid(crc, cp_block, crc_offset))
385 		goto invalid_cp1;
386 
387 	pre_version = cur_cp_version(cp_block);
388 
389 	/* Read the 2nd cp block in this CP pack */
390 	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
391 	cp_page_2 = get_meta_page(sbi, cp_addr);
392 
393 	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
394 	crc_offset = le32_to_cpu(cp_block->checksum_offset);
395 	if (crc_offset >= blk_size)
396 		goto invalid_cp2;
397 
398 	crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
399 	if (!f2fs_crc_valid(crc, cp_block, crc_offset))
400 		goto invalid_cp2;
401 
402 	cur_version = cur_cp_version(cp_block);
403 
404 	if (cur_version == pre_version) {
405 		*version = cur_version;
406 		f2fs_put_page(cp_page_2, 1);
407 		return cp_page_1;
408 	}
409 invalid_cp2:
410 	f2fs_put_page(cp_page_2, 1);
411 invalid_cp1:
412 	f2fs_put_page(cp_page_1, 1);
413 	return NULL;
414 }
415 
416 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
417 {
418 	struct f2fs_checkpoint *cp_block;
419 	struct f2fs_super_block *fsb = sbi->raw_super;
420 	struct page *cp1, *cp2, *cur_page;
421 	unsigned long blk_size = sbi->blocksize;
422 	unsigned long long cp1_version = 0, cp2_version = 0;
423 	unsigned long long cp_start_blk_no;
424 
425 	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
426 	if (!sbi->ckpt)
427 		return -ENOMEM;
428 	/*
429 	 * Finding out valid cp block involves read both
430 	 * sets( cp pack1 and cp pack 2)
431 	 */
432 	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
433 	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
434 
435 	/* The second checkpoint pack should start at the next segment */
436 	cp_start_blk_no += ((unsigned long long)1) <<
437 				le32_to_cpu(fsb->log_blocks_per_seg);
438 	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
439 
440 	if (cp1 && cp2) {
441 		if (ver_after(cp2_version, cp1_version))
442 			cur_page = cp2;
443 		else
444 			cur_page = cp1;
445 	} else if (cp1) {
446 		cur_page = cp1;
447 	} else if (cp2) {
448 		cur_page = cp2;
449 	} else {
450 		goto fail_no_cp;
451 	}
452 
453 	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
454 	memcpy(sbi->ckpt, cp_block, blk_size);
455 
456 	f2fs_put_page(cp1, 1);
457 	f2fs_put_page(cp2, 1);
458 	return 0;
459 
460 fail_no_cp:
461 	kfree(sbi->ckpt);
462 	return -EINVAL;
463 }
464 
465 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
466 {
467 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
468 	struct list_head *head = &sbi->dir_inode_list;
469 	struct list_head *this;
470 
471 	list_for_each(this, head) {
472 		struct dir_inode_entry *entry;
473 		entry = list_entry(this, struct dir_inode_entry, list);
474 		if (unlikely(entry->inode == inode))
475 			return -EEXIST;
476 	}
477 	list_add_tail(&new->list, head);
478 	stat_inc_dirty_dir(sbi);
479 	return 0;
480 }
481 
482 void set_dirty_dir_page(struct inode *inode, struct page *page)
483 {
484 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
485 	struct dir_inode_entry *new;
486 
487 	if (!S_ISDIR(inode->i_mode))
488 		return;
489 
490 	new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
491 	new->inode = inode;
492 	INIT_LIST_HEAD(&new->list);
493 
494 	spin_lock(&sbi->dir_inode_lock);
495 	if (__add_dirty_inode(inode, new))
496 		kmem_cache_free(inode_entry_slab, new);
497 
498 	inc_page_count(sbi, F2FS_DIRTY_DENTS);
499 	inode_inc_dirty_dents(inode);
500 	SetPagePrivate(page);
501 	spin_unlock(&sbi->dir_inode_lock);
502 }
503 
504 void add_dirty_dir_inode(struct inode *inode)
505 {
506 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
507 	struct dir_inode_entry *new =
508 			f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
509 
510 	new->inode = inode;
511 	INIT_LIST_HEAD(&new->list);
512 
513 	spin_lock(&sbi->dir_inode_lock);
514 	if (__add_dirty_inode(inode, new))
515 		kmem_cache_free(inode_entry_slab, new);
516 	spin_unlock(&sbi->dir_inode_lock);
517 }
518 
519 void remove_dirty_dir_inode(struct inode *inode)
520 {
521 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
522 
523 	struct list_head *this, *head;
524 
525 	if (!S_ISDIR(inode->i_mode))
526 		return;
527 
528 	spin_lock(&sbi->dir_inode_lock);
529 	if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
530 		spin_unlock(&sbi->dir_inode_lock);
531 		return;
532 	}
533 
534 	head = &sbi->dir_inode_list;
535 	list_for_each(this, head) {
536 		struct dir_inode_entry *entry;
537 		entry = list_entry(this, struct dir_inode_entry, list);
538 		if (entry->inode == inode) {
539 			list_del(&entry->list);
540 			kmem_cache_free(inode_entry_slab, entry);
541 			stat_dec_dirty_dir(sbi);
542 			break;
543 		}
544 	}
545 	spin_unlock(&sbi->dir_inode_lock);
546 
547 	/* Only from the recovery routine */
548 	if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
549 		clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
550 		iput(inode);
551 	}
552 }
553 
554 struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
555 {
556 
557 	struct list_head *this, *head;
558 	struct inode *inode = NULL;
559 
560 	spin_lock(&sbi->dir_inode_lock);
561 
562 	head = &sbi->dir_inode_list;
563 	list_for_each(this, head) {
564 		struct dir_inode_entry *entry;
565 		entry = list_entry(this, struct dir_inode_entry, list);
566 		if (entry->inode->i_ino == ino) {
567 			inode = entry->inode;
568 			break;
569 		}
570 	}
571 	spin_unlock(&sbi->dir_inode_lock);
572 	return inode;
573 }
574 
575 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
576 {
577 	struct list_head *head;
578 	struct dir_inode_entry *entry;
579 	struct inode *inode;
580 retry:
581 	spin_lock(&sbi->dir_inode_lock);
582 
583 	head = &sbi->dir_inode_list;
584 	if (list_empty(head)) {
585 		spin_unlock(&sbi->dir_inode_lock);
586 		return;
587 	}
588 	entry = list_entry(head->next, struct dir_inode_entry, list);
589 	inode = igrab(entry->inode);
590 	spin_unlock(&sbi->dir_inode_lock);
591 	if (inode) {
592 		filemap_flush(inode->i_mapping);
593 		iput(inode);
594 	} else {
595 		/*
596 		 * We should submit bio, since it exists several
597 		 * wribacking dentry pages in the freeing inode.
598 		 */
599 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
600 	}
601 	goto retry;
602 }
603 
604 /*
605  * Freeze all the FS-operations for checkpoint.
606  */
607 static void block_operations(struct f2fs_sb_info *sbi)
608 {
609 	struct writeback_control wbc = {
610 		.sync_mode = WB_SYNC_ALL,
611 		.nr_to_write = LONG_MAX,
612 		.for_reclaim = 0,
613 	};
614 	struct blk_plug plug;
615 
616 	blk_start_plug(&plug);
617 
618 retry_flush_dents:
619 	f2fs_lock_all(sbi);
620 	/* write all the dirty dentry pages */
621 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
622 		f2fs_unlock_all(sbi);
623 		sync_dirty_dir_inodes(sbi);
624 		goto retry_flush_dents;
625 	}
626 
627 	/*
628 	 * POR: we should ensure that there is no dirty node pages
629 	 * until finishing nat/sit flush.
630 	 */
631 retry_flush_nodes:
632 	mutex_lock(&sbi->node_write);
633 
634 	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
635 		mutex_unlock(&sbi->node_write);
636 		sync_node_pages(sbi, 0, &wbc);
637 		goto retry_flush_nodes;
638 	}
639 	blk_finish_plug(&plug);
640 }
641 
642 static void unblock_operations(struct f2fs_sb_info *sbi)
643 {
644 	mutex_unlock(&sbi->node_write);
645 	f2fs_unlock_all(sbi);
646 }
647 
648 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
649 {
650 	DEFINE_WAIT(wait);
651 
652 	for (;;) {
653 		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
654 
655 		if (!get_pages(sbi, F2FS_WRITEBACK))
656 			break;
657 
658 		io_schedule();
659 	}
660 	finish_wait(&sbi->cp_wait, &wait);
661 }
662 
663 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
664 {
665 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
666 	nid_t last_nid = 0;
667 	block_t start_blk;
668 	struct page *cp_page;
669 	unsigned int data_sum_blocks, orphan_blocks;
670 	__u32 crc32 = 0;
671 	void *kaddr;
672 	int i;
673 
674 	/* Flush all the NAT/SIT pages */
675 	while (get_pages(sbi, F2FS_DIRTY_META))
676 		sync_meta_pages(sbi, META, LONG_MAX);
677 
678 	next_free_nid(sbi, &last_nid);
679 
680 	/*
681 	 * modify checkpoint
682 	 * version number is already updated
683 	 */
684 	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
685 	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
686 	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
687 	for (i = 0; i < 3; i++) {
688 		ckpt->cur_node_segno[i] =
689 			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
690 		ckpt->cur_node_blkoff[i] =
691 			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
692 		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
693 				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
694 	}
695 	for (i = 0; i < 3; i++) {
696 		ckpt->cur_data_segno[i] =
697 			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
698 		ckpt->cur_data_blkoff[i] =
699 			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
700 		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
701 				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
702 	}
703 
704 	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
705 	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
706 	ckpt->next_free_nid = cpu_to_le32(last_nid);
707 
708 	/* 2 cp  + n data seg summary + orphan inode blocks */
709 	data_sum_blocks = npages_for_summary_flush(sbi);
710 	if (data_sum_blocks < 3)
711 		set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
712 	else
713 		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
714 
715 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
716 					/ F2FS_ORPHANS_PER_BLOCK;
717 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
718 
719 	if (is_umount) {
720 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
721 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
722 			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
723 	} else {
724 		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
725 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
726 			data_sum_blocks + orphan_blocks);
727 	}
728 
729 	if (sbi->n_orphans)
730 		set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
731 	else
732 		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
733 
734 	/* update SIT/NAT bitmap */
735 	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
736 	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
737 
738 	crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
739 	*((__le32 *)((unsigned char *)ckpt +
740 				le32_to_cpu(ckpt->checksum_offset)))
741 				= cpu_to_le32(crc32);
742 
743 	start_blk = __start_cp_addr(sbi);
744 
745 	/* write out checkpoint buffer at block 0 */
746 	cp_page = grab_meta_page(sbi, start_blk++);
747 	kaddr = page_address(cp_page);
748 	memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
749 	set_page_dirty(cp_page);
750 	f2fs_put_page(cp_page, 1);
751 
752 	if (sbi->n_orphans) {
753 		write_orphan_inodes(sbi, start_blk);
754 		start_blk += orphan_blocks;
755 	}
756 
757 	write_data_summaries(sbi, start_blk);
758 	start_blk += data_sum_blocks;
759 	if (is_umount) {
760 		write_node_summaries(sbi, start_blk);
761 		start_blk += NR_CURSEG_NODE_TYPE;
762 	}
763 
764 	/* writeout checkpoint block */
765 	cp_page = grab_meta_page(sbi, start_blk);
766 	kaddr = page_address(cp_page);
767 	memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
768 	set_page_dirty(cp_page);
769 	f2fs_put_page(cp_page, 1);
770 
771 	/* wait for previous submitted node/meta pages writeback */
772 	wait_on_all_pages_writeback(sbi);
773 
774 	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
775 	filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
776 
777 	/* update user_block_counts */
778 	sbi->last_valid_block_count = sbi->total_valid_block_count;
779 	sbi->alloc_valid_block_count = 0;
780 
781 	/* Here, we only have one bio having CP pack */
782 	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
783 
784 	if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
785 		clear_prefree_segments(sbi);
786 		F2FS_RESET_SB_DIRT(sbi);
787 	}
788 }
789 
790 /*
791  * We guarantee that this checkpoint procedure should not fail.
792  */
793 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
794 {
795 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
796 	unsigned long long ckpt_ver;
797 
798 	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
799 
800 	mutex_lock(&sbi->cp_mutex);
801 	block_operations(sbi);
802 
803 	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
804 
805 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
806 	f2fs_submit_merged_bio(sbi, NODE, WRITE);
807 	f2fs_submit_merged_bio(sbi, META, WRITE);
808 
809 	/*
810 	 * update checkpoint pack index
811 	 * Increase the version number so that
812 	 * SIT entries and seg summaries are written at correct place
813 	 */
814 	ckpt_ver = cur_cp_version(ckpt);
815 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
816 
817 	/* write cached NAT/SIT entries to NAT/SIT area */
818 	flush_nat_entries(sbi);
819 	flush_sit_entries(sbi);
820 
821 	/* unlock all the fs_lock[] in do_checkpoint() */
822 	do_checkpoint(sbi, is_umount);
823 
824 	unblock_operations(sbi);
825 	mutex_unlock(&sbi->cp_mutex);
826 
827 	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
828 }
829 
830 void init_orphan_info(struct f2fs_sb_info *sbi)
831 {
832 	spin_lock_init(&sbi->orphan_inode_lock);
833 	INIT_LIST_HEAD(&sbi->orphan_inode_list);
834 	sbi->n_orphans = 0;
835 	/*
836 	 * considering 512 blocks in a segment 8 blocks are needed for cp
837 	 * and log segment summaries. Remaining blocks are used to keep
838 	 * orphan entries with the limitation one reserved segment
839 	 * for cp pack we can have max 1020*504 orphan entries
840 	 */
841 	sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
842 				* F2FS_ORPHANS_PER_BLOCK;
843 }
844 
845 int __init create_checkpoint_caches(void)
846 {
847 	orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
848 			sizeof(struct orphan_inode_entry), NULL);
849 	if (!orphan_entry_slab)
850 		return -ENOMEM;
851 	inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
852 			sizeof(struct dir_inode_entry), NULL);
853 	if (!inode_entry_slab) {
854 		kmem_cache_destroy(orphan_entry_slab);
855 		return -ENOMEM;
856 	}
857 	return 0;
858 }
859 
860 void destroy_checkpoint_caches(void)
861 {
862 	kmem_cache_destroy(orphan_entry_slab);
863 	kmem_cache_destroy(inode_entry_slab);
864 }
865