1 /* 2 * fs/f2fs/checkpoint.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/bio.h> 13 #include <linux/mpage.h> 14 #include <linux/writeback.h> 15 #include <linux/blkdev.h> 16 #include <linux/f2fs_fs.h> 17 #include <linux/pagevec.h> 18 #include <linux/swap.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 #include <trace/events/f2fs.h> 24 25 static struct kmem_cache *orphan_entry_slab; 26 static struct kmem_cache *inode_entry_slab; 27 28 /* 29 * We guarantee no failure on the returned page. 30 */ 31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 32 { 33 struct address_space *mapping = META_MAPPING(sbi); 34 struct page *page = NULL; 35 repeat: 36 page = grab_cache_page(mapping, index); 37 if (!page) { 38 cond_resched(); 39 goto repeat; 40 } 41 f2fs_wait_on_page_writeback(page, META); 42 SetPageUptodate(page); 43 return page; 44 } 45 46 /* 47 * We guarantee no failure on the returned page. 48 */ 49 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 50 { 51 struct address_space *mapping = META_MAPPING(sbi); 52 struct page *page; 53 repeat: 54 page = grab_cache_page(mapping, index); 55 if (!page) { 56 cond_resched(); 57 goto repeat; 58 } 59 if (PageUptodate(page)) 60 goto out; 61 62 if (f2fs_submit_page_bio(sbi, page, index, 63 READ_SYNC | REQ_META | REQ_PRIO)) 64 goto repeat; 65 66 lock_page(page); 67 if (unlikely(page->mapping != mapping)) { 68 f2fs_put_page(page, 1); 69 goto repeat; 70 } 71 out: 72 return page; 73 } 74 75 static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 76 { 77 switch (type) { 78 case META_NAT: 79 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; 80 case META_SIT: 81 return SIT_BLK_CNT(sbi); 82 case META_SSA: 83 case META_CP: 84 return 0; 85 default: 86 BUG(); 87 } 88 } 89 90 /* 91 * Readahead CP/NAT/SIT/SSA pages 92 */ 93 int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) 94 { 95 block_t prev_blk_addr = 0; 96 struct page *page; 97 int blkno = start; 98 int max_blks = get_max_meta_blks(sbi, type); 99 100 struct f2fs_io_info fio = { 101 .type = META, 102 .rw = READ_SYNC | REQ_META | REQ_PRIO 103 }; 104 105 for (; nrpages-- > 0; blkno++) { 106 block_t blk_addr; 107 108 switch (type) { 109 case META_NAT: 110 /* get nat block addr */ 111 if (unlikely(blkno >= max_blks)) 112 blkno = 0; 113 blk_addr = current_nat_addr(sbi, 114 blkno * NAT_ENTRY_PER_BLOCK); 115 break; 116 case META_SIT: 117 /* get sit block addr */ 118 if (unlikely(blkno >= max_blks)) 119 goto out; 120 blk_addr = current_sit_addr(sbi, 121 blkno * SIT_ENTRY_PER_BLOCK); 122 if (blkno != start && prev_blk_addr + 1 != blk_addr) 123 goto out; 124 prev_blk_addr = blk_addr; 125 break; 126 case META_SSA: 127 case META_CP: 128 /* get ssa/cp block addr */ 129 blk_addr = blkno; 130 break; 131 default: 132 BUG(); 133 } 134 135 page = grab_cache_page(META_MAPPING(sbi), blk_addr); 136 if (!page) 137 continue; 138 if (PageUptodate(page)) { 139 f2fs_put_page(page, 1); 140 continue; 141 } 142 143 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); 144 f2fs_put_page(page, 0); 145 } 146 out: 147 f2fs_submit_merged_bio(sbi, META, READ); 148 return blkno - start; 149 } 150 151 static int f2fs_write_meta_page(struct page *page, 152 struct writeback_control *wbc) 153 { 154 struct inode *inode = page->mapping->host; 155 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 156 157 trace_f2fs_writepage(page, META); 158 159 if (unlikely(sbi->por_doing)) 160 goto redirty_out; 161 if (wbc->for_reclaim) 162 goto redirty_out; 163 164 /* Should not write any meta pages, if any IO error was occurred */ 165 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) 166 goto no_write; 167 168 f2fs_wait_on_page_writeback(page, META); 169 write_meta_page(sbi, page); 170 no_write: 171 dec_page_count(sbi, F2FS_DIRTY_META); 172 unlock_page(page); 173 return 0; 174 175 redirty_out: 176 redirty_page_for_writepage(wbc, page); 177 return AOP_WRITEPAGE_ACTIVATE; 178 } 179 180 static int f2fs_write_meta_pages(struct address_space *mapping, 181 struct writeback_control *wbc) 182 { 183 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 184 long diff, written; 185 186 trace_f2fs_writepages(mapping->host, wbc, META); 187 188 /* collect a number of dirty meta pages and write together */ 189 if (wbc->for_kupdate || 190 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 191 goto skip_write; 192 193 /* if mounting is failed, skip writing node pages */ 194 mutex_lock(&sbi->cp_mutex); 195 diff = nr_pages_to_write(sbi, META, wbc); 196 written = sync_meta_pages(sbi, META, wbc->nr_to_write); 197 mutex_unlock(&sbi->cp_mutex); 198 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); 199 return 0; 200 201 skip_write: 202 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); 203 return 0; 204 } 205 206 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 207 long nr_to_write) 208 { 209 struct address_space *mapping = META_MAPPING(sbi); 210 pgoff_t index = 0, end = LONG_MAX; 211 struct pagevec pvec; 212 long nwritten = 0; 213 struct writeback_control wbc = { 214 .for_reclaim = 0, 215 }; 216 217 pagevec_init(&pvec, 0); 218 219 while (index <= end) { 220 int i, nr_pages; 221 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 222 PAGECACHE_TAG_DIRTY, 223 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 224 if (unlikely(nr_pages == 0)) 225 break; 226 227 for (i = 0; i < nr_pages; i++) { 228 struct page *page = pvec.pages[i]; 229 230 lock_page(page); 231 232 if (unlikely(page->mapping != mapping)) { 233 continue_unlock: 234 unlock_page(page); 235 continue; 236 } 237 if (!PageDirty(page)) { 238 /* someone wrote it for us */ 239 goto continue_unlock; 240 } 241 242 if (!clear_page_dirty_for_io(page)) 243 goto continue_unlock; 244 245 if (f2fs_write_meta_page(page, &wbc)) { 246 unlock_page(page); 247 break; 248 } 249 nwritten++; 250 if (unlikely(nwritten >= nr_to_write)) 251 break; 252 } 253 pagevec_release(&pvec); 254 cond_resched(); 255 } 256 257 if (nwritten) 258 f2fs_submit_merged_bio(sbi, type, WRITE); 259 260 return nwritten; 261 } 262 263 static int f2fs_set_meta_page_dirty(struct page *page) 264 { 265 struct address_space *mapping = page->mapping; 266 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 267 268 trace_f2fs_set_page_dirty(page, META); 269 270 SetPageUptodate(page); 271 if (!PageDirty(page)) { 272 __set_page_dirty_nobuffers(page); 273 inc_page_count(sbi, F2FS_DIRTY_META); 274 return 1; 275 } 276 return 0; 277 } 278 279 const struct address_space_operations f2fs_meta_aops = { 280 .writepage = f2fs_write_meta_page, 281 .writepages = f2fs_write_meta_pages, 282 .set_page_dirty = f2fs_set_meta_page_dirty, 283 }; 284 285 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 286 { 287 int err = 0; 288 289 spin_lock(&sbi->orphan_inode_lock); 290 if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 291 err = -ENOSPC; 292 else 293 sbi->n_orphans++; 294 spin_unlock(&sbi->orphan_inode_lock); 295 296 return err; 297 } 298 299 void release_orphan_inode(struct f2fs_sb_info *sbi) 300 { 301 spin_lock(&sbi->orphan_inode_lock); 302 f2fs_bug_on(sbi->n_orphans == 0); 303 sbi->n_orphans--; 304 spin_unlock(&sbi->orphan_inode_lock); 305 } 306 307 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 308 { 309 struct list_head *head; 310 struct orphan_inode_entry *new, *orphan; 311 312 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 313 new->ino = ino; 314 315 spin_lock(&sbi->orphan_inode_lock); 316 head = &sbi->orphan_inode_list; 317 list_for_each_entry(orphan, head, list) { 318 if (orphan->ino == ino) { 319 spin_unlock(&sbi->orphan_inode_lock); 320 kmem_cache_free(orphan_entry_slab, new); 321 return; 322 } 323 324 if (orphan->ino > ino) 325 break; 326 } 327 328 /* add new orphan entry into list which is sorted by inode number */ 329 list_add_tail(&new->list, &orphan->list); 330 spin_unlock(&sbi->orphan_inode_lock); 331 } 332 333 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 334 { 335 struct list_head *head; 336 struct orphan_inode_entry *orphan; 337 338 spin_lock(&sbi->orphan_inode_lock); 339 head = &sbi->orphan_inode_list; 340 list_for_each_entry(orphan, head, list) { 341 if (orphan->ino == ino) { 342 list_del(&orphan->list); 343 f2fs_bug_on(sbi->n_orphans == 0); 344 sbi->n_orphans--; 345 spin_unlock(&sbi->orphan_inode_lock); 346 kmem_cache_free(orphan_entry_slab, orphan); 347 return; 348 } 349 } 350 spin_unlock(&sbi->orphan_inode_lock); 351 } 352 353 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 354 { 355 struct inode *inode = f2fs_iget(sbi->sb, ino); 356 f2fs_bug_on(IS_ERR(inode)); 357 clear_nlink(inode); 358 359 /* truncate all the data during iput */ 360 iput(inode); 361 } 362 363 void recover_orphan_inodes(struct f2fs_sb_info *sbi) 364 { 365 block_t start_blk, orphan_blkaddr, i, j; 366 367 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 368 return; 369 370 sbi->por_doing = true; 371 372 start_blk = __start_cp_addr(sbi) + 1 + 373 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 374 orphan_blkaddr = __start_sum_addr(sbi) - 1; 375 376 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); 377 378 for (i = 0; i < orphan_blkaddr; i++) { 379 struct page *page = get_meta_page(sbi, start_blk + i); 380 struct f2fs_orphan_block *orphan_blk; 381 382 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 383 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 384 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 385 recover_orphan_inode(sbi, ino); 386 } 387 f2fs_put_page(page, 1); 388 } 389 /* clear Orphan Flag */ 390 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 391 sbi->por_doing = false; 392 return; 393 } 394 395 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 396 { 397 struct list_head *head; 398 struct f2fs_orphan_block *orphan_blk = NULL; 399 unsigned int nentries = 0; 400 unsigned short index; 401 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 402 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 403 struct page *page = NULL; 404 struct orphan_inode_entry *orphan = NULL; 405 406 for (index = 0; index < orphan_blocks; index++) 407 grab_meta_page(sbi, start_blk + index); 408 409 index = 1; 410 spin_lock(&sbi->orphan_inode_lock); 411 head = &sbi->orphan_inode_list; 412 413 /* loop for each orphan inode entry and write them in Jornal block */ 414 list_for_each_entry(orphan, head, list) { 415 if (!page) { 416 page = find_get_page(META_MAPPING(sbi), start_blk++); 417 f2fs_bug_on(!page); 418 orphan_blk = 419 (struct f2fs_orphan_block *)page_address(page); 420 memset(orphan_blk, 0, sizeof(*orphan_blk)); 421 f2fs_put_page(page, 0); 422 } 423 424 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 425 426 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 427 /* 428 * an orphan block is full of 1020 entries, 429 * then we need to flush current orphan blocks 430 * and bring another one in memory 431 */ 432 orphan_blk->blk_addr = cpu_to_le16(index); 433 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 434 orphan_blk->entry_count = cpu_to_le32(nentries); 435 set_page_dirty(page); 436 f2fs_put_page(page, 1); 437 index++; 438 nentries = 0; 439 page = NULL; 440 } 441 } 442 443 if (page) { 444 orphan_blk->blk_addr = cpu_to_le16(index); 445 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 446 orphan_blk->entry_count = cpu_to_le32(nentries); 447 set_page_dirty(page); 448 f2fs_put_page(page, 1); 449 } 450 451 spin_unlock(&sbi->orphan_inode_lock); 452 } 453 454 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 455 block_t cp_addr, unsigned long long *version) 456 { 457 struct page *cp_page_1, *cp_page_2 = NULL; 458 unsigned long blk_size = sbi->blocksize; 459 struct f2fs_checkpoint *cp_block; 460 unsigned long long cur_version = 0, pre_version = 0; 461 size_t crc_offset; 462 __u32 crc = 0; 463 464 /* Read the 1st cp block in this CP pack */ 465 cp_page_1 = get_meta_page(sbi, cp_addr); 466 467 /* get the version number */ 468 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); 469 crc_offset = le32_to_cpu(cp_block->checksum_offset); 470 if (crc_offset >= blk_size) 471 goto invalid_cp1; 472 473 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); 474 if (!f2fs_crc_valid(crc, cp_block, crc_offset)) 475 goto invalid_cp1; 476 477 pre_version = cur_cp_version(cp_block); 478 479 /* Read the 2nd cp block in this CP pack */ 480 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; 481 cp_page_2 = get_meta_page(sbi, cp_addr); 482 483 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); 484 crc_offset = le32_to_cpu(cp_block->checksum_offset); 485 if (crc_offset >= blk_size) 486 goto invalid_cp2; 487 488 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); 489 if (!f2fs_crc_valid(crc, cp_block, crc_offset)) 490 goto invalid_cp2; 491 492 cur_version = cur_cp_version(cp_block); 493 494 if (cur_version == pre_version) { 495 *version = cur_version; 496 f2fs_put_page(cp_page_2, 1); 497 return cp_page_1; 498 } 499 invalid_cp2: 500 f2fs_put_page(cp_page_2, 1); 501 invalid_cp1: 502 f2fs_put_page(cp_page_1, 1); 503 return NULL; 504 } 505 506 int get_valid_checkpoint(struct f2fs_sb_info *sbi) 507 { 508 struct f2fs_checkpoint *cp_block; 509 struct f2fs_super_block *fsb = sbi->raw_super; 510 struct page *cp1, *cp2, *cur_page; 511 unsigned long blk_size = sbi->blocksize; 512 unsigned long long cp1_version = 0, cp2_version = 0; 513 unsigned long long cp_start_blk_no; 514 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 515 block_t cp_blk_no; 516 int i; 517 518 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); 519 if (!sbi->ckpt) 520 return -ENOMEM; 521 /* 522 * Finding out valid cp block involves read both 523 * sets( cp pack1 and cp pack 2) 524 */ 525 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); 526 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 527 528 /* The second checkpoint pack should start at the next segment */ 529 cp_start_blk_no += ((unsigned long long)1) << 530 le32_to_cpu(fsb->log_blocks_per_seg); 531 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 532 533 if (cp1 && cp2) { 534 if (ver_after(cp2_version, cp1_version)) 535 cur_page = cp2; 536 else 537 cur_page = cp1; 538 } else if (cp1) { 539 cur_page = cp1; 540 } else if (cp2) { 541 cur_page = cp2; 542 } else { 543 goto fail_no_cp; 544 } 545 546 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 547 memcpy(sbi->ckpt, cp_block, blk_size); 548 549 if (cp_blks <= 1) 550 goto done; 551 552 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); 553 if (cur_page == cp2) 554 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 555 556 for (i = 1; i < cp_blks; i++) { 557 void *sit_bitmap_ptr; 558 unsigned char *ckpt = (unsigned char *)sbi->ckpt; 559 560 cur_page = get_meta_page(sbi, cp_blk_no + i); 561 sit_bitmap_ptr = page_address(cur_page); 562 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); 563 f2fs_put_page(cur_page, 1); 564 } 565 done: 566 f2fs_put_page(cp1, 1); 567 f2fs_put_page(cp2, 1); 568 return 0; 569 570 fail_no_cp: 571 kfree(sbi->ckpt); 572 return -EINVAL; 573 } 574 575 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 576 { 577 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 578 579 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) 580 return -EEXIST; 581 582 set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); 583 F2FS_I(inode)->dirty_dir = new; 584 list_add_tail(&new->list, &sbi->dir_inode_list); 585 stat_inc_dirty_dir(sbi); 586 return 0; 587 } 588 589 void set_dirty_dir_page(struct inode *inode, struct page *page) 590 { 591 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 592 struct dir_inode_entry *new; 593 int ret = 0; 594 595 if (!S_ISDIR(inode->i_mode)) 596 return; 597 598 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 599 new->inode = inode; 600 INIT_LIST_HEAD(&new->list); 601 602 spin_lock(&sbi->dir_inode_lock); 603 ret = __add_dirty_inode(inode, new); 604 inode_inc_dirty_dents(inode); 605 SetPagePrivate(page); 606 spin_unlock(&sbi->dir_inode_lock); 607 608 if (ret) 609 kmem_cache_free(inode_entry_slab, new); 610 } 611 612 void add_dirty_dir_inode(struct inode *inode) 613 { 614 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 615 struct dir_inode_entry *new = 616 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 617 int ret = 0; 618 619 new->inode = inode; 620 INIT_LIST_HEAD(&new->list); 621 622 spin_lock(&sbi->dir_inode_lock); 623 ret = __add_dirty_inode(inode, new); 624 spin_unlock(&sbi->dir_inode_lock); 625 626 if (ret) 627 kmem_cache_free(inode_entry_slab, new); 628 } 629 630 void remove_dirty_dir_inode(struct inode *inode) 631 { 632 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 633 struct dir_inode_entry *entry; 634 635 if (!S_ISDIR(inode->i_mode)) 636 return; 637 638 spin_lock(&sbi->dir_inode_lock); 639 if (get_dirty_dents(inode) || 640 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { 641 spin_unlock(&sbi->dir_inode_lock); 642 return; 643 } 644 645 entry = F2FS_I(inode)->dirty_dir; 646 list_del(&entry->list); 647 F2FS_I(inode)->dirty_dir = NULL; 648 clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); 649 stat_dec_dirty_dir(sbi); 650 spin_unlock(&sbi->dir_inode_lock); 651 kmem_cache_free(inode_entry_slab, entry); 652 653 /* Only from the recovery routine */ 654 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 655 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 656 iput(inode); 657 } 658 } 659 660 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 661 { 662 struct list_head *head; 663 struct dir_inode_entry *entry; 664 struct inode *inode; 665 retry: 666 spin_lock(&sbi->dir_inode_lock); 667 668 head = &sbi->dir_inode_list; 669 if (list_empty(head)) { 670 spin_unlock(&sbi->dir_inode_lock); 671 return; 672 } 673 entry = list_entry(head->next, struct dir_inode_entry, list); 674 inode = igrab(entry->inode); 675 spin_unlock(&sbi->dir_inode_lock); 676 if (inode) { 677 filemap_fdatawrite(inode->i_mapping); 678 iput(inode); 679 } else { 680 /* 681 * We should submit bio, since it exists several 682 * wribacking dentry pages in the freeing inode. 683 */ 684 f2fs_submit_merged_bio(sbi, DATA, WRITE); 685 } 686 goto retry; 687 } 688 689 /* 690 * Freeze all the FS-operations for checkpoint. 691 */ 692 static void block_operations(struct f2fs_sb_info *sbi) 693 { 694 struct writeback_control wbc = { 695 .sync_mode = WB_SYNC_ALL, 696 .nr_to_write = LONG_MAX, 697 .for_reclaim = 0, 698 }; 699 struct blk_plug plug; 700 701 blk_start_plug(&plug); 702 703 retry_flush_dents: 704 f2fs_lock_all(sbi); 705 /* write all the dirty dentry pages */ 706 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 707 f2fs_unlock_all(sbi); 708 sync_dirty_dir_inodes(sbi); 709 goto retry_flush_dents; 710 } 711 712 /* 713 * POR: we should ensure that there is no dirty node pages 714 * until finishing nat/sit flush. 715 */ 716 retry_flush_nodes: 717 mutex_lock(&sbi->node_write); 718 719 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 720 mutex_unlock(&sbi->node_write); 721 sync_node_pages(sbi, 0, &wbc); 722 goto retry_flush_nodes; 723 } 724 blk_finish_plug(&plug); 725 } 726 727 static void unblock_operations(struct f2fs_sb_info *sbi) 728 { 729 mutex_unlock(&sbi->node_write); 730 f2fs_unlock_all(sbi); 731 } 732 733 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) 734 { 735 DEFINE_WAIT(wait); 736 737 for (;;) { 738 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 739 740 if (!get_pages(sbi, F2FS_WRITEBACK)) 741 break; 742 743 io_schedule(); 744 } 745 finish_wait(&sbi->cp_wait, &wait); 746 } 747 748 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 749 { 750 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 751 nid_t last_nid = 0; 752 block_t start_blk; 753 struct page *cp_page; 754 unsigned int data_sum_blocks, orphan_blocks; 755 __u32 crc32 = 0; 756 void *kaddr; 757 int i; 758 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 759 760 /* 761 * This avoids to conduct wrong roll-forward operations and uses 762 * metapages, so should be called prior to sync_meta_pages below. 763 */ 764 discard_next_dnode(sbi); 765 766 /* Flush all the NAT/SIT pages */ 767 while (get_pages(sbi, F2FS_DIRTY_META)) 768 sync_meta_pages(sbi, META, LONG_MAX); 769 770 next_free_nid(sbi, &last_nid); 771 772 /* 773 * modify checkpoint 774 * version number is already updated 775 */ 776 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 777 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 778 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 779 for (i = 0; i < 3; i++) { 780 ckpt->cur_node_segno[i] = 781 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 782 ckpt->cur_node_blkoff[i] = 783 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); 784 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 785 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 786 } 787 for (i = 0; i < 3; i++) { 788 ckpt->cur_data_segno[i] = 789 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 790 ckpt->cur_data_blkoff[i] = 791 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); 792 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 793 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 794 } 795 796 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 797 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 798 ckpt->next_free_nid = cpu_to_le32(last_nid); 799 800 /* 2 cp + n data seg summary + orphan inode blocks */ 801 data_sum_blocks = npages_for_summary_flush(sbi); 802 if (data_sum_blocks < 3) 803 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 804 else 805 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 806 807 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) 808 / F2FS_ORPHANS_PER_BLOCK; 809 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 810 orphan_blocks); 811 812 if (is_umount) { 813 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 814 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 815 cp_payload_blks + data_sum_blocks + 816 orphan_blocks + NR_CURSEG_NODE_TYPE); 817 } else { 818 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 819 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 820 cp_payload_blks + data_sum_blocks + 821 orphan_blocks); 822 } 823 824 if (sbi->n_orphans) 825 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 826 else 827 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 828 829 /* update SIT/NAT bitmap */ 830 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 831 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 832 833 crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); 834 *((__le32 *)((unsigned char *)ckpt + 835 le32_to_cpu(ckpt->checksum_offset))) 836 = cpu_to_le32(crc32); 837 838 start_blk = __start_cp_addr(sbi); 839 840 /* write out checkpoint buffer at block 0 */ 841 cp_page = grab_meta_page(sbi, start_blk++); 842 kaddr = page_address(cp_page); 843 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); 844 set_page_dirty(cp_page); 845 f2fs_put_page(cp_page, 1); 846 847 for (i = 1; i < 1 + cp_payload_blks; i++) { 848 cp_page = grab_meta_page(sbi, start_blk++); 849 kaddr = page_address(cp_page); 850 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, 851 (1 << sbi->log_blocksize)); 852 set_page_dirty(cp_page); 853 f2fs_put_page(cp_page, 1); 854 } 855 856 if (sbi->n_orphans) { 857 write_orphan_inodes(sbi, start_blk); 858 start_blk += orphan_blocks; 859 } 860 861 write_data_summaries(sbi, start_blk); 862 start_blk += data_sum_blocks; 863 if (is_umount) { 864 write_node_summaries(sbi, start_blk); 865 start_blk += NR_CURSEG_NODE_TYPE; 866 } 867 868 /* writeout checkpoint block */ 869 cp_page = grab_meta_page(sbi, start_blk); 870 kaddr = page_address(cp_page); 871 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); 872 set_page_dirty(cp_page); 873 f2fs_put_page(cp_page, 1); 874 875 /* wait for previous submitted node/meta pages writeback */ 876 wait_on_all_pages_writeback(sbi); 877 878 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); 879 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); 880 881 /* update user_block_counts */ 882 sbi->last_valid_block_count = sbi->total_valid_block_count; 883 sbi->alloc_valid_block_count = 0; 884 885 /* Here, we only have one bio having CP pack */ 886 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 887 888 if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 889 clear_prefree_segments(sbi); 890 F2FS_RESET_SB_DIRT(sbi); 891 } 892 } 893 894 /* 895 * We guarantee that this checkpoint procedure should not fail. 896 */ 897 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 898 { 899 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 900 unsigned long long ckpt_ver; 901 902 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 903 904 mutex_lock(&sbi->cp_mutex); 905 block_operations(sbi); 906 907 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 908 909 f2fs_submit_merged_bio(sbi, DATA, WRITE); 910 f2fs_submit_merged_bio(sbi, NODE, WRITE); 911 f2fs_submit_merged_bio(sbi, META, WRITE); 912 913 /* 914 * update checkpoint pack index 915 * Increase the version number so that 916 * SIT entries and seg summaries are written at correct place 917 */ 918 ckpt_ver = cur_cp_version(ckpt); 919 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 920 921 /* write cached NAT/SIT entries to NAT/SIT area */ 922 flush_nat_entries(sbi); 923 flush_sit_entries(sbi); 924 925 /* unlock all the fs_lock[] in do_checkpoint() */ 926 do_checkpoint(sbi, is_umount); 927 928 unblock_operations(sbi); 929 mutex_unlock(&sbi->cp_mutex); 930 931 stat_inc_cp_count(sbi->stat_info); 932 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 933 } 934 935 void init_orphan_info(struct f2fs_sb_info *sbi) 936 { 937 spin_lock_init(&sbi->orphan_inode_lock); 938 INIT_LIST_HEAD(&sbi->orphan_inode_list); 939 sbi->n_orphans = 0; 940 /* 941 * considering 512 blocks in a segment 8 blocks are needed for cp 942 * and log segment summaries. Remaining blocks are used to keep 943 * orphan entries with the limitation one reserved segment 944 * for cp pack we can have max 1020*504 orphan entries 945 */ 946 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 947 * F2FS_ORPHANS_PER_BLOCK; 948 } 949 950 int __init create_checkpoint_caches(void) 951 { 952 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 953 sizeof(struct orphan_inode_entry)); 954 if (!orphan_entry_slab) 955 return -ENOMEM; 956 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 957 sizeof(struct dir_inode_entry)); 958 if (!inode_entry_slab) { 959 kmem_cache_destroy(orphan_entry_slab); 960 return -ENOMEM; 961 } 962 return 0; 963 } 964 965 void destroy_checkpoint_caches(void) 966 { 967 kmem_cache_destroy(orphan_entry_slab); 968 kmem_cache_destroy(inode_entry_slab); 969 } 970