1 /* 2 * fs/f2fs/checkpoint.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/bio.h> 13 #include <linux/mpage.h> 14 #include <linux/writeback.h> 15 #include <linux/blkdev.h> 16 #include <linux/f2fs_fs.h> 17 #include <linux/pagevec.h> 18 #include <linux/swap.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 static struct kmem_cache *ino_entry_slab; 27 struct kmem_cache *inode_entry_slab; 28 29 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) 30 { 31 set_ckpt_flags(sbi, CP_ERROR_FLAG); 32 sbi->sb->s_flags |= MS_RDONLY; 33 if (!end_io) 34 f2fs_flush_merged_writes(sbi); 35 } 36 37 /* 38 * We guarantee no failure on the returned page. 39 */ 40 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 41 { 42 struct address_space *mapping = META_MAPPING(sbi); 43 struct page *page = NULL; 44 repeat: 45 page = f2fs_grab_cache_page(mapping, index, false); 46 if (!page) { 47 cond_resched(); 48 goto repeat; 49 } 50 f2fs_wait_on_page_writeback(page, META, true); 51 if (!PageUptodate(page)) 52 SetPageUptodate(page); 53 return page; 54 } 55 56 /* 57 * We guarantee no failure on the returned page. 58 */ 59 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, 60 bool is_meta) 61 { 62 struct address_space *mapping = META_MAPPING(sbi); 63 struct page *page; 64 struct f2fs_io_info fio = { 65 .sbi = sbi, 66 .type = META, 67 .op = REQ_OP_READ, 68 .op_flags = REQ_META | REQ_PRIO, 69 .old_blkaddr = index, 70 .new_blkaddr = index, 71 .encrypted_page = NULL, 72 }; 73 74 if (unlikely(!is_meta)) 75 fio.op_flags &= ~REQ_META; 76 repeat: 77 page = f2fs_grab_cache_page(mapping, index, false); 78 if (!page) { 79 cond_resched(); 80 goto repeat; 81 } 82 if (PageUptodate(page)) 83 goto out; 84 85 fio.page = page; 86 87 if (f2fs_submit_page_bio(&fio)) { 88 f2fs_put_page(page, 1); 89 goto repeat; 90 } 91 92 lock_page(page); 93 if (unlikely(page->mapping != mapping)) { 94 f2fs_put_page(page, 1); 95 goto repeat; 96 } 97 98 /* 99 * if there is any IO error when accessing device, make our filesystem 100 * readonly and make sure do not write checkpoint with non-uptodate 101 * meta page. 102 */ 103 if (unlikely(!PageUptodate(page))) 104 f2fs_stop_checkpoint(sbi, false); 105 out: 106 return page; 107 } 108 109 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 110 { 111 return __get_meta_page(sbi, index, true); 112 } 113 114 /* for POR only */ 115 struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) 116 { 117 return __get_meta_page(sbi, index, false); 118 } 119 120 bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) 121 { 122 switch (type) { 123 case META_NAT: 124 break; 125 case META_SIT: 126 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) 127 return false; 128 break; 129 case META_SSA: 130 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || 131 blkaddr < SM_I(sbi)->ssa_blkaddr)) 132 return false; 133 break; 134 case META_CP: 135 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || 136 blkaddr < __start_cp_addr(sbi))) 137 return false; 138 break; 139 case META_POR: 140 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || 141 blkaddr < MAIN_BLKADDR(sbi))) 142 return false; 143 break; 144 default: 145 BUG(); 146 } 147 148 return true; 149 } 150 151 /* 152 * Readahead CP/NAT/SIT/SSA pages 153 */ 154 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, 155 int type, bool sync) 156 { 157 struct page *page; 158 block_t blkno = start; 159 struct f2fs_io_info fio = { 160 .sbi = sbi, 161 .type = META, 162 .op = REQ_OP_READ, 163 .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, 164 .encrypted_page = NULL, 165 .in_list = false, 166 }; 167 struct blk_plug plug; 168 169 if (unlikely(type == META_POR)) 170 fio.op_flags &= ~REQ_META; 171 172 blk_start_plug(&plug); 173 for (; nrpages-- > 0; blkno++) { 174 175 if (!is_valid_blkaddr(sbi, blkno, type)) 176 goto out; 177 178 switch (type) { 179 case META_NAT: 180 if (unlikely(blkno >= 181 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) 182 blkno = 0; 183 /* get nat block addr */ 184 fio.new_blkaddr = current_nat_addr(sbi, 185 blkno * NAT_ENTRY_PER_BLOCK); 186 break; 187 case META_SIT: 188 /* get sit block addr */ 189 fio.new_blkaddr = current_sit_addr(sbi, 190 blkno * SIT_ENTRY_PER_BLOCK); 191 break; 192 case META_SSA: 193 case META_CP: 194 case META_POR: 195 fio.new_blkaddr = blkno; 196 break; 197 default: 198 BUG(); 199 } 200 201 page = f2fs_grab_cache_page(META_MAPPING(sbi), 202 fio.new_blkaddr, false); 203 if (!page) 204 continue; 205 if (PageUptodate(page)) { 206 f2fs_put_page(page, 1); 207 continue; 208 } 209 210 fio.page = page; 211 f2fs_submit_page_bio(&fio); 212 f2fs_put_page(page, 0); 213 } 214 out: 215 blk_finish_plug(&plug); 216 return blkno - start; 217 } 218 219 void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) 220 { 221 struct page *page; 222 bool readahead = false; 223 224 page = find_get_page(META_MAPPING(sbi), index); 225 if (!page || !PageUptodate(page)) 226 readahead = true; 227 f2fs_put_page(page, 0); 228 229 if (readahead) 230 ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); 231 } 232 233 static int __f2fs_write_meta_page(struct page *page, 234 struct writeback_control *wbc, 235 enum iostat_type io_type) 236 { 237 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 238 239 trace_f2fs_writepage(page, META); 240 241 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 242 goto redirty_out; 243 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) 244 goto redirty_out; 245 if (unlikely(f2fs_cp_error(sbi))) 246 goto redirty_out; 247 248 write_meta_page(sbi, page, io_type); 249 dec_page_count(sbi, F2FS_DIRTY_META); 250 251 if (wbc->for_reclaim) 252 f2fs_submit_merged_write_cond(sbi, page->mapping->host, 253 0, page->index, META); 254 255 unlock_page(page); 256 257 if (unlikely(f2fs_cp_error(sbi))) 258 f2fs_submit_merged_write(sbi, META); 259 260 return 0; 261 262 redirty_out: 263 redirty_page_for_writepage(wbc, page); 264 return AOP_WRITEPAGE_ACTIVATE; 265 } 266 267 static int f2fs_write_meta_page(struct page *page, 268 struct writeback_control *wbc) 269 { 270 return __f2fs_write_meta_page(page, wbc, FS_META_IO); 271 } 272 273 static int f2fs_write_meta_pages(struct address_space *mapping, 274 struct writeback_control *wbc) 275 { 276 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 277 long diff, written; 278 279 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 280 goto skip_write; 281 282 /* collect a number of dirty meta pages and write together */ 283 if (wbc->for_kupdate || 284 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 285 goto skip_write; 286 287 /* if locked failed, cp will flush dirty pages instead */ 288 if (!mutex_trylock(&sbi->cp_mutex)) 289 goto skip_write; 290 291 trace_f2fs_writepages(mapping->host, wbc, META); 292 diff = nr_pages_to_write(sbi, META, wbc); 293 written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); 294 mutex_unlock(&sbi->cp_mutex); 295 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); 296 return 0; 297 298 skip_write: 299 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); 300 trace_f2fs_writepages(mapping->host, wbc, META); 301 return 0; 302 } 303 304 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 305 long nr_to_write, enum iostat_type io_type) 306 { 307 struct address_space *mapping = META_MAPPING(sbi); 308 pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; 309 struct pagevec pvec; 310 long nwritten = 0; 311 struct writeback_control wbc = { 312 .for_reclaim = 0, 313 }; 314 struct blk_plug plug; 315 316 pagevec_init(&pvec, 0); 317 318 blk_start_plug(&plug); 319 320 while (index <= end) { 321 int i, nr_pages; 322 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 323 PAGECACHE_TAG_DIRTY, 324 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 325 if (unlikely(nr_pages == 0)) 326 break; 327 328 for (i = 0; i < nr_pages; i++) { 329 struct page *page = pvec.pages[i]; 330 331 if (prev == ULONG_MAX) 332 prev = page->index - 1; 333 if (nr_to_write != LONG_MAX && page->index != prev + 1) { 334 pagevec_release(&pvec); 335 goto stop; 336 } 337 338 lock_page(page); 339 340 if (unlikely(page->mapping != mapping)) { 341 continue_unlock: 342 unlock_page(page); 343 continue; 344 } 345 if (!PageDirty(page)) { 346 /* someone wrote it for us */ 347 goto continue_unlock; 348 } 349 350 f2fs_wait_on_page_writeback(page, META, true); 351 352 BUG_ON(PageWriteback(page)); 353 if (!clear_page_dirty_for_io(page)) 354 goto continue_unlock; 355 356 if (__f2fs_write_meta_page(page, &wbc, io_type)) { 357 unlock_page(page); 358 break; 359 } 360 nwritten++; 361 prev = page->index; 362 if (unlikely(nwritten >= nr_to_write)) 363 break; 364 } 365 pagevec_release(&pvec); 366 cond_resched(); 367 } 368 stop: 369 if (nwritten) 370 f2fs_submit_merged_write(sbi, type); 371 372 blk_finish_plug(&plug); 373 374 return nwritten; 375 } 376 377 static int f2fs_set_meta_page_dirty(struct page *page) 378 { 379 trace_f2fs_set_page_dirty(page, META); 380 381 if (!PageUptodate(page)) 382 SetPageUptodate(page); 383 if (!PageDirty(page)) { 384 f2fs_set_page_dirty_nobuffers(page); 385 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 386 SetPagePrivate(page); 387 f2fs_trace_pid(page); 388 return 1; 389 } 390 return 0; 391 } 392 393 const struct address_space_operations f2fs_meta_aops = { 394 .writepage = f2fs_write_meta_page, 395 .writepages = f2fs_write_meta_pages, 396 .set_page_dirty = f2fs_set_meta_page_dirty, 397 .invalidatepage = f2fs_invalidate_page, 398 .releasepage = f2fs_release_page, 399 #ifdef CONFIG_MIGRATION 400 .migratepage = f2fs_migrate_page, 401 #endif 402 }; 403 404 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 405 { 406 struct inode_management *im = &sbi->im[type]; 407 struct ino_entry *e, *tmp; 408 409 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 410 retry: 411 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 412 413 spin_lock(&im->ino_lock); 414 e = radix_tree_lookup(&im->ino_root, ino); 415 if (!e) { 416 e = tmp; 417 if (radix_tree_insert(&im->ino_root, ino, e)) { 418 spin_unlock(&im->ino_lock); 419 radix_tree_preload_end(); 420 goto retry; 421 } 422 memset(e, 0, sizeof(struct ino_entry)); 423 e->ino = ino; 424 425 list_add_tail(&e->list, &im->ino_list); 426 if (type != ORPHAN_INO) 427 im->ino_num++; 428 } 429 spin_unlock(&im->ino_lock); 430 radix_tree_preload_end(); 431 432 if (e != tmp) 433 kmem_cache_free(ino_entry_slab, tmp); 434 } 435 436 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 437 { 438 struct inode_management *im = &sbi->im[type]; 439 struct ino_entry *e; 440 441 spin_lock(&im->ino_lock); 442 e = radix_tree_lookup(&im->ino_root, ino); 443 if (e) { 444 list_del(&e->list); 445 radix_tree_delete(&im->ino_root, ino); 446 im->ino_num--; 447 spin_unlock(&im->ino_lock); 448 kmem_cache_free(ino_entry_slab, e); 449 return; 450 } 451 spin_unlock(&im->ino_lock); 452 } 453 454 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 455 { 456 /* add new dirty ino entry into list */ 457 __add_ino_entry(sbi, ino, type); 458 } 459 460 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 461 { 462 /* remove dirty ino entry from list */ 463 __remove_ino_entry(sbi, ino, type); 464 } 465 466 /* mode should be APPEND_INO or UPDATE_INO */ 467 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) 468 { 469 struct inode_management *im = &sbi->im[mode]; 470 struct ino_entry *e; 471 472 spin_lock(&im->ino_lock); 473 e = radix_tree_lookup(&im->ino_root, ino); 474 spin_unlock(&im->ino_lock); 475 return e ? true : false; 476 } 477 478 void release_ino_entry(struct f2fs_sb_info *sbi, bool all) 479 { 480 struct ino_entry *e, *tmp; 481 int i; 482 483 for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 484 struct inode_management *im = &sbi->im[i]; 485 486 spin_lock(&im->ino_lock); 487 list_for_each_entry_safe(e, tmp, &im->ino_list, list) { 488 list_del(&e->list); 489 radix_tree_delete(&im->ino_root, e->ino); 490 kmem_cache_free(ino_entry_slab, e); 491 im->ino_num--; 492 } 493 spin_unlock(&im->ino_lock); 494 } 495 } 496 497 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 498 { 499 struct inode_management *im = &sbi->im[ORPHAN_INO]; 500 int err = 0; 501 502 spin_lock(&im->ino_lock); 503 504 #ifdef CONFIG_F2FS_FAULT_INJECTION 505 if (time_to_inject(sbi, FAULT_ORPHAN)) { 506 spin_unlock(&im->ino_lock); 507 f2fs_show_injection_info(FAULT_ORPHAN); 508 return -ENOSPC; 509 } 510 #endif 511 if (unlikely(im->ino_num >= sbi->max_orphans)) 512 err = -ENOSPC; 513 else 514 im->ino_num++; 515 spin_unlock(&im->ino_lock); 516 517 return err; 518 } 519 520 void release_orphan_inode(struct f2fs_sb_info *sbi) 521 { 522 struct inode_management *im = &sbi->im[ORPHAN_INO]; 523 524 spin_lock(&im->ino_lock); 525 f2fs_bug_on(sbi, im->ino_num == 0); 526 im->ino_num--; 527 spin_unlock(&im->ino_lock); 528 } 529 530 void add_orphan_inode(struct inode *inode) 531 { 532 /* add new orphan ino entry into list */ 533 __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); 534 update_inode_page(inode); 535 } 536 537 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 538 { 539 /* remove orphan entry from orphan list */ 540 __remove_ino_entry(sbi, ino, ORPHAN_INO); 541 } 542 543 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 544 { 545 struct inode *inode; 546 struct node_info ni; 547 int err = acquire_orphan_inode(sbi); 548 549 if (err) { 550 set_sbi_flag(sbi, SBI_NEED_FSCK); 551 f2fs_msg(sbi->sb, KERN_WARNING, 552 "%s: orphan failed (ino=%x), run fsck to fix.", 553 __func__, ino); 554 return err; 555 } 556 557 __add_ino_entry(sbi, ino, ORPHAN_INO); 558 559 inode = f2fs_iget_retry(sbi->sb, ino); 560 if (IS_ERR(inode)) { 561 /* 562 * there should be a bug that we can't find the entry 563 * to orphan inode. 564 */ 565 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); 566 return PTR_ERR(inode); 567 } 568 569 clear_nlink(inode); 570 571 /* truncate all the data during iput */ 572 iput(inode); 573 574 get_node_info(sbi, ino, &ni); 575 576 /* ENOMEM was fully retried in f2fs_evict_inode. */ 577 if (ni.blk_addr != NULL_ADDR) { 578 set_sbi_flag(sbi, SBI_NEED_FSCK); 579 f2fs_msg(sbi->sb, KERN_WARNING, 580 "%s: orphan failed (ino=%x) by kernel, retry mount.", 581 __func__, ino); 582 return -EIO; 583 } 584 __remove_ino_entry(sbi, ino, ORPHAN_INO); 585 return 0; 586 } 587 588 int recover_orphan_inodes(struct f2fs_sb_info *sbi) 589 { 590 block_t start_blk, orphan_blocks, i, j; 591 unsigned int s_flags = sbi->sb->s_flags; 592 int err = 0; 593 594 if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) 595 return 0; 596 597 if (s_flags & MS_RDONLY) { 598 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); 599 sbi->sb->s_flags &= ~MS_RDONLY; 600 } 601 602 #ifdef CONFIG_QUOTA 603 /* Needed for iput() to work correctly and not trash data */ 604 sbi->sb->s_flags |= MS_ACTIVE; 605 /* Turn on quotas so that they are updated correctly */ 606 f2fs_enable_quota_files(sbi); 607 #endif 608 609 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 610 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 611 612 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); 613 614 for (i = 0; i < orphan_blocks; i++) { 615 struct page *page = get_meta_page(sbi, start_blk + i); 616 struct f2fs_orphan_block *orphan_blk; 617 618 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 619 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 620 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 621 err = recover_orphan_inode(sbi, ino); 622 if (err) { 623 f2fs_put_page(page, 1); 624 goto out; 625 } 626 } 627 f2fs_put_page(page, 1); 628 } 629 /* clear Orphan Flag */ 630 clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); 631 out: 632 #ifdef CONFIG_QUOTA 633 /* Turn quotas off */ 634 f2fs_quota_off_umount(sbi->sb); 635 #endif 636 sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 637 638 return err; 639 } 640 641 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 642 { 643 struct list_head *head; 644 struct f2fs_orphan_block *orphan_blk = NULL; 645 unsigned int nentries = 0; 646 unsigned short index = 1; 647 unsigned short orphan_blocks; 648 struct page *page = NULL; 649 struct ino_entry *orphan = NULL; 650 struct inode_management *im = &sbi->im[ORPHAN_INO]; 651 652 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 653 654 /* 655 * we don't need to do spin_lock(&im->ino_lock) here, since all the 656 * orphan inode operations are covered under f2fs_lock_op(). 657 * And, spin_lock should be avoided due to page operations below. 658 */ 659 head = &im->ino_list; 660 661 /* loop for each orphan inode entry and write them in Jornal block */ 662 list_for_each_entry(orphan, head, list) { 663 if (!page) { 664 page = grab_meta_page(sbi, start_blk++); 665 orphan_blk = 666 (struct f2fs_orphan_block *)page_address(page); 667 memset(orphan_blk, 0, sizeof(*orphan_blk)); 668 } 669 670 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 671 672 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 673 /* 674 * an orphan block is full of 1020 entries, 675 * then we need to flush current orphan blocks 676 * and bring another one in memory 677 */ 678 orphan_blk->blk_addr = cpu_to_le16(index); 679 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 680 orphan_blk->entry_count = cpu_to_le32(nentries); 681 set_page_dirty(page); 682 f2fs_put_page(page, 1); 683 index++; 684 nentries = 0; 685 page = NULL; 686 } 687 } 688 689 if (page) { 690 orphan_blk->blk_addr = cpu_to_le16(index); 691 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 692 orphan_blk->entry_count = cpu_to_le32(nentries); 693 set_page_dirty(page); 694 f2fs_put_page(page, 1); 695 } 696 } 697 698 static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, 699 struct f2fs_checkpoint **cp_block, struct page **cp_page, 700 unsigned long long *version) 701 { 702 unsigned long blk_size = sbi->blocksize; 703 size_t crc_offset = 0; 704 __u32 crc = 0; 705 706 *cp_page = get_meta_page(sbi, cp_addr); 707 *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); 708 709 crc_offset = le32_to_cpu((*cp_block)->checksum_offset); 710 if (crc_offset > (blk_size - sizeof(__le32))) { 711 f2fs_msg(sbi->sb, KERN_WARNING, 712 "invalid crc_offset: %zu", crc_offset); 713 return -EINVAL; 714 } 715 716 crc = cur_cp_crc(*cp_block); 717 if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { 718 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); 719 return -EINVAL; 720 } 721 722 *version = cur_cp_version(*cp_block); 723 return 0; 724 } 725 726 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 727 block_t cp_addr, unsigned long long *version) 728 { 729 struct page *cp_page_1 = NULL, *cp_page_2 = NULL; 730 struct f2fs_checkpoint *cp_block = NULL; 731 unsigned long long cur_version = 0, pre_version = 0; 732 int err; 733 734 err = get_checkpoint_version(sbi, cp_addr, &cp_block, 735 &cp_page_1, version); 736 if (err) 737 goto invalid_cp1; 738 pre_version = *version; 739 740 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; 741 err = get_checkpoint_version(sbi, cp_addr, &cp_block, 742 &cp_page_2, version); 743 if (err) 744 goto invalid_cp2; 745 cur_version = *version; 746 747 if (cur_version == pre_version) { 748 *version = cur_version; 749 f2fs_put_page(cp_page_2, 1); 750 return cp_page_1; 751 } 752 invalid_cp2: 753 f2fs_put_page(cp_page_2, 1); 754 invalid_cp1: 755 f2fs_put_page(cp_page_1, 1); 756 return NULL; 757 } 758 759 int get_valid_checkpoint(struct f2fs_sb_info *sbi) 760 { 761 struct f2fs_checkpoint *cp_block; 762 struct f2fs_super_block *fsb = sbi->raw_super; 763 struct page *cp1, *cp2, *cur_page; 764 unsigned long blk_size = sbi->blocksize; 765 unsigned long long cp1_version = 0, cp2_version = 0; 766 unsigned long long cp_start_blk_no; 767 unsigned int cp_blks = 1 + __cp_payload(sbi); 768 block_t cp_blk_no; 769 int i; 770 771 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); 772 if (!sbi->ckpt) 773 return -ENOMEM; 774 /* 775 * Finding out valid cp block involves read both 776 * sets( cp pack1 and cp pack 2) 777 */ 778 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); 779 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 780 781 /* The second checkpoint pack should start at the next segment */ 782 cp_start_blk_no += ((unsigned long long)1) << 783 le32_to_cpu(fsb->log_blocks_per_seg); 784 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 785 786 if (cp1 && cp2) { 787 if (ver_after(cp2_version, cp1_version)) 788 cur_page = cp2; 789 else 790 cur_page = cp1; 791 } else if (cp1) { 792 cur_page = cp1; 793 } else if (cp2) { 794 cur_page = cp2; 795 } else { 796 goto fail_no_cp; 797 } 798 799 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 800 memcpy(sbi->ckpt, cp_block, blk_size); 801 802 /* Sanity checking of checkpoint */ 803 if (sanity_check_ckpt(sbi)) 804 goto free_fail_no_cp; 805 806 if (cur_page == cp1) 807 sbi->cur_cp_pack = 1; 808 else 809 sbi->cur_cp_pack = 2; 810 811 if (cp_blks <= 1) 812 goto done; 813 814 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); 815 if (cur_page == cp2) 816 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 817 818 for (i = 1; i < cp_blks; i++) { 819 void *sit_bitmap_ptr; 820 unsigned char *ckpt = (unsigned char *)sbi->ckpt; 821 822 cur_page = get_meta_page(sbi, cp_blk_no + i); 823 sit_bitmap_ptr = page_address(cur_page); 824 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); 825 f2fs_put_page(cur_page, 1); 826 } 827 done: 828 f2fs_put_page(cp1, 1); 829 f2fs_put_page(cp2, 1); 830 return 0; 831 832 free_fail_no_cp: 833 f2fs_put_page(cp1, 1); 834 f2fs_put_page(cp2, 1); 835 fail_no_cp: 836 kfree(sbi->ckpt); 837 return -EINVAL; 838 } 839 840 static void __add_dirty_inode(struct inode *inode, enum inode_type type) 841 { 842 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 843 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 844 845 if (is_inode_flag_set(inode, flag)) 846 return; 847 848 set_inode_flag(inode, flag); 849 if (!f2fs_is_volatile_file(inode)) 850 list_add_tail(&F2FS_I(inode)->dirty_list, 851 &sbi->inode_list[type]); 852 stat_inc_dirty_inode(sbi, type); 853 } 854 855 static void __remove_dirty_inode(struct inode *inode, enum inode_type type) 856 { 857 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 858 859 if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag)) 860 return; 861 862 list_del_init(&F2FS_I(inode)->dirty_list); 863 clear_inode_flag(inode, flag); 864 stat_dec_dirty_inode(F2FS_I_SB(inode), type); 865 } 866 867 void update_dirty_page(struct inode *inode, struct page *page) 868 { 869 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 870 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 871 872 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 873 !S_ISLNK(inode->i_mode)) 874 return; 875 876 spin_lock(&sbi->inode_lock[type]); 877 if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) 878 __add_dirty_inode(inode, type); 879 inode_inc_dirty_pages(inode); 880 spin_unlock(&sbi->inode_lock[type]); 881 882 SetPagePrivate(page); 883 f2fs_trace_pid(page); 884 } 885 886 void remove_dirty_inode(struct inode *inode) 887 { 888 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 889 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 890 891 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 892 !S_ISLNK(inode->i_mode)) 893 return; 894 895 if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH)) 896 return; 897 898 spin_lock(&sbi->inode_lock[type]); 899 __remove_dirty_inode(inode, type); 900 spin_unlock(&sbi->inode_lock[type]); 901 } 902 903 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) 904 { 905 struct list_head *head; 906 struct inode *inode; 907 struct f2fs_inode_info *fi; 908 bool is_dir = (type == DIR_INODE); 909 unsigned long ino = 0; 910 911 trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, 912 get_pages(sbi, is_dir ? 913 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 914 retry: 915 if (unlikely(f2fs_cp_error(sbi))) 916 return -EIO; 917 918 spin_lock(&sbi->inode_lock[type]); 919 920 head = &sbi->inode_list[type]; 921 if (list_empty(head)) { 922 spin_unlock(&sbi->inode_lock[type]); 923 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, 924 get_pages(sbi, is_dir ? 925 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 926 return 0; 927 } 928 fi = list_first_entry(head, struct f2fs_inode_info, dirty_list); 929 inode = igrab(&fi->vfs_inode); 930 spin_unlock(&sbi->inode_lock[type]); 931 if (inode) { 932 unsigned long cur_ino = inode->i_ino; 933 934 if (is_dir) 935 F2FS_I(inode)->cp_task = current; 936 937 filemap_fdatawrite(inode->i_mapping); 938 939 if (is_dir) 940 F2FS_I(inode)->cp_task = NULL; 941 942 iput(inode); 943 /* We need to give cpu to another writers. */ 944 if (ino == cur_ino) { 945 congestion_wait(BLK_RW_ASYNC, HZ/50); 946 cond_resched(); 947 } else { 948 ino = cur_ino; 949 } 950 } else { 951 /* 952 * We should submit bio, since it exists several 953 * wribacking dentry pages in the freeing inode. 954 */ 955 f2fs_submit_merged_write(sbi, DATA); 956 cond_resched(); 957 } 958 goto retry; 959 } 960 961 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) 962 { 963 struct list_head *head = &sbi->inode_list[DIRTY_META]; 964 struct inode *inode; 965 struct f2fs_inode_info *fi; 966 s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); 967 968 while (total--) { 969 if (unlikely(f2fs_cp_error(sbi))) 970 return -EIO; 971 972 spin_lock(&sbi->inode_lock[DIRTY_META]); 973 if (list_empty(head)) { 974 spin_unlock(&sbi->inode_lock[DIRTY_META]); 975 return 0; 976 } 977 fi = list_first_entry(head, struct f2fs_inode_info, 978 gdirty_list); 979 inode = igrab(&fi->vfs_inode); 980 spin_unlock(&sbi->inode_lock[DIRTY_META]); 981 if (inode) { 982 sync_inode_metadata(inode, 0); 983 984 /* it's on eviction */ 985 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) 986 update_inode_page(inode); 987 iput(inode); 988 } 989 }; 990 return 0; 991 } 992 993 static void __prepare_cp_block(struct f2fs_sb_info *sbi) 994 { 995 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 996 struct f2fs_nm_info *nm_i = NM_I(sbi); 997 nid_t last_nid = nm_i->next_scan_nid; 998 999 next_free_nid(sbi, &last_nid); 1000 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 1001 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 1002 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 1003 ckpt->next_free_nid = cpu_to_le32(last_nid); 1004 } 1005 1006 /* 1007 * Freeze all the FS-operations for checkpoint. 1008 */ 1009 static int block_operations(struct f2fs_sb_info *sbi) 1010 { 1011 struct writeback_control wbc = { 1012 .sync_mode = WB_SYNC_ALL, 1013 .nr_to_write = LONG_MAX, 1014 .for_reclaim = 0, 1015 }; 1016 struct blk_plug plug; 1017 int err = 0; 1018 1019 blk_start_plug(&plug); 1020 1021 retry_flush_dents: 1022 f2fs_lock_all(sbi); 1023 /* write all the dirty dentry pages */ 1024 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 1025 f2fs_unlock_all(sbi); 1026 err = sync_dirty_inodes(sbi, DIR_INODE); 1027 if (err) 1028 goto out; 1029 cond_resched(); 1030 goto retry_flush_dents; 1031 } 1032 1033 /* 1034 * POR: we should ensure that there are no dirty node pages 1035 * until finishing nat/sit flush. inode->i_blocks can be updated. 1036 */ 1037 down_write(&sbi->node_change); 1038 1039 if (get_pages(sbi, F2FS_DIRTY_IMETA)) { 1040 up_write(&sbi->node_change); 1041 f2fs_unlock_all(sbi); 1042 err = f2fs_sync_inode_meta(sbi); 1043 if (err) 1044 goto out; 1045 cond_resched(); 1046 goto retry_flush_dents; 1047 } 1048 1049 retry_flush_nodes: 1050 down_write(&sbi->node_write); 1051 1052 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 1053 up_write(&sbi->node_write); 1054 err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); 1055 if (err) { 1056 up_write(&sbi->node_change); 1057 f2fs_unlock_all(sbi); 1058 goto out; 1059 } 1060 cond_resched(); 1061 goto retry_flush_nodes; 1062 } 1063 1064 /* 1065 * sbi->node_change is used only for AIO write_begin path which produces 1066 * dirty node blocks and some checkpoint values by block allocation. 1067 */ 1068 __prepare_cp_block(sbi); 1069 up_write(&sbi->node_change); 1070 out: 1071 blk_finish_plug(&plug); 1072 return err; 1073 } 1074 1075 static void unblock_operations(struct f2fs_sb_info *sbi) 1076 { 1077 up_write(&sbi->node_write); 1078 f2fs_unlock_all(sbi); 1079 } 1080 1081 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) 1082 { 1083 DEFINE_WAIT(wait); 1084 1085 for (;;) { 1086 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 1087 1088 if (!get_pages(sbi, F2FS_WB_CP_DATA)) 1089 break; 1090 1091 io_schedule_timeout(5*HZ); 1092 } 1093 finish_wait(&sbi->cp_wait, &wait); 1094 } 1095 1096 static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1097 { 1098 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 1099 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1100 unsigned long flags; 1101 1102 spin_lock_irqsave(&sbi->cp_lock, flags); 1103 1104 if ((cpc->reason & CP_UMOUNT) && 1105 le32_to_cpu(ckpt->cp_pack_total_block_count) > 1106 sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) 1107 disable_nat_bits(sbi, false); 1108 1109 if (cpc->reason & CP_TRIMMED) 1110 __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); 1111 1112 if (cpc->reason & CP_UMOUNT) 1113 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1114 else 1115 __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1116 1117 if (cpc->reason & CP_FASTBOOT) 1118 __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1119 else 1120 __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1121 1122 if (orphan_num) 1123 __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1124 else 1125 __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1126 1127 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1128 __set_ckpt_flags(ckpt, CP_FSCK_FLAG); 1129 1130 /* set this flag to activate crc|cp_ver for recovery */ 1131 __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); 1132 1133 spin_unlock_irqrestore(&sbi->cp_lock, flags); 1134 } 1135 1136 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1137 { 1138 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1139 struct f2fs_nm_info *nm_i = NM_I(sbi); 1140 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags; 1141 block_t start_blk; 1142 unsigned int data_sum_blocks, orphan_blocks; 1143 __u32 crc32 = 0; 1144 int i; 1145 int cp_payload_blks = __cp_payload(sbi); 1146 struct super_block *sb = sbi->sb; 1147 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1148 u64 kbytes_written; 1149 1150 /* Flush all the NAT/SIT pages */ 1151 while (get_pages(sbi, F2FS_DIRTY_META)) { 1152 sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); 1153 if (unlikely(f2fs_cp_error(sbi))) 1154 return -EIO; 1155 } 1156 1157 /* 1158 * modify checkpoint 1159 * version number is already updated 1160 */ 1161 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 1162 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 1163 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { 1164 ckpt->cur_node_segno[i] = 1165 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 1166 ckpt->cur_node_blkoff[i] = 1167 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); 1168 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 1169 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 1170 } 1171 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { 1172 ckpt->cur_data_segno[i] = 1173 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 1174 ckpt->cur_data_blkoff[i] = 1175 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); 1176 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 1177 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 1178 } 1179 1180 /* 2 cp + n data seg summary + orphan inode blocks */ 1181 data_sum_blocks = npages_for_summary_flush(sbi, false); 1182 spin_lock_irqsave(&sbi->cp_lock, flags); 1183 if (data_sum_blocks < NR_CURSEG_DATA_TYPE) 1184 __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 1185 else 1186 __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 1187 spin_unlock_irqrestore(&sbi->cp_lock, flags); 1188 1189 orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); 1190 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 1191 orphan_blocks); 1192 1193 if (__remain_node_summaries(cpc->reason)) 1194 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 1195 cp_payload_blks + data_sum_blocks + 1196 orphan_blocks + NR_CURSEG_NODE_TYPE); 1197 else 1198 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + 1199 cp_payload_blks + data_sum_blocks + 1200 orphan_blocks); 1201 1202 /* update ckpt flag for checkpoint */ 1203 update_ckpt_flags(sbi, cpc); 1204 1205 /* update SIT/NAT bitmap */ 1206 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 1207 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 1208 1209 crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); 1210 *((__le32 *)((unsigned char *)ckpt + 1211 le32_to_cpu(ckpt->checksum_offset))) 1212 = cpu_to_le32(crc32); 1213 1214 start_blk = __start_cp_next_addr(sbi); 1215 1216 /* write nat bits */ 1217 if (enabled_nat_bits(sbi, cpc)) { 1218 __u64 cp_ver = cur_cp_version(ckpt); 1219 block_t blk; 1220 1221 cp_ver |= ((__u64)crc32 << 32); 1222 *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); 1223 1224 blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; 1225 for (i = 0; i < nm_i->nat_bits_blocks; i++) 1226 update_meta_page(sbi, nm_i->nat_bits + 1227 (i << F2FS_BLKSIZE_BITS), blk + i); 1228 1229 /* Flush all the NAT BITS pages */ 1230 while (get_pages(sbi, F2FS_DIRTY_META)) { 1231 sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); 1232 if (unlikely(f2fs_cp_error(sbi))) 1233 return -EIO; 1234 } 1235 } 1236 1237 /* need to wait for end_io results */ 1238 wait_on_all_pages_writeback(sbi); 1239 if (unlikely(f2fs_cp_error(sbi))) 1240 return -EIO; 1241 1242 /* write out checkpoint buffer at block 0 */ 1243 update_meta_page(sbi, ckpt, start_blk++); 1244 1245 for (i = 1; i < 1 + cp_payload_blks; i++) 1246 update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, 1247 start_blk++); 1248 1249 if (orphan_num) { 1250 write_orphan_inodes(sbi, start_blk); 1251 start_blk += orphan_blocks; 1252 } 1253 1254 write_data_summaries(sbi, start_blk); 1255 start_blk += data_sum_blocks; 1256 1257 /* Record write statistics in the hot node summary */ 1258 kbytes_written = sbi->kbytes_written; 1259 if (sb->s_bdev->bd_part) 1260 kbytes_written += BD_PART_WRITTEN(sbi); 1261 1262 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1263 1264 if (__remain_node_summaries(cpc->reason)) { 1265 write_node_summaries(sbi, start_blk); 1266 start_blk += NR_CURSEG_NODE_TYPE; 1267 } 1268 1269 /* writeout checkpoint block */ 1270 update_meta_page(sbi, ckpt, start_blk); 1271 1272 /* wait for previous submitted node/meta pages writeback */ 1273 wait_on_all_pages_writeback(sbi); 1274 1275 if (unlikely(f2fs_cp_error(sbi))) 1276 return -EIO; 1277 1278 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); 1279 filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); 1280 1281 /* update user_block_counts */ 1282 sbi->last_valid_block_count = sbi->total_valid_block_count; 1283 percpu_counter_set(&sbi->alloc_valid_block_count, 0); 1284 1285 /* Here, we only have one bio having CP pack */ 1286 sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); 1287 1288 /* wait for previous submitted meta pages writeback */ 1289 wait_on_all_pages_writeback(sbi); 1290 1291 release_ino_entry(sbi, false); 1292 1293 if (unlikely(f2fs_cp_error(sbi))) 1294 return -EIO; 1295 1296 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1297 clear_sbi_flag(sbi, SBI_NEED_CP); 1298 __set_cp_next_pack(sbi); 1299 1300 /* 1301 * redirty superblock if metadata like node page or inode cache is 1302 * updated during writing checkpoint. 1303 */ 1304 if (get_pages(sbi, F2FS_DIRTY_NODES) || 1305 get_pages(sbi, F2FS_DIRTY_IMETA)) 1306 set_sbi_flag(sbi, SBI_IS_DIRTY); 1307 1308 f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); 1309 1310 return 0; 1311 } 1312 1313 /* 1314 * We guarantee that this checkpoint procedure will not fail. 1315 */ 1316 int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1317 { 1318 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1319 unsigned long long ckpt_ver; 1320 int err = 0; 1321 1322 mutex_lock(&sbi->cp_mutex); 1323 1324 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1325 ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || 1326 ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) 1327 goto out; 1328 if (unlikely(f2fs_cp_error(sbi))) { 1329 err = -EIO; 1330 goto out; 1331 } 1332 if (f2fs_readonly(sbi->sb)) { 1333 err = -EROFS; 1334 goto out; 1335 } 1336 1337 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 1338 1339 err = block_operations(sbi); 1340 if (err) 1341 goto out; 1342 1343 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1344 1345 f2fs_flush_merged_writes(sbi); 1346 1347 /* this is the case of multiple fstrims without any changes */ 1348 if (cpc->reason & CP_DISCARD) { 1349 if (!exist_trim_candidates(sbi, cpc)) { 1350 unblock_operations(sbi); 1351 goto out; 1352 } 1353 1354 if (NM_I(sbi)->dirty_nat_cnt == 0 && 1355 SIT_I(sbi)->dirty_sentries == 0 && 1356 prefree_segments(sbi) == 0) { 1357 flush_sit_entries(sbi, cpc); 1358 clear_prefree_segments(sbi, cpc); 1359 unblock_operations(sbi); 1360 goto out; 1361 } 1362 } 1363 1364 /* 1365 * update checkpoint pack index 1366 * Increase the version number so that 1367 * SIT entries and seg summaries are written at correct place 1368 */ 1369 ckpt_ver = cur_cp_version(ckpt); 1370 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1371 1372 /* write cached NAT/SIT entries to NAT/SIT area */ 1373 flush_nat_entries(sbi, cpc); 1374 flush_sit_entries(sbi, cpc); 1375 1376 /* unlock all the fs_lock[] in do_checkpoint() */ 1377 err = do_checkpoint(sbi, cpc); 1378 if (err) 1379 release_discard_addrs(sbi); 1380 else 1381 clear_prefree_segments(sbi, cpc); 1382 1383 unblock_operations(sbi); 1384 stat_inc_cp_count(sbi->stat_info); 1385 1386 if (cpc->reason & CP_RECOVERY) 1387 f2fs_msg(sbi->sb, KERN_NOTICE, 1388 "checkpoint: version = %llx", ckpt_ver); 1389 1390 /* do checkpoint periodically */ 1391 f2fs_update_time(sbi, CP_TIME); 1392 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1393 out: 1394 mutex_unlock(&sbi->cp_mutex); 1395 return err; 1396 } 1397 1398 void init_ino_entry_info(struct f2fs_sb_info *sbi) 1399 { 1400 int i; 1401 1402 for (i = 0; i < MAX_INO_ENTRY; i++) { 1403 struct inode_management *im = &sbi->im[i]; 1404 1405 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); 1406 spin_lock_init(&im->ino_lock); 1407 INIT_LIST_HEAD(&im->ino_list); 1408 im->ino_num = 0; 1409 } 1410 1411 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1412 NR_CURSEG_TYPE - __cp_payload(sbi)) * 1413 F2FS_ORPHANS_PER_BLOCK; 1414 } 1415 1416 int __init create_checkpoint_caches(void) 1417 { 1418 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", 1419 sizeof(struct ino_entry)); 1420 if (!ino_entry_slab) 1421 return -ENOMEM; 1422 inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", 1423 sizeof(struct inode_entry)); 1424 if (!inode_entry_slab) { 1425 kmem_cache_destroy(ino_entry_slab); 1426 return -ENOMEM; 1427 } 1428 return 0; 1429 } 1430 1431 void destroy_checkpoint_caches(void) 1432 { 1433 kmem_cache_destroy(ino_entry_slab); 1434 kmem_cache_destroy(inode_entry_slab); 1435 } 1436