1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/checkpoint.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/bio.h> 10 #include <linux/mpage.h> 11 #include <linux/writeback.h> 12 #include <linux/blkdev.h> 13 #include <linux/f2fs_fs.h> 14 #include <linux/pagevec.h> 15 #include <linux/swap.h> 16 17 #include "f2fs.h" 18 #include "node.h" 19 #include "segment.h" 20 #include "trace.h" 21 #include <trace/events/f2fs.h> 22 23 static struct kmem_cache *ino_entry_slab; 24 struct kmem_cache *f2fs_inode_entry_slab; 25 26 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) 27 { 28 f2fs_build_fault_attr(sbi, 0, 0); 29 set_ckpt_flags(sbi, CP_ERROR_FLAG); 30 if (!end_io) 31 f2fs_flush_merged_writes(sbi); 32 } 33 34 /* 35 * We guarantee no failure on the returned page. 36 */ 37 struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 38 { 39 struct address_space *mapping = META_MAPPING(sbi); 40 struct page *page = NULL; 41 repeat: 42 page = f2fs_grab_cache_page(mapping, index, false); 43 if (!page) { 44 cond_resched(); 45 goto repeat; 46 } 47 f2fs_wait_on_page_writeback(page, META, true, true); 48 if (!PageUptodate(page)) 49 SetPageUptodate(page); 50 return page; 51 } 52 53 /* 54 * We guarantee no failure on the returned page. 55 */ 56 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, 57 bool is_meta) 58 { 59 struct address_space *mapping = META_MAPPING(sbi); 60 struct page *page; 61 struct f2fs_io_info fio = { 62 .sbi = sbi, 63 .type = META, 64 .op = REQ_OP_READ, 65 .op_flags = REQ_META | REQ_PRIO, 66 .old_blkaddr = index, 67 .new_blkaddr = index, 68 .encrypted_page = NULL, 69 .is_por = !is_meta, 70 }; 71 int err; 72 73 if (unlikely(!is_meta)) 74 fio.op_flags &= ~REQ_META; 75 repeat: 76 page = f2fs_grab_cache_page(mapping, index, false); 77 if (!page) { 78 cond_resched(); 79 goto repeat; 80 } 81 if (PageUptodate(page)) 82 goto out; 83 84 fio.page = page; 85 86 err = f2fs_submit_page_bio(&fio); 87 if (err) { 88 f2fs_put_page(page, 1); 89 return ERR_PTR(err); 90 } 91 92 lock_page(page); 93 if (unlikely(page->mapping != mapping)) { 94 f2fs_put_page(page, 1); 95 goto repeat; 96 } 97 98 if (unlikely(!PageUptodate(page))) { 99 f2fs_put_page(page, 1); 100 return ERR_PTR(-EIO); 101 } 102 out: 103 return page; 104 } 105 106 struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 107 { 108 return __get_meta_page(sbi, index, true); 109 } 110 111 struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) 112 { 113 struct page *page; 114 int count = 0; 115 116 retry: 117 page = __get_meta_page(sbi, index, true); 118 if (IS_ERR(page)) { 119 if (PTR_ERR(page) == -EIO && 120 ++count <= DEFAULT_RETRY_IO_COUNT) 121 goto retry; 122 f2fs_stop_checkpoint(sbi, false); 123 } 124 return page; 125 } 126 127 /* for POR only */ 128 struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) 129 { 130 return __get_meta_page(sbi, index, false); 131 } 132 133 static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, 134 int type) 135 { 136 struct seg_entry *se; 137 unsigned int segno, offset; 138 bool exist; 139 140 if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) 141 return true; 142 143 segno = GET_SEGNO(sbi, blkaddr); 144 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 145 se = get_seg_entry(sbi, segno); 146 147 exist = f2fs_test_bit(offset, se->cur_valid_map); 148 if (!exist && type == DATA_GENERIC_ENHANCE) { 149 f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " 150 "blkaddr:%u, sit bitmap:%d", blkaddr, exist); 151 set_sbi_flag(sbi, SBI_NEED_FSCK); 152 WARN_ON(1); 153 } 154 return exist; 155 } 156 157 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, 158 block_t blkaddr, int type) 159 { 160 switch (type) { 161 case META_NAT: 162 break; 163 case META_SIT: 164 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) 165 return false; 166 break; 167 case META_SSA: 168 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || 169 blkaddr < SM_I(sbi)->ssa_blkaddr)) 170 return false; 171 break; 172 case META_CP: 173 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || 174 blkaddr < __start_cp_addr(sbi))) 175 return false; 176 break; 177 case META_POR: 178 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || 179 blkaddr < MAIN_BLKADDR(sbi))) 180 return false; 181 break; 182 case DATA_GENERIC: 183 case DATA_GENERIC_ENHANCE: 184 case DATA_GENERIC_ENHANCE_READ: 185 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || 186 blkaddr < MAIN_BLKADDR(sbi))) { 187 f2fs_msg(sbi->sb, KERN_WARNING, 188 "access invalid blkaddr:%u", blkaddr); 189 set_sbi_flag(sbi, SBI_NEED_FSCK); 190 WARN_ON(1); 191 return false; 192 } else { 193 return __is_bitmap_valid(sbi, blkaddr, type); 194 } 195 break; 196 case META_GENERIC: 197 if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || 198 blkaddr >= MAIN_BLKADDR(sbi))) 199 return false; 200 break; 201 default: 202 BUG(); 203 } 204 205 return true; 206 } 207 208 /* 209 * Readahead CP/NAT/SIT/SSA pages 210 */ 211 int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, 212 int type, bool sync) 213 { 214 struct page *page; 215 block_t blkno = start; 216 struct f2fs_io_info fio = { 217 .sbi = sbi, 218 .type = META, 219 .op = REQ_OP_READ, 220 .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, 221 .encrypted_page = NULL, 222 .in_list = false, 223 .is_por = (type == META_POR), 224 }; 225 struct blk_plug plug; 226 227 if (unlikely(type == META_POR)) 228 fio.op_flags &= ~REQ_META; 229 230 blk_start_plug(&plug); 231 for (; nrpages-- > 0; blkno++) { 232 233 if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) 234 goto out; 235 236 switch (type) { 237 case META_NAT: 238 if (unlikely(blkno >= 239 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) 240 blkno = 0; 241 /* get nat block addr */ 242 fio.new_blkaddr = current_nat_addr(sbi, 243 blkno * NAT_ENTRY_PER_BLOCK); 244 break; 245 case META_SIT: 246 /* get sit block addr */ 247 fio.new_blkaddr = current_sit_addr(sbi, 248 blkno * SIT_ENTRY_PER_BLOCK); 249 break; 250 case META_SSA: 251 case META_CP: 252 case META_POR: 253 fio.new_blkaddr = blkno; 254 break; 255 default: 256 BUG(); 257 } 258 259 page = f2fs_grab_cache_page(META_MAPPING(sbi), 260 fio.new_blkaddr, false); 261 if (!page) 262 continue; 263 if (PageUptodate(page)) { 264 f2fs_put_page(page, 1); 265 continue; 266 } 267 268 fio.page = page; 269 f2fs_submit_page_bio(&fio); 270 f2fs_put_page(page, 0); 271 } 272 out: 273 blk_finish_plug(&plug); 274 return blkno - start; 275 } 276 277 void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) 278 { 279 struct page *page; 280 bool readahead = false; 281 282 page = find_get_page(META_MAPPING(sbi), index); 283 if (!page || !PageUptodate(page)) 284 readahead = true; 285 f2fs_put_page(page, 0); 286 287 if (readahead) 288 f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); 289 } 290 291 static int __f2fs_write_meta_page(struct page *page, 292 struct writeback_control *wbc, 293 enum iostat_type io_type) 294 { 295 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 296 297 trace_f2fs_writepage(page, META); 298 299 if (unlikely(f2fs_cp_error(sbi))) 300 goto redirty_out; 301 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 302 goto redirty_out; 303 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) 304 goto redirty_out; 305 306 f2fs_do_write_meta_page(sbi, page, io_type); 307 dec_page_count(sbi, F2FS_DIRTY_META); 308 309 if (wbc->for_reclaim) 310 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); 311 312 unlock_page(page); 313 314 if (unlikely(f2fs_cp_error(sbi))) 315 f2fs_submit_merged_write(sbi, META); 316 317 return 0; 318 319 redirty_out: 320 redirty_page_for_writepage(wbc, page); 321 return AOP_WRITEPAGE_ACTIVATE; 322 } 323 324 static int f2fs_write_meta_page(struct page *page, 325 struct writeback_control *wbc) 326 { 327 return __f2fs_write_meta_page(page, wbc, FS_META_IO); 328 } 329 330 static int f2fs_write_meta_pages(struct address_space *mapping, 331 struct writeback_control *wbc) 332 { 333 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 334 long diff, written; 335 336 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 337 goto skip_write; 338 339 /* collect a number of dirty meta pages and write together */ 340 if (wbc->sync_mode != WB_SYNC_ALL && 341 get_pages(sbi, F2FS_DIRTY_META) < 342 nr_pages_to_skip(sbi, META)) 343 goto skip_write; 344 345 /* if locked failed, cp will flush dirty pages instead */ 346 if (!mutex_trylock(&sbi->cp_mutex)) 347 goto skip_write; 348 349 trace_f2fs_writepages(mapping->host, wbc, META); 350 diff = nr_pages_to_write(sbi, META, wbc); 351 written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); 352 mutex_unlock(&sbi->cp_mutex); 353 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); 354 return 0; 355 356 skip_write: 357 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); 358 trace_f2fs_writepages(mapping->host, wbc, META); 359 return 0; 360 } 361 362 long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 363 long nr_to_write, enum iostat_type io_type) 364 { 365 struct address_space *mapping = META_MAPPING(sbi); 366 pgoff_t index = 0, prev = ULONG_MAX; 367 struct pagevec pvec; 368 long nwritten = 0; 369 int nr_pages; 370 struct writeback_control wbc = { 371 .for_reclaim = 0, 372 }; 373 struct blk_plug plug; 374 375 pagevec_init(&pvec); 376 377 blk_start_plug(&plug); 378 379 while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 380 PAGECACHE_TAG_DIRTY))) { 381 int i; 382 383 for (i = 0; i < nr_pages; i++) { 384 struct page *page = pvec.pages[i]; 385 386 if (prev == ULONG_MAX) 387 prev = page->index - 1; 388 if (nr_to_write != LONG_MAX && page->index != prev + 1) { 389 pagevec_release(&pvec); 390 goto stop; 391 } 392 393 lock_page(page); 394 395 if (unlikely(page->mapping != mapping)) { 396 continue_unlock: 397 unlock_page(page); 398 continue; 399 } 400 if (!PageDirty(page)) { 401 /* someone wrote it for us */ 402 goto continue_unlock; 403 } 404 405 f2fs_wait_on_page_writeback(page, META, true, true); 406 407 if (!clear_page_dirty_for_io(page)) 408 goto continue_unlock; 409 410 if (__f2fs_write_meta_page(page, &wbc, io_type)) { 411 unlock_page(page); 412 break; 413 } 414 nwritten++; 415 prev = page->index; 416 if (unlikely(nwritten >= nr_to_write)) 417 break; 418 } 419 pagevec_release(&pvec); 420 cond_resched(); 421 } 422 stop: 423 if (nwritten) 424 f2fs_submit_merged_write(sbi, type); 425 426 blk_finish_plug(&plug); 427 428 return nwritten; 429 } 430 431 static int f2fs_set_meta_page_dirty(struct page *page) 432 { 433 trace_f2fs_set_page_dirty(page, META); 434 435 if (!PageUptodate(page)) 436 SetPageUptodate(page); 437 if (!PageDirty(page)) { 438 __set_page_dirty_nobuffers(page); 439 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 440 f2fs_set_page_private(page, 0); 441 f2fs_trace_pid(page); 442 return 1; 443 } 444 return 0; 445 } 446 447 const struct address_space_operations f2fs_meta_aops = { 448 .writepage = f2fs_write_meta_page, 449 .writepages = f2fs_write_meta_pages, 450 .set_page_dirty = f2fs_set_meta_page_dirty, 451 .invalidatepage = f2fs_invalidate_page, 452 .releasepage = f2fs_release_page, 453 #ifdef CONFIG_MIGRATION 454 .migratepage = f2fs_migrate_page, 455 #endif 456 }; 457 458 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, 459 unsigned int devidx, int type) 460 { 461 struct inode_management *im = &sbi->im[type]; 462 struct ino_entry *e, *tmp; 463 464 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 465 466 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 467 468 spin_lock(&im->ino_lock); 469 e = radix_tree_lookup(&im->ino_root, ino); 470 if (!e) { 471 e = tmp; 472 if (unlikely(radix_tree_insert(&im->ino_root, ino, e))) 473 f2fs_bug_on(sbi, 1); 474 475 memset(e, 0, sizeof(struct ino_entry)); 476 e->ino = ino; 477 478 list_add_tail(&e->list, &im->ino_list); 479 if (type != ORPHAN_INO) 480 im->ino_num++; 481 } 482 483 if (type == FLUSH_INO) 484 f2fs_set_bit(devidx, (char *)&e->dirty_device); 485 486 spin_unlock(&im->ino_lock); 487 radix_tree_preload_end(); 488 489 if (e != tmp) 490 kmem_cache_free(ino_entry_slab, tmp); 491 } 492 493 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 494 { 495 struct inode_management *im = &sbi->im[type]; 496 struct ino_entry *e; 497 498 spin_lock(&im->ino_lock); 499 e = radix_tree_lookup(&im->ino_root, ino); 500 if (e) { 501 list_del(&e->list); 502 radix_tree_delete(&im->ino_root, ino); 503 im->ino_num--; 504 spin_unlock(&im->ino_lock); 505 kmem_cache_free(ino_entry_slab, e); 506 return; 507 } 508 spin_unlock(&im->ino_lock); 509 } 510 511 void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 512 { 513 /* add new dirty ino entry into list */ 514 __add_ino_entry(sbi, ino, 0, type); 515 } 516 517 void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 518 { 519 /* remove dirty ino entry from list */ 520 __remove_ino_entry(sbi, ino, type); 521 } 522 523 /* mode should be APPEND_INO or UPDATE_INO */ 524 bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) 525 { 526 struct inode_management *im = &sbi->im[mode]; 527 struct ino_entry *e; 528 529 spin_lock(&im->ino_lock); 530 e = radix_tree_lookup(&im->ino_root, ino); 531 spin_unlock(&im->ino_lock); 532 return e ? true : false; 533 } 534 535 void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all) 536 { 537 struct ino_entry *e, *tmp; 538 int i; 539 540 for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) { 541 struct inode_management *im = &sbi->im[i]; 542 543 spin_lock(&im->ino_lock); 544 list_for_each_entry_safe(e, tmp, &im->ino_list, list) { 545 list_del(&e->list); 546 radix_tree_delete(&im->ino_root, e->ino); 547 kmem_cache_free(ino_entry_slab, e); 548 im->ino_num--; 549 } 550 spin_unlock(&im->ino_lock); 551 } 552 } 553 554 void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 555 unsigned int devidx, int type) 556 { 557 __add_ino_entry(sbi, ino, devidx, type); 558 } 559 560 bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 561 unsigned int devidx, int type) 562 { 563 struct inode_management *im = &sbi->im[type]; 564 struct ino_entry *e; 565 bool is_dirty = false; 566 567 spin_lock(&im->ino_lock); 568 e = radix_tree_lookup(&im->ino_root, ino); 569 if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device)) 570 is_dirty = true; 571 spin_unlock(&im->ino_lock); 572 return is_dirty; 573 } 574 575 int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) 576 { 577 struct inode_management *im = &sbi->im[ORPHAN_INO]; 578 int err = 0; 579 580 spin_lock(&im->ino_lock); 581 582 if (time_to_inject(sbi, FAULT_ORPHAN)) { 583 spin_unlock(&im->ino_lock); 584 f2fs_show_injection_info(FAULT_ORPHAN); 585 return -ENOSPC; 586 } 587 588 if (unlikely(im->ino_num >= sbi->max_orphans)) 589 err = -ENOSPC; 590 else 591 im->ino_num++; 592 spin_unlock(&im->ino_lock); 593 594 return err; 595 } 596 597 void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi) 598 { 599 struct inode_management *im = &sbi->im[ORPHAN_INO]; 600 601 spin_lock(&im->ino_lock); 602 f2fs_bug_on(sbi, im->ino_num == 0); 603 im->ino_num--; 604 spin_unlock(&im->ino_lock); 605 } 606 607 void f2fs_add_orphan_inode(struct inode *inode) 608 { 609 /* add new orphan ino entry into list */ 610 __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); 611 f2fs_update_inode_page(inode); 612 } 613 614 void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 615 { 616 /* remove orphan entry from orphan list */ 617 __remove_ino_entry(sbi, ino, ORPHAN_INO); 618 } 619 620 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 621 { 622 struct inode *inode; 623 struct node_info ni; 624 int err; 625 626 inode = f2fs_iget_retry(sbi->sb, ino); 627 if (IS_ERR(inode)) { 628 /* 629 * there should be a bug that we can't find the entry 630 * to orphan inode. 631 */ 632 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); 633 return PTR_ERR(inode); 634 } 635 636 err = dquot_initialize(inode); 637 if (err) { 638 iput(inode); 639 goto err_out; 640 } 641 642 clear_nlink(inode); 643 644 /* truncate all the data during iput */ 645 iput(inode); 646 647 err = f2fs_get_node_info(sbi, ino, &ni); 648 if (err) 649 goto err_out; 650 651 /* ENOMEM was fully retried in f2fs_evict_inode. */ 652 if (ni.blk_addr != NULL_ADDR) { 653 err = -EIO; 654 goto err_out; 655 } 656 return 0; 657 658 err_out: 659 set_sbi_flag(sbi, SBI_NEED_FSCK); 660 f2fs_msg(sbi->sb, KERN_WARNING, 661 "%s: orphan failed (ino=%x), run fsck to fix.", 662 __func__, ino); 663 return err; 664 } 665 666 int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) 667 { 668 block_t start_blk, orphan_blocks, i, j; 669 unsigned int s_flags = sbi->sb->s_flags; 670 int err = 0; 671 #ifdef CONFIG_QUOTA 672 int quota_enabled; 673 #endif 674 675 if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) 676 return 0; 677 678 if (bdev_read_only(sbi->sb->s_bdev)) { 679 f2fs_msg(sbi->sb, KERN_INFO, "write access " 680 "unavailable, skipping orphan cleanup"); 681 return 0; 682 } 683 684 if (s_flags & SB_RDONLY) { 685 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); 686 sbi->sb->s_flags &= ~SB_RDONLY; 687 } 688 689 #ifdef CONFIG_QUOTA 690 /* Needed for iput() to work correctly and not trash data */ 691 sbi->sb->s_flags |= SB_ACTIVE; 692 693 /* 694 * Turn on quotas which were not enabled for read-only mounts if 695 * filesystem has quota feature, so that they are updated correctly. 696 */ 697 quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); 698 #endif 699 700 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 701 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 702 703 f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); 704 705 for (i = 0; i < orphan_blocks; i++) { 706 struct page *page; 707 struct f2fs_orphan_block *orphan_blk; 708 709 page = f2fs_get_meta_page(sbi, start_blk + i); 710 if (IS_ERR(page)) { 711 err = PTR_ERR(page); 712 goto out; 713 } 714 715 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 716 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 717 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 718 err = recover_orphan_inode(sbi, ino); 719 if (err) { 720 f2fs_put_page(page, 1); 721 goto out; 722 } 723 } 724 f2fs_put_page(page, 1); 725 } 726 /* clear Orphan Flag */ 727 clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); 728 out: 729 set_sbi_flag(sbi, SBI_IS_RECOVERED); 730 731 #ifdef CONFIG_QUOTA 732 /* Turn quotas off */ 733 if (quota_enabled) 734 f2fs_quota_off_umount(sbi->sb); 735 #endif 736 sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ 737 738 return err; 739 } 740 741 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 742 { 743 struct list_head *head; 744 struct f2fs_orphan_block *orphan_blk = NULL; 745 unsigned int nentries = 0; 746 unsigned short index = 1; 747 unsigned short orphan_blocks; 748 struct page *page = NULL; 749 struct ino_entry *orphan = NULL; 750 struct inode_management *im = &sbi->im[ORPHAN_INO]; 751 752 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 753 754 /* 755 * we don't need to do spin_lock(&im->ino_lock) here, since all the 756 * orphan inode operations are covered under f2fs_lock_op(). 757 * And, spin_lock should be avoided due to page operations below. 758 */ 759 head = &im->ino_list; 760 761 /* loop for each orphan inode entry and write them in Jornal block */ 762 list_for_each_entry(orphan, head, list) { 763 if (!page) { 764 page = f2fs_grab_meta_page(sbi, start_blk++); 765 orphan_blk = 766 (struct f2fs_orphan_block *)page_address(page); 767 memset(orphan_blk, 0, sizeof(*orphan_blk)); 768 } 769 770 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 771 772 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 773 /* 774 * an orphan block is full of 1020 entries, 775 * then we need to flush current orphan blocks 776 * and bring another one in memory 777 */ 778 orphan_blk->blk_addr = cpu_to_le16(index); 779 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 780 orphan_blk->entry_count = cpu_to_le32(nentries); 781 set_page_dirty(page); 782 f2fs_put_page(page, 1); 783 index++; 784 nentries = 0; 785 page = NULL; 786 } 787 } 788 789 if (page) { 790 orphan_blk->blk_addr = cpu_to_le16(index); 791 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 792 orphan_blk->entry_count = cpu_to_le32(nentries); 793 set_page_dirty(page); 794 f2fs_put_page(page, 1); 795 } 796 } 797 798 static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, 799 struct f2fs_checkpoint *ckpt) 800 { 801 unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); 802 __u32 chksum; 803 804 chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); 805 if (chksum_ofs < CP_CHKSUM_OFFSET) { 806 chksum_ofs += sizeof(chksum); 807 chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, 808 F2FS_BLKSIZE - chksum_ofs); 809 } 810 return chksum; 811 } 812 813 static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, 814 struct f2fs_checkpoint **cp_block, struct page **cp_page, 815 unsigned long long *version) 816 { 817 size_t crc_offset = 0; 818 __u32 crc; 819 820 *cp_page = f2fs_get_meta_page(sbi, cp_addr); 821 if (IS_ERR(*cp_page)) 822 return PTR_ERR(*cp_page); 823 824 *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); 825 826 crc_offset = le32_to_cpu((*cp_block)->checksum_offset); 827 if (crc_offset < CP_MIN_CHKSUM_OFFSET || 828 crc_offset > CP_CHKSUM_OFFSET) { 829 f2fs_put_page(*cp_page, 1); 830 f2fs_msg(sbi->sb, KERN_WARNING, 831 "invalid crc_offset: %zu", crc_offset); 832 return -EINVAL; 833 } 834 835 if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { 836 if (crc_offset != CP_MIN_CHKSUM_OFFSET) { 837 f2fs_put_page(*cp_page, 1); 838 f2fs_msg(sbi->sb, KERN_WARNING, 839 "layout of large_nat_bitmap is deprecated, " 840 "run fsck to repair, chksum_offset: %zu", 841 crc_offset); 842 return -EINVAL; 843 } 844 } 845 846 crc = f2fs_checkpoint_chksum(sbi, *cp_block); 847 if (crc != cur_cp_crc(*cp_block)) { 848 f2fs_put_page(*cp_page, 1); 849 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); 850 return -EINVAL; 851 } 852 853 *version = cur_cp_version(*cp_block); 854 return 0; 855 } 856 857 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 858 block_t cp_addr, unsigned long long *version) 859 { 860 struct page *cp_page_1 = NULL, *cp_page_2 = NULL; 861 struct f2fs_checkpoint *cp_block = NULL; 862 unsigned long long cur_version = 0, pre_version = 0; 863 int err; 864 865 err = get_checkpoint_version(sbi, cp_addr, &cp_block, 866 &cp_page_1, version); 867 if (err) 868 return NULL; 869 870 if (le32_to_cpu(cp_block->cp_pack_total_block_count) > 871 sbi->blocks_per_seg) { 872 f2fs_msg(sbi->sb, KERN_WARNING, 873 "invalid cp_pack_total_block_count:%u", 874 le32_to_cpu(cp_block->cp_pack_total_block_count)); 875 goto invalid_cp; 876 } 877 pre_version = *version; 878 879 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; 880 err = get_checkpoint_version(sbi, cp_addr, &cp_block, 881 &cp_page_2, version); 882 if (err) 883 goto invalid_cp; 884 cur_version = *version; 885 886 if (cur_version == pre_version) { 887 *version = cur_version; 888 f2fs_put_page(cp_page_2, 1); 889 return cp_page_1; 890 } 891 f2fs_put_page(cp_page_2, 1); 892 invalid_cp: 893 f2fs_put_page(cp_page_1, 1); 894 return NULL; 895 } 896 897 int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) 898 { 899 struct f2fs_checkpoint *cp_block; 900 struct f2fs_super_block *fsb = sbi->raw_super; 901 struct page *cp1, *cp2, *cur_page; 902 unsigned long blk_size = sbi->blocksize; 903 unsigned long long cp1_version = 0, cp2_version = 0; 904 unsigned long long cp_start_blk_no; 905 unsigned int cp_blks = 1 + __cp_payload(sbi); 906 block_t cp_blk_no; 907 int i; 908 909 sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), 910 GFP_KERNEL); 911 if (!sbi->ckpt) 912 return -ENOMEM; 913 /* 914 * Finding out valid cp block involves read both 915 * sets( cp pack1 and cp pack 2) 916 */ 917 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); 918 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 919 920 /* The second checkpoint pack should start at the next segment */ 921 cp_start_blk_no += ((unsigned long long)1) << 922 le32_to_cpu(fsb->log_blocks_per_seg); 923 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 924 925 if (cp1 && cp2) { 926 if (ver_after(cp2_version, cp1_version)) 927 cur_page = cp2; 928 else 929 cur_page = cp1; 930 } else if (cp1) { 931 cur_page = cp1; 932 } else if (cp2) { 933 cur_page = cp2; 934 } else { 935 goto fail_no_cp; 936 } 937 938 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 939 memcpy(sbi->ckpt, cp_block, blk_size); 940 941 if (cur_page == cp1) 942 sbi->cur_cp_pack = 1; 943 else 944 sbi->cur_cp_pack = 2; 945 946 /* Sanity checking of checkpoint */ 947 if (f2fs_sanity_check_ckpt(sbi)) 948 goto free_fail_no_cp; 949 950 if (cp_blks <= 1) 951 goto done; 952 953 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); 954 if (cur_page == cp2) 955 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 956 957 for (i = 1; i < cp_blks; i++) { 958 void *sit_bitmap_ptr; 959 unsigned char *ckpt = (unsigned char *)sbi->ckpt; 960 961 cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); 962 if (IS_ERR(cur_page)) 963 goto free_fail_no_cp; 964 sit_bitmap_ptr = page_address(cur_page); 965 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); 966 f2fs_put_page(cur_page, 1); 967 } 968 done: 969 f2fs_put_page(cp1, 1); 970 f2fs_put_page(cp2, 1); 971 return 0; 972 973 free_fail_no_cp: 974 f2fs_put_page(cp1, 1); 975 f2fs_put_page(cp2, 1); 976 fail_no_cp: 977 kvfree(sbi->ckpt); 978 return -EINVAL; 979 } 980 981 static void __add_dirty_inode(struct inode *inode, enum inode_type type) 982 { 983 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 984 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 985 986 if (is_inode_flag_set(inode, flag)) 987 return; 988 989 set_inode_flag(inode, flag); 990 if (!f2fs_is_volatile_file(inode)) 991 list_add_tail(&F2FS_I(inode)->dirty_list, 992 &sbi->inode_list[type]); 993 stat_inc_dirty_inode(sbi, type); 994 } 995 996 static void __remove_dirty_inode(struct inode *inode, enum inode_type type) 997 { 998 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 999 1000 if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag)) 1001 return; 1002 1003 list_del_init(&F2FS_I(inode)->dirty_list); 1004 clear_inode_flag(inode, flag); 1005 stat_dec_dirty_inode(F2FS_I_SB(inode), type); 1006 } 1007 1008 void f2fs_update_dirty_page(struct inode *inode, struct page *page) 1009 { 1010 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1011 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 1012 1013 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 1014 !S_ISLNK(inode->i_mode)) 1015 return; 1016 1017 spin_lock(&sbi->inode_lock[type]); 1018 if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) 1019 __add_dirty_inode(inode, type); 1020 inode_inc_dirty_pages(inode); 1021 spin_unlock(&sbi->inode_lock[type]); 1022 1023 f2fs_set_page_private(page, 0); 1024 f2fs_trace_pid(page); 1025 } 1026 1027 void f2fs_remove_dirty_inode(struct inode *inode) 1028 { 1029 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1030 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 1031 1032 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 1033 !S_ISLNK(inode->i_mode)) 1034 return; 1035 1036 if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH)) 1037 return; 1038 1039 spin_lock(&sbi->inode_lock[type]); 1040 __remove_dirty_inode(inode, type); 1041 spin_unlock(&sbi->inode_lock[type]); 1042 } 1043 1044 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) 1045 { 1046 struct list_head *head; 1047 struct inode *inode; 1048 struct f2fs_inode_info *fi; 1049 bool is_dir = (type == DIR_INODE); 1050 unsigned long ino = 0; 1051 1052 trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, 1053 get_pages(sbi, is_dir ? 1054 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 1055 retry: 1056 if (unlikely(f2fs_cp_error(sbi))) 1057 return -EIO; 1058 1059 spin_lock(&sbi->inode_lock[type]); 1060 1061 head = &sbi->inode_list[type]; 1062 if (list_empty(head)) { 1063 spin_unlock(&sbi->inode_lock[type]); 1064 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, 1065 get_pages(sbi, is_dir ? 1066 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 1067 return 0; 1068 } 1069 fi = list_first_entry(head, struct f2fs_inode_info, dirty_list); 1070 inode = igrab(&fi->vfs_inode); 1071 spin_unlock(&sbi->inode_lock[type]); 1072 if (inode) { 1073 unsigned long cur_ino = inode->i_ino; 1074 1075 F2FS_I(inode)->cp_task = current; 1076 1077 filemap_fdatawrite(inode->i_mapping); 1078 1079 F2FS_I(inode)->cp_task = NULL; 1080 1081 iput(inode); 1082 /* We need to give cpu to another writers. */ 1083 if (ino == cur_ino) 1084 cond_resched(); 1085 else 1086 ino = cur_ino; 1087 } else { 1088 /* 1089 * We should submit bio, since it exists several 1090 * wribacking dentry pages in the freeing inode. 1091 */ 1092 f2fs_submit_merged_write(sbi, DATA); 1093 cond_resched(); 1094 } 1095 goto retry; 1096 } 1097 1098 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) 1099 { 1100 struct list_head *head = &sbi->inode_list[DIRTY_META]; 1101 struct inode *inode; 1102 struct f2fs_inode_info *fi; 1103 s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); 1104 1105 while (total--) { 1106 if (unlikely(f2fs_cp_error(sbi))) 1107 return -EIO; 1108 1109 spin_lock(&sbi->inode_lock[DIRTY_META]); 1110 if (list_empty(head)) { 1111 spin_unlock(&sbi->inode_lock[DIRTY_META]); 1112 return 0; 1113 } 1114 fi = list_first_entry(head, struct f2fs_inode_info, 1115 gdirty_list); 1116 inode = igrab(&fi->vfs_inode); 1117 spin_unlock(&sbi->inode_lock[DIRTY_META]); 1118 if (inode) { 1119 sync_inode_metadata(inode, 0); 1120 1121 /* it's on eviction */ 1122 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) 1123 f2fs_update_inode_page(inode); 1124 iput(inode); 1125 } 1126 } 1127 return 0; 1128 } 1129 1130 static void __prepare_cp_block(struct f2fs_sb_info *sbi) 1131 { 1132 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1133 struct f2fs_nm_info *nm_i = NM_I(sbi); 1134 nid_t last_nid = nm_i->next_scan_nid; 1135 1136 next_free_nid(sbi, &last_nid); 1137 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 1138 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 1139 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 1140 ckpt->next_free_nid = cpu_to_le32(last_nid); 1141 } 1142 1143 static bool __need_flush_quota(struct f2fs_sb_info *sbi) 1144 { 1145 if (!is_journalled_quota(sbi)) 1146 return false; 1147 if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) 1148 return false; 1149 if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) 1150 return false; 1151 if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) 1152 return true; 1153 if (get_pages(sbi, F2FS_DIRTY_QDATA)) 1154 return true; 1155 return false; 1156 } 1157 1158 /* 1159 * Freeze all the FS-operations for checkpoint. 1160 */ 1161 static int block_operations(struct f2fs_sb_info *sbi) 1162 { 1163 struct writeback_control wbc = { 1164 .sync_mode = WB_SYNC_ALL, 1165 .nr_to_write = LONG_MAX, 1166 .for_reclaim = 0, 1167 }; 1168 struct blk_plug plug; 1169 int err = 0, cnt = 0; 1170 1171 blk_start_plug(&plug); 1172 1173 retry_flush_quotas: 1174 if (__need_flush_quota(sbi)) { 1175 int locked; 1176 1177 if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { 1178 set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); 1179 f2fs_lock_all(sbi); 1180 goto retry_flush_dents; 1181 } 1182 clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); 1183 1184 /* only failed during mount/umount/freeze/quotactl */ 1185 locked = down_read_trylock(&sbi->sb->s_umount); 1186 f2fs_quota_sync(sbi->sb, -1); 1187 if (locked) 1188 up_read(&sbi->sb->s_umount); 1189 } 1190 1191 f2fs_lock_all(sbi); 1192 if (__need_flush_quota(sbi)) { 1193 f2fs_unlock_all(sbi); 1194 cond_resched(); 1195 goto retry_flush_quotas; 1196 } 1197 1198 retry_flush_dents: 1199 /* write all the dirty dentry pages */ 1200 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 1201 f2fs_unlock_all(sbi); 1202 err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); 1203 if (err) 1204 goto out; 1205 cond_resched(); 1206 goto retry_flush_quotas; 1207 } 1208 1209 /* 1210 * POR: we should ensure that there are no dirty node pages 1211 * until finishing nat/sit flush. inode->i_blocks can be updated. 1212 */ 1213 down_write(&sbi->node_change); 1214 1215 if (__need_flush_quota(sbi)) { 1216 up_write(&sbi->node_change); 1217 f2fs_unlock_all(sbi); 1218 goto retry_flush_quotas; 1219 } 1220 1221 if (get_pages(sbi, F2FS_DIRTY_IMETA)) { 1222 up_write(&sbi->node_change); 1223 f2fs_unlock_all(sbi); 1224 err = f2fs_sync_inode_meta(sbi); 1225 if (err) 1226 goto out; 1227 cond_resched(); 1228 goto retry_flush_quotas; 1229 } 1230 1231 retry_flush_nodes: 1232 down_write(&sbi->node_write); 1233 1234 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 1235 up_write(&sbi->node_write); 1236 atomic_inc(&sbi->wb_sync_req[NODE]); 1237 err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); 1238 atomic_dec(&sbi->wb_sync_req[NODE]); 1239 if (err) { 1240 up_write(&sbi->node_change); 1241 f2fs_unlock_all(sbi); 1242 goto out; 1243 } 1244 cond_resched(); 1245 goto retry_flush_nodes; 1246 } 1247 1248 /* 1249 * sbi->node_change is used only for AIO write_begin path which produces 1250 * dirty node blocks and some checkpoint values by block allocation. 1251 */ 1252 __prepare_cp_block(sbi); 1253 up_write(&sbi->node_change); 1254 out: 1255 blk_finish_plug(&plug); 1256 return err; 1257 } 1258 1259 static void unblock_operations(struct f2fs_sb_info *sbi) 1260 { 1261 up_write(&sbi->node_write); 1262 f2fs_unlock_all(sbi); 1263 } 1264 1265 void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) 1266 { 1267 DEFINE_WAIT(wait); 1268 1269 for (;;) { 1270 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 1271 1272 if (!get_pages(sbi, F2FS_WB_CP_DATA)) 1273 break; 1274 1275 if (unlikely(f2fs_cp_error(sbi))) 1276 break; 1277 1278 io_schedule_timeout(5*HZ); 1279 } 1280 finish_wait(&sbi->cp_wait, &wait); 1281 } 1282 1283 static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1284 { 1285 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 1286 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1287 unsigned long flags; 1288 1289 spin_lock_irqsave(&sbi->cp_lock, flags); 1290 1291 if ((cpc->reason & CP_UMOUNT) && 1292 le32_to_cpu(ckpt->cp_pack_total_block_count) > 1293 sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) 1294 disable_nat_bits(sbi, false); 1295 1296 if (cpc->reason & CP_TRIMMED) 1297 __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); 1298 else 1299 __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG); 1300 1301 if (cpc->reason & CP_UMOUNT) 1302 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1303 else 1304 __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1305 1306 if (cpc->reason & CP_FASTBOOT) 1307 __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1308 else 1309 __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1310 1311 if (orphan_num) 1312 __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1313 else 1314 __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1315 1316 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1317 __set_ckpt_flags(ckpt, CP_FSCK_FLAG); 1318 1319 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 1320 __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); 1321 else 1322 __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); 1323 1324 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK)) 1325 __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); 1326 else 1327 __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); 1328 1329 if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) 1330 __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); 1331 /* 1332 * TODO: we count on fsck.f2fs to clear this flag until we figure out 1333 * missing cases which clear it incorrectly. 1334 */ 1335 1336 if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) 1337 __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); 1338 1339 /* set this flag to activate crc|cp_ver for recovery */ 1340 __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); 1341 __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); 1342 1343 spin_unlock_irqrestore(&sbi->cp_lock, flags); 1344 } 1345 1346 static void commit_checkpoint(struct f2fs_sb_info *sbi, 1347 void *src, block_t blk_addr) 1348 { 1349 struct writeback_control wbc = { 1350 .for_reclaim = 0, 1351 }; 1352 1353 /* 1354 * pagevec_lookup_tag and lock_page again will take 1355 * some extra time. Therefore, f2fs_update_meta_pages and 1356 * f2fs_sync_meta_pages are combined in this function. 1357 */ 1358 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 1359 int err; 1360 1361 f2fs_wait_on_page_writeback(page, META, true, true); 1362 1363 memcpy(page_address(page), src, PAGE_SIZE); 1364 1365 set_page_dirty(page); 1366 if (unlikely(!clear_page_dirty_for_io(page))) 1367 f2fs_bug_on(sbi, 1); 1368 1369 /* writeout cp pack 2 page */ 1370 err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); 1371 if (unlikely(err && f2fs_cp_error(sbi))) { 1372 f2fs_put_page(page, 1); 1373 return; 1374 } 1375 1376 f2fs_bug_on(sbi, err); 1377 f2fs_put_page(page, 0); 1378 1379 /* submit checkpoint (with barrier if NOBARRIER is not set) */ 1380 f2fs_submit_merged_write(sbi, META_FLUSH); 1381 } 1382 1383 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1384 { 1385 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1386 struct f2fs_nm_info *nm_i = NM_I(sbi); 1387 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags; 1388 block_t start_blk; 1389 unsigned int data_sum_blocks, orphan_blocks; 1390 __u32 crc32 = 0; 1391 int i; 1392 int cp_payload_blks = __cp_payload(sbi); 1393 struct super_block *sb = sbi->sb; 1394 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1395 u64 kbytes_written; 1396 int err; 1397 1398 /* Flush all the NAT/SIT pages */ 1399 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); 1400 f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && 1401 !f2fs_cp_error(sbi)); 1402 1403 /* 1404 * modify checkpoint 1405 * version number is already updated 1406 */ 1407 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); 1408 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 1409 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { 1410 ckpt->cur_node_segno[i] = 1411 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 1412 ckpt->cur_node_blkoff[i] = 1413 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); 1414 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 1415 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 1416 } 1417 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { 1418 ckpt->cur_data_segno[i] = 1419 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 1420 ckpt->cur_data_blkoff[i] = 1421 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); 1422 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 1423 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 1424 } 1425 1426 /* 2 cp + n data seg summary + orphan inode blocks */ 1427 data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); 1428 spin_lock_irqsave(&sbi->cp_lock, flags); 1429 if (data_sum_blocks < NR_CURSEG_DATA_TYPE) 1430 __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 1431 else 1432 __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 1433 spin_unlock_irqrestore(&sbi->cp_lock, flags); 1434 1435 orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); 1436 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 1437 orphan_blocks); 1438 1439 if (__remain_node_summaries(cpc->reason)) 1440 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 1441 cp_payload_blks + data_sum_blocks + 1442 orphan_blocks + NR_CURSEG_NODE_TYPE); 1443 else 1444 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + 1445 cp_payload_blks + data_sum_blocks + 1446 orphan_blocks); 1447 1448 /* update ckpt flag for checkpoint */ 1449 update_ckpt_flags(sbi, cpc); 1450 1451 /* update SIT/NAT bitmap */ 1452 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 1453 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 1454 1455 crc32 = f2fs_checkpoint_chksum(sbi, ckpt); 1456 *((__le32 *)((unsigned char *)ckpt + 1457 le32_to_cpu(ckpt->checksum_offset))) 1458 = cpu_to_le32(crc32); 1459 1460 start_blk = __start_cp_next_addr(sbi); 1461 1462 /* write nat bits */ 1463 if (enabled_nat_bits(sbi, cpc)) { 1464 __u64 cp_ver = cur_cp_version(ckpt); 1465 block_t blk; 1466 1467 cp_ver |= ((__u64)crc32 << 32); 1468 *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); 1469 1470 blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; 1471 for (i = 0; i < nm_i->nat_bits_blocks; i++) 1472 f2fs_update_meta_page(sbi, nm_i->nat_bits + 1473 (i << F2FS_BLKSIZE_BITS), blk + i); 1474 } 1475 1476 /* write out checkpoint buffer at block 0 */ 1477 f2fs_update_meta_page(sbi, ckpt, start_blk++); 1478 1479 for (i = 1; i < 1 + cp_payload_blks; i++) 1480 f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, 1481 start_blk++); 1482 1483 if (orphan_num) { 1484 write_orphan_inodes(sbi, start_blk); 1485 start_blk += orphan_blocks; 1486 } 1487 1488 f2fs_write_data_summaries(sbi, start_blk); 1489 start_blk += data_sum_blocks; 1490 1491 /* Record write statistics in the hot node summary */ 1492 kbytes_written = sbi->kbytes_written; 1493 if (sb->s_bdev->bd_part) 1494 kbytes_written += BD_PART_WRITTEN(sbi); 1495 1496 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1497 1498 if (__remain_node_summaries(cpc->reason)) { 1499 f2fs_write_node_summaries(sbi, start_blk); 1500 start_blk += NR_CURSEG_NODE_TYPE; 1501 } 1502 1503 /* update user_block_counts */ 1504 sbi->last_valid_block_count = sbi->total_valid_block_count; 1505 percpu_counter_set(&sbi->alloc_valid_block_count, 0); 1506 1507 /* Here, we have one bio having CP pack except cp pack 2 page */ 1508 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); 1509 f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && 1510 !f2fs_cp_error(sbi)); 1511 1512 /* wait for previous submitted meta pages writeback */ 1513 f2fs_wait_on_all_pages_writeback(sbi); 1514 1515 /* flush all device cache */ 1516 err = f2fs_flush_device_cache(sbi); 1517 if (err) 1518 return err; 1519 1520 /* barrier and flush checkpoint cp pack 2 page if it can */ 1521 commit_checkpoint(sbi, ckpt, start_blk); 1522 f2fs_wait_on_all_pages_writeback(sbi); 1523 1524 /* 1525 * invalidate intermediate page cache borrowed from meta inode 1526 * which are used for migration of encrypted inode's blocks. 1527 */ 1528 if (f2fs_sb_has_encrypt(sbi)) 1529 invalidate_mapping_pages(META_MAPPING(sbi), 1530 MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); 1531 1532 f2fs_release_ino_entry(sbi, false); 1533 1534 f2fs_reset_fsync_node_info(sbi); 1535 1536 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1537 clear_sbi_flag(sbi, SBI_NEED_CP); 1538 clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); 1539 1540 spin_lock(&sbi->stat_lock); 1541 sbi->unusable_block_count = 0; 1542 spin_unlock(&sbi->stat_lock); 1543 1544 __set_cp_next_pack(sbi); 1545 1546 /* 1547 * redirty superblock if metadata like node page or inode cache is 1548 * updated during writing checkpoint. 1549 */ 1550 if (get_pages(sbi, F2FS_DIRTY_NODES) || 1551 get_pages(sbi, F2FS_DIRTY_IMETA)) 1552 set_sbi_flag(sbi, SBI_IS_DIRTY); 1553 1554 f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); 1555 1556 return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; 1557 } 1558 1559 /* 1560 * We guarantee that this checkpoint procedure will not fail. 1561 */ 1562 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1563 { 1564 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1565 unsigned long long ckpt_ver; 1566 int err = 0; 1567 1568 if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) 1569 return -EROFS; 1570 1571 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1572 if (cpc->reason != CP_PAUSE) 1573 return 0; 1574 f2fs_msg(sbi->sb, KERN_WARNING, 1575 "Start checkpoint disabled!"); 1576 } 1577 mutex_lock(&sbi->cp_mutex); 1578 1579 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1580 ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || 1581 ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) 1582 goto out; 1583 if (unlikely(f2fs_cp_error(sbi))) { 1584 err = -EIO; 1585 goto out; 1586 } 1587 1588 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 1589 1590 err = block_operations(sbi); 1591 if (err) 1592 goto out; 1593 1594 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1595 1596 f2fs_flush_merged_writes(sbi); 1597 1598 /* this is the case of multiple fstrims without any changes */ 1599 if (cpc->reason & CP_DISCARD) { 1600 if (!f2fs_exist_trim_candidates(sbi, cpc)) { 1601 unblock_operations(sbi); 1602 goto out; 1603 } 1604 1605 if (NM_I(sbi)->dirty_nat_cnt == 0 && 1606 SIT_I(sbi)->dirty_sentries == 0 && 1607 prefree_segments(sbi) == 0) { 1608 f2fs_flush_sit_entries(sbi, cpc); 1609 f2fs_clear_prefree_segments(sbi, cpc); 1610 unblock_operations(sbi); 1611 goto out; 1612 } 1613 } 1614 1615 /* 1616 * update checkpoint pack index 1617 * Increase the version number so that 1618 * SIT entries and seg summaries are written at correct place 1619 */ 1620 ckpt_ver = cur_cp_version(ckpt); 1621 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1622 1623 /* write cached NAT/SIT entries to NAT/SIT area */ 1624 err = f2fs_flush_nat_entries(sbi, cpc); 1625 if (err) 1626 goto stop; 1627 1628 f2fs_flush_sit_entries(sbi, cpc); 1629 1630 /* unlock all the fs_lock[] in do_checkpoint() */ 1631 err = do_checkpoint(sbi, cpc); 1632 if (err) 1633 f2fs_release_discard_addrs(sbi); 1634 else 1635 f2fs_clear_prefree_segments(sbi, cpc); 1636 stop: 1637 unblock_operations(sbi); 1638 stat_inc_cp_count(sbi->stat_info); 1639 1640 if (cpc->reason & CP_RECOVERY) 1641 f2fs_msg(sbi->sb, KERN_NOTICE, 1642 "checkpoint: version = %llx", ckpt_ver); 1643 1644 /* do checkpoint periodically */ 1645 f2fs_update_time(sbi, CP_TIME); 1646 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1647 out: 1648 mutex_unlock(&sbi->cp_mutex); 1649 return err; 1650 } 1651 1652 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) 1653 { 1654 int i; 1655 1656 for (i = 0; i < MAX_INO_ENTRY; i++) { 1657 struct inode_management *im = &sbi->im[i]; 1658 1659 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); 1660 spin_lock_init(&im->ino_lock); 1661 INIT_LIST_HEAD(&im->ino_list); 1662 im->ino_num = 0; 1663 } 1664 1665 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1666 NR_CURSEG_TYPE - __cp_payload(sbi)) * 1667 F2FS_ORPHANS_PER_BLOCK; 1668 } 1669 1670 int __init f2fs_create_checkpoint_caches(void) 1671 { 1672 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", 1673 sizeof(struct ino_entry)); 1674 if (!ino_entry_slab) 1675 return -ENOMEM; 1676 f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", 1677 sizeof(struct inode_entry)); 1678 if (!f2fs_inode_entry_slab) { 1679 kmem_cache_destroy(ino_entry_slab); 1680 return -ENOMEM; 1681 } 1682 return 0; 1683 } 1684 1685 void f2fs_destroy_checkpoint_caches(void) 1686 { 1687 kmem_cache_destroy(ino_entry_slab); 1688 kmem_cache_destroy(f2fs_inode_entry_slab); 1689 } 1690