1 /* 2 * fs/f2fs/checkpoint.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/bio.h> 13 #include <linux/mpage.h> 14 #include <linux/writeback.h> 15 #include <linux/blkdev.h> 16 #include <linux/f2fs_fs.h> 17 #include <linux/pagevec.h> 18 #include <linux/swap.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 static struct kmem_cache *ino_entry_slab; 27 struct kmem_cache *inode_entry_slab; 28 29 /* 30 * We guarantee no failure on the returned page. 31 */ 32 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 33 { 34 struct address_space *mapping = META_MAPPING(sbi); 35 struct page *page = NULL; 36 repeat: 37 page = f2fs_grab_cache_page(mapping, index, false); 38 if (!page) { 39 cond_resched(); 40 goto repeat; 41 } 42 f2fs_wait_on_page_writeback(page, META, true); 43 SetPageUptodate(page); 44 return page; 45 } 46 47 /* 48 * We guarantee no failure on the returned page. 49 */ 50 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, 51 bool is_meta) 52 { 53 struct address_space *mapping = META_MAPPING(sbi); 54 struct page *page; 55 struct f2fs_io_info fio = { 56 .sbi = sbi, 57 .type = META, 58 .rw = READ_SYNC | REQ_META | REQ_PRIO, 59 .old_blkaddr = index, 60 .new_blkaddr = index, 61 .encrypted_page = NULL, 62 }; 63 64 if (unlikely(!is_meta)) 65 fio.rw &= ~REQ_META; 66 repeat: 67 page = f2fs_grab_cache_page(mapping, index, false); 68 if (!page) { 69 cond_resched(); 70 goto repeat; 71 } 72 if (PageUptodate(page)) 73 goto out; 74 75 fio.page = page; 76 77 if (f2fs_submit_page_bio(&fio)) { 78 f2fs_put_page(page, 1); 79 goto repeat; 80 } 81 82 lock_page(page); 83 if (unlikely(page->mapping != mapping)) { 84 f2fs_put_page(page, 1); 85 goto repeat; 86 } 87 88 /* 89 * if there is any IO error when accessing device, make our filesystem 90 * readonly and make sure do not write checkpoint with non-uptodate 91 * meta page. 92 */ 93 if (unlikely(!PageUptodate(page))) 94 f2fs_stop_checkpoint(sbi); 95 out: 96 return page; 97 } 98 99 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 100 { 101 return __get_meta_page(sbi, index, true); 102 } 103 104 /* for POR only */ 105 struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) 106 { 107 return __get_meta_page(sbi, index, false); 108 } 109 110 bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) 111 { 112 switch (type) { 113 case META_NAT: 114 break; 115 case META_SIT: 116 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) 117 return false; 118 break; 119 case META_SSA: 120 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || 121 blkaddr < SM_I(sbi)->ssa_blkaddr)) 122 return false; 123 break; 124 case META_CP: 125 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || 126 blkaddr < __start_cp_addr(sbi))) 127 return false; 128 break; 129 case META_POR: 130 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || 131 blkaddr < MAIN_BLKADDR(sbi))) 132 return false; 133 break; 134 default: 135 BUG(); 136 } 137 138 return true; 139 } 140 141 /* 142 * Readahead CP/NAT/SIT/SSA pages 143 */ 144 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, 145 int type, bool sync) 146 { 147 struct page *page; 148 block_t blkno = start; 149 struct f2fs_io_info fio = { 150 .sbi = sbi, 151 .type = META, 152 .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA, 153 .encrypted_page = NULL, 154 }; 155 struct blk_plug plug; 156 157 if (unlikely(type == META_POR)) 158 fio.rw &= ~REQ_META; 159 160 blk_start_plug(&plug); 161 for (; nrpages-- > 0; blkno++) { 162 163 if (!is_valid_blkaddr(sbi, blkno, type)) 164 goto out; 165 166 switch (type) { 167 case META_NAT: 168 if (unlikely(blkno >= 169 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) 170 blkno = 0; 171 /* get nat block addr */ 172 fio.new_blkaddr = current_nat_addr(sbi, 173 blkno * NAT_ENTRY_PER_BLOCK); 174 break; 175 case META_SIT: 176 /* get sit block addr */ 177 fio.new_blkaddr = current_sit_addr(sbi, 178 blkno * SIT_ENTRY_PER_BLOCK); 179 break; 180 case META_SSA: 181 case META_CP: 182 case META_POR: 183 fio.new_blkaddr = blkno; 184 break; 185 default: 186 BUG(); 187 } 188 189 page = f2fs_grab_cache_page(META_MAPPING(sbi), 190 fio.new_blkaddr, false); 191 if (!page) 192 continue; 193 if (PageUptodate(page)) { 194 f2fs_put_page(page, 1); 195 continue; 196 } 197 198 fio.page = page; 199 fio.old_blkaddr = fio.new_blkaddr; 200 f2fs_submit_page_mbio(&fio); 201 f2fs_put_page(page, 0); 202 } 203 out: 204 f2fs_submit_merged_bio(sbi, META, READ); 205 blk_finish_plug(&plug); 206 return blkno - start; 207 } 208 209 void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) 210 { 211 struct page *page; 212 bool readahead = false; 213 214 page = find_get_page(META_MAPPING(sbi), index); 215 if (!page || !PageUptodate(page)) 216 readahead = true; 217 f2fs_put_page(page, 0); 218 219 if (readahead) 220 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true); 221 } 222 223 static int f2fs_write_meta_page(struct page *page, 224 struct writeback_control *wbc) 225 { 226 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 227 228 trace_f2fs_writepage(page, META); 229 230 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 231 goto redirty_out; 232 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) 233 goto redirty_out; 234 if (unlikely(f2fs_cp_error(sbi))) 235 goto redirty_out; 236 237 write_meta_page(sbi, page); 238 dec_page_count(sbi, F2FS_DIRTY_META); 239 240 if (wbc->for_reclaim) 241 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); 242 243 unlock_page(page); 244 245 if (unlikely(f2fs_cp_error(sbi))) 246 f2fs_submit_merged_bio(sbi, META, WRITE); 247 248 return 0; 249 250 redirty_out: 251 redirty_page_for_writepage(wbc, page); 252 return AOP_WRITEPAGE_ACTIVATE; 253 } 254 255 static int f2fs_write_meta_pages(struct address_space *mapping, 256 struct writeback_control *wbc) 257 { 258 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 259 long diff, written; 260 261 /* collect a number of dirty meta pages and write together */ 262 if (wbc->for_kupdate || 263 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 264 goto skip_write; 265 266 trace_f2fs_writepages(mapping->host, wbc, META); 267 268 /* if mounting is failed, skip writing node pages */ 269 mutex_lock(&sbi->cp_mutex); 270 diff = nr_pages_to_write(sbi, META, wbc); 271 written = sync_meta_pages(sbi, META, wbc->nr_to_write); 272 mutex_unlock(&sbi->cp_mutex); 273 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); 274 return 0; 275 276 skip_write: 277 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); 278 trace_f2fs_writepages(mapping->host, wbc, META); 279 return 0; 280 } 281 282 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 283 long nr_to_write) 284 { 285 struct address_space *mapping = META_MAPPING(sbi); 286 pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; 287 struct pagevec pvec; 288 long nwritten = 0; 289 struct writeback_control wbc = { 290 .for_reclaim = 0, 291 }; 292 struct blk_plug plug; 293 294 pagevec_init(&pvec, 0); 295 296 blk_start_plug(&plug); 297 298 while (index <= end) { 299 int i, nr_pages; 300 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 301 PAGECACHE_TAG_DIRTY, 302 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 303 if (unlikely(nr_pages == 0)) 304 break; 305 306 for (i = 0; i < nr_pages; i++) { 307 struct page *page = pvec.pages[i]; 308 309 if (prev == ULONG_MAX) 310 prev = page->index - 1; 311 if (nr_to_write != LONG_MAX && page->index != prev + 1) { 312 pagevec_release(&pvec); 313 goto stop; 314 } 315 316 lock_page(page); 317 318 if (unlikely(page->mapping != mapping)) { 319 continue_unlock: 320 unlock_page(page); 321 continue; 322 } 323 if (!PageDirty(page)) { 324 /* someone wrote it for us */ 325 goto continue_unlock; 326 } 327 328 f2fs_wait_on_page_writeback(page, META, true); 329 330 BUG_ON(PageWriteback(page)); 331 if (!clear_page_dirty_for_io(page)) 332 goto continue_unlock; 333 334 if (mapping->a_ops->writepage(page, &wbc)) { 335 unlock_page(page); 336 break; 337 } 338 nwritten++; 339 prev = page->index; 340 if (unlikely(nwritten >= nr_to_write)) 341 break; 342 } 343 pagevec_release(&pvec); 344 cond_resched(); 345 } 346 stop: 347 if (nwritten) 348 f2fs_submit_merged_bio(sbi, type, WRITE); 349 350 blk_finish_plug(&plug); 351 352 return nwritten; 353 } 354 355 static int f2fs_set_meta_page_dirty(struct page *page) 356 { 357 trace_f2fs_set_page_dirty(page, META); 358 359 SetPageUptodate(page); 360 if (!PageDirty(page)) { 361 __set_page_dirty_nobuffers(page); 362 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 363 SetPagePrivate(page); 364 f2fs_trace_pid(page); 365 return 1; 366 } 367 return 0; 368 } 369 370 const struct address_space_operations f2fs_meta_aops = { 371 .writepage = f2fs_write_meta_page, 372 .writepages = f2fs_write_meta_pages, 373 .set_page_dirty = f2fs_set_meta_page_dirty, 374 .invalidatepage = f2fs_invalidate_page, 375 .releasepage = f2fs_release_page, 376 }; 377 378 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 379 { 380 struct inode_management *im = &sbi->im[type]; 381 struct ino_entry *e, *tmp; 382 383 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 384 retry: 385 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 386 387 spin_lock(&im->ino_lock); 388 e = radix_tree_lookup(&im->ino_root, ino); 389 if (!e) { 390 e = tmp; 391 if (radix_tree_insert(&im->ino_root, ino, e)) { 392 spin_unlock(&im->ino_lock); 393 radix_tree_preload_end(); 394 goto retry; 395 } 396 memset(e, 0, sizeof(struct ino_entry)); 397 e->ino = ino; 398 399 list_add_tail(&e->list, &im->ino_list); 400 if (type != ORPHAN_INO) 401 im->ino_num++; 402 } 403 spin_unlock(&im->ino_lock); 404 radix_tree_preload_end(); 405 406 if (e != tmp) 407 kmem_cache_free(ino_entry_slab, tmp); 408 } 409 410 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 411 { 412 struct inode_management *im = &sbi->im[type]; 413 struct ino_entry *e; 414 415 spin_lock(&im->ino_lock); 416 e = radix_tree_lookup(&im->ino_root, ino); 417 if (e) { 418 list_del(&e->list); 419 radix_tree_delete(&im->ino_root, ino); 420 im->ino_num--; 421 spin_unlock(&im->ino_lock); 422 kmem_cache_free(ino_entry_slab, e); 423 return; 424 } 425 spin_unlock(&im->ino_lock); 426 } 427 428 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 429 { 430 /* add new dirty ino entry into list */ 431 __add_ino_entry(sbi, ino, type); 432 } 433 434 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 435 { 436 /* remove dirty ino entry from list */ 437 __remove_ino_entry(sbi, ino, type); 438 } 439 440 /* mode should be APPEND_INO or UPDATE_INO */ 441 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) 442 { 443 struct inode_management *im = &sbi->im[mode]; 444 struct ino_entry *e; 445 446 spin_lock(&im->ino_lock); 447 e = radix_tree_lookup(&im->ino_root, ino); 448 spin_unlock(&im->ino_lock); 449 return e ? true : false; 450 } 451 452 void release_ino_entry(struct f2fs_sb_info *sbi, bool all) 453 { 454 struct ino_entry *e, *tmp; 455 int i; 456 457 for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 458 struct inode_management *im = &sbi->im[i]; 459 460 spin_lock(&im->ino_lock); 461 list_for_each_entry_safe(e, tmp, &im->ino_list, list) { 462 list_del(&e->list); 463 radix_tree_delete(&im->ino_root, e->ino); 464 kmem_cache_free(ino_entry_slab, e); 465 im->ino_num--; 466 } 467 spin_unlock(&im->ino_lock); 468 } 469 } 470 471 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 472 { 473 struct inode_management *im = &sbi->im[ORPHAN_INO]; 474 int err = 0; 475 476 spin_lock(&im->ino_lock); 477 478 #ifdef CONFIG_F2FS_FAULT_INJECTION 479 if (time_to_inject(FAULT_ORPHAN)) { 480 spin_unlock(&im->ino_lock); 481 return -ENOSPC; 482 } 483 #endif 484 if (unlikely(im->ino_num >= sbi->max_orphans)) 485 err = -ENOSPC; 486 else 487 im->ino_num++; 488 spin_unlock(&im->ino_lock); 489 490 return err; 491 } 492 493 void release_orphan_inode(struct f2fs_sb_info *sbi) 494 { 495 struct inode_management *im = &sbi->im[ORPHAN_INO]; 496 497 spin_lock(&im->ino_lock); 498 f2fs_bug_on(sbi, im->ino_num == 0); 499 im->ino_num--; 500 spin_unlock(&im->ino_lock); 501 } 502 503 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 504 { 505 /* add new orphan ino entry into list */ 506 __add_ino_entry(sbi, ino, ORPHAN_INO); 507 } 508 509 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 510 { 511 /* remove orphan entry from orphan list */ 512 __remove_ino_entry(sbi, ino, ORPHAN_INO); 513 } 514 515 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 516 { 517 struct inode *inode; 518 519 inode = f2fs_iget(sbi->sb, ino); 520 if (IS_ERR(inode)) { 521 /* 522 * there should be a bug that we can't find the entry 523 * to orphan inode. 524 */ 525 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); 526 return PTR_ERR(inode); 527 } 528 529 clear_nlink(inode); 530 531 /* truncate all the data during iput */ 532 iput(inode); 533 return 0; 534 } 535 536 int recover_orphan_inodes(struct f2fs_sb_info *sbi) 537 { 538 block_t start_blk, orphan_blocks, i, j; 539 int err; 540 541 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 542 return 0; 543 544 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 545 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 546 547 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); 548 549 for (i = 0; i < orphan_blocks; i++) { 550 struct page *page = get_meta_page(sbi, start_blk + i); 551 struct f2fs_orphan_block *orphan_blk; 552 553 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 554 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 555 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 556 err = recover_orphan_inode(sbi, ino); 557 if (err) { 558 f2fs_put_page(page, 1); 559 return err; 560 } 561 } 562 f2fs_put_page(page, 1); 563 } 564 /* clear Orphan Flag */ 565 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 566 return 0; 567 } 568 569 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 570 { 571 struct list_head *head; 572 struct f2fs_orphan_block *orphan_blk = NULL; 573 unsigned int nentries = 0; 574 unsigned short index = 1; 575 unsigned short orphan_blocks; 576 struct page *page = NULL; 577 struct ino_entry *orphan = NULL; 578 struct inode_management *im = &sbi->im[ORPHAN_INO]; 579 580 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 581 582 /* 583 * we don't need to do spin_lock(&im->ino_lock) here, since all the 584 * orphan inode operations are covered under f2fs_lock_op(). 585 * And, spin_lock should be avoided due to page operations below. 586 */ 587 head = &im->ino_list; 588 589 /* loop for each orphan inode entry and write them in Jornal block */ 590 list_for_each_entry(orphan, head, list) { 591 if (!page) { 592 page = grab_meta_page(sbi, start_blk++); 593 orphan_blk = 594 (struct f2fs_orphan_block *)page_address(page); 595 memset(orphan_blk, 0, sizeof(*orphan_blk)); 596 } 597 598 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 599 600 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 601 /* 602 * an orphan block is full of 1020 entries, 603 * then we need to flush current orphan blocks 604 * and bring another one in memory 605 */ 606 orphan_blk->blk_addr = cpu_to_le16(index); 607 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 608 orphan_blk->entry_count = cpu_to_le32(nentries); 609 set_page_dirty(page); 610 f2fs_put_page(page, 1); 611 index++; 612 nentries = 0; 613 page = NULL; 614 } 615 } 616 617 if (page) { 618 orphan_blk->blk_addr = cpu_to_le16(index); 619 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 620 orphan_blk->entry_count = cpu_to_le32(nentries); 621 set_page_dirty(page); 622 f2fs_put_page(page, 1); 623 } 624 } 625 626 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 627 block_t cp_addr, unsigned long long *version) 628 { 629 struct page *cp_page_1, *cp_page_2 = NULL; 630 unsigned long blk_size = sbi->blocksize; 631 struct f2fs_checkpoint *cp_block; 632 unsigned long long cur_version = 0, pre_version = 0; 633 size_t crc_offset; 634 __u32 crc = 0; 635 636 /* Read the 1st cp block in this CP pack */ 637 cp_page_1 = get_meta_page(sbi, cp_addr); 638 639 /* get the version number */ 640 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); 641 crc_offset = le32_to_cpu(cp_block->checksum_offset); 642 if (crc_offset >= blk_size) 643 goto invalid_cp1; 644 645 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); 646 if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) 647 goto invalid_cp1; 648 649 pre_version = cur_cp_version(cp_block); 650 651 /* Read the 2nd cp block in this CP pack */ 652 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; 653 cp_page_2 = get_meta_page(sbi, cp_addr); 654 655 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); 656 crc_offset = le32_to_cpu(cp_block->checksum_offset); 657 if (crc_offset >= blk_size) 658 goto invalid_cp2; 659 660 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); 661 if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) 662 goto invalid_cp2; 663 664 cur_version = cur_cp_version(cp_block); 665 666 if (cur_version == pre_version) { 667 *version = cur_version; 668 f2fs_put_page(cp_page_2, 1); 669 return cp_page_1; 670 } 671 invalid_cp2: 672 f2fs_put_page(cp_page_2, 1); 673 invalid_cp1: 674 f2fs_put_page(cp_page_1, 1); 675 return NULL; 676 } 677 678 int get_valid_checkpoint(struct f2fs_sb_info *sbi) 679 { 680 struct f2fs_checkpoint *cp_block; 681 struct f2fs_super_block *fsb = sbi->raw_super; 682 struct page *cp1, *cp2, *cur_page; 683 unsigned long blk_size = sbi->blocksize; 684 unsigned long long cp1_version = 0, cp2_version = 0; 685 unsigned long long cp_start_blk_no; 686 unsigned int cp_blks = 1 + __cp_payload(sbi); 687 block_t cp_blk_no; 688 int i; 689 690 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); 691 if (!sbi->ckpt) 692 return -ENOMEM; 693 /* 694 * Finding out valid cp block involves read both 695 * sets( cp pack1 and cp pack 2) 696 */ 697 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); 698 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 699 700 /* The second checkpoint pack should start at the next segment */ 701 cp_start_blk_no += ((unsigned long long)1) << 702 le32_to_cpu(fsb->log_blocks_per_seg); 703 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 704 705 if (cp1 && cp2) { 706 if (ver_after(cp2_version, cp1_version)) 707 cur_page = cp2; 708 else 709 cur_page = cp1; 710 } else if (cp1) { 711 cur_page = cp1; 712 } else if (cp2) { 713 cur_page = cp2; 714 } else { 715 goto fail_no_cp; 716 } 717 718 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 719 memcpy(sbi->ckpt, cp_block, blk_size); 720 721 /* Sanity checking of checkpoint */ 722 if (sanity_check_ckpt(sbi)) 723 goto fail_no_cp; 724 725 if (cp_blks <= 1) 726 goto done; 727 728 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); 729 if (cur_page == cp2) 730 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 731 732 for (i = 1; i < cp_blks; i++) { 733 void *sit_bitmap_ptr; 734 unsigned char *ckpt = (unsigned char *)sbi->ckpt; 735 736 cur_page = get_meta_page(sbi, cp_blk_no + i); 737 sit_bitmap_ptr = page_address(cur_page); 738 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); 739 f2fs_put_page(cur_page, 1); 740 } 741 done: 742 f2fs_put_page(cp1, 1); 743 f2fs_put_page(cp2, 1); 744 return 0; 745 746 fail_no_cp: 747 kfree(sbi->ckpt); 748 return -EINVAL; 749 } 750 751 static void __add_dirty_inode(struct inode *inode, enum inode_type type) 752 { 753 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 754 struct f2fs_inode_info *fi = F2FS_I(inode); 755 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 756 757 if (is_inode_flag_set(fi, flag)) 758 return; 759 760 set_inode_flag(fi, flag); 761 list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); 762 stat_inc_dirty_inode(sbi, type); 763 } 764 765 static void __remove_dirty_inode(struct inode *inode, enum inode_type type) 766 { 767 struct f2fs_inode_info *fi = F2FS_I(inode); 768 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 769 770 if (get_dirty_pages(inode) || 771 !is_inode_flag_set(F2FS_I(inode), flag)) 772 return; 773 774 list_del_init(&fi->dirty_list); 775 clear_inode_flag(fi, flag); 776 stat_dec_dirty_inode(F2FS_I_SB(inode), type); 777 } 778 779 void update_dirty_page(struct inode *inode, struct page *page) 780 { 781 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 782 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 783 784 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 785 !S_ISLNK(inode->i_mode)) 786 return; 787 788 if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { 789 spin_lock(&sbi->inode_lock[type]); 790 __add_dirty_inode(inode, type); 791 spin_unlock(&sbi->inode_lock[type]); 792 } 793 794 inode_inc_dirty_pages(inode); 795 SetPagePrivate(page); 796 f2fs_trace_pid(page); 797 } 798 799 void remove_dirty_inode(struct inode *inode) 800 { 801 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 802 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 803 804 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 805 !S_ISLNK(inode->i_mode)) 806 return; 807 808 if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH)) 809 return; 810 811 spin_lock(&sbi->inode_lock[type]); 812 __remove_dirty_inode(inode, type); 813 spin_unlock(&sbi->inode_lock[type]); 814 } 815 816 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) 817 { 818 struct list_head *head; 819 struct inode *inode; 820 struct f2fs_inode_info *fi; 821 bool is_dir = (type == DIR_INODE); 822 823 trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, 824 get_pages(sbi, is_dir ? 825 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 826 retry: 827 if (unlikely(f2fs_cp_error(sbi))) 828 return -EIO; 829 830 spin_lock(&sbi->inode_lock[type]); 831 832 head = &sbi->inode_list[type]; 833 if (list_empty(head)) { 834 spin_unlock(&sbi->inode_lock[type]); 835 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, 836 get_pages(sbi, is_dir ? 837 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 838 return 0; 839 } 840 fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); 841 inode = igrab(&fi->vfs_inode); 842 spin_unlock(&sbi->inode_lock[type]); 843 if (inode) { 844 filemap_fdatawrite(inode->i_mapping); 845 iput(inode); 846 } else { 847 /* 848 * We should submit bio, since it exists several 849 * wribacking dentry pages in the freeing inode. 850 */ 851 f2fs_submit_merged_bio(sbi, DATA, WRITE); 852 cond_resched(); 853 } 854 goto retry; 855 } 856 857 /* 858 * Freeze all the FS-operations for checkpoint. 859 */ 860 static int block_operations(struct f2fs_sb_info *sbi) 861 { 862 struct writeback_control wbc = { 863 .sync_mode = WB_SYNC_ALL, 864 .nr_to_write = LONG_MAX, 865 .for_reclaim = 0, 866 }; 867 struct blk_plug plug; 868 int err = 0; 869 870 blk_start_plug(&plug); 871 872 retry_flush_dents: 873 f2fs_lock_all(sbi); 874 /* write all the dirty dentry pages */ 875 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 876 f2fs_unlock_all(sbi); 877 err = sync_dirty_inodes(sbi, DIR_INODE); 878 if (err) 879 goto out; 880 goto retry_flush_dents; 881 } 882 883 /* 884 * POR: we should ensure that there are no dirty node pages 885 * until finishing nat/sit flush. 886 */ 887 retry_flush_nodes: 888 down_write(&sbi->node_write); 889 890 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 891 up_write(&sbi->node_write); 892 err = sync_node_pages(sbi, &wbc); 893 if (err) { 894 f2fs_unlock_all(sbi); 895 goto out; 896 } 897 goto retry_flush_nodes; 898 } 899 out: 900 blk_finish_plug(&plug); 901 return err; 902 } 903 904 static void unblock_operations(struct f2fs_sb_info *sbi) 905 { 906 up_write(&sbi->node_write); 907 f2fs_unlock_all(sbi); 908 } 909 910 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) 911 { 912 DEFINE_WAIT(wait); 913 914 for (;;) { 915 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 916 917 if (!atomic_read(&sbi->nr_wb_bios)) 918 break; 919 920 io_schedule_timeout(5*HZ); 921 } 922 finish_wait(&sbi->cp_wait, &wait); 923 } 924 925 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 926 { 927 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 928 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 929 struct f2fs_nm_info *nm_i = NM_I(sbi); 930 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 931 nid_t last_nid = nm_i->next_scan_nid; 932 block_t start_blk; 933 unsigned int data_sum_blocks, orphan_blocks; 934 __u32 crc32 = 0; 935 int i; 936 int cp_payload_blks = __cp_payload(sbi); 937 block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); 938 bool invalidate = false; 939 struct super_block *sb = sbi->sb; 940 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 941 u64 kbytes_written; 942 943 /* 944 * This avoids to conduct wrong roll-forward operations and uses 945 * metapages, so should be called prior to sync_meta_pages below. 946 */ 947 if (discard_next_dnode(sbi, discard_blk)) 948 invalidate = true; 949 950 /* Flush all the NAT/SIT pages */ 951 while (get_pages(sbi, F2FS_DIRTY_META)) { 952 sync_meta_pages(sbi, META, LONG_MAX); 953 if (unlikely(f2fs_cp_error(sbi))) 954 return -EIO; 955 } 956 957 next_free_nid(sbi, &last_nid); 958 959 /* 960 * modify checkpoint 961 * version number is already updated 962 */ 963 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 964 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 965 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 966 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { 967 ckpt->cur_node_segno[i] = 968 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 969 ckpt->cur_node_blkoff[i] = 970 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); 971 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 972 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 973 } 974 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { 975 ckpt->cur_data_segno[i] = 976 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 977 ckpt->cur_data_blkoff[i] = 978 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); 979 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 980 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 981 } 982 983 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 984 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 985 ckpt->next_free_nid = cpu_to_le32(last_nid); 986 987 /* 2 cp + n data seg summary + orphan inode blocks */ 988 data_sum_blocks = npages_for_summary_flush(sbi, false); 989 if (data_sum_blocks < NR_CURSEG_DATA_TYPE) 990 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 991 else 992 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 993 994 orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); 995 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 996 orphan_blocks); 997 998 if (__remain_node_summaries(cpc->reason)) 999 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 1000 cp_payload_blks + data_sum_blocks + 1001 orphan_blocks + NR_CURSEG_NODE_TYPE); 1002 else 1003 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + 1004 cp_payload_blks + data_sum_blocks + 1005 orphan_blocks); 1006 1007 if (cpc->reason == CP_UMOUNT) 1008 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1009 else 1010 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1011 1012 if (cpc->reason == CP_FASTBOOT) 1013 set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1014 else 1015 clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1016 1017 if (orphan_num) 1018 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1019 else 1020 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1021 1022 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1023 set_ckpt_flags(ckpt, CP_FSCK_FLAG); 1024 1025 /* update SIT/NAT bitmap */ 1026 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 1027 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 1028 1029 crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); 1030 *((__le32 *)((unsigned char *)ckpt + 1031 le32_to_cpu(ckpt->checksum_offset))) 1032 = cpu_to_le32(crc32); 1033 1034 start_blk = __start_cp_addr(sbi); 1035 1036 /* need to wait for end_io results */ 1037 wait_on_all_pages_writeback(sbi); 1038 if (unlikely(f2fs_cp_error(sbi))) 1039 return -EIO; 1040 1041 /* write out checkpoint buffer at block 0 */ 1042 update_meta_page(sbi, ckpt, start_blk++); 1043 1044 for (i = 1; i < 1 + cp_payload_blks; i++) 1045 update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, 1046 start_blk++); 1047 1048 if (orphan_num) { 1049 write_orphan_inodes(sbi, start_blk); 1050 start_blk += orphan_blocks; 1051 } 1052 1053 write_data_summaries(sbi, start_blk); 1054 start_blk += data_sum_blocks; 1055 1056 /* Record write statistics in the hot node summary */ 1057 kbytes_written = sbi->kbytes_written; 1058 if (sb->s_bdev->bd_part) 1059 kbytes_written += BD_PART_WRITTEN(sbi); 1060 1061 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1062 1063 if (__remain_node_summaries(cpc->reason)) { 1064 write_node_summaries(sbi, start_blk); 1065 start_blk += NR_CURSEG_NODE_TYPE; 1066 } 1067 1068 /* writeout checkpoint block */ 1069 update_meta_page(sbi, ckpt, start_blk); 1070 1071 /* wait for previous submitted node/meta pages writeback */ 1072 wait_on_all_pages_writeback(sbi); 1073 1074 if (unlikely(f2fs_cp_error(sbi))) 1075 return -EIO; 1076 1077 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); 1078 filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); 1079 1080 /* update user_block_counts */ 1081 sbi->last_valid_block_count = sbi->total_valid_block_count; 1082 percpu_counter_set(&sbi->alloc_valid_block_count, 0); 1083 1084 /* Here, we only have one bio having CP pack */ 1085 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 1086 1087 /* wait for previous submitted meta pages writeback */ 1088 wait_on_all_pages_writeback(sbi); 1089 1090 /* 1091 * invalidate meta page which is used temporarily for zeroing out 1092 * block at the end of warm node chain. 1093 */ 1094 if (invalidate) 1095 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1096 discard_blk); 1097 1098 release_ino_entry(sbi, false); 1099 1100 if (unlikely(f2fs_cp_error(sbi))) 1101 return -EIO; 1102 1103 clear_prefree_segments(sbi, cpc); 1104 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1105 1106 return 0; 1107 } 1108 1109 /* 1110 * We guarantee that this checkpoint procedure will not fail. 1111 */ 1112 int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1113 { 1114 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1115 unsigned long long ckpt_ver; 1116 int err = 0; 1117 1118 mutex_lock(&sbi->cp_mutex); 1119 1120 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1121 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || 1122 (cpc->reason == CP_DISCARD && !sbi->discard_blks))) 1123 goto out; 1124 if (unlikely(f2fs_cp_error(sbi))) { 1125 err = -EIO; 1126 goto out; 1127 } 1128 if (f2fs_readonly(sbi->sb)) { 1129 err = -EROFS; 1130 goto out; 1131 } 1132 1133 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 1134 1135 err = block_operations(sbi); 1136 if (err) 1137 goto out; 1138 1139 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1140 1141 f2fs_flush_merged_bios(sbi); 1142 1143 /* 1144 * update checkpoint pack index 1145 * Increase the version number so that 1146 * SIT entries and seg summaries are written at correct place 1147 */ 1148 ckpt_ver = cur_cp_version(ckpt); 1149 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1150 1151 /* write cached NAT/SIT entries to NAT/SIT area */ 1152 flush_nat_entries(sbi); 1153 flush_sit_entries(sbi, cpc); 1154 1155 /* unlock all the fs_lock[] in do_checkpoint() */ 1156 err = do_checkpoint(sbi, cpc); 1157 1158 unblock_operations(sbi); 1159 stat_inc_cp_count(sbi->stat_info); 1160 1161 if (cpc->reason == CP_RECOVERY) 1162 f2fs_msg(sbi->sb, KERN_NOTICE, 1163 "checkpoint: version = %llx", ckpt_ver); 1164 1165 /* do checkpoint periodically */ 1166 f2fs_update_time(sbi, CP_TIME); 1167 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1168 out: 1169 mutex_unlock(&sbi->cp_mutex); 1170 return err; 1171 } 1172 1173 void init_ino_entry_info(struct f2fs_sb_info *sbi) 1174 { 1175 int i; 1176 1177 for (i = 0; i < MAX_INO_ENTRY; i++) { 1178 struct inode_management *im = &sbi->im[i]; 1179 1180 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); 1181 spin_lock_init(&im->ino_lock); 1182 INIT_LIST_HEAD(&im->ino_list); 1183 im->ino_num = 0; 1184 } 1185 1186 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1187 NR_CURSEG_TYPE - __cp_payload(sbi)) * 1188 F2FS_ORPHANS_PER_BLOCK; 1189 } 1190 1191 int __init create_checkpoint_caches(void) 1192 { 1193 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", 1194 sizeof(struct ino_entry)); 1195 if (!ino_entry_slab) 1196 return -ENOMEM; 1197 inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", 1198 sizeof(struct inode_entry)); 1199 if (!inode_entry_slab) { 1200 kmem_cache_destroy(ino_entry_slab); 1201 return -ENOMEM; 1202 } 1203 return 0; 1204 } 1205 1206 void destroy_checkpoint_caches(void) 1207 { 1208 kmem_cache_destroy(ino_entry_slab); 1209 kmem_cache_destroy(inode_entry_slab); 1210 } 1211