1 /* 2 * fs/f2fs/checkpoint.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/bio.h> 13 #include <linux/mpage.h> 14 #include <linux/writeback.h> 15 #include <linux/blkdev.h> 16 #include <linux/f2fs_fs.h> 17 #include <linux/pagevec.h> 18 #include <linux/swap.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 static struct kmem_cache *ino_entry_slab; 27 struct kmem_cache *inode_entry_slab; 28 29 /* 30 * We guarantee no failure on the returned page. 31 */ 32 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 33 { 34 struct address_space *mapping = META_MAPPING(sbi); 35 struct page *page = NULL; 36 repeat: 37 page = grab_cache_page(mapping, index); 38 if (!page) { 39 cond_resched(); 40 goto repeat; 41 } 42 f2fs_wait_on_page_writeback(page, META, true); 43 SetPageUptodate(page); 44 return page; 45 } 46 47 /* 48 * We guarantee no failure on the returned page. 49 */ 50 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, 51 bool is_meta) 52 { 53 struct address_space *mapping = META_MAPPING(sbi); 54 struct page *page; 55 struct f2fs_io_info fio = { 56 .sbi = sbi, 57 .type = META, 58 .rw = READ_SYNC | REQ_META | REQ_PRIO, 59 .old_blkaddr = index, 60 .new_blkaddr = index, 61 .encrypted_page = NULL, 62 }; 63 64 if (unlikely(!is_meta)) 65 fio.rw &= ~REQ_META; 66 repeat: 67 page = grab_cache_page(mapping, index); 68 if (!page) { 69 cond_resched(); 70 goto repeat; 71 } 72 if (PageUptodate(page)) 73 goto out; 74 75 fio.page = page; 76 77 if (f2fs_submit_page_bio(&fio)) { 78 f2fs_put_page(page, 1); 79 goto repeat; 80 } 81 82 lock_page(page); 83 if (unlikely(page->mapping != mapping)) { 84 f2fs_put_page(page, 1); 85 goto repeat; 86 } 87 88 /* 89 * if there is any IO error when accessing device, make our filesystem 90 * readonly and make sure do not write checkpoint with non-uptodate 91 * meta page. 92 */ 93 if (unlikely(!PageUptodate(page))) 94 f2fs_stop_checkpoint(sbi); 95 out: 96 return page; 97 } 98 99 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 100 { 101 return __get_meta_page(sbi, index, true); 102 } 103 104 /* for POR only */ 105 struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) 106 { 107 return __get_meta_page(sbi, index, false); 108 } 109 110 bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) 111 { 112 switch (type) { 113 case META_NAT: 114 break; 115 case META_SIT: 116 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) 117 return false; 118 break; 119 case META_SSA: 120 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || 121 blkaddr < SM_I(sbi)->ssa_blkaddr)) 122 return false; 123 break; 124 case META_CP: 125 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || 126 blkaddr < __start_cp_addr(sbi))) 127 return false; 128 break; 129 case META_POR: 130 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || 131 blkaddr < MAIN_BLKADDR(sbi))) 132 return false; 133 break; 134 default: 135 BUG(); 136 } 137 138 return true; 139 } 140 141 /* 142 * Readahead CP/NAT/SIT/SSA pages 143 */ 144 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, 145 int type, bool sync) 146 { 147 struct page *page; 148 block_t blkno = start; 149 struct f2fs_io_info fio = { 150 .sbi = sbi, 151 .type = META, 152 .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA, 153 .encrypted_page = NULL, 154 }; 155 struct blk_plug plug; 156 157 if (unlikely(type == META_POR)) 158 fio.rw &= ~REQ_META; 159 160 blk_start_plug(&plug); 161 for (; nrpages-- > 0; blkno++) { 162 163 if (!is_valid_blkaddr(sbi, blkno, type)) 164 goto out; 165 166 switch (type) { 167 case META_NAT: 168 if (unlikely(blkno >= 169 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) 170 blkno = 0; 171 /* get nat block addr */ 172 fio.new_blkaddr = current_nat_addr(sbi, 173 blkno * NAT_ENTRY_PER_BLOCK); 174 break; 175 case META_SIT: 176 /* get sit block addr */ 177 fio.new_blkaddr = current_sit_addr(sbi, 178 blkno * SIT_ENTRY_PER_BLOCK); 179 break; 180 case META_SSA: 181 case META_CP: 182 case META_POR: 183 fio.new_blkaddr = blkno; 184 break; 185 default: 186 BUG(); 187 } 188 189 page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr); 190 if (!page) 191 continue; 192 if (PageUptodate(page)) { 193 f2fs_put_page(page, 1); 194 continue; 195 } 196 197 fio.page = page; 198 fio.old_blkaddr = fio.new_blkaddr; 199 f2fs_submit_page_mbio(&fio); 200 f2fs_put_page(page, 0); 201 } 202 out: 203 f2fs_submit_merged_bio(sbi, META, READ); 204 blk_finish_plug(&plug); 205 return blkno - start; 206 } 207 208 void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) 209 { 210 struct page *page; 211 bool readahead = false; 212 213 page = find_get_page(META_MAPPING(sbi), index); 214 if (!page || (page && !PageUptodate(page))) 215 readahead = true; 216 f2fs_put_page(page, 0); 217 218 if (readahead) 219 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true); 220 } 221 222 static int f2fs_write_meta_page(struct page *page, 223 struct writeback_control *wbc) 224 { 225 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 226 227 trace_f2fs_writepage(page, META); 228 229 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 230 goto redirty_out; 231 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) 232 goto redirty_out; 233 if (unlikely(f2fs_cp_error(sbi))) 234 goto redirty_out; 235 236 write_meta_page(sbi, page); 237 dec_page_count(sbi, F2FS_DIRTY_META); 238 239 if (wbc->for_reclaim) 240 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); 241 242 unlock_page(page); 243 244 if (unlikely(f2fs_cp_error(sbi))) 245 f2fs_submit_merged_bio(sbi, META, WRITE); 246 247 return 0; 248 249 redirty_out: 250 redirty_page_for_writepage(wbc, page); 251 return AOP_WRITEPAGE_ACTIVATE; 252 } 253 254 static int f2fs_write_meta_pages(struct address_space *mapping, 255 struct writeback_control *wbc) 256 { 257 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 258 long diff, written; 259 260 /* collect a number of dirty meta pages and write together */ 261 if (wbc->for_kupdate || 262 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 263 goto skip_write; 264 265 trace_f2fs_writepages(mapping->host, wbc, META); 266 267 /* if mounting is failed, skip writing node pages */ 268 mutex_lock(&sbi->cp_mutex); 269 diff = nr_pages_to_write(sbi, META, wbc); 270 written = sync_meta_pages(sbi, META, wbc->nr_to_write); 271 mutex_unlock(&sbi->cp_mutex); 272 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); 273 return 0; 274 275 skip_write: 276 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); 277 trace_f2fs_writepages(mapping->host, wbc, META); 278 return 0; 279 } 280 281 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 282 long nr_to_write) 283 { 284 struct address_space *mapping = META_MAPPING(sbi); 285 pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; 286 struct pagevec pvec; 287 long nwritten = 0; 288 struct writeback_control wbc = { 289 .for_reclaim = 0, 290 }; 291 struct blk_plug plug; 292 293 pagevec_init(&pvec, 0); 294 295 blk_start_plug(&plug); 296 297 while (index <= end) { 298 int i, nr_pages; 299 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 300 PAGECACHE_TAG_DIRTY, 301 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 302 if (unlikely(nr_pages == 0)) 303 break; 304 305 for (i = 0; i < nr_pages; i++) { 306 struct page *page = pvec.pages[i]; 307 308 if (prev == ULONG_MAX) 309 prev = page->index - 1; 310 if (nr_to_write != LONG_MAX && page->index != prev + 1) { 311 pagevec_release(&pvec); 312 goto stop; 313 } 314 315 lock_page(page); 316 317 if (unlikely(page->mapping != mapping)) { 318 continue_unlock: 319 unlock_page(page); 320 continue; 321 } 322 if (!PageDirty(page)) { 323 /* someone wrote it for us */ 324 goto continue_unlock; 325 } 326 327 f2fs_wait_on_page_writeback(page, META, true); 328 329 BUG_ON(PageWriteback(page)); 330 if (!clear_page_dirty_for_io(page)) 331 goto continue_unlock; 332 333 if (mapping->a_ops->writepage(page, &wbc)) { 334 unlock_page(page); 335 break; 336 } 337 nwritten++; 338 prev = page->index; 339 if (unlikely(nwritten >= nr_to_write)) 340 break; 341 } 342 pagevec_release(&pvec); 343 cond_resched(); 344 } 345 stop: 346 if (nwritten) 347 f2fs_submit_merged_bio(sbi, type, WRITE); 348 349 blk_finish_plug(&plug); 350 351 return nwritten; 352 } 353 354 static int f2fs_set_meta_page_dirty(struct page *page) 355 { 356 trace_f2fs_set_page_dirty(page, META); 357 358 SetPageUptodate(page); 359 if (!PageDirty(page)) { 360 __set_page_dirty_nobuffers(page); 361 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 362 SetPagePrivate(page); 363 f2fs_trace_pid(page); 364 return 1; 365 } 366 return 0; 367 } 368 369 const struct address_space_operations f2fs_meta_aops = { 370 .writepage = f2fs_write_meta_page, 371 .writepages = f2fs_write_meta_pages, 372 .set_page_dirty = f2fs_set_meta_page_dirty, 373 .invalidatepage = f2fs_invalidate_page, 374 .releasepage = f2fs_release_page, 375 }; 376 377 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 378 { 379 struct inode_management *im = &sbi->im[type]; 380 struct ino_entry *e, *tmp; 381 382 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 383 retry: 384 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 385 386 spin_lock(&im->ino_lock); 387 e = radix_tree_lookup(&im->ino_root, ino); 388 if (!e) { 389 e = tmp; 390 if (radix_tree_insert(&im->ino_root, ino, e)) { 391 spin_unlock(&im->ino_lock); 392 radix_tree_preload_end(); 393 goto retry; 394 } 395 memset(e, 0, sizeof(struct ino_entry)); 396 e->ino = ino; 397 398 list_add_tail(&e->list, &im->ino_list); 399 if (type != ORPHAN_INO) 400 im->ino_num++; 401 } 402 spin_unlock(&im->ino_lock); 403 radix_tree_preload_end(); 404 405 if (e != tmp) 406 kmem_cache_free(ino_entry_slab, tmp); 407 } 408 409 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 410 { 411 struct inode_management *im = &sbi->im[type]; 412 struct ino_entry *e; 413 414 spin_lock(&im->ino_lock); 415 e = radix_tree_lookup(&im->ino_root, ino); 416 if (e) { 417 list_del(&e->list); 418 radix_tree_delete(&im->ino_root, ino); 419 im->ino_num--; 420 spin_unlock(&im->ino_lock); 421 kmem_cache_free(ino_entry_slab, e); 422 return; 423 } 424 spin_unlock(&im->ino_lock); 425 } 426 427 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 428 { 429 /* add new dirty ino entry into list */ 430 __add_ino_entry(sbi, ino, type); 431 } 432 433 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 434 { 435 /* remove dirty ino entry from list */ 436 __remove_ino_entry(sbi, ino, type); 437 } 438 439 /* mode should be APPEND_INO or UPDATE_INO */ 440 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) 441 { 442 struct inode_management *im = &sbi->im[mode]; 443 struct ino_entry *e; 444 445 spin_lock(&im->ino_lock); 446 e = radix_tree_lookup(&im->ino_root, ino); 447 spin_unlock(&im->ino_lock); 448 return e ? true : false; 449 } 450 451 void release_ino_entry(struct f2fs_sb_info *sbi) 452 { 453 struct ino_entry *e, *tmp; 454 int i; 455 456 for (i = APPEND_INO; i <= UPDATE_INO; i++) { 457 struct inode_management *im = &sbi->im[i]; 458 459 spin_lock(&im->ino_lock); 460 list_for_each_entry_safe(e, tmp, &im->ino_list, list) { 461 list_del(&e->list); 462 radix_tree_delete(&im->ino_root, e->ino); 463 kmem_cache_free(ino_entry_slab, e); 464 im->ino_num--; 465 } 466 spin_unlock(&im->ino_lock); 467 } 468 } 469 470 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 471 { 472 struct inode_management *im = &sbi->im[ORPHAN_INO]; 473 int err = 0; 474 475 spin_lock(&im->ino_lock); 476 if (unlikely(im->ino_num >= sbi->max_orphans)) 477 err = -ENOSPC; 478 else 479 im->ino_num++; 480 spin_unlock(&im->ino_lock); 481 482 return err; 483 } 484 485 void release_orphan_inode(struct f2fs_sb_info *sbi) 486 { 487 struct inode_management *im = &sbi->im[ORPHAN_INO]; 488 489 spin_lock(&im->ino_lock); 490 f2fs_bug_on(sbi, im->ino_num == 0); 491 im->ino_num--; 492 spin_unlock(&im->ino_lock); 493 } 494 495 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 496 { 497 /* add new orphan ino entry into list */ 498 __add_ino_entry(sbi, ino, ORPHAN_INO); 499 } 500 501 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 502 { 503 /* remove orphan entry from orphan list */ 504 __remove_ino_entry(sbi, ino, ORPHAN_INO); 505 } 506 507 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 508 { 509 struct inode *inode; 510 511 inode = f2fs_iget(sbi->sb, ino); 512 if (IS_ERR(inode)) { 513 /* 514 * there should be a bug that we can't find the entry 515 * to orphan inode. 516 */ 517 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); 518 return PTR_ERR(inode); 519 } 520 521 clear_nlink(inode); 522 523 /* truncate all the data during iput */ 524 iput(inode); 525 return 0; 526 } 527 528 int recover_orphan_inodes(struct f2fs_sb_info *sbi) 529 { 530 block_t start_blk, orphan_blocks, i, j; 531 int err; 532 533 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 534 return 0; 535 536 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 537 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 538 539 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); 540 541 for (i = 0; i < orphan_blocks; i++) { 542 struct page *page = get_meta_page(sbi, start_blk + i); 543 struct f2fs_orphan_block *orphan_blk; 544 545 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 546 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 547 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 548 err = recover_orphan_inode(sbi, ino); 549 if (err) { 550 f2fs_put_page(page, 1); 551 return err; 552 } 553 } 554 f2fs_put_page(page, 1); 555 } 556 /* clear Orphan Flag */ 557 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 558 return 0; 559 } 560 561 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 562 { 563 struct list_head *head; 564 struct f2fs_orphan_block *orphan_blk = NULL; 565 unsigned int nentries = 0; 566 unsigned short index = 1; 567 unsigned short orphan_blocks; 568 struct page *page = NULL; 569 struct ino_entry *orphan = NULL; 570 struct inode_management *im = &sbi->im[ORPHAN_INO]; 571 572 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 573 574 /* 575 * we don't need to do spin_lock(&im->ino_lock) here, since all the 576 * orphan inode operations are covered under f2fs_lock_op(). 577 * And, spin_lock should be avoided due to page operations below. 578 */ 579 head = &im->ino_list; 580 581 /* loop for each orphan inode entry and write them in Jornal block */ 582 list_for_each_entry(orphan, head, list) { 583 if (!page) { 584 page = grab_meta_page(sbi, start_blk++); 585 orphan_blk = 586 (struct f2fs_orphan_block *)page_address(page); 587 memset(orphan_blk, 0, sizeof(*orphan_blk)); 588 } 589 590 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 591 592 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 593 /* 594 * an orphan block is full of 1020 entries, 595 * then we need to flush current orphan blocks 596 * and bring another one in memory 597 */ 598 orphan_blk->blk_addr = cpu_to_le16(index); 599 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 600 orphan_blk->entry_count = cpu_to_le32(nentries); 601 set_page_dirty(page); 602 f2fs_put_page(page, 1); 603 index++; 604 nentries = 0; 605 page = NULL; 606 } 607 } 608 609 if (page) { 610 orphan_blk->blk_addr = cpu_to_le16(index); 611 orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 612 orphan_blk->entry_count = cpu_to_le32(nentries); 613 set_page_dirty(page); 614 f2fs_put_page(page, 1); 615 } 616 } 617 618 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 619 block_t cp_addr, unsigned long long *version) 620 { 621 struct page *cp_page_1, *cp_page_2 = NULL; 622 unsigned long blk_size = sbi->blocksize; 623 struct f2fs_checkpoint *cp_block; 624 unsigned long long cur_version = 0, pre_version = 0; 625 size_t crc_offset; 626 __u32 crc = 0; 627 628 /* Read the 1st cp block in this CP pack */ 629 cp_page_1 = get_meta_page(sbi, cp_addr); 630 631 /* get the version number */ 632 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); 633 crc_offset = le32_to_cpu(cp_block->checksum_offset); 634 if (crc_offset >= blk_size) 635 goto invalid_cp1; 636 637 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); 638 if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) 639 goto invalid_cp1; 640 641 pre_version = cur_cp_version(cp_block); 642 643 /* Read the 2nd cp block in this CP pack */ 644 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; 645 cp_page_2 = get_meta_page(sbi, cp_addr); 646 647 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); 648 crc_offset = le32_to_cpu(cp_block->checksum_offset); 649 if (crc_offset >= blk_size) 650 goto invalid_cp2; 651 652 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); 653 if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) 654 goto invalid_cp2; 655 656 cur_version = cur_cp_version(cp_block); 657 658 if (cur_version == pre_version) { 659 *version = cur_version; 660 f2fs_put_page(cp_page_2, 1); 661 return cp_page_1; 662 } 663 invalid_cp2: 664 f2fs_put_page(cp_page_2, 1); 665 invalid_cp1: 666 f2fs_put_page(cp_page_1, 1); 667 return NULL; 668 } 669 670 int get_valid_checkpoint(struct f2fs_sb_info *sbi) 671 { 672 struct f2fs_checkpoint *cp_block; 673 struct f2fs_super_block *fsb = sbi->raw_super; 674 struct page *cp1, *cp2, *cur_page; 675 unsigned long blk_size = sbi->blocksize; 676 unsigned long long cp1_version = 0, cp2_version = 0; 677 unsigned long long cp_start_blk_no; 678 unsigned int cp_blks = 1 + __cp_payload(sbi); 679 block_t cp_blk_no; 680 int i; 681 682 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); 683 if (!sbi->ckpt) 684 return -ENOMEM; 685 /* 686 * Finding out valid cp block involves read both 687 * sets( cp pack1 and cp pack 2) 688 */ 689 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); 690 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 691 692 /* The second checkpoint pack should start at the next segment */ 693 cp_start_blk_no += ((unsigned long long)1) << 694 le32_to_cpu(fsb->log_blocks_per_seg); 695 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 696 697 if (cp1 && cp2) { 698 if (ver_after(cp2_version, cp1_version)) 699 cur_page = cp2; 700 else 701 cur_page = cp1; 702 } else if (cp1) { 703 cur_page = cp1; 704 } else if (cp2) { 705 cur_page = cp2; 706 } else { 707 goto fail_no_cp; 708 } 709 710 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 711 memcpy(sbi->ckpt, cp_block, blk_size); 712 713 /* Sanity checking of checkpoint */ 714 if (sanity_check_ckpt(sbi)) 715 goto fail_no_cp; 716 717 if (cp_blks <= 1) 718 goto done; 719 720 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); 721 if (cur_page == cp2) 722 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 723 724 for (i = 1; i < cp_blks; i++) { 725 void *sit_bitmap_ptr; 726 unsigned char *ckpt = (unsigned char *)sbi->ckpt; 727 728 cur_page = get_meta_page(sbi, cp_blk_no + i); 729 sit_bitmap_ptr = page_address(cur_page); 730 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); 731 f2fs_put_page(cur_page, 1); 732 } 733 done: 734 f2fs_put_page(cp1, 1); 735 f2fs_put_page(cp2, 1); 736 return 0; 737 738 fail_no_cp: 739 kfree(sbi->ckpt); 740 return -EINVAL; 741 } 742 743 static void __add_dirty_inode(struct inode *inode, enum inode_type type) 744 { 745 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 746 struct f2fs_inode_info *fi = F2FS_I(inode); 747 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 748 749 if (is_inode_flag_set(fi, flag)) 750 return; 751 752 set_inode_flag(fi, flag); 753 list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); 754 stat_inc_dirty_inode(sbi, type); 755 } 756 757 static void __remove_dirty_inode(struct inode *inode, enum inode_type type) 758 { 759 struct f2fs_inode_info *fi = F2FS_I(inode); 760 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; 761 762 if (get_dirty_pages(inode) || 763 !is_inode_flag_set(F2FS_I(inode), flag)) 764 return; 765 766 list_del_init(&fi->dirty_list); 767 clear_inode_flag(fi, flag); 768 stat_dec_dirty_inode(F2FS_I_SB(inode), type); 769 } 770 771 void update_dirty_page(struct inode *inode, struct page *page) 772 { 773 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 774 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 775 776 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 777 !S_ISLNK(inode->i_mode)) 778 return; 779 780 spin_lock(&sbi->inode_lock[type]); 781 __add_dirty_inode(inode, type); 782 inode_inc_dirty_pages(inode); 783 spin_unlock(&sbi->inode_lock[type]); 784 785 SetPagePrivate(page); 786 f2fs_trace_pid(page); 787 } 788 789 void add_dirty_dir_inode(struct inode *inode) 790 { 791 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 792 793 spin_lock(&sbi->inode_lock[DIR_INODE]); 794 __add_dirty_inode(inode, DIR_INODE); 795 spin_unlock(&sbi->inode_lock[DIR_INODE]); 796 } 797 798 void remove_dirty_inode(struct inode *inode) 799 { 800 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 801 struct f2fs_inode_info *fi = F2FS_I(inode); 802 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 803 804 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 805 !S_ISLNK(inode->i_mode)) 806 return; 807 808 spin_lock(&sbi->inode_lock[type]); 809 __remove_dirty_inode(inode, type); 810 spin_unlock(&sbi->inode_lock[type]); 811 812 /* Only from the recovery routine */ 813 if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { 814 clear_inode_flag(fi, FI_DELAY_IPUT); 815 iput(inode); 816 } 817 } 818 819 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) 820 { 821 struct list_head *head; 822 struct inode *inode; 823 struct f2fs_inode_info *fi; 824 bool is_dir = (type == DIR_INODE); 825 826 trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, 827 get_pages(sbi, is_dir ? 828 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 829 retry: 830 if (unlikely(f2fs_cp_error(sbi))) 831 return -EIO; 832 833 spin_lock(&sbi->inode_lock[type]); 834 835 head = &sbi->inode_list[type]; 836 if (list_empty(head)) { 837 spin_unlock(&sbi->inode_lock[type]); 838 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, 839 get_pages(sbi, is_dir ? 840 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 841 return 0; 842 } 843 fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); 844 inode = igrab(&fi->vfs_inode); 845 spin_unlock(&sbi->inode_lock[type]); 846 if (inode) { 847 filemap_fdatawrite(inode->i_mapping); 848 iput(inode); 849 } else { 850 /* 851 * We should submit bio, since it exists several 852 * wribacking dentry pages in the freeing inode. 853 */ 854 f2fs_submit_merged_bio(sbi, DATA, WRITE); 855 cond_resched(); 856 } 857 goto retry; 858 } 859 860 /* 861 * Freeze all the FS-operations for checkpoint. 862 */ 863 static int block_operations(struct f2fs_sb_info *sbi) 864 { 865 struct writeback_control wbc = { 866 .sync_mode = WB_SYNC_ALL, 867 .nr_to_write = LONG_MAX, 868 .for_reclaim = 0, 869 }; 870 struct blk_plug plug; 871 int err = 0; 872 873 blk_start_plug(&plug); 874 875 retry_flush_dents: 876 f2fs_lock_all(sbi); 877 /* write all the dirty dentry pages */ 878 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 879 f2fs_unlock_all(sbi); 880 err = sync_dirty_inodes(sbi, DIR_INODE); 881 if (err) 882 goto out; 883 goto retry_flush_dents; 884 } 885 886 /* 887 * POR: we should ensure that there are no dirty node pages 888 * until finishing nat/sit flush. 889 */ 890 retry_flush_nodes: 891 down_write(&sbi->node_write); 892 893 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 894 up_write(&sbi->node_write); 895 err = sync_node_pages(sbi, 0, &wbc); 896 if (err) { 897 f2fs_unlock_all(sbi); 898 goto out; 899 } 900 goto retry_flush_nodes; 901 } 902 out: 903 blk_finish_plug(&plug); 904 return err; 905 } 906 907 static void unblock_operations(struct f2fs_sb_info *sbi) 908 { 909 up_write(&sbi->node_write); 910 f2fs_unlock_all(sbi); 911 } 912 913 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) 914 { 915 DEFINE_WAIT(wait); 916 917 for (;;) { 918 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 919 920 if (!get_pages(sbi, F2FS_WRITEBACK)) 921 break; 922 923 io_schedule_timeout(5*HZ); 924 } 925 finish_wait(&sbi->cp_wait, &wait); 926 } 927 928 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 929 { 930 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 931 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 932 struct f2fs_nm_info *nm_i = NM_I(sbi); 933 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 934 nid_t last_nid = nm_i->next_scan_nid; 935 block_t start_blk; 936 unsigned int data_sum_blocks, orphan_blocks; 937 __u32 crc32 = 0; 938 int i; 939 int cp_payload_blks = __cp_payload(sbi); 940 block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); 941 bool invalidate = false; 942 struct super_block *sb = sbi->sb; 943 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 944 u64 kbytes_written; 945 946 /* 947 * This avoids to conduct wrong roll-forward operations and uses 948 * metapages, so should be called prior to sync_meta_pages below. 949 */ 950 if (discard_next_dnode(sbi, discard_blk)) 951 invalidate = true; 952 953 /* Flush all the NAT/SIT pages */ 954 while (get_pages(sbi, F2FS_DIRTY_META)) { 955 sync_meta_pages(sbi, META, LONG_MAX); 956 if (unlikely(f2fs_cp_error(sbi))) 957 return -EIO; 958 } 959 960 next_free_nid(sbi, &last_nid); 961 962 /* 963 * modify checkpoint 964 * version number is already updated 965 */ 966 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 967 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 968 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 969 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { 970 ckpt->cur_node_segno[i] = 971 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 972 ckpt->cur_node_blkoff[i] = 973 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); 974 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 975 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 976 } 977 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { 978 ckpt->cur_data_segno[i] = 979 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 980 ckpt->cur_data_blkoff[i] = 981 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); 982 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 983 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 984 } 985 986 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 987 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 988 ckpt->next_free_nid = cpu_to_le32(last_nid); 989 990 /* 2 cp + n data seg summary + orphan inode blocks */ 991 data_sum_blocks = npages_for_summary_flush(sbi, false); 992 if (data_sum_blocks < NR_CURSEG_DATA_TYPE) 993 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 994 else 995 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 996 997 orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); 998 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 999 orphan_blocks); 1000 1001 if (__remain_node_summaries(cpc->reason)) 1002 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 1003 cp_payload_blks + data_sum_blocks + 1004 orphan_blocks + NR_CURSEG_NODE_TYPE); 1005 else 1006 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + 1007 cp_payload_blks + data_sum_blocks + 1008 orphan_blocks); 1009 1010 if (cpc->reason == CP_UMOUNT) 1011 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1012 else 1013 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1014 1015 if (cpc->reason == CP_FASTBOOT) 1016 set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1017 else 1018 clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1019 1020 if (orphan_num) 1021 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1022 else 1023 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 1024 1025 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1026 set_ckpt_flags(ckpt, CP_FSCK_FLAG); 1027 1028 /* update SIT/NAT bitmap */ 1029 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 1030 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 1031 1032 crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); 1033 *((__le32 *)((unsigned char *)ckpt + 1034 le32_to_cpu(ckpt->checksum_offset))) 1035 = cpu_to_le32(crc32); 1036 1037 start_blk = __start_cp_addr(sbi); 1038 1039 /* need to wait for end_io results */ 1040 wait_on_all_pages_writeback(sbi); 1041 if (unlikely(f2fs_cp_error(sbi))) 1042 return -EIO; 1043 1044 /* write out checkpoint buffer at block 0 */ 1045 update_meta_page(sbi, ckpt, start_blk++); 1046 1047 for (i = 1; i < 1 + cp_payload_blks; i++) 1048 update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, 1049 start_blk++); 1050 1051 if (orphan_num) { 1052 write_orphan_inodes(sbi, start_blk); 1053 start_blk += orphan_blocks; 1054 } 1055 1056 write_data_summaries(sbi, start_blk); 1057 start_blk += data_sum_blocks; 1058 1059 /* Record write statistics in the hot node summary */ 1060 kbytes_written = sbi->kbytes_written; 1061 if (sb->s_bdev->bd_part) 1062 kbytes_written += BD_PART_WRITTEN(sbi); 1063 1064 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1065 1066 if (__remain_node_summaries(cpc->reason)) { 1067 write_node_summaries(sbi, start_blk); 1068 start_blk += NR_CURSEG_NODE_TYPE; 1069 } 1070 1071 /* writeout checkpoint block */ 1072 update_meta_page(sbi, ckpt, start_blk); 1073 1074 /* wait for previous submitted node/meta pages writeback */ 1075 wait_on_all_pages_writeback(sbi); 1076 1077 if (unlikely(f2fs_cp_error(sbi))) 1078 return -EIO; 1079 1080 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); 1081 filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); 1082 1083 /* update user_block_counts */ 1084 sbi->last_valid_block_count = sbi->total_valid_block_count; 1085 sbi->alloc_valid_block_count = 0; 1086 1087 /* Here, we only have one bio having CP pack */ 1088 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 1089 1090 /* wait for previous submitted meta pages writeback */ 1091 wait_on_all_pages_writeback(sbi); 1092 1093 /* 1094 * invalidate meta page which is used temporarily for zeroing out 1095 * block at the end of warm node chain. 1096 */ 1097 if (invalidate) 1098 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1099 discard_blk); 1100 1101 release_ino_entry(sbi); 1102 1103 if (unlikely(f2fs_cp_error(sbi))) 1104 return -EIO; 1105 1106 clear_prefree_segments(sbi, cpc); 1107 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1108 1109 return 0; 1110 } 1111 1112 /* 1113 * We guarantee that this checkpoint procedure will not fail. 1114 */ 1115 int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1116 { 1117 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1118 unsigned long long ckpt_ver; 1119 int err = 0; 1120 1121 mutex_lock(&sbi->cp_mutex); 1122 1123 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1124 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || 1125 (cpc->reason == CP_DISCARD && !sbi->discard_blks))) 1126 goto out; 1127 if (unlikely(f2fs_cp_error(sbi))) { 1128 err = -EIO; 1129 goto out; 1130 } 1131 if (f2fs_readonly(sbi->sb)) { 1132 err = -EROFS; 1133 goto out; 1134 } 1135 1136 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 1137 1138 err = block_operations(sbi); 1139 if (err) 1140 goto out; 1141 1142 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1143 1144 f2fs_flush_merged_bios(sbi); 1145 1146 /* 1147 * update checkpoint pack index 1148 * Increase the version number so that 1149 * SIT entries and seg summaries are written at correct place 1150 */ 1151 ckpt_ver = cur_cp_version(ckpt); 1152 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1153 1154 /* write cached NAT/SIT entries to NAT/SIT area */ 1155 flush_nat_entries(sbi); 1156 flush_sit_entries(sbi, cpc); 1157 1158 /* unlock all the fs_lock[] in do_checkpoint() */ 1159 err = do_checkpoint(sbi, cpc); 1160 1161 unblock_operations(sbi); 1162 stat_inc_cp_count(sbi->stat_info); 1163 1164 if (cpc->reason == CP_RECOVERY) 1165 f2fs_msg(sbi->sb, KERN_NOTICE, 1166 "checkpoint: version = %llx", ckpt_ver); 1167 1168 /* do checkpoint periodically */ 1169 f2fs_update_time(sbi, CP_TIME); 1170 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1171 out: 1172 mutex_unlock(&sbi->cp_mutex); 1173 return err; 1174 } 1175 1176 void init_ino_entry_info(struct f2fs_sb_info *sbi) 1177 { 1178 int i; 1179 1180 for (i = 0; i < MAX_INO_ENTRY; i++) { 1181 struct inode_management *im = &sbi->im[i]; 1182 1183 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); 1184 spin_lock_init(&im->ino_lock); 1185 INIT_LIST_HEAD(&im->ino_list); 1186 im->ino_num = 0; 1187 } 1188 1189 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1190 NR_CURSEG_TYPE - __cp_payload(sbi)) * 1191 F2FS_ORPHANS_PER_BLOCK; 1192 } 1193 1194 int __init create_checkpoint_caches(void) 1195 { 1196 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", 1197 sizeof(struct ino_entry)); 1198 if (!ino_entry_slab) 1199 return -ENOMEM; 1200 inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", 1201 sizeof(struct inode_entry)); 1202 if (!inode_entry_slab) { 1203 kmem_cache_destroy(ino_entry_slab); 1204 return -ENOMEM; 1205 } 1206 return 0; 1207 } 1208 1209 void destroy_checkpoint_caches(void) 1210 { 1211 kmem_cache_destroy(ino_entry_slab); 1212 kmem_cache_destroy(inode_entry_slab); 1213 } 1214