1 /* 2 * fs/f2fs/recovery.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include "f2fs.h" 14 #include "node.h" 15 #include "segment.h" 16 17 /* 18 * Roll forward recovery scenarios. 19 * 20 * [Term] F: fsync_mark, D: dentry_mark 21 * 22 * 1. inode(x) | CP | inode(x) | dnode(F) 23 * -> Update the latest inode(x). 24 * 25 * 2. inode(x) | CP | inode(F) | dnode(F) 26 * -> No problem. 27 * 28 * 3. inode(x) | CP | dnode(F) | inode(x) 29 * -> Recover to the latest dnode(F), and drop the last inode(x) 30 * 31 * 4. inode(x) | CP | dnode(F) | inode(F) 32 * -> No problem. 33 * 34 * 5. CP | inode(x) | dnode(F) 35 * -> The inode(DF) was missing. Should drop this dnode(F). 36 * 37 * 6. CP | inode(DF) | dnode(F) 38 * -> No problem. 39 * 40 * 7. CP | dnode(F) | inode(DF) 41 * -> If f2fs_iget fails, then goto next to find inode(DF). 42 * 43 * 8. CP | dnode(F) | inode(x) 44 * -> If f2fs_iget fails, then goto next to find inode(DF). 45 * But it will fail due to no inode(DF). 46 */ 47 48 static struct kmem_cache *fsync_entry_slab; 49 50 bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) 51 { 52 s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); 53 54 if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) 55 return false; 56 return true; 57 } 58 59 static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, 60 nid_t ino) 61 { 62 struct fsync_inode_entry *entry; 63 64 list_for_each_entry(entry, head, list) 65 if (entry->inode->i_ino == ino) 66 return entry; 67 68 return NULL; 69 } 70 71 static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, 72 struct list_head *head, nid_t ino, bool quota_inode) 73 { 74 struct inode *inode; 75 struct fsync_inode_entry *entry; 76 int err; 77 78 inode = f2fs_iget_retry(sbi->sb, ino); 79 if (IS_ERR(inode)) 80 return ERR_CAST(inode); 81 82 err = dquot_initialize(inode); 83 if (err) 84 goto err_out; 85 86 if (quota_inode) { 87 err = dquot_alloc_inode(inode); 88 if (err) 89 goto err_out; 90 } 91 92 entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 93 entry->inode = inode; 94 list_add_tail(&entry->list, head); 95 96 return entry; 97 err_out: 98 iput(inode); 99 return ERR_PTR(err); 100 } 101 102 static void del_fsync_inode(struct fsync_inode_entry *entry) 103 { 104 iput(entry->inode); 105 list_del(&entry->list); 106 kmem_cache_free(fsync_entry_slab, entry); 107 } 108 109 static int recover_dentry(struct inode *inode, struct page *ipage, 110 struct list_head *dir_list) 111 { 112 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 113 nid_t pino = le32_to_cpu(raw_inode->i_pino); 114 struct f2fs_dir_entry *de; 115 struct fscrypt_name fname; 116 struct page *page; 117 struct inode *dir, *einode; 118 struct fsync_inode_entry *entry; 119 int err = 0; 120 char *name; 121 122 entry = get_fsync_inode(dir_list, pino); 123 if (!entry) { 124 entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, 125 pino, false); 126 if (IS_ERR(entry)) { 127 dir = ERR_CAST(entry); 128 err = PTR_ERR(entry); 129 goto out; 130 } 131 } 132 133 dir = entry->inode; 134 135 memset(&fname, 0, sizeof(struct fscrypt_name)); 136 fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen); 137 fname.disk_name.name = raw_inode->i_name; 138 139 if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) { 140 WARN_ON(1); 141 err = -ENAMETOOLONG; 142 goto out; 143 } 144 retry: 145 de = __f2fs_find_entry(dir, &fname, &page); 146 if (de && inode->i_ino == le32_to_cpu(de->ino)) 147 goto out_put; 148 149 if (de) { 150 einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino)); 151 if (IS_ERR(einode)) { 152 WARN_ON(1); 153 err = PTR_ERR(einode); 154 if (err == -ENOENT) 155 err = -EEXIST; 156 goto out_put; 157 } 158 159 err = dquot_initialize(einode); 160 if (err) { 161 iput(einode); 162 goto out_put; 163 } 164 165 err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode)); 166 if (err) { 167 iput(einode); 168 goto out_put; 169 } 170 f2fs_delete_entry(de, page, dir, einode); 171 iput(einode); 172 goto retry; 173 } else if (IS_ERR(page)) { 174 err = PTR_ERR(page); 175 } else { 176 err = f2fs_add_dentry(dir, &fname, inode, 177 inode->i_ino, inode->i_mode); 178 } 179 if (err == -ENOMEM) 180 goto retry; 181 goto out; 182 183 out_put: 184 f2fs_put_page(page, 0); 185 out: 186 if (file_enc_name(inode)) 187 name = "<encrypted>"; 188 else 189 name = raw_inode->i_name; 190 f2fs_msg(inode->i_sb, KERN_NOTICE, 191 "%s: ino = %x, name = %s, dir = %lx, err = %d", 192 __func__, ino_of_node(ipage), name, 193 IS_ERR(dir) ? 0 : dir->i_ino, err); 194 return err; 195 } 196 197 static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) 198 { 199 if (ri->i_inline & F2FS_PIN_FILE) 200 set_inode_flag(inode, FI_PIN_FILE); 201 else 202 clear_inode_flag(inode, FI_PIN_FILE); 203 if (ri->i_inline & F2FS_DATA_EXIST) 204 set_inode_flag(inode, FI_DATA_EXIST); 205 else 206 clear_inode_flag(inode, FI_DATA_EXIST); 207 } 208 209 static void recover_inode(struct inode *inode, struct page *page) 210 { 211 struct f2fs_inode *raw = F2FS_INODE(page); 212 char *name; 213 214 inode->i_mode = le16_to_cpu(raw->i_mode); 215 f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); 216 inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); 217 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); 218 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); 219 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec); 220 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); 221 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 222 223 F2FS_I(inode)->i_advise = raw->i_advise; 224 225 recover_inline_flags(inode, raw); 226 227 if (file_enc_name(inode)) 228 name = "<encrypted>"; 229 else 230 name = F2FS_INODE(page)->i_name; 231 232 f2fs_msg(inode->i_sb, KERN_NOTICE, 233 "recover_inode: ino = %x, name = %s, inline = %x", 234 ino_of_node(page), name, raw->i_inline); 235 } 236 237 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, 238 bool check_only) 239 { 240 struct curseg_info *curseg; 241 struct page *page = NULL; 242 block_t blkaddr; 243 unsigned int loop_cnt = 0; 244 unsigned int free_blocks = sbi->user_block_count - 245 valid_user_blocks(sbi); 246 int err = 0; 247 248 /* get node pages in the current segment */ 249 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 250 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 251 252 while (1) { 253 struct fsync_inode_entry *entry; 254 255 if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) 256 return 0; 257 258 page = f2fs_get_tmp_page(sbi, blkaddr); 259 260 if (!is_recoverable_dnode(page)) 261 break; 262 263 if (!is_fsync_dnode(page)) 264 goto next; 265 266 entry = get_fsync_inode(head, ino_of_node(page)); 267 if (!entry) { 268 bool quota_inode = false; 269 270 if (!check_only && 271 IS_INODE(page) && is_dent_dnode(page)) { 272 err = f2fs_recover_inode_page(sbi, page); 273 if (err) 274 break; 275 quota_inode = true; 276 } 277 278 /* 279 * CP | dnode(F) | inode(DF) 280 * For this case, we should not give up now. 281 */ 282 entry = add_fsync_inode(sbi, head, ino_of_node(page), 283 quota_inode); 284 if (IS_ERR(entry)) { 285 err = PTR_ERR(entry); 286 if (err == -ENOENT) { 287 err = 0; 288 goto next; 289 } 290 break; 291 } 292 } 293 entry->blkaddr = blkaddr; 294 295 if (IS_INODE(page) && is_dent_dnode(page)) 296 entry->last_dentry = blkaddr; 297 next: 298 /* sanity check in order to detect looped node chain */ 299 if (++loop_cnt >= free_blocks || 300 blkaddr == next_blkaddr_of_node(page)) { 301 f2fs_msg(sbi->sb, KERN_NOTICE, 302 "%s: detect looped node chain, " 303 "blkaddr:%u, next:%u", 304 __func__, blkaddr, next_blkaddr_of_node(page)); 305 err = -EINVAL; 306 break; 307 } 308 309 /* check next segment */ 310 blkaddr = next_blkaddr_of_node(page); 311 f2fs_put_page(page, 1); 312 313 f2fs_ra_meta_pages_cond(sbi, blkaddr); 314 } 315 f2fs_put_page(page, 1); 316 return err; 317 } 318 319 static void destroy_fsync_dnodes(struct list_head *head) 320 { 321 struct fsync_inode_entry *entry, *tmp; 322 323 list_for_each_entry_safe(entry, tmp, head, list) 324 del_fsync_inode(entry); 325 } 326 327 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, 328 block_t blkaddr, struct dnode_of_data *dn) 329 { 330 struct seg_entry *sentry; 331 unsigned int segno = GET_SEGNO(sbi, blkaddr); 332 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 333 struct f2fs_summary_block *sum_node; 334 struct f2fs_summary sum; 335 struct page *sum_page, *node_page; 336 struct dnode_of_data tdn = *dn; 337 nid_t ino, nid; 338 struct inode *inode; 339 unsigned int offset; 340 block_t bidx; 341 int i; 342 343 sentry = get_seg_entry(sbi, segno); 344 if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) 345 return 0; 346 347 /* Get the previous summary */ 348 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 349 struct curseg_info *curseg = CURSEG_I(sbi, i); 350 if (curseg->segno == segno) { 351 sum = curseg->sum_blk->entries[blkoff]; 352 goto got_it; 353 } 354 } 355 356 sum_page = f2fs_get_sum_page(sbi, segno); 357 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 358 sum = sum_node->entries[blkoff]; 359 f2fs_put_page(sum_page, 1); 360 got_it: 361 /* Use the locked dnode page and inode */ 362 nid = le32_to_cpu(sum.nid); 363 if (dn->inode->i_ino == nid) { 364 tdn.nid = nid; 365 if (!dn->inode_page_locked) 366 lock_page(dn->inode_page); 367 tdn.node_page = dn->inode_page; 368 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 369 goto truncate_out; 370 } else if (dn->nid == nid) { 371 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 372 goto truncate_out; 373 } 374 375 /* Get the node page */ 376 node_page = f2fs_get_node_page(sbi, nid); 377 if (IS_ERR(node_page)) 378 return PTR_ERR(node_page); 379 380 offset = ofs_of_node(node_page); 381 ino = ino_of_node(node_page); 382 f2fs_put_page(node_page, 1); 383 384 if (ino != dn->inode->i_ino) { 385 int ret; 386 387 /* Deallocate previous index in the node page */ 388 inode = f2fs_iget_retry(sbi->sb, ino); 389 if (IS_ERR(inode)) 390 return PTR_ERR(inode); 391 392 ret = dquot_initialize(inode); 393 if (ret) { 394 iput(inode); 395 return ret; 396 } 397 } else { 398 inode = dn->inode; 399 } 400 401 bidx = f2fs_start_bidx_of_node(offset, inode) + 402 le16_to_cpu(sum.ofs_in_node); 403 404 /* 405 * if inode page is locked, unlock temporarily, but its reference 406 * count keeps alive. 407 */ 408 if (ino == dn->inode->i_ino && dn->inode_page_locked) 409 unlock_page(dn->inode_page); 410 411 set_new_dnode(&tdn, inode, NULL, NULL, 0); 412 if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 413 goto out; 414 415 if (tdn.data_blkaddr == blkaddr) 416 f2fs_truncate_data_blocks_range(&tdn, 1); 417 418 f2fs_put_dnode(&tdn); 419 out: 420 if (ino != dn->inode->i_ino) 421 iput(inode); 422 else if (dn->inode_page_locked) 423 lock_page(dn->inode_page); 424 return 0; 425 426 truncate_out: 427 if (datablock_addr(tdn.inode, tdn.node_page, 428 tdn.ofs_in_node) == blkaddr) 429 f2fs_truncate_data_blocks_range(&tdn, 1); 430 if (dn->inode->i_ino == nid && !dn->inode_page_locked) 431 unlock_page(dn->inode_page); 432 return 0; 433 } 434 435 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, 436 struct page *page) 437 { 438 struct dnode_of_data dn; 439 struct node_info ni; 440 unsigned int start, end; 441 int err = 0, recovered = 0; 442 443 /* step 1: recover xattr */ 444 if (IS_INODE(page)) { 445 f2fs_recover_inline_xattr(inode, page); 446 } else if (f2fs_has_xattr_block(ofs_of_node(page))) { 447 err = f2fs_recover_xattr_data(inode, page); 448 if (!err) 449 recovered++; 450 goto out; 451 } 452 453 /* step 2: recover inline data */ 454 if (f2fs_recover_inline_data(inode, page)) 455 goto out; 456 457 /* step 3: recover data indices */ 458 start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); 459 end = start + ADDRS_PER_PAGE(page, inode); 460 461 set_new_dnode(&dn, inode, NULL, NULL, 0); 462 retry_dn: 463 err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); 464 if (err) { 465 if (err == -ENOMEM) { 466 congestion_wait(BLK_RW_ASYNC, HZ/50); 467 goto retry_dn; 468 } 469 goto out; 470 } 471 472 f2fs_wait_on_page_writeback(dn.node_page, NODE, true); 473 474 f2fs_get_node_info(sbi, dn.nid, &ni); 475 f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); 476 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); 477 478 for (; start < end; start++, dn.ofs_in_node++) { 479 block_t src, dest; 480 481 src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); 482 dest = datablock_addr(dn.inode, page, dn.ofs_in_node); 483 484 /* skip recovering if dest is the same as src */ 485 if (src == dest) 486 continue; 487 488 /* dest is invalid, just invalidate src block */ 489 if (dest == NULL_ADDR) { 490 f2fs_truncate_data_blocks_range(&dn, 1); 491 continue; 492 } 493 494 if (!file_keep_isize(inode) && 495 (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) 496 f2fs_i_size_write(inode, 497 (loff_t)(start + 1) << PAGE_SHIFT); 498 499 /* 500 * dest is reserved block, invalidate src block 501 * and then reserve one new block in dnode page. 502 */ 503 if (dest == NEW_ADDR) { 504 f2fs_truncate_data_blocks_range(&dn, 1); 505 f2fs_reserve_new_block(&dn); 506 continue; 507 } 508 509 /* dest is valid block, try to recover from src to dest */ 510 if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) { 511 512 if (src == NULL_ADDR) { 513 err = f2fs_reserve_new_block(&dn); 514 #ifdef CONFIG_F2FS_FAULT_INJECTION 515 while (err) 516 err = f2fs_reserve_new_block(&dn); 517 #endif 518 /* We should not get -ENOSPC */ 519 f2fs_bug_on(sbi, err); 520 if (err) 521 goto err; 522 } 523 retry_prev: 524 /* Check the previous node page having this index */ 525 err = check_index_in_prev_nodes(sbi, dest, &dn); 526 if (err) { 527 if (err == -ENOMEM) { 528 congestion_wait(BLK_RW_ASYNC, HZ/50); 529 goto retry_prev; 530 } 531 goto err; 532 } 533 534 /* write dummy data page */ 535 f2fs_replace_block(sbi, &dn, src, dest, 536 ni.version, false, false); 537 recovered++; 538 } 539 } 540 541 copy_node_footer(dn.node_page, page); 542 fill_node_footer(dn.node_page, dn.nid, ni.ino, 543 ofs_of_node(page), false); 544 set_page_dirty(dn.node_page); 545 err: 546 f2fs_put_dnode(&dn); 547 out: 548 f2fs_msg(sbi->sb, KERN_NOTICE, 549 "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", 550 inode->i_ino, 551 file_keep_isize(inode) ? "keep" : "recover", 552 recovered, err); 553 return err; 554 } 555 556 static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, 557 struct list_head *dir_list) 558 { 559 struct curseg_info *curseg; 560 struct page *page = NULL; 561 int err = 0; 562 block_t blkaddr; 563 564 /* get node pages in the current segment */ 565 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 566 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 567 568 while (1) { 569 struct fsync_inode_entry *entry; 570 571 if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) 572 break; 573 574 f2fs_ra_meta_pages_cond(sbi, blkaddr); 575 576 page = f2fs_get_tmp_page(sbi, blkaddr); 577 578 if (!is_recoverable_dnode(page)) { 579 f2fs_put_page(page, 1); 580 break; 581 } 582 583 entry = get_fsync_inode(inode_list, ino_of_node(page)); 584 if (!entry) 585 goto next; 586 /* 587 * inode(x) | CP | inode(x) | dnode(F) 588 * In this case, we can lose the latest inode(x). 589 * So, call recover_inode for the inode update. 590 */ 591 if (IS_INODE(page)) 592 recover_inode(entry->inode, page); 593 if (entry->last_dentry == blkaddr) { 594 err = recover_dentry(entry->inode, page, dir_list); 595 if (err) { 596 f2fs_put_page(page, 1); 597 break; 598 } 599 } 600 err = do_recover_data(sbi, entry->inode, page); 601 if (err) { 602 f2fs_put_page(page, 1); 603 break; 604 } 605 606 if (entry->blkaddr == blkaddr) 607 del_fsync_inode(entry); 608 next: 609 /* check next segment */ 610 blkaddr = next_blkaddr_of_node(page); 611 f2fs_put_page(page, 1); 612 } 613 if (!err) 614 f2fs_allocate_new_segments(sbi); 615 return err; 616 } 617 618 int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) 619 { 620 struct list_head inode_list; 621 struct list_head dir_list; 622 int err; 623 int ret = 0; 624 unsigned long s_flags = sbi->sb->s_flags; 625 bool need_writecp = false; 626 #ifdef CONFIG_QUOTA 627 int quota_enabled; 628 #endif 629 630 if (s_flags & SB_RDONLY) { 631 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); 632 sbi->sb->s_flags &= ~SB_RDONLY; 633 } 634 635 #ifdef CONFIG_QUOTA 636 /* Needed for iput() to work correctly and not trash data */ 637 sbi->sb->s_flags |= SB_ACTIVE; 638 /* Turn on quotas so that they are updated correctly */ 639 quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); 640 #endif 641 642 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 643 sizeof(struct fsync_inode_entry)); 644 if (!fsync_entry_slab) { 645 err = -ENOMEM; 646 goto out; 647 } 648 649 INIT_LIST_HEAD(&inode_list); 650 INIT_LIST_HEAD(&dir_list); 651 652 /* prevent checkpoint */ 653 mutex_lock(&sbi->cp_mutex); 654 655 /* step #1: find fsynced inode numbers */ 656 err = find_fsync_dnodes(sbi, &inode_list, check_only); 657 if (err || list_empty(&inode_list)) 658 goto skip; 659 660 if (check_only) { 661 ret = 1; 662 goto skip; 663 } 664 665 need_writecp = true; 666 667 /* step #2: recover data */ 668 err = recover_data(sbi, &inode_list, &dir_list); 669 if (!err) 670 f2fs_bug_on(sbi, !list_empty(&inode_list)); 671 skip: 672 destroy_fsync_dnodes(&inode_list); 673 674 /* truncate meta pages to be used by the recovery */ 675 truncate_inode_pages_range(META_MAPPING(sbi), 676 (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1); 677 678 if (err) { 679 truncate_inode_pages_final(NODE_MAPPING(sbi)); 680 truncate_inode_pages_final(META_MAPPING(sbi)); 681 } 682 683 clear_sbi_flag(sbi, SBI_POR_DOING); 684 mutex_unlock(&sbi->cp_mutex); 685 686 /* let's drop all the directory inodes for clean checkpoint */ 687 destroy_fsync_dnodes(&dir_list); 688 689 if (!err && need_writecp) { 690 struct cp_control cpc = { 691 .reason = CP_RECOVERY, 692 }; 693 err = f2fs_write_checkpoint(sbi, &cpc); 694 } 695 696 kmem_cache_destroy(fsync_entry_slab); 697 out: 698 #ifdef CONFIG_QUOTA 699 /* Turn quotas off */ 700 if (quota_enabled) 701 f2fs_quota_off_umount(sbi->sb); 702 #endif 703 sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ 704 705 return ret ? ret: err; 706 } 707