1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 unsigned long head_block; 33 34 int nr_replays; 35 int nr_revokes; 36 int nr_revoke_hits; 37 }; 38 39 static int do_one_pass(journal_t *journal, 40 struct recovery_info *info, enum passtype pass); 41 static int scan_revoke_records(journal_t *, struct buffer_head *, 42 tid_t, struct recovery_info *); 43 44 #ifdef __KERNEL__ 45 46 /* Release readahead buffers after use */ 47 static void journal_brelse_array(struct buffer_head *b[], int n) 48 { 49 while (--n >= 0) 50 brelse (b[n]); 51 } 52 53 54 /* 55 * When reading from the journal, we are going through the block device 56 * layer directly and so there is no readahead being done for us. We 57 * need to implement any readahead ourselves if we want it to happen at 58 * all. Recovery is basically one long sequential read, so make sure we 59 * do the IO in reasonably large chunks. 60 * 61 * This is not so critical that we need to be enormously clever about 62 * the readahead size, though. 128K is a purely arbitrary, good-enough 63 * fixed value. 64 */ 65 66 #define MAXBUF 8 67 static int do_readahead(journal_t *journal, unsigned int start) 68 { 69 int err; 70 unsigned int max, nbufs, next; 71 unsigned long long blocknr; 72 struct buffer_head *bh; 73 74 struct buffer_head * bufs[MAXBUF]; 75 76 /* Do up to 128K of readahead */ 77 max = start + (128 * 1024 / journal->j_blocksize); 78 if (max > journal->j_total_len) 79 max = journal->j_total_len; 80 81 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 82 * a time to the block device IO layer. */ 83 84 nbufs = 0; 85 86 for (next = start; next < max; next++) { 87 err = jbd2_journal_bmap(journal, next, &blocknr); 88 89 if (err) { 90 printk(KERN_ERR "JBD2: bad block at offset %u\n", 91 next); 92 goto failed; 93 } 94 95 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 96 if (!bh) { 97 err = -ENOMEM; 98 goto failed; 99 } 100 101 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 102 bufs[nbufs++] = bh; 103 if (nbufs == MAXBUF) { 104 bh_readahead_batch(nbufs, bufs, 0); 105 journal_brelse_array(bufs, nbufs); 106 nbufs = 0; 107 } 108 } else 109 brelse(bh); 110 } 111 112 if (nbufs) 113 bh_readahead_batch(nbufs, bufs, 0); 114 err = 0; 115 116 failed: 117 if (nbufs) 118 journal_brelse_array(bufs, nbufs); 119 return err; 120 } 121 122 #endif /* __KERNEL__ */ 123 124 125 /* 126 * Read a block from the journal 127 */ 128 129 static int jread(struct buffer_head **bhp, journal_t *journal, 130 unsigned int offset) 131 { 132 int err; 133 unsigned long long blocknr; 134 struct buffer_head *bh; 135 136 *bhp = NULL; 137 138 if (offset >= journal->j_total_len) { 139 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 140 return -EFSCORRUPTED; 141 } 142 143 err = jbd2_journal_bmap(journal, offset, &blocknr); 144 145 if (err) { 146 printk(KERN_ERR "JBD2: bad block at offset %u\n", 147 offset); 148 return err; 149 } 150 151 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 152 if (!bh) 153 return -ENOMEM; 154 155 if (!buffer_uptodate(bh)) { 156 /* 157 * If this is a brand new buffer, start readahead. 158 * Otherwise, we assume we are already reading it. 159 */ 160 bool need_readahead = !buffer_req(bh); 161 162 bh_read_nowait(bh, 0); 163 if (need_readahead) 164 do_readahead(journal, offset); 165 wait_on_buffer(bh); 166 } 167 168 if (!buffer_uptodate(bh)) { 169 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 170 offset); 171 brelse(bh); 172 return -EIO; 173 } 174 175 *bhp = bh; 176 return 0; 177 } 178 179 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 180 { 181 struct jbd2_journal_block_tail *tail; 182 __be32 provided; 183 __u32 calculated; 184 185 if (!jbd2_journal_has_csum_v2or3(j)) 186 return 1; 187 188 tail = (struct jbd2_journal_block_tail *)((char *)buf + 189 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 190 provided = tail->t_checksum; 191 tail->t_checksum = 0; 192 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 193 tail->t_checksum = provided; 194 195 return provided == cpu_to_be32(calculated); 196 } 197 198 /* 199 * Count the number of in-use tags in a journal descriptor block. 200 */ 201 202 static int count_tags(journal_t *journal, struct buffer_head *bh) 203 { 204 char * tagp; 205 journal_block_tag_t tag; 206 int nr = 0, size = journal->j_blocksize; 207 int tag_bytes = journal_tag_bytes(journal); 208 209 if (jbd2_journal_has_csum_v2or3(journal)) 210 size -= sizeof(struct jbd2_journal_block_tail); 211 212 tagp = &bh->b_data[sizeof(journal_header_t)]; 213 214 while ((tagp - bh->b_data + tag_bytes) <= size) { 215 memcpy(&tag, tagp, sizeof(tag)); 216 217 nr++; 218 tagp += tag_bytes; 219 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 220 tagp += 16; 221 222 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 223 break; 224 } 225 226 return nr; 227 } 228 229 230 /* Make sure we wrap around the log correctly! */ 231 #define wrap(journal, var) \ 232 do { \ 233 unsigned long _wrap_last = \ 234 jbd2_has_feature_fast_commit(journal) ? \ 235 (journal)->j_fc_last : (journal)->j_last; \ 236 \ 237 if (var >= _wrap_last) \ 238 var -= (_wrap_last - (journal)->j_first); \ 239 } while (0) 240 241 static int fc_do_one_pass(journal_t *journal, 242 struct recovery_info *info, enum passtype pass) 243 { 244 unsigned int expected_commit_id = info->end_transaction; 245 unsigned long next_fc_block; 246 struct buffer_head *bh; 247 int err = 0; 248 249 next_fc_block = journal->j_fc_first; 250 if (!journal->j_fc_replay_callback) 251 return 0; 252 253 while (next_fc_block <= journal->j_fc_last) { 254 jbd2_debug(3, "Fast commit replay: next block %ld\n", 255 next_fc_block); 256 err = jread(&bh, journal, next_fc_block); 257 if (err) { 258 jbd2_debug(3, "Fast commit replay: read error\n"); 259 break; 260 } 261 262 err = journal->j_fc_replay_callback(journal, bh, pass, 263 next_fc_block - journal->j_fc_first, 264 expected_commit_id); 265 brelse(bh); 266 next_fc_block++; 267 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 268 break; 269 err = 0; 270 } 271 272 if (err) 273 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 274 275 return err; 276 } 277 278 /** 279 * jbd2_journal_recover - recovers a on-disk journal 280 * @journal: the journal to recover 281 * 282 * The primary function for recovering the log contents when mounting a 283 * journaled device. 284 * 285 * Recovery is done in three passes. In the first pass, we look for the 286 * end of the log. In the second, we assemble the list of revoke 287 * blocks. In the third and final pass, we replay any un-revoked blocks 288 * in the log. 289 */ 290 int jbd2_journal_recover(journal_t *journal) 291 { 292 int err, err2; 293 journal_superblock_t * sb; 294 295 struct recovery_info info; 296 297 memset(&info, 0, sizeof(info)); 298 sb = journal->j_superblock; 299 300 /* 301 * The journal superblock's s_start field (the current log head) 302 * is always zero if, and only if, the journal was cleanly 303 * unmounted. 304 */ 305 if (!sb->s_start) { 306 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", 307 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); 308 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 309 journal->j_head = be32_to_cpu(sb->s_head); 310 return 0; 311 } 312 313 err = do_one_pass(journal, &info, PASS_SCAN); 314 if (!err) 315 err = do_one_pass(journal, &info, PASS_REVOKE); 316 if (!err) 317 err = do_one_pass(journal, &info, PASS_REPLAY); 318 319 jbd2_debug(1, "JBD2: recovery, exit status %d, " 320 "recovered transactions %u to %u\n", 321 err, info.start_transaction, info.end_transaction); 322 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 323 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 324 325 /* Restart the log at the next transaction ID, thus invalidating 326 * any existing commit records in the log. */ 327 journal->j_transaction_sequence = ++info.end_transaction; 328 journal->j_head = info.head_block; 329 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n", 330 journal->j_transaction_sequence, journal->j_head); 331 332 jbd2_journal_clear_revoke(journal); 333 err2 = sync_blockdev(journal->j_fs_dev); 334 if (!err) 335 err = err2; 336 /* Make sure all replayed data is on permanent storage */ 337 if (journal->j_flags & JBD2_BARRIER) { 338 err2 = blkdev_issue_flush(journal->j_fs_dev); 339 if (!err) 340 err = err2; 341 } 342 return err; 343 } 344 345 /** 346 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 347 * @journal: journal to startup 348 * 349 * Locate any valid recovery information from the journal and set up the 350 * journal structures in memory to ignore it (presumably because the 351 * caller has evidence that it is out of date). 352 * This function doesn't appear to be exported.. 353 * 354 * We perform one pass over the journal to allow us to tell the user how 355 * much recovery information is being erased, and to let us initialise 356 * the journal transaction sequence numbers to the next unused ID. 357 */ 358 int jbd2_journal_skip_recovery(journal_t *journal) 359 { 360 int err; 361 362 struct recovery_info info; 363 364 memset (&info, 0, sizeof(info)); 365 366 err = do_one_pass(journal, &info, PASS_SCAN); 367 368 if (err) { 369 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 370 ++journal->j_transaction_sequence; 371 journal->j_head = journal->j_first; 372 } else { 373 #ifdef CONFIG_JBD2_DEBUG 374 int dropped = info.end_transaction - 375 be32_to_cpu(journal->j_superblock->s_sequence); 376 jbd2_debug(1, 377 "JBD2: ignoring %d transaction%s from the journal.\n", 378 dropped, (dropped == 1) ? "" : "s"); 379 #endif 380 journal->j_transaction_sequence = ++info.end_transaction; 381 journal->j_head = info.head_block; 382 } 383 384 journal->j_tail = 0; 385 return err; 386 } 387 388 static inline unsigned long long read_tag_block(journal_t *journal, 389 journal_block_tag_t *tag) 390 { 391 unsigned long long block = be32_to_cpu(tag->t_blocknr); 392 if (jbd2_has_feature_64bit(journal)) 393 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 394 return block; 395 } 396 397 /* 398 * calc_chksums calculates the checksums for the blocks described in the 399 * descriptor block. 400 */ 401 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 402 unsigned long *next_log_block, __u32 *crc32_sum) 403 { 404 int i, num_blks, err; 405 unsigned long io_block; 406 struct buffer_head *obh; 407 408 num_blks = count_tags(journal, bh); 409 /* Calculate checksum of the descriptor block. */ 410 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 411 412 for (i = 0; i < num_blks; i++) { 413 io_block = (*next_log_block)++; 414 wrap(journal, *next_log_block); 415 err = jread(&obh, journal, io_block); 416 if (err) { 417 printk(KERN_ERR "JBD2: IO error %d recovering block " 418 "%lu in log\n", err, io_block); 419 return 1; 420 } else { 421 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 422 obh->b_size); 423 } 424 put_bh(obh); 425 } 426 return 0; 427 } 428 429 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 430 { 431 struct commit_header *h; 432 __be32 provided; 433 __u32 calculated; 434 435 if (!jbd2_journal_has_csum_v2or3(j)) 436 return 1; 437 438 h = buf; 439 provided = h->h_chksum[0]; 440 h->h_chksum[0] = 0; 441 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 442 h->h_chksum[0] = provided; 443 444 return provided == cpu_to_be32(calculated); 445 } 446 447 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 448 journal_block_tag3_t *tag3, 449 void *buf, __u32 sequence) 450 { 451 __u32 csum32; 452 __be32 seq; 453 454 if (!jbd2_journal_has_csum_v2or3(j)) 455 return 1; 456 457 seq = cpu_to_be32(sequence); 458 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 459 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 460 461 if (jbd2_has_feature_csum3(j)) 462 return tag3->t_checksum == cpu_to_be32(csum32); 463 else 464 return tag->t_checksum == cpu_to_be16(csum32); 465 } 466 467 static int do_one_pass(journal_t *journal, 468 struct recovery_info *info, enum passtype pass) 469 { 470 unsigned int first_commit_ID, next_commit_ID; 471 unsigned long next_log_block, head_block; 472 int err, success = 0; 473 journal_superblock_t * sb; 474 journal_header_t * tmp; 475 struct buffer_head * bh; 476 unsigned int sequence; 477 int blocktype; 478 int tag_bytes = journal_tag_bytes(journal); 479 __u32 crc32_sum = ~0; /* Transactional Checksums */ 480 int descr_csum_size = 0; 481 int block_error = 0; 482 bool need_check_commit_time = false; 483 __u64 last_trans_commit_time = 0, commit_time; 484 485 /* 486 * First thing is to establish what we expect to find in the log 487 * (in terms of transaction IDs), and where (in terms of log 488 * block offsets): query the superblock. 489 */ 490 491 sb = journal->j_superblock; 492 next_commit_ID = be32_to_cpu(sb->s_sequence); 493 next_log_block = be32_to_cpu(sb->s_start); 494 head_block = next_log_block; 495 496 first_commit_ID = next_commit_ID; 497 if (pass == PASS_SCAN) 498 info->start_transaction = first_commit_ID; 499 500 jbd2_debug(1, "Starting recovery pass %d\n", pass); 501 502 /* 503 * Now we walk through the log, transaction by transaction, 504 * making sure that each transaction has a commit block in the 505 * expected place. Each complete transaction gets replayed back 506 * into the main filesystem. 507 */ 508 509 while (1) { 510 int flags; 511 char * tagp; 512 journal_block_tag_t tag; 513 struct buffer_head * obh; 514 struct buffer_head * nbh; 515 516 cond_resched(); 517 518 /* If we already know where to stop the log traversal, 519 * check right now that we haven't gone past the end of 520 * the log. */ 521 522 if (pass != PASS_SCAN) 523 if (tid_geq(next_commit_ID, info->end_transaction)) 524 break; 525 526 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 527 next_commit_ID, next_log_block, 528 jbd2_has_feature_fast_commit(journal) ? 529 journal->j_fc_last : journal->j_last); 530 531 /* Skip over each chunk of the transaction looking 532 * either the next descriptor block or the final commit 533 * record. */ 534 535 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 536 err = jread(&bh, journal, next_log_block); 537 if (err) 538 goto failed; 539 540 next_log_block++; 541 wrap(journal, next_log_block); 542 543 /* What kind of buffer is it? 544 * 545 * If it is a descriptor block, check that it has the 546 * expected sequence number. Otherwise, we're all done 547 * here. */ 548 549 tmp = (journal_header_t *)bh->b_data; 550 551 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 552 brelse(bh); 553 break; 554 } 555 556 blocktype = be32_to_cpu(tmp->h_blocktype); 557 sequence = be32_to_cpu(tmp->h_sequence); 558 jbd2_debug(3, "Found magic %d, sequence %d\n", 559 blocktype, sequence); 560 561 if (sequence != next_commit_ID) { 562 brelse(bh); 563 break; 564 } 565 566 /* OK, we have a valid descriptor block which matches 567 * all of the sequence number checks. What are we going 568 * to do with it? That depends on the pass... */ 569 570 switch(blocktype) { 571 case JBD2_DESCRIPTOR_BLOCK: 572 /* Verify checksum first */ 573 if (jbd2_journal_has_csum_v2or3(journal)) 574 descr_csum_size = 575 sizeof(struct jbd2_journal_block_tail); 576 if (descr_csum_size > 0 && 577 !jbd2_descriptor_block_csum_verify(journal, 578 bh->b_data)) { 579 /* 580 * PASS_SCAN can see stale blocks due to lazy 581 * journal init. Don't error out on those yet. 582 */ 583 if (pass != PASS_SCAN) { 584 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 585 next_log_block); 586 err = -EFSBADCRC; 587 brelse(bh); 588 goto failed; 589 } 590 need_check_commit_time = true; 591 jbd2_debug(1, 592 "invalid descriptor block found in %lu\n", 593 next_log_block); 594 } 595 596 /* If it is a valid descriptor block, replay it 597 * in pass REPLAY; if journal_checksums enabled, then 598 * calculate checksums in PASS_SCAN, otherwise, 599 * just skip over the blocks it describes. */ 600 if (pass != PASS_REPLAY) { 601 if (pass == PASS_SCAN && 602 jbd2_has_feature_checksum(journal) && 603 !need_check_commit_time && 604 !info->end_transaction) { 605 if (calc_chksums(journal, bh, 606 &next_log_block, 607 &crc32_sum)) { 608 put_bh(bh); 609 break; 610 } 611 put_bh(bh); 612 continue; 613 } 614 next_log_block += count_tags(journal, bh); 615 wrap(journal, next_log_block); 616 put_bh(bh); 617 continue; 618 } 619 620 /* A descriptor block: we can now write all of 621 * the data blocks. Yay, useful work is finally 622 * getting done here! */ 623 624 tagp = &bh->b_data[sizeof(journal_header_t)]; 625 while ((tagp - bh->b_data + tag_bytes) 626 <= journal->j_blocksize - descr_csum_size) { 627 unsigned long io_block; 628 629 memcpy(&tag, tagp, sizeof(tag)); 630 flags = be16_to_cpu(tag.t_flags); 631 632 io_block = next_log_block++; 633 wrap(journal, next_log_block); 634 err = jread(&obh, journal, io_block); 635 if (err) { 636 /* Recover what we can, but 637 * report failure at the end. */ 638 success = err; 639 printk(KERN_ERR 640 "JBD2: IO error %d recovering " 641 "block %ld in log\n", 642 err, io_block); 643 } else { 644 unsigned long long blocknr; 645 646 J_ASSERT(obh != NULL); 647 blocknr = read_tag_block(journal, 648 &tag); 649 650 /* If the block has been 651 * revoked, then we're all done 652 * here. */ 653 if (jbd2_journal_test_revoke 654 (journal, blocknr, 655 next_commit_ID)) { 656 brelse(obh); 657 ++info->nr_revoke_hits; 658 goto skip_write; 659 } 660 661 /* Look for block corruption */ 662 if (!jbd2_block_tag_csum_verify( 663 journal, &tag, (journal_block_tag3_t *)tagp, 664 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 665 brelse(obh); 666 success = -EFSBADCRC; 667 printk(KERN_ERR "JBD2: Invalid " 668 "checksum recovering " 669 "data block %llu in " 670 "log\n", blocknr); 671 block_error = 1; 672 goto skip_write; 673 } 674 675 /* Find a buffer for the new 676 * data being restored */ 677 nbh = __getblk(journal->j_fs_dev, 678 blocknr, 679 journal->j_blocksize); 680 if (nbh == NULL) { 681 printk(KERN_ERR 682 "JBD2: Out of memory " 683 "during recovery.\n"); 684 err = -ENOMEM; 685 brelse(bh); 686 brelse(obh); 687 goto failed; 688 } 689 690 lock_buffer(nbh); 691 memcpy(nbh->b_data, obh->b_data, 692 journal->j_blocksize); 693 if (flags & JBD2_FLAG_ESCAPE) { 694 *((__be32 *)nbh->b_data) = 695 cpu_to_be32(JBD2_MAGIC_NUMBER); 696 } 697 698 BUFFER_TRACE(nbh, "marking dirty"); 699 set_buffer_uptodate(nbh); 700 mark_buffer_dirty(nbh); 701 BUFFER_TRACE(nbh, "marking uptodate"); 702 ++info->nr_replays; 703 unlock_buffer(nbh); 704 brelse(obh); 705 brelse(nbh); 706 } 707 708 skip_write: 709 tagp += tag_bytes; 710 if (!(flags & JBD2_FLAG_SAME_UUID)) 711 tagp += 16; 712 713 if (flags & JBD2_FLAG_LAST_TAG) 714 break; 715 } 716 717 brelse(bh); 718 continue; 719 720 case JBD2_COMMIT_BLOCK: 721 /* How to differentiate between interrupted commit 722 * and journal corruption ? 723 * 724 * {nth transaction} 725 * Checksum Verification Failed 726 * | 727 * ____________________ 728 * | | 729 * async_commit sync_commit 730 * | | 731 * | GO TO NEXT "Journal Corruption" 732 * | TRANSACTION 733 * | 734 * {(n+1)th transanction} 735 * | 736 * _______|______________ 737 * | | 738 * Commit block found Commit block not found 739 * | | 740 * "Journal Corruption" | 741 * _____________|_________ 742 * | | 743 * nth trans corrupt OR nth trans 744 * and (n+1)th interrupted interrupted 745 * before commit block 746 * could reach the disk. 747 * (Cannot find the difference in above 748 * mentioned conditions. Hence assume 749 * "Interrupted Commit".) 750 */ 751 commit_time = be64_to_cpu( 752 ((struct commit_header *)bh->b_data)->h_commit_sec); 753 /* 754 * If need_check_commit_time is set, it means we are in 755 * PASS_SCAN and csum verify failed before. If 756 * commit_time is increasing, it's the same journal, 757 * otherwise it is stale journal block, just end this 758 * recovery. 759 */ 760 if (need_check_commit_time) { 761 if (commit_time >= last_trans_commit_time) { 762 pr_err("JBD2: Invalid checksum found in transaction %u\n", 763 next_commit_ID); 764 err = -EFSBADCRC; 765 brelse(bh); 766 goto failed; 767 } 768 ignore_crc_mismatch: 769 /* 770 * It likely does not belong to same journal, 771 * just end this recovery with success. 772 */ 773 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 774 next_commit_ID); 775 brelse(bh); 776 goto done; 777 } 778 779 /* 780 * Found an expected commit block: if checksums 781 * are present, verify them in PASS_SCAN; else not 782 * much to do other than move on to the next sequence 783 * number. 784 */ 785 if (pass == PASS_SCAN && 786 jbd2_has_feature_checksum(journal)) { 787 struct commit_header *cbh = 788 (struct commit_header *)bh->b_data; 789 unsigned found_chksum = 790 be32_to_cpu(cbh->h_chksum[0]); 791 792 if (info->end_transaction) { 793 journal->j_failed_commit = 794 info->end_transaction; 795 brelse(bh); 796 break; 797 } 798 799 /* Neither checksum match nor unused? */ 800 if (!((crc32_sum == found_chksum && 801 cbh->h_chksum_type == 802 JBD2_CRC32_CHKSUM && 803 cbh->h_chksum_size == 804 JBD2_CRC32_CHKSUM_SIZE) || 805 (cbh->h_chksum_type == 0 && 806 cbh->h_chksum_size == 0 && 807 found_chksum == 0))) 808 goto chksum_error; 809 810 crc32_sum = ~0; 811 } 812 if (pass == PASS_SCAN && 813 !jbd2_commit_block_csum_verify(journal, 814 bh->b_data)) { 815 chksum_error: 816 if (commit_time < last_trans_commit_time) 817 goto ignore_crc_mismatch; 818 info->end_transaction = next_commit_ID; 819 info->head_block = head_block; 820 821 if (!jbd2_has_feature_async_commit(journal)) { 822 journal->j_failed_commit = 823 next_commit_ID; 824 brelse(bh); 825 break; 826 } 827 } 828 if (pass == PASS_SCAN) { 829 last_trans_commit_time = commit_time; 830 head_block = next_log_block; 831 } 832 brelse(bh); 833 next_commit_ID++; 834 continue; 835 836 case JBD2_REVOKE_BLOCK: 837 /* 838 * Check revoke block crc in pass_scan, if csum verify 839 * failed, check commit block time later. 840 */ 841 if (pass == PASS_SCAN && 842 !jbd2_descriptor_block_csum_verify(journal, 843 bh->b_data)) { 844 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 845 next_log_block); 846 need_check_commit_time = true; 847 } 848 /* If we aren't in the REVOKE pass, then we can 849 * just skip over this block. */ 850 if (pass != PASS_REVOKE) { 851 brelse(bh); 852 continue; 853 } 854 855 err = scan_revoke_records(journal, bh, 856 next_commit_ID, info); 857 brelse(bh); 858 if (err) 859 goto failed; 860 continue; 861 862 default: 863 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 864 blocktype); 865 brelse(bh); 866 goto done; 867 } 868 } 869 870 done: 871 /* 872 * We broke out of the log scan loop: either we came to the 873 * known end of the log or we found an unexpected block in the 874 * log. If the latter happened, then we know that the "current" 875 * transaction marks the end of the valid log. 876 */ 877 878 if (pass == PASS_SCAN) { 879 if (!info->end_transaction) 880 info->end_transaction = next_commit_ID; 881 if (!info->head_block) 882 info->head_block = head_block; 883 } else { 884 /* It's really bad news if different passes end up at 885 * different places (but possible due to IO errors). */ 886 if (info->end_transaction != next_commit_ID) { 887 printk(KERN_ERR "JBD2: recovery pass %d ended at " 888 "transaction %u, expected %u\n", 889 pass, next_commit_ID, info->end_transaction); 890 if (!success) 891 success = -EIO; 892 } 893 } 894 895 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 896 err = fc_do_one_pass(journal, info, pass); 897 if (err) 898 success = err; 899 } 900 901 if (block_error && success == 0) 902 success = -EIO; 903 return success; 904 905 failed: 906 return err; 907 } 908 909 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 910 911 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 912 tid_t sequence, struct recovery_info *info) 913 { 914 jbd2_journal_revoke_header_t *header; 915 int offset, max; 916 unsigned csum_size = 0; 917 __u32 rcount; 918 int record_len = 4; 919 920 header = (jbd2_journal_revoke_header_t *) bh->b_data; 921 offset = sizeof(jbd2_journal_revoke_header_t); 922 rcount = be32_to_cpu(header->r_count); 923 924 if (jbd2_journal_has_csum_v2or3(journal)) 925 csum_size = sizeof(struct jbd2_journal_block_tail); 926 if (rcount > journal->j_blocksize - csum_size) 927 return -EINVAL; 928 max = rcount; 929 930 if (jbd2_has_feature_64bit(journal)) 931 record_len = 8; 932 933 while (offset + record_len <= max) { 934 unsigned long long blocknr; 935 int err; 936 937 if (record_len == 4) 938 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 939 else 940 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 941 offset += record_len; 942 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 943 if (err) 944 return err; 945 ++info->nr_revokes; 946 } 947 return 0; 948 } 949