1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 33 int nr_replays; 34 int nr_revokes; 35 int nr_revoke_hits; 36 }; 37 38 static int do_one_pass(journal_t *journal, 39 struct recovery_info *info, enum passtype pass); 40 static int scan_revoke_records(journal_t *, struct buffer_head *, 41 tid_t, struct recovery_info *); 42 43 #ifdef __KERNEL__ 44 45 /* Release readahead buffers after use */ 46 static void journal_brelse_array(struct buffer_head *b[], int n) 47 { 48 while (--n >= 0) 49 brelse (b[n]); 50 } 51 52 53 /* 54 * When reading from the journal, we are going through the block device 55 * layer directly and so there is no readahead being done for us. We 56 * need to implement any readahead ourselves if we want it to happen at 57 * all. Recovery is basically one long sequential read, so make sure we 58 * do the IO in reasonably large chunks. 59 * 60 * This is not so critical that we need to be enormously clever about 61 * the readahead size, though. 128K is a purely arbitrary, good-enough 62 * fixed value. 63 */ 64 65 #define MAXBUF 8 66 static int do_readahead(journal_t *journal, unsigned int start) 67 { 68 int err; 69 unsigned int max, nbufs, next; 70 unsigned long long blocknr; 71 struct buffer_head *bh; 72 73 struct buffer_head * bufs[MAXBUF]; 74 75 /* Do up to 128K of readahead */ 76 max = start + (128 * 1024 / journal->j_blocksize); 77 if (max > journal->j_total_len) 78 max = journal->j_total_len; 79 80 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 81 * a time to the block device IO layer. */ 82 83 nbufs = 0; 84 85 for (next = start; next < max; next++) { 86 err = jbd2_journal_bmap(journal, next, &blocknr); 87 88 if (err) { 89 printk(KERN_ERR "JBD2: bad block at offset %u\n", 90 next); 91 goto failed; 92 } 93 94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 95 if (!bh) { 96 err = -ENOMEM; 97 goto failed; 98 } 99 100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 101 bufs[nbufs++] = bh; 102 if (nbufs == MAXBUF) { 103 bh_readahead_batch(nbufs, bufs, 0); 104 journal_brelse_array(bufs, nbufs); 105 nbufs = 0; 106 } 107 } else 108 brelse(bh); 109 } 110 111 if (nbufs) 112 bh_readahead_batch(nbufs, bufs, 0); 113 err = 0; 114 115 failed: 116 if (nbufs) 117 journal_brelse_array(bufs, nbufs); 118 return err; 119 } 120 121 #endif /* __KERNEL__ */ 122 123 124 /* 125 * Read a block from the journal 126 */ 127 128 static int jread(struct buffer_head **bhp, journal_t *journal, 129 unsigned int offset) 130 { 131 int err; 132 unsigned long long blocknr; 133 struct buffer_head *bh; 134 135 *bhp = NULL; 136 137 if (offset >= journal->j_total_len) { 138 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 139 return -EFSCORRUPTED; 140 } 141 142 err = jbd2_journal_bmap(journal, offset, &blocknr); 143 144 if (err) { 145 printk(KERN_ERR "JBD2: bad block at offset %u\n", 146 offset); 147 return err; 148 } 149 150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 151 if (!bh) 152 return -ENOMEM; 153 154 if (!buffer_uptodate(bh)) { 155 /* 156 * If this is a brand new buffer, start readahead. 157 * Otherwise, we assume we are already reading it. 158 */ 159 bool need_readahead = !buffer_req(bh); 160 161 bh_read_nowait(bh, 0); 162 if (need_readahead) 163 do_readahead(journal, offset); 164 wait_on_buffer(bh); 165 } 166 167 if (!buffer_uptodate(bh)) { 168 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 169 offset); 170 brelse(bh); 171 return -EIO; 172 } 173 174 *bhp = bh; 175 return 0; 176 } 177 178 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 179 { 180 struct jbd2_journal_block_tail *tail; 181 __be32 provided; 182 __u32 calculated; 183 184 if (!jbd2_journal_has_csum_v2or3(j)) 185 return 1; 186 187 tail = (struct jbd2_journal_block_tail *)((char *)buf + 188 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 189 provided = tail->t_checksum; 190 tail->t_checksum = 0; 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 192 tail->t_checksum = provided; 193 194 return provided == cpu_to_be32(calculated); 195 } 196 197 /* 198 * Count the number of in-use tags in a journal descriptor block. 199 */ 200 201 static int count_tags(journal_t *journal, struct buffer_head *bh) 202 { 203 char * tagp; 204 journal_block_tag_t tag; 205 int nr = 0, size = journal->j_blocksize; 206 int tag_bytes = journal_tag_bytes(journal); 207 208 if (jbd2_journal_has_csum_v2or3(journal)) 209 size -= sizeof(struct jbd2_journal_block_tail); 210 211 tagp = &bh->b_data[sizeof(journal_header_t)]; 212 213 while ((tagp - bh->b_data + tag_bytes) <= size) { 214 memcpy(&tag, tagp, sizeof(tag)); 215 216 nr++; 217 tagp += tag_bytes; 218 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 219 tagp += 16; 220 221 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 222 break; 223 } 224 225 return nr; 226 } 227 228 229 /* Make sure we wrap around the log correctly! */ 230 #define wrap(journal, var) \ 231 do { \ 232 unsigned long _wrap_last = \ 233 jbd2_has_feature_fast_commit(journal) ? \ 234 (journal)->j_fc_last : (journal)->j_last; \ 235 \ 236 if (var >= _wrap_last) \ 237 var -= (_wrap_last - (journal)->j_first); \ 238 } while (0) 239 240 static int fc_do_one_pass(journal_t *journal, 241 struct recovery_info *info, enum passtype pass) 242 { 243 unsigned int expected_commit_id = info->end_transaction; 244 unsigned long next_fc_block; 245 struct buffer_head *bh; 246 int err = 0; 247 248 next_fc_block = journal->j_fc_first; 249 if (!journal->j_fc_replay_callback) 250 return 0; 251 252 while (next_fc_block <= journal->j_fc_last) { 253 jbd2_debug(3, "Fast commit replay: next block %ld\n", 254 next_fc_block); 255 err = jread(&bh, journal, next_fc_block); 256 if (err) { 257 jbd2_debug(3, "Fast commit replay: read error\n"); 258 break; 259 } 260 261 err = journal->j_fc_replay_callback(journal, bh, pass, 262 next_fc_block - journal->j_fc_first, 263 expected_commit_id); 264 next_fc_block++; 265 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 266 break; 267 err = 0; 268 } 269 270 if (err) 271 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 272 273 return err; 274 } 275 276 /** 277 * jbd2_journal_recover - recovers a on-disk journal 278 * @journal: the journal to recover 279 * 280 * The primary function for recovering the log contents when mounting a 281 * journaled device. 282 * 283 * Recovery is done in three passes. In the first pass, we look for the 284 * end of the log. In the second, we assemble the list of revoke 285 * blocks. In the third and final pass, we replay any un-revoked blocks 286 * in the log. 287 */ 288 int jbd2_journal_recover(journal_t *journal) 289 { 290 int err, err2; 291 journal_superblock_t * sb; 292 293 struct recovery_info info; 294 295 memset(&info, 0, sizeof(info)); 296 sb = journal->j_superblock; 297 298 /* 299 * The journal superblock's s_start field (the current log head) 300 * is always zero if, and only if, the journal was cleanly 301 * unmounted. 302 */ 303 304 if (!sb->s_start) { 305 jbd2_debug(1, "No recovery required, last transaction %d\n", 306 be32_to_cpu(sb->s_sequence)); 307 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 308 return 0; 309 } 310 311 err = do_one_pass(journal, &info, PASS_SCAN); 312 if (!err) 313 err = do_one_pass(journal, &info, PASS_REVOKE); 314 if (!err) 315 err = do_one_pass(journal, &info, PASS_REPLAY); 316 317 jbd2_debug(1, "JBD2: recovery, exit status %d, " 318 "recovered transactions %u to %u\n", 319 err, info.start_transaction, info.end_transaction); 320 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 321 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 322 323 /* Restart the log at the next transaction ID, thus invalidating 324 * any existing commit records in the log. */ 325 journal->j_transaction_sequence = ++info.end_transaction; 326 327 jbd2_journal_clear_revoke(journal); 328 err2 = sync_blockdev(journal->j_fs_dev); 329 if (!err) 330 err = err2; 331 /* Make sure all replayed data is on permanent storage */ 332 if (journal->j_flags & JBD2_BARRIER) { 333 err2 = blkdev_issue_flush(journal->j_fs_dev); 334 if (!err) 335 err = err2; 336 } 337 return err; 338 } 339 340 /** 341 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 342 * @journal: journal to startup 343 * 344 * Locate any valid recovery information from the journal and set up the 345 * journal structures in memory to ignore it (presumably because the 346 * caller has evidence that it is out of date). 347 * This function doesn't appear to be exported.. 348 * 349 * We perform one pass over the journal to allow us to tell the user how 350 * much recovery information is being erased, and to let us initialise 351 * the journal transaction sequence numbers to the next unused ID. 352 */ 353 int jbd2_journal_skip_recovery(journal_t *journal) 354 { 355 int err; 356 357 struct recovery_info info; 358 359 memset (&info, 0, sizeof(info)); 360 361 err = do_one_pass(journal, &info, PASS_SCAN); 362 363 if (err) { 364 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 365 ++journal->j_transaction_sequence; 366 } else { 367 #ifdef CONFIG_JBD2_DEBUG 368 int dropped = info.end_transaction - 369 be32_to_cpu(journal->j_superblock->s_sequence); 370 jbd2_debug(1, 371 "JBD2: ignoring %d transaction%s from the journal.\n", 372 dropped, (dropped == 1) ? "" : "s"); 373 #endif 374 journal->j_transaction_sequence = ++info.end_transaction; 375 } 376 377 journal->j_tail = 0; 378 return err; 379 } 380 381 static inline unsigned long long read_tag_block(journal_t *journal, 382 journal_block_tag_t *tag) 383 { 384 unsigned long long block = be32_to_cpu(tag->t_blocknr); 385 if (jbd2_has_feature_64bit(journal)) 386 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 387 return block; 388 } 389 390 /* 391 * calc_chksums calculates the checksums for the blocks described in the 392 * descriptor block. 393 */ 394 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 395 unsigned long *next_log_block, __u32 *crc32_sum) 396 { 397 int i, num_blks, err; 398 unsigned long io_block; 399 struct buffer_head *obh; 400 401 num_blks = count_tags(journal, bh); 402 /* Calculate checksum of the descriptor block. */ 403 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 404 405 for (i = 0; i < num_blks; i++) { 406 io_block = (*next_log_block)++; 407 wrap(journal, *next_log_block); 408 err = jread(&obh, journal, io_block); 409 if (err) { 410 printk(KERN_ERR "JBD2: IO error %d recovering block " 411 "%lu in log\n", err, io_block); 412 return 1; 413 } else { 414 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 415 obh->b_size); 416 } 417 put_bh(obh); 418 } 419 return 0; 420 } 421 422 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 423 { 424 struct commit_header *h; 425 __be32 provided; 426 __u32 calculated; 427 428 if (!jbd2_journal_has_csum_v2or3(j)) 429 return 1; 430 431 h = buf; 432 provided = h->h_chksum[0]; 433 h->h_chksum[0] = 0; 434 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 435 h->h_chksum[0] = provided; 436 437 return provided == cpu_to_be32(calculated); 438 } 439 440 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 441 journal_block_tag3_t *tag3, 442 void *buf, __u32 sequence) 443 { 444 __u32 csum32; 445 __be32 seq; 446 447 if (!jbd2_journal_has_csum_v2or3(j)) 448 return 1; 449 450 seq = cpu_to_be32(sequence); 451 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 452 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 453 454 if (jbd2_has_feature_csum3(j)) 455 return tag3->t_checksum == cpu_to_be32(csum32); 456 else 457 return tag->t_checksum == cpu_to_be16(csum32); 458 } 459 460 static int do_one_pass(journal_t *journal, 461 struct recovery_info *info, enum passtype pass) 462 { 463 unsigned int first_commit_ID, next_commit_ID; 464 unsigned long next_log_block; 465 int err, success = 0; 466 journal_superblock_t * sb; 467 journal_header_t * tmp; 468 struct buffer_head * bh; 469 unsigned int sequence; 470 int blocktype; 471 int tag_bytes = journal_tag_bytes(journal); 472 __u32 crc32_sum = ~0; /* Transactional Checksums */ 473 int descr_csum_size = 0; 474 int block_error = 0; 475 bool need_check_commit_time = false; 476 __u64 last_trans_commit_time = 0, commit_time; 477 478 /* 479 * First thing is to establish what we expect to find in the log 480 * (in terms of transaction IDs), and where (in terms of log 481 * block offsets): query the superblock. 482 */ 483 484 sb = journal->j_superblock; 485 next_commit_ID = be32_to_cpu(sb->s_sequence); 486 next_log_block = be32_to_cpu(sb->s_start); 487 488 first_commit_ID = next_commit_ID; 489 if (pass == PASS_SCAN) 490 info->start_transaction = first_commit_ID; 491 492 jbd2_debug(1, "Starting recovery pass %d\n", pass); 493 494 /* 495 * Now we walk through the log, transaction by transaction, 496 * making sure that each transaction has a commit block in the 497 * expected place. Each complete transaction gets replayed back 498 * into the main filesystem. 499 */ 500 501 while (1) { 502 int flags; 503 char * tagp; 504 journal_block_tag_t tag; 505 struct buffer_head * obh; 506 struct buffer_head * nbh; 507 508 cond_resched(); 509 510 /* If we already know where to stop the log traversal, 511 * check right now that we haven't gone past the end of 512 * the log. */ 513 514 if (pass != PASS_SCAN) 515 if (tid_geq(next_commit_ID, info->end_transaction)) 516 break; 517 518 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 519 next_commit_ID, next_log_block, 520 jbd2_has_feature_fast_commit(journal) ? 521 journal->j_fc_last : journal->j_last); 522 523 /* Skip over each chunk of the transaction looking 524 * either the next descriptor block or the final commit 525 * record. */ 526 527 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 528 err = jread(&bh, journal, next_log_block); 529 if (err) 530 goto failed; 531 532 next_log_block++; 533 wrap(journal, next_log_block); 534 535 /* What kind of buffer is it? 536 * 537 * If it is a descriptor block, check that it has the 538 * expected sequence number. Otherwise, we're all done 539 * here. */ 540 541 tmp = (journal_header_t *)bh->b_data; 542 543 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 544 brelse(bh); 545 break; 546 } 547 548 blocktype = be32_to_cpu(tmp->h_blocktype); 549 sequence = be32_to_cpu(tmp->h_sequence); 550 jbd2_debug(3, "Found magic %d, sequence %d\n", 551 blocktype, sequence); 552 553 if (sequence != next_commit_ID) { 554 brelse(bh); 555 break; 556 } 557 558 /* OK, we have a valid descriptor block which matches 559 * all of the sequence number checks. What are we going 560 * to do with it? That depends on the pass... */ 561 562 switch(blocktype) { 563 case JBD2_DESCRIPTOR_BLOCK: 564 /* Verify checksum first */ 565 if (jbd2_journal_has_csum_v2or3(journal)) 566 descr_csum_size = 567 sizeof(struct jbd2_journal_block_tail); 568 if (descr_csum_size > 0 && 569 !jbd2_descriptor_block_csum_verify(journal, 570 bh->b_data)) { 571 /* 572 * PASS_SCAN can see stale blocks due to lazy 573 * journal init. Don't error out on those yet. 574 */ 575 if (pass != PASS_SCAN) { 576 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 577 next_log_block); 578 err = -EFSBADCRC; 579 brelse(bh); 580 goto failed; 581 } 582 need_check_commit_time = true; 583 jbd2_debug(1, 584 "invalid descriptor block found in %lu\n", 585 next_log_block); 586 } 587 588 /* If it is a valid descriptor block, replay it 589 * in pass REPLAY; if journal_checksums enabled, then 590 * calculate checksums in PASS_SCAN, otherwise, 591 * just skip over the blocks it describes. */ 592 if (pass != PASS_REPLAY) { 593 if (pass == PASS_SCAN && 594 jbd2_has_feature_checksum(journal) && 595 !need_check_commit_time && 596 !info->end_transaction) { 597 if (calc_chksums(journal, bh, 598 &next_log_block, 599 &crc32_sum)) { 600 put_bh(bh); 601 break; 602 } 603 put_bh(bh); 604 continue; 605 } 606 next_log_block += count_tags(journal, bh); 607 wrap(journal, next_log_block); 608 put_bh(bh); 609 continue; 610 } 611 612 /* A descriptor block: we can now write all of 613 * the data blocks. Yay, useful work is finally 614 * getting done here! */ 615 616 tagp = &bh->b_data[sizeof(journal_header_t)]; 617 while ((tagp - bh->b_data + tag_bytes) 618 <= journal->j_blocksize - descr_csum_size) { 619 unsigned long io_block; 620 621 memcpy(&tag, tagp, sizeof(tag)); 622 flags = be16_to_cpu(tag.t_flags); 623 624 io_block = next_log_block++; 625 wrap(journal, next_log_block); 626 err = jread(&obh, journal, io_block); 627 if (err) { 628 /* Recover what we can, but 629 * report failure at the end. */ 630 success = err; 631 printk(KERN_ERR 632 "JBD2: IO error %d recovering " 633 "block %ld in log\n", 634 err, io_block); 635 } else { 636 unsigned long long blocknr; 637 638 J_ASSERT(obh != NULL); 639 blocknr = read_tag_block(journal, 640 &tag); 641 642 /* If the block has been 643 * revoked, then we're all done 644 * here. */ 645 if (jbd2_journal_test_revoke 646 (journal, blocknr, 647 next_commit_ID)) { 648 brelse(obh); 649 ++info->nr_revoke_hits; 650 goto skip_write; 651 } 652 653 /* Look for block corruption */ 654 if (!jbd2_block_tag_csum_verify( 655 journal, &tag, (journal_block_tag3_t *)tagp, 656 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 657 brelse(obh); 658 success = -EFSBADCRC; 659 printk(KERN_ERR "JBD2: Invalid " 660 "checksum recovering " 661 "data block %llu in " 662 "log\n", blocknr); 663 block_error = 1; 664 goto skip_write; 665 } 666 667 /* Find a buffer for the new 668 * data being restored */ 669 nbh = __getblk(journal->j_fs_dev, 670 blocknr, 671 journal->j_blocksize); 672 if (nbh == NULL) { 673 printk(KERN_ERR 674 "JBD2: Out of memory " 675 "during recovery.\n"); 676 err = -ENOMEM; 677 brelse(bh); 678 brelse(obh); 679 goto failed; 680 } 681 682 lock_buffer(nbh); 683 memcpy(nbh->b_data, obh->b_data, 684 journal->j_blocksize); 685 if (flags & JBD2_FLAG_ESCAPE) { 686 *((__be32 *)nbh->b_data) = 687 cpu_to_be32(JBD2_MAGIC_NUMBER); 688 } 689 690 BUFFER_TRACE(nbh, "marking dirty"); 691 set_buffer_uptodate(nbh); 692 mark_buffer_dirty(nbh); 693 BUFFER_TRACE(nbh, "marking uptodate"); 694 ++info->nr_replays; 695 unlock_buffer(nbh); 696 brelse(obh); 697 brelse(nbh); 698 } 699 700 skip_write: 701 tagp += tag_bytes; 702 if (!(flags & JBD2_FLAG_SAME_UUID)) 703 tagp += 16; 704 705 if (flags & JBD2_FLAG_LAST_TAG) 706 break; 707 } 708 709 brelse(bh); 710 continue; 711 712 case JBD2_COMMIT_BLOCK: 713 /* How to differentiate between interrupted commit 714 * and journal corruption ? 715 * 716 * {nth transaction} 717 * Checksum Verification Failed 718 * | 719 * ____________________ 720 * | | 721 * async_commit sync_commit 722 * | | 723 * | GO TO NEXT "Journal Corruption" 724 * | TRANSACTION 725 * | 726 * {(n+1)th transanction} 727 * | 728 * _______|______________ 729 * | | 730 * Commit block found Commit block not found 731 * | | 732 * "Journal Corruption" | 733 * _____________|_________ 734 * | | 735 * nth trans corrupt OR nth trans 736 * and (n+1)th interrupted interrupted 737 * before commit block 738 * could reach the disk. 739 * (Cannot find the difference in above 740 * mentioned conditions. Hence assume 741 * "Interrupted Commit".) 742 */ 743 commit_time = be64_to_cpu( 744 ((struct commit_header *)bh->b_data)->h_commit_sec); 745 /* 746 * If need_check_commit_time is set, it means we are in 747 * PASS_SCAN and csum verify failed before. If 748 * commit_time is increasing, it's the same journal, 749 * otherwise it is stale journal block, just end this 750 * recovery. 751 */ 752 if (need_check_commit_time) { 753 if (commit_time >= last_trans_commit_time) { 754 pr_err("JBD2: Invalid checksum found in transaction %u\n", 755 next_commit_ID); 756 err = -EFSBADCRC; 757 brelse(bh); 758 goto failed; 759 } 760 ignore_crc_mismatch: 761 /* 762 * It likely does not belong to same journal, 763 * just end this recovery with success. 764 */ 765 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 766 next_commit_ID); 767 brelse(bh); 768 goto done; 769 } 770 771 /* 772 * Found an expected commit block: if checksums 773 * are present, verify them in PASS_SCAN; else not 774 * much to do other than move on to the next sequence 775 * number. 776 */ 777 if (pass == PASS_SCAN && 778 jbd2_has_feature_checksum(journal)) { 779 struct commit_header *cbh = 780 (struct commit_header *)bh->b_data; 781 unsigned found_chksum = 782 be32_to_cpu(cbh->h_chksum[0]); 783 784 if (info->end_transaction) { 785 journal->j_failed_commit = 786 info->end_transaction; 787 brelse(bh); 788 break; 789 } 790 791 /* Neither checksum match nor unused? */ 792 if (!((crc32_sum == found_chksum && 793 cbh->h_chksum_type == 794 JBD2_CRC32_CHKSUM && 795 cbh->h_chksum_size == 796 JBD2_CRC32_CHKSUM_SIZE) || 797 (cbh->h_chksum_type == 0 && 798 cbh->h_chksum_size == 0 && 799 found_chksum == 0))) 800 goto chksum_error; 801 802 crc32_sum = ~0; 803 } 804 if (pass == PASS_SCAN && 805 !jbd2_commit_block_csum_verify(journal, 806 bh->b_data)) { 807 chksum_error: 808 if (commit_time < last_trans_commit_time) 809 goto ignore_crc_mismatch; 810 info->end_transaction = next_commit_ID; 811 812 if (!jbd2_has_feature_async_commit(journal)) { 813 journal->j_failed_commit = 814 next_commit_ID; 815 brelse(bh); 816 break; 817 } 818 } 819 if (pass == PASS_SCAN) 820 last_trans_commit_time = commit_time; 821 brelse(bh); 822 next_commit_ID++; 823 continue; 824 825 case JBD2_REVOKE_BLOCK: 826 /* 827 * Check revoke block crc in pass_scan, if csum verify 828 * failed, check commit block time later. 829 */ 830 if (pass == PASS_SCAN && 831 !jbd2_descriptor_block_csum_verify(journal, 832 bh->b_data)) { 833 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 834 next_log_block); 835 need_check_commit_time = true; 836 } 837 /* If we aren't in the REVOKE pass, then we can 838 * just skip over this block. */ 839 if (pass != PASS_REVOKE) { 840 brelse(bh); 841 continue; 842 } 843 844 err = scan_revoke_records(journal, bh, 845 next_commit_ID, info); 846 brelse(bh); 847 if (err) 848 goto failed; 849 continue; 850 851 default: 852 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 853 blocktype); 854 brelse(bh); 855 goto done; 856 } 857 } 858 859 done: 860 /* 861 * We broke out of the log scan loop: either we came to the 862 * known end of the log or we found an unexpected block in the 863 * log. If the latter happened, then we know that the "current" 864 * transaction marks the end of the valid log. 865 */ 866 867 if (pass == PASS_SCAN) { 868 if (!info->end_transaction) 869 info->end_transaction = next_commit_ID; 870 } else { 871 /* It's really bad news if different passes end up at 872 * different places (but possible due to IO errors). */ 873 if (info->end_transaction != next_commit_ID) { 874 printk(KERN_ERR "JBD2: recovery pass %d ended at " 875 "transaction %u, expected %u\n", 876 pass, next_commit_ID, info->end_transaction); 877 if (!success) 878 success = -EIO; 879 } 880 } 881 882 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 883 err = fc_do_one_pass(journal, info, pass); 884 if (err) 885 success = err; 886 } 887 888 if (block_error && success == 0) 889 success = -EIO; 890 return success; 891 892 failed: 893 return err; 894 } 895 896 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 897 898 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 899 tid_t sequence, struct recovery_info *info) 900 { 901 jbd2_journal_revoke_header_t *header; 902 int offset, max; 903 unsigned csum_size = 0; 904 __u32 rcount; 905 int record_len = 4; 906 907 header = (jbd2_journal_revoke_header_t *) bh->b_data; 908 offset = sizeof(jbd2_journal_revoke_header_t); 909 rcount = be32_to_cpu(header->r_count); 910 911 if (jbd2_journal_has_csum_v2or3(journal)) 912 csum_size = sizeof(struct jbd2_journal_block_tail); 913 if (rcount > journal->j_blocksize - csum_size) 914 return -EINVAL; 915 max = rcount; 916 917 if (jbd2_has_feature_64bit(journal)) 918 record_len = 8; 919 920 while (offset + record_len <= max) { 921 unsigned long long blocknr; 922 int err; 923 924 if (record_len == 4) 925 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 926 else 927 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 928 offset += record_len; 929 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 930 if (err) 931 return err; 932 ++info->nr_revokes; 933 } 934 return 0; 935 } 936