1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 33 int nr_replays; 34 int nr_revokes; 35 int nr_revoke_hits; 36 }; 37 38 static int do_one_pass(journal_t *journal, 39 struct recovery_info *info, enum passtype pass); 40 static int scan_revoke_records(journal_t *, struct buffer_head *, 41 tid_t, struct recovery_info *); 42 43 #ifdef __KERNEL__ 44 45 /* Release readahead buffers after use */ 46 static void journal_brelse_array(struct buffer_head *b[], int n) 47 { 48 while (--n >= 0) 49 brelse (b[n]); 50 } 51 52 53 /* 54 * When reading from the journal, we are going through the block device 55 * layer directly and so there is no readahead being done for us. We 56 * need to implement any readahead ourselves if we want it to happen at 57 * all. Recovery is basically one long sequential read, so make sure we 58 * do the IO in reasonably large chunks. 59 * 60 * This is not so critical that we need to be enormously clever about 61 * the readahead size, though. 128K is a purely arbitrary, good-enough 62 * fixed value. 63 */ 64 65 #define MAXBUF 8 66 static int do_readahead(journal_t *journal, unsigned int start) 67 { 68 int err; 69 unsigned int max, nbufs, next; 70 unsigned long long blocknr; 71 struct buffer_head *bh; 72 73 struct buffer_head * bufs[MAXBUF]; 74 75 /* Do up to 128K of readahead */ 76 max = start + (128 * 1024 / journal->j_blocksize); 77 if (max > journal->j_maxlen) 78 max = journal->j_maxlen; 79 80 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 81 * a time to the block device IO layer. */ 82 83 nbufs = 0; 84 85 for (next = start; next < max; next++) { 86 err = jbd2_journal_bmap(journal, next, &blocknr); 87 88 if (err) { 89 printk(KERN_ERR "JBD2: bad block at offset %u\n", 90 next); 91 goto failed; 92 } 93 94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 95 if (!bh) { 96 err = -ENOMEM; 97 goto failed; 98 } 99 100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 101 bufs[nbufs++] = bh; 102 if (nbufs == MAXBUF) { 103 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); 104 journal_brelse_array(bufs, nbufs); 105 nbufs = 0; 106 } 107 } else 108 brelse(bh); 109 } 110 111 if (nbufs) 112 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); 113 err = 0; 114 115 failed: 116 if (nbufs) 117 journal_brelse_array(bufs, nbufs); 118 return err; 119 } 120 121 #endif /* __KERNEL__ */ 122 123 124 /* 125 * Read a block from the journal 126 */ 127 128 static int jread(struct buffer_head **bhp, journal_t *journal, 129 unsigned int offset) 130 { 131 int err; 132 unsigned long long blocknr; 133 struct buffer_head *bh; 134 135 *bhp = NULL; 136 137 if (offset >= journal->j_maxlen) { 138 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 139 return -EFSCORRUPTED; 140 } 141 142 err = jbd2_journal_bmap(journal, offset, &blocknr); 143 144 if (err) { 145 printk(KERN_ERR "JBD2: bad block at offset %u\n", 146 offset); 147 return err; 148 } 149 150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 151 if (!bh) 152 return -ENOMEM; 153 154 if (!buffer_uptodate(bh)) { 155 /* If this is a brand new buffer, start readahead. 156 Otherwise, we assume we are already reading it. */ 157 if (!buffer_req(bh)) 158 do_readahead(journal, offset); 159 wait_on_buffer(bh); 160 } 161 162 if (!buffer_uptodate(bh)) { 163 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 164 offset); 165 brelse(bh); 166 return -EIO; 167 } 168 169 *bhp = bh; 170 return 0; 171 } 172 173 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 174 { 175 struct jbd2_journal_block_tail *tail; 176 __be32 provided; 177 __u32 calculated; 178 179 if (!jbd2_journal_has_csum_v2or3(j)) 180 return 1; 181 182 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 183 sizeof(struct jbd2_journal_block_tail)); 184 provided = tail->t_checksum; 185 tail->t_checksum = 0; 186 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 187 tail->t_checksum = provided; 188 189 return provided == cpu_to_be32(calculated); 190 } 191 192 /* 193 * Count the number of in-use tags in a journal descriptor block. 194 */ 195 196 static int count_tags(journal_t *journal, struct buffer_head *bh) 197 { 198 char * tagp; 199 journal_block_tag_t * tag; 200 int nr = 0, size = journal->j_blocksize; 201 int tag_bytes = journal_tag_bytes(journal); 202 203 if (jbd2_journal_has_csum_v2or3(journal)) 204 size -= sizeof(struct jbd2_journal_block_tail); 205 206 tagp = &bh->b_data[sizeof(journal_header_t)]; 207 208 while ((tagp - bh->b_data + tag_bytes) <= size) { 209 tag = (journal_block_tag_t *) tagp; 210 211 nr++; 212 tagp += tag_bytes; 213 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 214 tagp += 16; 215 216 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 217 break; 218 } 219 220 return nr; 221 } 222 223 224 /* Make sure we wrap around the log correctly! */ 225 #define wrap(journal, var) \ 226 do { \ 227 unsigned long _wrap_last = \ 228 jbd2_has_feature_fast_commit(journal) ? \ 229 (journal)->j_fc_last : (journal)->j_last; \ 230 \ 231 if (var >= _wrap_last) \ 232 var -= (_wrap_last - (journal)->j_first); \ 233 } while (0) 234 235 static int fc_do_one_pass(journal_t *journal, 236 struct recovery_info *info, enum passtype pass) 237 { 238 unsigned int expected_commit_id = info->end_transaction; 239 unsigned long next_fc_block; 240 struct buffer_head *bh; 241 int err = 0; 242 243 next_fc_block = journal->j_fc_first; 244 if (!journal->j_fc_replay_callback) 245 return 0; 246 247 while (next_fc_block <= journal->j_fc_last) { 248 jbd_debug(3, "Fast commit replay: next block %ld", 249 next_fc_block); 250 err = jread(&bh, journal, next_fc_block); 251 if (err) { 252 jbd_debug(3, "Fast commit replay: read error"); 253 break; 254 } 255 256 jbd_debug(3, "Processing fast commit blk with seq %d"); 257 err = journal->j_fc_replay_callback(journal, bh, pass, 258 next_fc_block - journal->j_fc_first, 259 expected_commit_id); 260 next_fc_block++; 261 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 262 break; 263 err = 0; 264 } 265 266 if (err) 267 jbd_debug(3, "Fast commit replay failed, err = %d\n", err); 268 269 return err; 270 } 271 272 /** 273 * jbd2_journal_recover - recovers a on-disk journal 274 * @journal: the journal to recover 275 * 276 * The primary function for recovering the log contents when mounting a 277 * journaled device. 278 * 279 * Recovery is done in three passes. In the first pass, we look for the 280 * end of the log. In the second, we assemble the list of revoke 281 * blocks. In the third and final pass, we replay any un-revoked blocks 282 * in the log. 283 */ 284 int jbd2_journal_recover(journal_t *journal) 285 { 286 int err, err2; 287 journal_superblock_t * sb; 288 289 struct recovery_info info; 290 291 memset(&info, 0, sizeof(info)); 292 sb = journal->j_superblock; 293 294 /* 295 * The journal superblock's s_start field (the current log head) 296 * is always zero if, and only if, the journal was cleanly 297 * unmounted. 298 */ 299 300 if (!sb->s_start) { 301 jbd_debug(1, "No recovery required, last transaction %d\n", 302 be32_to_cpu(sb->s_sequence)); 303 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 304 return 0; 305 } 306 307 err = do_one_pass(journal, &info, PASS_SCAN); 308 if (!err) 309 err = do_one_pass(journal, &info, PASS_REVOKE); 310 if (!err) 311 err = do_one_pass(journal, &info, PASS_REPLAY); 312 313 jbd_debug(1, "JBD2: recovery, exit status %d, " 314 "recovered transactions %u to %u\n", 315 err, info.start_transaction, info.end_transaction); 316 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 317 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 318 319 /* Restart the log at the next transaction ID, thus invalidating 320 * any existing commit records in the log. */ 321 journal->j_transaction_sequence = ++info.end_transaction; 322 323 jbd2_journal_clear_revoke(journal); 324 err2 = sync_blockdev(journal->j_fs_dev); 325 if (!err) 326 err = err2; 327 /* Make sure all replayed data is on permanent storage */ 328 if (journal->j_flags & JBD2_BARRIER) { 329 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL); 330 if (!err) 331 err = err2; 332 } 333 return err; 334 } 335 336 /** 337 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 338 * @journal: journal to startup 339 * 340 * Locate any valid recovery information from the journal and set up the 341 * journal structures in memory to ignore it (presumably because the 342 * caller has evidence that it is out of date). 343 * This function doesn't appear to be exported.. 344 * 345 * We perform one pass over the journal to allow us to tell the user how 346 * much recovery information is being erased, and to let us initialise 347 * the journal transaction sequence numbers to the next unused ID. 348 */ 349 int jbd2_journal_skip_recovery(journal_t *journal) 350 { 351 int err; 352 353 struct recovery_info info; 354 355 memset (&info, 0, sizeof(info)); 356 357 err = do_one_pass(journal, &info, PASS_SCAN); 358 359 if (err) { 360 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 361 ++journal->j_transaction_sequence; 362 } else { 363 #ifdef CONFIG_JBD2_DEBUG 364 int dropped = info.end_transaction - 365 be32_to_cpu(journal->j_superblock->s_sequence); 366 jbd_debug(1, 367 "JBD2: ignoring %d transaction%s from the journal.\n", 368 dropped, (dropped == 1) ? "" : "s"); 369 #endif 370 journal->j_transaction_sequence = ++info.end_transaction; 371 } 372 373 journal->j_tail = 0; 374 return err; 375 } 376 377 static inline unsigned long long read_tag_block(journal_t *journal, 378 journal_block_tag_t *tag) 379 { 380 unsigned long long block = be32_to_cpu(tag->t_blocknr); 381 if (jbd2_has_feature_64bit(journal)) 382 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 383 return block; 384 } 385 386 /* 387 * calc_chksums calculates the checksums for the blocks described in the 388 * descriptor block. 389 */ 390 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 391 unsigned long *next_log_block, __u32 *crc32_sum) 392 { 393 int i, num_blks, err; 394 unsigned long io_block; 395 struct buffer_head *obh; 396 397 num_blks = count_tags(journal, bh); 398 /* Calculate checksum of the descriptor block. */ 399 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 400 401 for (i = 0; i < num_blks; i++) { 402 io_block = (*next_log_block)++; 403 wrap(journal, *next_log_block); 404 err = jread(&obh, journal, io_block); 405 if (err) { 406 printk(KERN_ERR "JBD2: IO error %d recovering block " 407 "%lu in log\n", err, io_block); 408 return 1; 409 } else { 410 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 411 obh->b_size); 412 } 413 put_bh(obh); 414 } 415 return 0; 416 } 417 418 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 419 { 420 struct commit_header *h; 421 __be32 provided; 422 __u32 calculated; 423 424 if (!jbd2_journal_has_csum_v2or3(j)) 425 return 1; 426 427 h = buf; 428 provided = h->h_chksum[0]; 429 h->h_chksum[0] = 0; 430 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 431 h->h_chksum[0] = provided; 432 433 return provided == cpu_to_be32(calculated); 434 } 435 436 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 437 void *buf, __u32 sequence) 438 { 439 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; 440 __u32 csum32; 441 __be32 seq; 442 443 if (!jbd2_journal_has_csum_v2or3(j)) 444 return 1; 445 446 seq = cpu_to_be32(sequence); 447 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 448 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 449 450 if (jbd2_has_feature_csum3(j)) 451 return tag3->t_checksum == cpu_to_be32(csum32); 452 else 453 return tag->t_checksum == cpu_to_be16(csum32); 454 } 455 456 static int do_one_pass(journal_t *journal, 457 struct recovery_info *info, enum passtype pass) 458 { 459 unsigned int first_commit_ID, next_commit_ID; 460 unsigned long next_log_block; 461 int err, success = 0; 462 journal_superblock_t * sb; 463 journal_header_t * tmp; 464 struct buffer_head * bh; 465 unsigned int sequence; 466 int blocktype; 467 int tag_bytes = journal_tag_bytes(journal); 468 __u32 crc32_sum = ~0; /* Transactional Checksums */ 469 int descr_csum_size = 0; 470 int block_error = 0; 471 bool need_check_commit_time = false; 472 __u64 last_trans_commit_time = 0, commit_time; 473 474 /* 475 * First thing is to establish what we expect to find in the log 476 * (in terms of transaction IDs), and where (in terms of log 477 * block offsets): query the superblock. 478 */ 479 480 sb = journal->j_superblock; 481 next_commit_ID = be32_to_cpu(sb->s_sequence); 482 next_log_block = be32_to_cpu(sb->s_start); 483 484 first_commit_ID = next_commit_ID; 485 if (pass == PASS_SCAN) 486 info->start_transaction = first_commit_ID; 487 488 jbd_debug(1, "Starting recovery pass %d\n", pass); 489 490 /* 491 * Now we walk through the log, transaction by transaction, 492 * making sure that each transaction has a commit block in the 493 * expected place. Each complete transaction gets replayed back 494 * into the main filesystem. 495 */ 496 497 while (1) { 498 int flags; 499 char * tagp; 500 journal_block_tag_t * tag; 501 struct buffer_head * obh; 502 struct buffer_head * nbh; 503 504 cond_resched(); 505 506 /* If we already know where to stop the log traversal, 507 * check right now that we haven't gone past the end of 508 * the log. */ 509 510 if (pass != PASS_SCAN) 511 if (tid_geq(next_commit_ID, info->end_transaction)) 512 break; 513 514 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 515 next_commit_ID, next_log_block, 516 jbd2_has_feature_fast_commit(journal) ? 517 journal->j_fc_last : journal->j_last); 518 519 /* Skip over each chunk of the transaction looking 520 * either the next descriptor block or the final commit 521 * record. */ 522 523 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); 524 err = jread(&bh, journal, next_log_block); 525 if (err) 526 goto failed; 527 528 next_log_block++; 529 wrap(journal, next_log_block); 530 531 /* What kind of buffer is it? 532 * 533 * If it is a descriptor block, check that it has the 534 * expected sequence number. Otherwise, we're all done 535 * here. */ 536 537 tmp = (journal_header_t *)bh->b_data; 538 539 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 540 brelse(bh); 541 break; 542 } 543 544 blocktype = be32_to_cpu(tmp->h_blocktype); 545 sequence = be32_to_cpu(tmp->h_sequence); 546 jbd_debug(3, "Found magic %d, sequence %d\n", 547 blocktype, sequence); 548 549 if (sequence != next_commit_ID) { 550 brelse(bh); 551 break; 552 } 553 554 /* OK, we have a valid descriptor block which matches 555 * all of the sequence number checks. What are we going 556 * to do with it? That depends on the pass... */ 557 558 switch(blocktype) { 559 case JBD2_DESCRIPTOR_BLOCK: 560 /* Verify checksum first */ 561 if (jbd2_journal_has_csum_v2or3(journal)) 562 descr_csum_size = 563 sizeof(struct jbd2_journal_block_tail); 564 if (descr_csum_size > 0 && 565 !jbd2_descriptor_block_csum_verify(journal, 566 bh->b_data)) { 567 /* 568 * PASS_SCAN can see stale blocks due to lazy 569 * journal init. Don't error out on those yet. 570 */ 571 if (pass != PASS_SCAN) { 572 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 573 next_log_block); 574 err = -EFSBADCRC; 575 brelse(bh); 576 goto failed; 577 } 578 need_check_commit_time = true; 579 jbd_debug(1, 580 "invalid descriptor block found in %lu\n", 581 next_log_block); 582 } 583 584 /* If it is a valid descriptor block, replay it 585 * in pass REPLAY; if journal_checksums enabled, then 586 * calculate checksums in PASS_SCAN, otherwise, 587 * just skip over the blocks it describes. */ 588 if (pass != PASS_REPLAY) { 589 if (pass == PASS_SCAN && 590 jbd2_has_feature_checksum(journal) && 591 !need_check_commit_time && 592 !info->end_transaction) { 593 if (calc_chksums(journal, bh, 594 &next_log_block, 595 &crc32_sum)) { 596 put_bh(bh); 597 break; 598 } 599 put_bh(bh); 600 continue; 601 } 602 next_log_block += count_tags(journal, bh); 603 wrap(journal, next_log_block); 604 put_bh(bh); 605 continue; 606 } 607 608 /* A descriptor block: we can now write all of 609 * the data blocks. Yay, useful work is finally 610 * getting done here! */ 611 612 tagp = &bh->b_data[sizeof(journal_header_t)]; 613 while ((tagp - bh->b_data + tag_bytes) 614 <= journal->j_blocksize - descr_csum_size) { 615 unsigned long io_block; 616 617 tag = (journal_block_tag_t *) tagp; 618 flags = be16_to_cpu(tag->t_flags); 619 620 io_block = next_log_block++; 621 wrap(journal, next_log_block); 622 err = jread(&obh, journal, io_block); 623 if (err) { 624 /* Recover what we can, but 625 * report failure at the end. */ 626 success = err; 627 printk(KERN_ERR 628 "JBD2: IO error %d recovering " 629 "block %ld in log\n", 630 err, io_block); 631 } else { 632 unsigned long long blocknr; 633 634 J_ASSERT(obh != NULL); 635 blocknr = read_tag_block(journal, 636 tag); 637 638 /* If the block has been 639 * revoked, then we're all done 640 * here. */ 641 if (jbd2_journal_test_revoke 642 (journal, blocknr, 643 next_commit_ID)) { 644 brelse(obh); 645 ++info->nr_revoke_hits; 646 goto skip_write; 647 } 648 649 /* Look for block corruption */ 650 if (!jbd2_block_tag_csum_verify( 651 journal, tag, obh->b_data, 652 be32_to_cpu(tmp->h_sequence))) { 653 brelse(obh); 654 success = -EFSBADCRC; 655 printk(KERN_ERR "JBD2: Invalid " 656 "checksum recovering " 657 "data block %llu in " 658 "log\n", blocknr); 659 block_error = 1; 660 goto skip_write; 661 } 662 663 /* Find a buffer for the new 664 * data being restored */ 665 nbh = __getblk(journal->j_fs_dev, 666 blocknr, 667 journal->j_blocksize); 668 if (nbh == NULL) { 669 printk(KERN_ERR 670 "JBD2: Out of memory " 671 "during recovery.\n"); 672 err = -ENOMEM; 673 brelse(bh); 674 brelse(obh); 675 goto failed; 676 } 677 678 lock_buffer(nbh); 679 memcpy(nbh->b_data, obh->b_data, 680 journal->j_blocksize); 681 if (flags & JBD2_FLAG_ESCAPE) { 682 *((__be32 *)nbh->b_data) = 683 cpu_to_be32(JBD2_MAGIC_NUMBER); 684 } 685 686 BUFFER_TRACE(nbh, "marking dirty"); 687 set_buffer_uptodate(nbh); 688 mark_buffer_dirty(nbh); 689 BUFFER_TRACE(nbh, "marking uptodate"); 690 ++info->nr_replays; 691 /* ll_rw_block(WRITE, 1, &nbh); */ 692 unlock_buffer(nbh); 693 brelse(obh); 694 brelse(nbh); 695 } 696 697 skip_write: 698 tagp += tag_bytes; 699 if (!(flags & JBD2_FLAG_SAME_UUID)) 700 tagp += 16; 701 702 if (flags & JBD2_FLAG_LAST_TAG) 703 break; 704 } 705 706 brelse(bh); 707 continue; 708 709 case JBD2_COMMIT_BLOCK: 710 /* How to differentiate between interrupted commit 711 * and journal corruption ? 712 * 713 * {nth transaction} 714 * Checksum Verification Failed 715 * | 716 * ____________________ 717 * | | 718 * async_commit sync_commit 719 * | | 720 * | GO TO NEXT "Journal Corruption" 721 * | TRANSACTION 722 * | 723 * {(n+1)th transanction} 724 * | 725 * _______|______________ 726 * | | 727 * Commit block found Commit block not found 728 * | | 729 * "Journal Corruption" | 730 * _____________|_________ 731 * | | 732 * nth trans corrupt OR nth trans 733 * and (n+1)th interrupted interrupted 734 * before commit block 735 * could reach the disk. 736 * (Cannot find the difference in above 737 * mentioned conditions. Hence assume 738 * "Interrupted Commit".) 739 */ 740 commit_time = be64_to_cpu( 741 ((struct commit_header *)bh->b_data)->h_commit_sec); 742 /* 743 * If need_check_commit_time is set, it means we are in 744 * PASS_SCAN and csum verify failed before. If 745 * commit_time is increasing, it's the same journal, 746 * otherwise it is stale journal block, just end this 747 * recovery. 748 */ 749 if (need_check_commit_time) { 750 if (commit_time >= last_trans_commit_time) { 751 pr_err("JBD2: Invalid checksum found in transaction %u\n", 752 next_commit_ID); 753 err = -EFSBADCRC; 754 brelse(bh); 755 goto failed; 756 } 757 ignore_crc_mismatch: 758 /* 759 * It likely does not belong to same journal, 760 * just end this recovery with success. 761 */ 762 jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 763 next_commit_ID); 764 err = 0; 765 brelse(bh); 766 goto done; 767 } 768 769 /* 770 * Found an expected commit block: if checksums 771 * are present, verify them in PASS_SCAN; else not 772 * much to do other than move on to the next sequence 773 * number. 774 */ 775 if (pass == PASS_SCAN && 776 jbd2_has_feature_checksum(journal)) { 777 struct commit_header *cbh = 778 (struct commit_header *)bh->b_data; 779 unsigned found_chksum = 780 be32_to_cpu(cbh->h_chksum[0]); 781 782 if (info->end_transaction) { 783 journal->j_failed_commit = 784 info->end_transaction; 785 brelse(bh); 786 break; 787 } 788 789 /* Neither checksum match nor unused? */ 790 if (!((crc32_sum == found_chksum && 791 cbh->h_chksum_type == 792 JBD2_CRC32_CHKSUM && 793 cbh->h_chksum_size == 794 JBD2_CRC32_CHKSUM_SIZE) || 795 (cbh->h_chksum_type == 0 && 796 cbh->h_chksum_size == 0 && 797 found_chksum == 0))) 798 goto chksum_error; 799 800 crc32_sum = ~0; 801 } 802 if (pass == PASS_SCAN && 803 !jbd2_commit_block_csum_verify(journal, 804 bh->b_data)) { 805 chksum_error: 806 if (commit_time < last_trans_commit_time) 807 goto ignore_crc_mismatch; 808 info->end_transaction = next_commit_ID; 809 810 if (!jbd2_has_feature_async_commit(journal)) { 811 journal->j_failed_commit = 812 next_commit_ID; 813 brelse(bh); 814 break; 815 } 816 } 817 if (pass == PASS_SCAN) 818 last_trans_commit_time = commit_time; 819 brelse(bh); 820 next_commit_ID++; 821 continue; 822 823 case JBD2_REVOKE_BLOCK: 824 /* 825 * Check revoke block crc in pass_scan, if csum verify 826 * failed, check commit block time later. 827 */ 828 if (pass == PASS_SCAN && 829 !jbd2_descriptor_block_csum_verify(journal, 830 bh->b_data)) { 831 jbd_debug(1, "JBD2: invalid revoke block found in %lu\n", 832 next_log_block); 833 need_check_commit_time = true; 834 } 835 /* If we aren't in the REVOKE pass, then we can 836 * just skip over this block. */ 837 if (pass != PASS_REVOKE) { 838 brelse(bh); 839 continue; 840 } 841 842 err = scan_revoke_records(journal, bh, 843 next_commit_ID, info); 844 brelse(bh); 845 if (err) 846 goto failed; 847 continue; 848 849 default: 850 jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 851 blocktype); 852 brelse(bh); 853 goto done; 854 } 855 } 856 857 done: 858 /* 859 * We broke out of the log scan loop: either we came to the 860 * known end of the log or we found an unexpected block in the 861 * log. If the latter happened, then we know that the "current" 862 * transaction marks the end of the valid log. 863 */ 864 865 if (pass == PASS_SCAN) { 866 if (!info->end_transaction) 867 info->end_transaction = next_commit_ID; 868 } else { 869 /* It's really bad news if different passes end up at 870 * different places (but possible due to IO errors). */ 871 if (info->end_transaction != next_commit_ID) { 872 printk(KERN_ERR "JBD2: recovery pass %d ended at " 873 "transaction %u, expected %u\n", 874 pass, next_commit_ID, info->end_transaction); 875 if (!success) 876 success = -EIO; 877 } 878 } 879 880 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 881 err = fc_do_one_pass(journal, info, pass); 882 if (err) 883 success = err; 884 } 885 886 if (block_error && success == 0) 887 success = -EIO; 888 return success; 889 890 failed: 891 return err; 892 } 893 894 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 895 896 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 897 tid_t sequence, struct recovery_info *info) 898 { 899 jbd2_journal_revoke_header_t *header; 900 int offset, max; 901 int csum_size = 0; 902 __u32 rcount; 903 int record_len = 4; 904 905 header = (jbd2_journal_revoke_header_t *) bh->b_data; 906 offset = sizeof(jbd2_journal_revoke_header_t); 907 rcount = be32_to_cpu(header->r_count); 908 909 if (jbd2_journal_has_csum_v2or3(journal)) 910 csum_size = sizeof(struct jbd2_journal_block_tail); 911 if (rcount > journal->j_blocksize - csum_size) 912 return -EINVAL; 913 max = rcount; 914 915 if (jbd2_has_feature_64bit(journal)) 916 record_len = 8; 917 918 while (offset + record_len <= max) { 919 unsigned long long blocknr; 920 int err; 921 922 if (record_len == 4) 923 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 924 else 925 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 926 offset += record_len; 927 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 928 if (err) 929 return err; 930 ++info->nr_revokes; 931 } 932 return 0; 933 } 934