1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 33 int nr_replays; 34 int nr_revokes; 35 int nr_revoke_hits; 36 }; 37 38 static int do_one_pass(journal_t *journal, 39 struct recovery_info *info, enum passtype pass); 40 static int scan_revoke_records(journal_t *, struct buffer_head *, 41 tid_t, struct recovery_info *); 42 43 #ifdef __KERNEL__ 44 45 /* Release readahead buffers after use */ 46 static void journal_brelse_array(struct buffer_head *b[], int n) 47 { 48 while (--n >= 0) 49 brelse (b[n]); 50 } 51 52 53 /* 54 * When reading from the journal, we are going through the block device 55 * layer directly and so there is no readahead being done for us. We 56 * need to implement any readahead ourselves if we want it to happen at 57 * all. Recovery is basically one long sequential read, so make sure we 58 * do the IO in reasonably large chunks. 59 * 60 * This is not so critical that we need to be enormously clever about 61 * the readahead size, though. 128K is a purely arbitrary, good-enough 62 * fixed value. 63 */ 64 65 #define MAXBUF 8 66 static int do_readahead(journal_t *journal, unsigned int start) 67 { 68 int err; 69 unsigned int max, nbufs, next; 70 unsigned long long blocknr; 71 struct buffer_head *bh; 72 73 struct buffer_head * bufs[MAXBUF]; 74 75 /* Do up to 128K of readahead */ 76 max = start + (128 * 1024 / journal->j_blocksize); 77 if (max > journal->j_total_len) 78 max = journal->j_total_len; 79 80 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 81 * a time to the block device IO layer. */ 82 83 nbufs = 0; 84 85 for (next = start; next < max; next++) { 86 err = jbd2_journal_bmap(journal, next, &blocknr); 87 88 if (err) { 89 printk(KERN_ERR "JBD2: bad block at offset %u\n", 90 next); 91 goto failed; 92 } 93 94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 95 if (!bh) { 96 err = -ENOMEM; 97 goto failed; 98 } 99 100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 101 bufs[nbufs++] = bh; 102 if (nbufs == MAXBUF) { 103 ll_rw_block(REQ_OP_READ, nbufs, bufs); 104 journal_brelse_array(bufs, nbufs); 105 nbufs = 0; 106 } 107 } else 108 brelse(bh); 109 } 110 111 if (nbufs) 112 ll_rw_block(REQ_OP_READ, nbufs, bufs); 113 err = 0; 114 115 failed: 116 if (nbufs) 117 journal_brelse_array(bufs, nbufs); 118 return err; 119 } 120 121 #endif /* __KERNEL__ */ 122 123 124 /* 125 * Read a block from the journal 126 */ 127 128 static int jread(struct buffer_head **bhp, journal_t *journal, 129 unsigned int offset) 130 { 131 int err; 132 unsigned long long blocknr; 133 struct buffer_head *bh; 134 135 *bhp = NULL; 136 137 if (offset >= journal->j_total_len) { 138 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 139 return -EFSCORRUPTED; 140 } 141 142 err = jbd2_journal_bmap(journal, offset, &blocknr); 143 144 if (err) { 145 printk(KERN_ERR "JBD2: bad block at offset %u\n", 146 offset); 147 return err; 148 } 149 150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 151 if (!bh) 152 return -ENOMEM; 153 154 if (!buffer_uptodate(bh)) { 155 /* If this is a brand new buffer, start readahead. 156 Otherwise, we assume we are already reading it. */ 157 if (!buffer_req(bh)) 158 do_readahead(journal, offset); 159 wait_on_buffer(bh); 160 } 161 162 if (!buffer_uptodate(bh)) { 163 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 164 offset); 165 brelse(bh); 166 return -EIO; 167 } 168 169 *bhp = bh; 170 return 0; 171 } 172 173 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 174 { 175 struct jbd2_journal_block_tail *tail; 176 __be32 provided; 177 __u32 calculated; 178 179 if (!jbd2_journal_has_csum_v2or3(j)) 180 return 1; 181 182 tail = (struct jbd2_journal_block_tail *)((char *)buf + 183 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 184 provided = tail->t_checksum; 185 tail->t_checksum = 0; 186 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 187 tail->t_checksum = provided; 188 189 return provided == cpu_to_be32(calculated); 190 } 191 192 /* 193 * Count the number of in-use tags in a journal descriptor block. 194 */ 195 196 static int count_tags(journal_t *journal, struct buffer_head *bh) 197 { 198 char * tagp; 199 journal_block_tag_t tag; 200 int nr = 0, size = journal->j_blocksize; 201 int tag_bytes = journal_tag_bytes(journal); 202 203 if (jbd2_journal_has_csum_v2or3(journal)) 204 size -= sizeof(struct jbd2_journal_block_tail); 205 206 tagp = &bh->b_data[sizeof(journal_header_t)]; 207 208 while ((tagp - bh->b_data + tag_bytes) <= size) { 209 memcpy(&tag, tagp, sizeof(tag)); 210 211 nr++; 212 tagp += tag_bytes; 213 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 214 tagp += 16; 215 216 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 217 break; 218 } 219 220 return nr; 221 } 222 223 224 /* Make sure we wrap around the log correctly! */ 225 #define wrap(journal, var) \ 226 do { \ 227 unsigned long _wrap_last = \ 228 jbd2_has_feature_fast_commit(journal) ? \ 229 (journal)->j_fc_last : (journal)->j_last; \ 230 \ 231 if (var >= _wrap_last) \ 232 var -= (_wrap_last - (journal)->j_first); \ 233 } while (0) 234 235 static int fc_do_one_pass(journal_t *journal, 236 struct recovery_info *info, enum passtype pass) 237 { 238 unsigned int expected_commit_id = info->end_transaction; 239 unsigned long next_fc_block; 240 struct buffer_head *bh; 241 int err = 0; 242 243 next_fc_block = journal->j_fc_first; 244 if (!journal->j_fc_replay_callback) 245 return 0; 246 247 while (next_fc_block <= journal->j_fc_last) { 248 jbd2_debug(3, "Fast commit replay: next block %ld\n", 249 next_fc_block); 250 err = jread(&bh, journal, next_fc_block); 251 if (err) { 252 jbd2_debug(3, "Fast commit replay: read error\n"); 253 break; 254 } 255 256 err = journal->j_fc_replay_callback(journal, bh, pass, 257 next_fc_block - journal->j_fc_first, 258 expected_commit_id); 259 next_fc_block++; 260 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 261 break; 262 err = 0; 263 } 264 265 if (err) 266 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 267 268 return err; 269 } 270 271 /** 272 * jbd2_journal_recover - recovers a on-disk journal 273 * @journal: the journal to recover 274 * 275 * The primary function for recovering the log contents when mounting a 276 * journaled device. 277 * 278 * Recovery is done in three passes. In the first pass, we look for the 279 * end of the log. In the second, we assemble the list of revoke 280 * blocks. In the third and final pass, we replay any un-revoked blocks 281 * in the log. 282 */ 283 int jbd2_journal_recover(journal_t *journal) 284 { 285 int err, err2; 286 journal_superblock_t * sb; 287 288 struct recovery_info info; 289 290 memset(&info, 0, sizeof(info)); 291 sb = journal->j_superblock; 292 293 /* 294 * The journal superblock's s_start field (the current log head) 295 * is always zero if, and only if, the journal was cleanly 296 * unmounted. 297 */ 298 299 if (!sb->s_start) { 300 jbd2_debug(1, "No recovery required, last transaction %d\n", 301 be32_to_cpu(sb->s_sequence)); 302 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 303 return 0; 304 } 305 306 err = do_one_pass(journal, &info, PASS_SCAN); 307 if (!err) 308 err = do_one_pass(journal, &info, PASS_REVOKE); 309 if (!err) 310 err = do_one_pass(journal, &info, PASS_REPLAY); 311 312 jbd2_debug(1, "JBD2: recovery, exit status %d, " 313 "recovered transactions %u to %u\n", 314 err, info.start_transaction, info.end_transaction); 315 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 316 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 317 318 /* Restart the log at the next transaction ID, thus invalidating 319 * any existing commit records in the log. */ 320 journal->j_transaction_sequence = ++info.end_transaction; 321 322 jbd2_journal_clear_revoke(journal); 323 err2 = sync_blockdev(journal->j_fs_dev); 324 if (!err) 325 err = err2; 326 /* Make sure all replayed data is on permanent storage */ 327 if (journal->j_flags & JBD2_BARRIER) { 328 err2 = blkdev_issue_flush(journal->j_fs_dev); 329 if (!err) 330 err = err2; 331 } 332 return err; 333 } 334 335 /** 336 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 337 * @journal: journal to startup 338 * 339 * Locate any valid recovery information from the journal and set up the 340 * journal structures in memory to ignore it (presumably because the 341 * caller has evidence that it is out of date). 342 * This function doesn't appear to be exported.. 343 * 344 * We perform one pass over the journal to allow us to tell the user how 345 * much recovery information is being erased, and to let us initialise 346 * the journal transaction sequence numbers to the next unused ID. 347 */ 348 int jbd2_journal_skip_recovery(journal_t *journal) 349 { 350 int err; 351 352 struct recovery_info info; 353 354 memset (&info, 0, sizeof(info)); 355 356 err = do_one_pass(journal, &info, PASS_SCAN); 357 358 if (err) { 359 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 360 ++journal->j_transaction_sequence; 361 } else { 362 #ifdef CONFIG_JBD2_DEBUG 363 int dropped = info.end_transaction - 364 be32_to_cpu(journal->j_superblock->s_sequence); 365 jbd2_debug(1, 366 "JBD2: ignoring %d transaction%s from the journal.\n", 367 dropped, (dropped == 1) ? "" : "s"); 368 #endif 369 journal->j_transaction_sequence = ++info.end_transaction; 370 } 371 372 journal->j_tail = 0; 373 return err; 374 } 375 376 static inline unsigned long long read_tag_block(journal_t *journal, 377 journal_block_tag_t *tag) 378 { 379 unsigned long long block = be32_to_cpu(tag->t_blocknr); 380 if (jbd2_has_feature_64bit(journal)) 381 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 382 return block; 383 } 384 385 /* 386 * calc_chksums calculates the checksums for the blocks described in the 387 * descriptor block. 388 */ 389 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 390 unsigned long *next_log_block, __u32 *crc32_sum) 391 { 392 int i, num_blks, err; 393 unsigned long io_block; 394 struct buffer_head *obh; 395 396 num_blks = count_tags(journal, bh); 397 /* Calculate checksum of the descriptor block. */ 398 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 399 400 for (i = 0; i < num_blks; i++) { 401 io_block = (*next_log_block)++; 402 wrap(journal, *next_log_block); 403 err = jread(&obh, journal, io_block); 404 if (err) { 405 printk(KERN_ERR "JBD2: IO error %d recovering block " 406 "%lu in log\n", err, io_block); 407 return 1; 408 } else { 409 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 410 obh->b_size); 411 } 412 put_bh(obh); 413 } 414 return 0; 415 } 416 417 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 418 { 419 struct commit_header *h; 420 __be32 provided; 421 __u32 calculated; 422 423 if (!jbd2_journal_has_csum_v2or3(j)) 424 return 1; 425 426 h = buf; 427 provided = h->h_chksum[0]; 428 h->h_chksum[0] = 0; 429 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 430 h->h_chksum[0] = provided; 431 432 return provided == cpu_to_be32(calculated); 433 } 434 435 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 436 journal_block_tag3_t *tag3, 437 void *buf, __u32 sequence) 438 { 439 __u32 csum32; 440 __be32 seq; 441 442 if (!jbd2_journal_has_csum_v2or3(j)) 443 return 1; 444 445 seq = cpu_to_be32(sequence); 446 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 447 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 448 449 if (jbd2_has_feature_csum3(j)) 450 return tag3->t_checksum == cpu_to_be32(csum32); 451 else 452 return tag->t_checksum == cpu_to_be16(csum32); 453 } 454 455 static int do_one_pass(journal_t *journal, 456 struct recovery_info *info, enum passtype pass) 457 { 458 unsigned int first_commit_ID, next_commit_ID; 459 unsigned long next_log_block; 460 int err, success = 0; 461 journal_superblock_t * sb; 462 journal_header_t * tmp; 463 struct buffer_head * bh; 464 unsigned int sequence; 465 int blocktype; 466 int tag_bytes = journal_tag_bytes(journal); 467 __u32 crc32_sum = ~0; /* Transactional Checksums */ 468 int descr_csum_size = 0; 469 int block_error = 0; 470 bool need_check_commit_time = false; 471 __u64 last_trans_commit_time = 0, commit_time; 472 473 /* 474 * First thing is to establish what we expect to find in the log 475 * (in terms of transaction IDs), and where (in terms of log 476 * block offsets): query the superblock. 477 */ 478 479 sb = journal->j_superblock; 480 next_commit_ID = be32_to_cpu(sb->s_sequence); 481 next_log_block = be32_to_cpu(sb->s_start); 482 483 first_commit_ID = next_commit_ID; 484 if (pass == PASS_SCAN) 485 info->start_transaction = first_commit_ID; 486 487 jbd2_debug(1, "Starting recovery pass %d\n", pass); 488 489 /* 490 * Now we walk through the log, transaction by transaction, 491 * making sure that each transaction has a commit block in the 492 * expected place. Each complete transaction gets replayed back 493 * into the main filesystem. 494 */ 495 496 while (1) { 497 int flags; 498 char * tagp; 499 journal_block_tag_t tag; 500 struct buffer_head * obh; 501 struct buffer_head * nbh; 502 503 cond_resched(); 504 505 /* If we already know where to stop the log traversal, 506 * check right now that we haven't gone past the end of 507 * the log. */ 508 509 if (pass != PASS_SCAN) 510 if (tid_geq(next_commit_ID, info->end_transaction)) 511 break; 512 513 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 514 next_commit_ID, next_log_block, 515 jbd2_has_feature_fast_commit(journal) ? 516 journal->j_fc_last : journal->j_last); 517 518 /* Skip over each chunk of the transaction looking 519 * either the next descriptor block or the final commit 520 * record. */ 521 522 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 523 err = jread(&bh, journal, next_log_block); 524 if (err) 525 goto failed; 526 527 next_log_block++; 528 wrap(journal, next_log_block); 529 530 /* What kind of buffer is it? 531 * 532 * If it is a descriptor block, check that it has the 533 * expected sequence number. Otherwise, we're all done 534 * here. */ 535 536 tmp = (journal_header_t *)bh->b_data; 537 538 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 539 brelse(bh); 540 break; 541 } 542 543 blocktype = be32_to_cpu(tmp->h_blocktype); 544 sequence = be32_to_cpu(tmp->h_sequence); 545 jbd2_debug(3, "Found magic %d, sequence %d\n", 546 blocktype, sequence); 547 548 if (sequence != next_commit_ID) { 549 brelse(bh); 550 break; 551 } 552 553 /* OK, we have a valid descriptor block which matches 554 * all of the sequence number checks. What are we going 555 * to do with it? That depends on the pass... */ 556 557 switch(blocktype) { 558 case JBD2_DESCRIPTOR_BLOCK: 559 /* Verify checksum first */ 560 if (jbd2_journal_has_csum_v2or3(journal)) 561 descr_csum_size = 562 sizeof(struct jbd2_journal_block_tail); 563 if (descr_csum_size > 0 && 564 !jbd2_descriptor_block_csum_verify(journal, 565 bh->b_data)) { 566 /* 567 * PASS_SCAN can see stale blocks due to lazy 568 * journal init. Don't error out on those yet. 569 */ 570 if (pass != PASS_SCAN) { 571 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 572 next_log_block); 573 err = -EFSBADCRC; 574 brelse(bh); 575 goto failed; 576 } 577 need_check_commit_time = true; 578 jbd2_debug(1, 579 "invalid descriptor block found in %lu\n", 580 next_log_block); 581 } 582 583 /* If it is a valid descriptor block, replay it 584 * in pass REPLAY; if journal_checksums enabled, then 585 * calculate checksums in PASS_SCAN, otherwise, 586 * just skip over the blocks it describes. */ 587 if (pass != PASS_REPLAY) { 588 if (pass == PASS_SCAN && 589 jbd2_has_feature_checksum(journal) && 590 !need_check_commit_time && 591 !info->end_transaction) { 592 if (calc_chksums(journal, bh, 593 &next_log_block, 594 &crc32_sum)) { 595 put_bh(bh); 596 break; 597 } 598 put_bh(bh); 599 continue; 600 } 601 next_log_block += count_tags(journal, bh); 602 wrap(journal, next_log_block); 603 put_bh(bh); 604 continue; 605 } 606 607 /* A descriptor block: we can now write all of 608 * the data blocks. Yay, useful work is finally 609 * getting done here! */ 610 611 tagp = &bh->b_data[sizeof(journal_header_t)]; 612 while ((tagp - bh->b_data + tag_bytes) 613 <= journal->j_blocksize - descr_csum_size) { 614 unsigned long io_block; 615 616 memcpy(&tag, tagp, sizeof(tag)); 617 flags = be16_to_cpu(tag.t_flags); 618 619 io_block = next_log_block++; 620 wrap(journal, next_log_block); 621 err = jread(&obh, journal, io_block); 622 if (err) { 623 /* Recover what we can, but 624 * report failure at the end. */ 625 success = err; 626 printk(KERN_ERR 627 "JBD2: IO error %d recovering " 628 "block %ld in log\n", 629 err, io_block); 630 } else { 631 unsigned long long blocknr; 632 633 J_ASSERT(obh != NULL); 634 blocknr = read_tag_block(journal, 635 &tag); 636 637 /* If the block has been 638 * revoked, then we're all done 639 * here. */ 640 if (jbd2_journal_test_revoke 641 (journal, blocknr, 642 next_commit_ID)) { 643 brelse(obh); 644 ++info->nr_revoke_hits; 645 goto skip_write; 646 } 647 648 /* Look for block corruption */ 649 if (!jbd2_block_tag_csum_verify( 650 journal, &tag, (journal_block_tag3_t *)tagp, 651 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 652 brelse(obh); 653 success = -EFSBADCRC; 654 printk(KERN_ERR "JBD2: Invalid " 655 "checksum recovering " 656 "data block %llu in " 657 "log\n", blocknr); 658 block_error = 1; 659 goto skip_write; 660 } 661 662 /* Find a buffer for the new 663 * data being restored */ 664 nbh = __getblk(journal->j_fs_dev, 665 blocknr, 666 journal->j_blocksize); 667 if (nbh == NULL) { 668 printk(KERN_ERR 669 "JBD2: Out of memory " 670 "during recovery.\n"); 671 err = -ENOMEM; 672 brelse(bh); 673 brelse(obh); 674 goto failed; 675 } 676 677 lock_buffer(nbh); 678 memcpy(nbh->b_data, obh->b_data, 679 journal->j_blocksize); 680 if (flags & JBD2_FLAG_ESCAPE) { 681 *((__be32 *)nbh->b_data) = 682 cpu_to_be32(JBD2_MAGIC_NUMBER); 683 } 684 685 BUFFER_TRACE(nbh, "marking dirty"); 686 set_buffer_uptodate(nbh); 687 mark_buffer_dirty(nbh); 688 BUFFER_TRACE(nbh, "marking uptodate"); 689 ++info->nr_replays; 690 /* ll_rw_block(WRITE, 1, &nbh); */ 691 unlock_buffer(nbh); 692 brelse(obh); 693 brelse(nbh); 694 } 695 696 skip_write: 697 tagp += tag_bytes; 698 if (!(flags & JBD2_FLAG_SAME_UUID)) 699 tagp += 16; 700 701 if (flags & JBD2_FLAG_LAST_TAG) 702 break; 703 } 704 705 brelse(bh); 706 continue; 707 708 case JBD2_COMMIT_BLOCK: 709 /* How to differentiate between interrupted commit 710 * and journal corruption ? 711 * 712 * {nth transaction} 713 * Checksum Verification Failed 714 * | 715 * ____________________ 716 * | | 717 * async_commit sync_commit 718 * | | 719 * | GO TO NEXT "Journal Corruption" 720 * | TRANSACTION 721 * | 722 * {(n+1)th transanction} 723 * | 724 * _______|______________ 725 * | | 726 * Commit block found Commit block not found 727 * | | 728 * "Journal Corruption" | 729 * _____________|_________ 730 * | | 731 * nth trans corrupt OR nth trans 732 * and (n+1)th interrupted interrupted 733 * before commit block 734 * could reach the disk. 735 * (Cannot find the difference in above 736 * mentioned conditions. Hence assume 737 * "Interrupted Commit".) 738 */ 739 commit_time = be64_to_cpu( 740 ((struct commit_header *)bh->b_data)->h_commit_sec); 741 /* 742 * If need_check_commit_time is set, it means we are in 743 * PASS_SCAN and csum verify failed before. If 744 * commit_time is increasing, it's the same journal, 745 * otherwise it is stale journal block, just end this 746 * recovery. 747 */ 748 if (need_check_commit_time) { 749 if (commit_time >= last_trans_commit_time) { 750 pr_err("JBD2: Invalid checksum found in transaction %u\n", 751 next_commit_ID); 752 err = -EFSBADCRC; 753 brelse(bh); 754 goto failed; 755 } 756 ignore_crc_mismatch: 757 /* 758 * It likely does not belong to same journal, 759 * just end this recovery with success. 760 */ 761 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 762 next_commit_ID); 763 brelse(bh); 764 goto done; 765 } 766 767 /* 768 * Found an expected commit block: if checksums 769 * are present, verify them in PASS_SCAN; else not 770 * much to do other than move on to the next sequence 771 * number. 772 */ 773 if (pass == PASS_SCAN && 774 jbd2_has_feature_checksum(journal)) { 775 struct commit_header *cbh = 776 (struct commit_header *)bh->b_data; 777 unsigned found_chksum = 778 be32_to_cpu(cbh->h_chksum[0]); 779 780 if (info->end_transaction) { 781 journal->j_failed_commit = 782 info->end_transaction; 783 brelse(bh); 784 break; 785 } 786 787 /* Neither checksum match nor unused? */ 788 if (!((crc32_sum == found_chksum && 789 cbh->h_chksum_type == 790 JBD2_CRC32_CHKSUM && 791 cbh->h_chksum_size == 792 JBD2_CRC32_CHKSUM_SIZE) || 793 (cbh->h_chksum_type == 0 && 794 cbh->h_chksum_size == 0 && 795 found_chksum == 0))) 796 goto chksum_error; 797 798 crc32_sum = ~0; 799 } 800 if (pass == PASS_SCAN && 801 !jbd2_commit_block_csum_verify(journal, 802 bh->b_data)) { 803 chksum_error: 804 if (commit_time < last_trans_commit_time) 805 goto ignore_crc_mismatch; 806 info->end_transaction = next_commit_ID; 807 808 if (!jbd2_has_feature_async_commit(journal)) { 809 journal->j_failed_commit = 810 next_commit_ID; 811 brelse(bh); 812 break; 813 } 814 } 815 if (pass == PASS_SCAN) 816 last_trans_commit_time = commit_time; 817 brelse(bh); 818 next_commit_ID++; 819 continue; 820 821 case JBD2_REVOKE_BLOCK: 822 /* 823 * Check revoke block crc in pass_scan, if csum verify 824 * failed, check commit block time later. 825 */ 826 if (pass == PASS_SCAN && 827 !jbd2_descriptor_block_csum_verify(journal, 828 bh->b_data)) { 829 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 830 next_log_block); 831 need_check_commit_time = true; 832 } 833 /* If we aren't in the REVOKE pass, then we can 834 * just skip over this block. */ 835 if (pass != PASS_REVOKE) { 836 brelse(bh); 837 continue; 838 } 839 840 err = scan_revoke_records(journal, bh, 841 next_commit_ID, info); 842 brelse(bh); 843 if (err) 844 goto failed; 845 continue; 846 847 default: 848 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 849 blocktype); 850 brelse(bh); 851 goto done; 852 } 853 } 854 855 done: 856 /* 857 * We broke out of the log scan loop: either we came to the 858 * known end of the log or we found an unexpected block in the 859 * log. If the latter happened, then we know that the "current" 860 * transaction marks the end of the valid log. 861 */ 862 863 if (pass == PASS_SCAN) { 864 if (!info->end_transaction) 865 info->end_transaction = next_commit_ID; 866 } else { 867 /* It's really bad news if different passes end up at 868 * different places (but possible due to IO errors). */ 869 if (info->end_transaction != next_commit_ID) { 870 printk(KERN_ERR "JBD2: recovery pass %d ended at " 871 "transaction %u, expected %u\n", 872 pass, next_commit_ID, info->end_transaction); 873 if (!success) 874 success = -EIO; 875 } 876 } 877 878 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 879 err = fc_do_one_pass(journal, info, pass); 880 if (err) 881 success = err; 882 } 883 884 if (block_error && success == 0) 885 success = -EIO; 886 return success; 887 888 failed: 889 return err; 890 } 891 892 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 893 894 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 895 tid_t sequence, struct recovery_info *info) 896 { 897 jbd2_journal_revoke_header_t *header; 898 int offset, max; 899 unsigned csum_size = 0; 900 __u32 rcount; 901 int record_len = 4; 902 903 header = (jbd2_journal_revoke_header_t *) bh->b_data; 904 offset = sizeof(jbd2_journal_revoke_header_t); 905 rcount = be32_to_cpu(header->r_count); 906 907 if (jbd2_journal_has_csum_v2or3(journal)) 908 csum_size = sizeof(struct jbd2_journal_block_tail); 909 if (rcount > journal->j_blocksize - csum_size) 910 return -EINVAL; 911 max = rcount; 912 913 if (jbd2_has_feature_64bit(journal)) 914 record_len = 8; 915 916 while (offset + record_len <= max) { 917 unsigned long long blocknr; 918 int err; 919 920 if (record_len == 4) 921 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 922 else 923 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 924 offset += record_len; 925 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 926 if (err) 927 return err; 928 ++info->nr_revokes; 929 } 930 return 0; 931 } 932