1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 unsigned long head_block; 33 34 int nr_replays; 35 int nr_revokes; 36 int nr_revoke_hits; 37 }; 38 39 static int do_one_pass(journal_t *journal, 40 struct recovery_info *info, enum passtype pass); 41 static int scan_revoke_records(journal_t *, struct buffer_head *, 42 tid_t, struct recovery_info *); 43 44 #ifdef __KERNEL__ 45 46 /* Release readahead buffers after use */ 47 static void journal_brelse_array(struct buffer_head *b[], int n) 48 { 49 while (--n >= 0) 50 brelse (b[n]); 51 } 52 53 54 /* 55 * When reading from the journal, we are going through the block device 56 * layer directly and so there is no readahead being done for us. We 57 * need to implement any readahead ourselves if we want it to happen at 58 * all. Recovery is basically one long sequential read, so make sure we 59 * do the IO in reasonably large chunks. 60 * 61 * This is not so critical that we need to be enormously clever about 62 * the readahead size, though. 128K is a purely arbitrary, good-enough 63 * fixed value. 64 */ 65 66 #define MAXBUF 8 67 static int do_readahead(journal_t *journal, unsigned int start) 68 { 69 int err; 70 unsigned int max, nbufs, next; 71 unsigned long long blocknr; 72 struct buffer_head *bh; 73 74 struct buffer_head * bufs[MAXBUF]; 75 76 /* Do up to 128K of readahead */ 77 max = start + (128 * 1024 / journal->j_blocksize); 78 if (max > journal->j_total_len) 79 max = journal->j_total_len; 80 81 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 82 * a time to the block device IO layer. */ 83 84 nbufs = 0; 85 86 for (next = start; next < max; next++) { 87 err = jbd2_journal_bmap(journal, next, &blocknr); 88 89 if (err) { 90 printk(KERN_ERR "JBD2: bad block at offset %u\n", 91 next); 92 goto failed; 93 } 94 95 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 96 if (!bh) { 97 err = -ENOMEM; 98 goto failed; 99 } 100 101 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 102 bufs[nbufs++] = bh; 103 if (nbufs == MAXBUF) { 104 bh_readahead_batch(nbufs, bufs, 0); 105 journal_brelse_array(bufs, nbufs); 106 nbufs = 0; 107 } 108 } else 109 brelse(bh); 110 } 111 112 if (nbufs) 113 bh_readahead_batch(nbufs, bufs, 0); 114 err = 0; 115 116 failed: 117 if (nbufs) 118 journal_brelse_array(bufs, nbufs); 119 return err; 120 } 121 122 #endif /* __KERNEL__ */ 123 124 125 /* 126 * Read a block from the journal 127 */ 128 129 static int jread(struct buffer_head **bhp, journal_t *journal, 130 unsigned int offset) 131 { 132 int err; 133 unsigned long long blocknr; 134 struct buffer_head *bh; 135 136 *bhp = NULL; 137 138 if (offset >= journal->j_total_len) { 139 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 140 return -EFSCORRUPTED; 141 } 142 143 err = jbd2_journal_bmap(journal, offset, &blocknr); 144 145 if (err) { 146 printk(KERN_ERR "JBD2: bad block at offset %u\n", 147 offset); 148 return err; 149 } 150 151 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 152 if (!bh) 153 return -ENOMEM; 154 155 if (!buffer_uptodate(bh)) { 156 /* 157 * If this is a brand new buffer, start readahead. 158 * Otherwise, we assume we are already reading it. 159 */ 160 bool need_readahead = !buffer_req(bh); 161 162 bh_read_nowait(bh, 0); 163 if (need_readahead) 164 do_readahead(journal, offset); 165 wait_on_buffer(bh); 166 } 167 168 if (!buffer_uptodate(bh)) { 169 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 170 offset); 171 brelse(bh); 172 return -EIO; 173 } 174 175 *bhp = bh; 176 return 0; 177 } 178 179 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 180 { 181 struct jbd2_journal_block_tail *tail; 182 __be32 provided; 183 __u32 calculated; 184 185 if (!jbd2_journal_has_csum_v2or3(j)) 186 return 1; 187 188 tail = (struct jbd2_journal_block_tail *)((char *)buf + 189 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 190 provided = tail->t_checksum; 191 tail->t_checksum = 0; 192 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 193 tail->t_checksum = provided; 194 195 return provided == cpu_to_be32(calculated); 196 } 197 198 /* 199 * Count the number of in-use tags in a journal descriptor block. 200 */ 201 202 static int count_tags(journal_t *journal, struct buffer_head *bh) 203 { 204 char * tagp; 205 journal_block_tag_t tag; 206 int nr = 0, size = journal->j_blocksize; 207 int tag_bytes = journal_tag_bytes(journal); 208 209 if (jbd2_journal_has_csum_v2or3(journal)) 210 size -= sizeof(struct jbd2_journal_block_tail); 211 212 tagp = &bh->b_data[sizeof(journal_header_t)]; 213 214 while ((tagp - bh->b_data + tag_bytes) <= size) { 215 memcpy(&tag, tagp, sizeof(tag)); 216 217 nr++; 218 tagp += tag_bytes; 219 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 220 tagp += 16; 221 222 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 223 break; 224 } 225 226 return nr; 227 } 228 229 230 /* Make sure we wrap around the log correctly! */ 231 #define wrap(journal, var) \ 232 do { \ 233 if (var >= (journal)->j_last) \ 234 var -= ((journal)->j_last - (journal)->j_first); \ 235 } while (0) 236 237 static int fc_do_one_pass(journal_t *journal, 238 struct recovery_info *info, enum passtype pass) 239 { 240 unsigned int expected_commit_id = info->end_transaction; 241 unsigned long next_fc_block; 242 struct buffer_head *bh; 243 int err = 0; 244 245 next_fc_block = journal->j_fc_first; 246 if (!journal->j_fc_replay_callback) 247 return 0; 248 249 while (next_fc_block <= journal->j_fc_last) { 250 jbd2_debug(3, "Fast commit replay: next block %ld\n", 251 next_fc_block); 252 err = jread(&bh, journal, next_fc_block); 253 if (err) { 254 jbd2_debug(3, "Fast commit replay: read error\n"); 255 break; 256 } 257 258 err = journal->j_fc_replay_callback(journal, bh, pass, 259 next_fc_block - journal->j_fc_first, 260 expected_commit_id); 261 brelse(bh); 262 next_fc_block++; 263 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 264 break; 265 err = 0; 266 } 267 268 if (err) 269 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 270 271 return err; 272 } 273 274 /** 275 * jbd2_journal_recover - recovers a on-disk journal 276 * @journal: the journal to recover 277 * 278 * The primary function for recovering the log contents when mounting a 279 * journaled device. 280 * 281 * Recovery is done in three passes. In the first pass, we look for the 282 * end of the log. In the second, we assemble the list of revoke 283 * blocks. In the third and final pass, we replay any un-revoked blocks 284 * in the log. 285 */ 286 int jbd2_journal_recover(journal_t *journal) 287 { 288 int err, err2; 289 journal_superblock_t * sb; 290 291 struct recovery_info info; 292 errseq_t wb_err; 293 struct address_space *mapping; 294 295 memset(&info, 0, sizeof(info)); 296 sb = journal->j_superblock; 297 298 /* 299 * The journal superblock's s_start field (the current log head) 300 * is always zero if, and only if, the journal was cleanly 301 * unmounted. 302 */ 303 if (!sb->s_start) { 304 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", 305 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); 306 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 307 journal->j_head = be32_to_cpu(sb->s_head); 308 return 0; 309 } 310 311 wb_err = 0; 312 mapping = journal->j_fs_dev->bd_inode->i_mapping; 313 errseq_check_and_advance(&mapping->wb_err, &wb_err); 314 err = do_one_pass(journal, &info, PASS_SCAN); 315 if (!err) 316 err = do_one_pass(journal, &info, PASS_REVOKE); 317 if (!err) 318 err = do_one_pass(journal, &info, PASS_REPLAY); 319 320 jbd2_debug(1, "JBD2: recovery, exit status %d, " 321 "recovered transactions %u to %u\n", 322 err, info.start_transaction, info.end_transaction); 323 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 324 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 325 326 /* Restart the log at the next transaction ID, thus invalidating 327 * any existing commit records in the log. */ 328 journal->j_transaction_sequence = ++info.end_transaction; 329 journal->j_head = info.head_block; 330 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n", 331 journal->j_transaction_sequence, journal->j_head); 332 333 jbd2_journal_clear_revoke(journal); 334 err2 = sync_blockdev(journal->j_fs_dev); 335 if (!err) 336 err = err2; 337 err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err); 338 if (!err) 339 err = err2; 340 /* Make sure all replayed data is on permanent storage */ 341 if (journal->j_flags & JBD2_BARRIER) { 342 err2 = blkdev_issue_flush(journal->j_fs_dev); 343 if (!err) 344 err = err2; 345 } 346 return err; 347 } 348 349 /** 350 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 351 * @journal: journal to startup 352 * 353 * Locate any valid recovery information from the journal and set up the 354 * journal structures in memory to ignore it (presumably because the 355 * caller has evidence that it is out of date). 356 * This function doesn't appear to be exported.. 357 * 358 * We perform one pass over the journal to allow us to tell the user how 359 * much recovery information is being erased, and to let us initialise 360 * the journal transaction sequence numbers to the next unused ID. 361 */ 362 int jbd2_journal_skip_recovery(journal_t *journal) 363 { 364 int err; 365 366 struct recovery_info info; 367 368 memset (&info, 0, sizeof(info)); 369 370 err = do_one_pass(journal, &info, PASS_SCAN); 371 372 if (err) { 373 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 374 ++journal->j_transaction_sequence; 375 journal->j_head = journal->j_first; 376 } else { 377 #ifdef CONFIG_JBD2_DEBUG 378 int dropped = info.end_transaction - 379 be32_to_cpu(journal->j_superblock->s_sequence); 380 jbd2_debug(1, 381 "JBD2: ignoring %d transaction%s from the journal.\n", 382 dropped, (dropped == 1) ? "" : "s"); 383 #endif 384 journal->j_transaction_sequence = ++info.end_transaction; 385 journal->j_head = info.head_block; 386 } 387 388 journal->j_tail = 0; 389 return err; 390 } 391 392 static inline unsigned long long read_tag_block(journal_t *journal, 393 journal_block_tag_t *tag) 394 { 395 unsigned long long block = be32_to_cpu(tag->t_blocknr); 396 if (jbd2_has_feature_64bit(journal)) 397 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 398 return block; 399 } 400 401 /* 402 * calc_chksums calculates the checksums for the blocks described in the 403 * descriptor block. 404 */ 405 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 406 unsigned long *next_log_block, __u32 *crc32_sum) 407 { 408 int i, num_blks, err; 409 unsigned long io_block; 410 struct buffer_head *obh; 411 412 num_blks = count_tags(journal, bh); 413 /* Calculate checksum of the descriptor block. */ 414 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 415 416 for (i = 0; i < num_blks; i++) { 417 io_block = (*next_log_block)++; 418 wrap(journal, *next_log_block); 419 err = jread(&obh, journal, io_block); 420 if (err) { 421 printk(KERN_ERR "JBD2: IO error %d recovering block " 422 "%lu in log\n", err, io_block); 423 return 1; 424 } else { 425 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 426 obh->b_size); 427 } 428 put_bh(obh); 429 } 430 return 0; 431 } 432 433 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 434 { 435 struct commit_header *h; 436 __be32 provided; 437 __u32 calculated; 438 439 if (!jbd2_journal_has_csum_v2or3(j)) 440 return 1; 441 442 h = buf; 443 provided = h->h_chksum[0]; 444 h->h_chksum[0] = 0; 445 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 446 h->h_chksum[0] = provided; 447 448 return provided == cpu_to_be32(calculated); 449 } 450 451 static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf) 452 { 453 struct commit_header *h; 454 __be32 provided; 455 __u32 calculated; 456 void *tmpbuf; 457 458 tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL); 459 if (!tmpbuf) 460 return false; 461 462 memcpy(tmpbuf, buf, sizeof(struct commit_header)); 463 h = tmpbuf; 464 provided = h->h_chksum[0]; 465 h->h_chksum[0] = 0; 466 calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize); 467 kfree(tmpbuf); 468 469 return provided == cpu_to_be32(calculated); 470 } 471 472 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 473 journal_block_tag3_t *tag3, 474 void *buf, __u32 sequence) 475 { 476 __u32 csum32; 477 __be32 seq; 478 479 if (!jbd2_journal_has_csum_v2or3(j)) 480 return 1; 481 482 seq = cpu_to_be32(sequence); 483 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 484 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 485 486 if (jbd2_has_feature_csum3(j)) 487 return tag3->t_checksum == cpu_to_be32(csum32); 488 else 489 return tag->t_checksum == cpu_to_be16(csum32); 490 } 491 492 static int do_one_pass(journal_t *journal, 493 struct recovery_info *info, enum passtype pass) 494 { 495 unsigned int first_commit_ID, next_commit_ID; 496 unsigned long next_log_block, head_block; 497 int err, success = 0; 498 journal_superblock_t * sb; 499 journal_header_t * tmp; 500 struct buffer_head * bh; 501 unsigned int sequence; 502 int blocktype; 503 int tag_bytes = journal_tag_bytes(journal); 504 __u32 crc32_sum = ~0; /* Transactional Checksums */ 505 int descr_csum_size = 0; 506 int block_error = 0; 507 bool need_check_commit_time = false; 508 __u64 last_trans_commit_time = 0, commit_time; 509 510 /* 511 * First thing is to establish what we expect to find in the log 512 * (in terms of transaction IDs), and where (in terms of log 513 * block offsets): query the superblock. 514 */ 515 516 sb = journal->j_superblock; 517 next_commit_ID = be32_to_cpu(sb->s_sequence); 518 next_log_block = be32_to_cpu(sb->s_start); 519 head_block = next_log_block; 520 521 first_commit_ID = next_commit_ID; 522 if (pass == PASS_SCAN) 523 info->start_transaction = first_commit_ID; 524 525 jbd2_debug(1, "Starting recovery pass %d\n", pass); 526 527 /* 528 * Now we walk through the log, transaction by transaction, 529 * making sure that each transaction has a commit block in the 530 * expected place. Each complete transaction gets replayed back 531 * into the main filesystem. 532 */ 533 534 while (1) { 535 int flags; 536 char * tagp; 537 journal_block_tag_t tag; 538 struct buffer_head * obh; 539 struct buffer_head * nbh; 540 541 cond_resched(); 542 543 /* If we already know where to stop the log traversal, 544 * check right now that we haven't gone past the end of 545 * the log. */ 546 547 if (pass != PASS_SCAN) 548 if (tid_geq(next_commit_ID, info->end_transaction)) 549 break; 550 551 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 552 next_commit_ID, next_log_block, journal->j_last); 553 554 /* Skip over each chunk of the transaction looking 555 * either the next descriptor block or the final commit 556 * record. */ 557 558 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 559 err = jread(&bh, journal, next_log_block); 560 if (err) 561 goto failed; 562 563 next_log_block++; 564 wrap(journal, next_log_block); 565 566 /* What kind of buffer is it? 567 * 568 * If it is a descriptor block, check that it has the 569 * expected sequence number. Otherwise, we're all done 570 * here. */ 571 572 tmp = (journal_header_t *)bh->b_data; 573 574 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 575 brelse(bh); 576 break; 577 } 578 579 blocktype = be32_to_cpu(tmp->h_blocktype); 580 sequence = be32_to_cpu(tmp->h_sequence); 581 jbd2_debug(3, "Found magic %d, sequence %d\n", 582 blocktype, sequence); 583 584 if (sequence != next_commit_ID) { 585 brelse(bh); 586 break; 587 } 588 589 /* OK, we have a valid descriptor block which matches 590 * all of the sequence number checks. What are we going 591 * to do with it? That depends on the pass... */ 592 593 switch(blocktype) { 594 case JBD2_DESCRIPTOR_BLOCK: 595 /* Verify checksum first */ 596 if (jbd2_journal_has_csum_v2or3(journal)) 597 descr_csum_size = 598 sizeof(struct jbd2_journal_block_tail); 599 if (descr_csum_size > 0 && 600 !jbd2_descriptor_block_csum_verify(journal, 601 bh->b_data)) { 602 /* 603 * PASS_SCAN can see stale blocks due to lazy 604 * journal init. Don't error out on those yet. 605 */ 606 if (pass != PASS_SCAN) { 607 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 608 next_log_block); 609 err = -EFSBADCRC; 610 brelse(bh); 611 goto failed; 612 } 613 need_check_commit_time = true; 614 jbd2_debug(1, 615 "invalid descriptor block found in %lu\n", 616 next_log_block); 617 } 618 619 /* If it is a valid descriptor block, replay it 620 * in pass REPLAY; if journal_checksums enabled, then 621 * calculate checksums in PASS_SCAN, otherwise, 622 * just skip over the blocks it describes. */ 623 if (pass != PASS_REPLAY) { 624 if (pass == PASS_SCAN && 625 jbd2_has_feature_checksum(journal) && 626 !need_check_commit_time && 627 !info->end_transaction) { 628 if (calc_chksums(journal, bh, 629 &next_log_block, 630 &crc32_sum)) { 631 put_bh(bh); 632 break; 633 } 634 put_bh(bh); 635 continue; 636 } 637 next_log_block += count_tags(journal, bh); 638 wrap(journal, next_log_block); 639 put_bh(bh); 640 continue; 641 } 642 643 /* A descriptor block: we can now write all of 644 * the data blocks. Yay, useful work is finally 645 * getting done here! */ 646 647 tagp = &bh->b_data[sizeof(journal_header_t)]; 648 while ((tagp - bh->b_data + tag_bytes) 649 <= journal->j_blocksize - descr_csum_size) { 650 unsigned long io_block; 651 652 memcpy(&tag, tagp, sizeof(tag)); 653 flags = be16_to_cpu(tag.t_flags); 654 655 io_block = next_log_block++; 656 wrap(journal, next_log_block); 657 err = jread(&obh, journal, io_block); 658 if (err) { 659 /* Recover what we can, but 660 * report failure at the end. */ 661 success = err; 662 printk(KERN_ERR 663 "JBD2: IO error %d recovering " 664 "block %ld in log\n", 665 err, io_block); 666 } else { 667 unsigned long long blocknr; 668 669 J_ASSERT(obh != NULL); 670 blocknr = read_tag_block(journal, 671 &tag); 672 673 /* If the block has been 674 * revoked, then we're all done 675 * here. */ 676 if (jbd2_journal_test_revoke 677 (journal, blocknr, 678 next_commit_ID)) { 679 brelse(obh); 680 ++info->nr_revoke_hits; 681 goto skip_write; 682 } 683 684 /* Look for block corruption */ 685 if (!jbd2_block_tag_csum_verify( 686 journal, &tag, (journal_block_tag3_t *)tagp, 687 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 688 brelse(obh); 689 success = -EFSBADCRC; 690 printk(KERN_ERR "JBD2: Invalid " 691 "checksum recovering " 692 "data block %llu in " 693 "log\n", blocknr); 694 block_error = 1; 695 goto skip_write; 696 } 697 698 /* Find a buffer for the new 699 * data being restored */ 700 nbh = __getblk(journal->j_fs_dev, 701 blocknr, 702 journal->j_blocksize); 703 if (nbh == NULL) { 704 printk(KERN_ERR 705 "JBD2: Out of memory " 706 "during recovery.\n"); 707 err = -ENOMEM; 708 brelse(bh); 709 brelse(obh); 710 goto failed; 711 } 712 713 lock_buffer(nbh); 714 memcpy(nbh->b_data, obh->b_data, 715 journal->j_blocksize); 716 if (flags & JBD2_FLAG_ESCAPE) { 717 *((__be32 *)nbh->b_data) = 718 cpu_to_be32(JBD2_MAGIC_NUMBER); 719 } 720 721 BUFFER_TRACE(nbh, "marking dirty"); 722 set_buffer_uptodate(nbh); 723 mark_buffer_dirty(nbh); 724 BUFFER_TRACE(nbh, "marking uptodate"); 725 ++info->nr_replays; 726 unlock_buffer(nbh); 727 brelse(obh); 728 brelse(nbh); 729 } 730 731 skip_write: 732 tagp += tag_bytes; 733 if (!(flags & JBD2_FLAG_SAME_UUID)) 734 tagp += 16; 735 736 if (flags & JBD2_FLAG_LAST_TAG) 737 break; 738 } 739 740 brelse(bh); 741 continue; 742 743 case JBD2_COMMIT_BLOCK: 744 /* How to differentiate between interrupted commit 745 * and journal corruption ? 746 * 747 * {nth transaction} 748 * Checksum Verification Failed 749 * | 750 * ____________________ 751 * | | 752 * async_commit sync_commit 753 * | | 754 * | GO TO NEXT "Journal Corruption" 755 * | TRANSACTION 756 * | 757 * {(n+1)th transanction} 758 * | 759 * _______|______________ 760 * | | 761 * Commit block found Commit block not found 762 * | | 763 * "Journal Corruption" | 764 * _____________|_________ 765 * | | 766 * nth trans corrupt OR nth trans 767 * and (n+1)th interrupted interrupted 768 * before commit block 769 * could reach the disk. 770 * (Cannot find the difference in above 771 * mentioned conditions. Hence assume 772 * "Interrupted Commit".) 773 */ 774 commit_time = be64_to_cpu( 775 ((struct commit_header *)bh->b_data)->h_commit_sec); 776 /* 777 * If need_check_commit_time is set, it means we are in 778 * PASS_SCAN and csum verify failed before. If 779 * commit_time is increasing, it's the same journal, 780 * otherwise it is stale journal block, just end this 781 * recovery. 782 */ 783 if (need_check_commit_time) { 784 if (commit_time >= last_trans_commit_time) { 785 pr_err("JBD2: Invalid checksum found in transaction %u\n", 786 next_commit_ID); 787 err = -EFSBADCRC; 788 brelse(bh); 789 goto failed; 790 } 791 ignore_crc_mismatch: 792 /* 793 * It likely does not belong to same journal, 794 * just end this recovery with success. 795 */ 796 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 797 next_commit_ID); 798 brelse(bh); 799 goto done; 800 } 801 802 /* 803 * Found an expected commit block: if checksums 804 * are present, verify them in PASS_SCAN; else not 805 * much to do other than move on to the next sequence 806 * number. 807 */ 808 if (pass == PASS_SCAN && 809 jbd2_has_feature_checksum(journal)) { 810 struct commit_header *cbh = 811 (struct commit_header *)bh->b_data; 812 unsigned found_chksum = 813 be32_to_cpu(cbh->h_chksum[0]); 814 815 if (info->end_transaction) { 816 journal->j_failed_commit = 817 info->end_transaction; 818 brelse(bh); 819 break; 820 } 821 822 /* Neither checksum match nor unused? */ 823 if (!((crc32_sum == found_chksum && 824 cbh->h_chksum_type == 825 JBD2_CRC32_CHKSUM && 826 cbh->h_chksum_size == 827 JBD2_CRC32_CHKSUM_SIZE) || 828 (cbh->h_chksum_type == 0 && 829 cbh->h_chksum_size == 0 && 830 found_chksum == 0))) 831 goto chksum_error; 832 833 crc32_sum = ~0; 834 } 835 if (pass == PASS_SCAN && 836 !jbd2_commit_block_csum_verify(journal, 837 bh->b_data)) { 838 if (jbd2_commit_block_csum_verify_partial( 839 journal, 840 bh->b_data)) { 841 pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n", 842 next_commit_ID, next_log_block); 843 goto chksum_ok; 844 } 845 chksum_error: 846 if (commit_time < last_trans_commit_time) 847 goto ignore_crc_mismatch; 848 info->end_transaction = next_commit_ID; 849 info->head_block = head_block; 850 851 if (!jbd2_has_feature_async_commit(journal)) { 852 journal->j_failed_commit = 853 next_commit_ID; 854 brelse(bh); 855 break; 856 } 857 } 858 if (pass == PASS_SCAN) { 859 chksum_ok: 860 last_trans_commit_time = commit_time; 861 head_block = next_log_block; 862 } 863 brelse(bh); 864 next_commit_ID++; 865 continue; 866 867 case JBD2_REVOKE_BLOCK: 868 /* 869 * Check revoke block crc in pass_scan, if csum verify 870 * failed, check commit block time later. 871 */ 872 if (pass == PASS_SCAN && 873 !jbd2_descriptor_block_csum_verify(journal, 874 bh->b_data)) { 875 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 876 next_log_block); 877 need_check_commit_time = true; 878 } 879 880 /* If we aren't in the REVOKE pass, then we can 881 * just skip over this block. */ 882 if (pass != PASS_REVOKE) { 883 brelse(bh); 884 continue; 885 } 886 887 err = scan_revoke_records(journal, bh, 888 next_commit_ID, info); 889 brelse(bh); 890 if (err) 891 goto failed; 892 continue; 893 894 default: 895 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 896 blocktype); 897 brelse(bh); 898 goto done; 899 } 900 } 901 902 done: 903 /* 904 * We broke out of the log scan loop: either we came to the 905 * known end of the log or we found an unexpected block in the 906 * log. If the latter happened, then we know that the "current" 907 * transaction marks the end of the valid log. 908 */ 909 910 if (pass == PASS_SCAN) { 911 if (!info->end_transaction) 912 info->end_transaction = next_commit_ID; 913 if (!info->head_block) 914 info->head_block = head_block; 915 } else { 916 /* It's really bad news if different passes end up at 917 * different places (but possible due to IO errors). */ 918 if (info->end_transaction != next_commit_ID) { 919 printk(KERN_ERR "JBD2: recovery pass %d ended at " 920 "transaction %u, expected %u\n", 921 pass, next_commit_ID, info->end_transaction); 922 if (!success) 923 success = -EIO; 924 } 925 } 926 927 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 928 err = fc_do_one_pass(journal, info, pass); 929 if (err) 930 success = err; 931 } 932 933 if (block_error && success == 0) 934 success = -EIO; 935 return success; 936 937 failed: 938 return err; 939 } 940 941 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 942 943 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 944 tid_t sequence, struct recovery_info *info) 945 { 946 jbd2_journal_revoke_header_t *header; 947 int offset, max; 948 unsigned csum_size = 0; 949 __u32 rcount; 950 int record_len = 4; 951 952 header = (jbd2_journal_revoke_header_t *) bh->b_data; 953 offset = sizeof(jbd2_journal_revoke_header_t); 954 rcount = be32_to_cpu(header->r_count); 955 956 if (jbd2_journal_has_csum_v2or3(journal)) 957 csum_size = sizeof(struct jbd2_journal_block_tail); 958 if (rcount > journal->j_blocksize - csum_size) 959 return -EINVAL; 960 max = rcount; 961 962 if (jbd2_has_feature_64bit(journal)) 963 record_len = 8; 964 965 while (offset + record_len <= max) { 966 unsigned long long blocknr; 967 int err; 968 969 if (record_len == 4) 970 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 971 else 972 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 973 offset += record_len; 974 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 975 if (err) 976 return err; 977 ++info->nr_revokes; 978 } 979 return 0; 980 } 981