1470decc6SDave Kleikamp /* 258862699SUwe Kleine-König * linux/fs/jbd2/recovery.c 3470decc6SDave Kleikamp * 4470decc6SDave Kleikamp * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 5470decc6SDave Kleikamp * 6470decc6SDave Kleikamp * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 7470decc6SDave Kleikamp * 8470decc6SDave Kleikamp * This file is part of the Linux kernel and is made available under 9470decc6SDave Kleikamp * the terms of the GNU General Public License, version 2, or at your 10470decc6SDave Kleikamp * option, any later version, incorporated herein by reference. 11470decc6SDave Kleikamp * 12470decc6SDave Kleikamp * Journal recovery routines for the generic filesystem journaling code; 13470decc6SDave Kleikamp * part of the ext2fs journaling system. 14470decc6SDave Kleikamp */ 15470decc6SDave Kleikamp 16470decc6SDave Kleikamp #ifndef __KERNEL__ 17470decc6SDave Kleikamp #include "jfs_user.h" 18470decc6SDave Kleikamp #else 19470decc6SDave Kleikamp #include <linux/time.h> 20470decc6SDave Kleikamp #include <linux/fs.h> 21f7f4bccbSMingming Cao #include <linux/jbd2.h> 22470decc6SDave Kleikamp #include <linux/errno.h> 23818d276cSGirish Shilamkar #include <linux/crc32.h> 24470decc6SDave Kleikamp #endif 25470decc6SDave Kleikamp 26470decc6SDave Kleikamp /* 27470decc6SDave Kleikamp * Maintain information about the progress of the recovery job, so that 28470decc6SDave Kleikamp * the different passes can carry information between them. 29470decc6SDave Kleikamp */ 30470decc6SDave Kleikamp struct recovery_info 31470decc6SDave Kleikamp { 32470decc6SDave Kleikamp tid_t start_transaction; 33470decc6SDave Kleikamp tid_t end_transaction; 34470decc6SDave Kleikamp 35470decc6SDave Kleikamp int nr_replays; 36470decc6SDave Kleikamp int nr_revokes; 37470decc6SDave Kleikamp int nr_revoke_hits; 38470decc6SDave Kleikamp }; 39470decc6SDave Kleikamp 40470decc6SDave Kleikamp enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; 41470decc6SDave Kleikamp static int do_one_pass(journal_t *journal, 42470decc6SDave Kleikamp struct recovery_info *info, enum passtype pass); 43470decc6SDave Kleikamp static int scan_revoke_records(journal_t *, struct buffer_head *, 44470decc6SDave Kleikamp tid_t, struct recovery_info *); 45470decc6SDave Kleikamp 46470decc6SDave Kleikamp #ifdef __KERNEL__ 47470decc6SDave Kleikamp 48470decc6SDave Kleikamp /* Release readahead buffers after use */ 49470decc6SDave Kleikamp static void journal_brelse_array(struct buffer_head *b[], int n) 50470decc6SDave Kleikamp { 51470decc6SDave Kleikamp while (--n >= 0) 52470decc6SDave Kleikamp brelse (b[n]); 53470decc6SDave Kleikamp } 54470decc6SDave Kleikamp 55470decc6SDave Kleikamp 56470decc6SDave Kleikamp /* 57470decc6SDave Kleikamp * When reading from the journal, we are going through the block device 58470decc6SDave Kleikamp * layer directly and so there is no readahead being done for us. We 59470decc6SDave Kleikamp * need to implement any readahead ourselves if we want it to happen at 60470decc6SDave Kleikamp * all. Recovery is basically one long sequential read, so make sure we 61470decc6SDave Kleikamp * do the IO in reasonably large chunks. 62470decc6SDave Kleikamp * 63470decc6SDave Kleikamp * This is not so critical that we need to be enormously clever about 64470decc6SDave Kleikamp * the readahead size, though. 128K is a purely arbitrary, good-enough 65470decc6SDave Kleikamp * fixed value. 66470decc6SDave Kleikamp */ 67470decc6SDave Kleikamp 68470decc6SDave Kleikamp #define MAXBUF 8 69470decc6SDave Kleikamp static int do_readahead(journal_t *journal, unsigned int start) 70470decc6SDave Kleikamp { 71470decc6SDave Kleikamp int err; 72470decc6SDave Kleikamp unsigned int max, nbufs, next; 7318eba7aaSMingming Cao unsigned long long blocknr; 74470decc6SDave Kleikamp struct buffer_head *bh; 75470decc6SDave Kleikamp 76470decc6SDave Kleikamp struct buffer_head * bufs[MAXBUF]; 77470decc6SDave Kleikamp 78470decc6SDave Kleikamp /* Do up to 128K of readahead */ 79470decc6SDave Kleikamp max = start + (128 * 1024 / journal->j_blocksize); 80470decc6SDave Kleikamp if (max > journal->j_maxlen) 81470decc6SDave Kleikamp max = journal->j_maxlen; 82470decc6SDave Kleikamp 83470decc6SDave Kleikamp /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 84470decc6SDave Kleikamp * a time to the block device IO layer. */ 85470decc6SDave Kleikamp 86470decc6SDave Kleikamp nbufs = 0; 87470decc6SDave Kleikamp 88470decc6SDave Kleikamp for (next = start; next < max; next++) { 89f7f4bccbSMingming Cao err = jbd2_journal_bmap(journal, next, &blocknr); 90470decc6SDave Kleikamp 91470decc6SDave Kleikamp if (err) { 92470decc6SDave Kleikamp printk (KERN_ERR "JBD: bad block at offset %u\n", 93470decc6SDave Kleikamp next); 94470decc6SDave Kleikamp goto failed; 95470decc6SDave Kleikamp } 96470decc6SDave Kleikamp 97470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 98470decc6SDave Kleikamp if (!bh) { 99470decc6SDave Kleikamp err = -ENOMEM; 100470decc6SDave Kleikamp goto failed; 101470decc6SDave Kleikamp } 102470decc6SDave Kleikamp 103470decc6SDave Kleikamp if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 104470decc6SDave Kleikamp bufs[nbufs++] = bh; 105470decc6SDave Kleikamp if (nbufs == MAXBUF) { 106470decc6SDave Kleikamp ll_rw_block(READ, nbufs, bufs); 107470decc6SDave Kleikamp journal_brelse_array(bufs, nbufs); 108470decc6SDave Kleikamp nbufs = 0; 109470decc6SDave Kleikamp } 110470decc6SDave Kleikamp } else 111470decc6SDave Kleikamp brelse(bh); 112470decc6SDave Kleikamp } 113470decc6SDave Kleikamp 114470decc6SDave Kleikamp if (nbufs) 115470decc6SDave Kleikamp ll_rw_block(READ, nbufs, bufs); 116470decc6SDave Kleikamp err = 0; 117470decc6SDave Kleikamp 118470decc6SDave Kleikamp failed: 119470decc6SDave Kleikamp if (nbufs) 120470decc6SDave Kleikamp journal_brelse_array(bufs, nbufs); 121470decc6SDave Kleikamp return err; 122470decc6SDave Kleikamp } 123470decc6SDave Kleikamp 124470decc6SDave Kleikamp #endif /* __KERNEL__ */ 125470decc6SDave Kleikamp 126470decc6SDave Kleikamp 127470decc6SDave Kleikamp /* 128470decc6SDave Kleikamp * Read a block from the journal 129470decc6SDave Kleikamp */ 130470decc6SDave Kleikamp 131470decc6SDave Kleikamp static int jread(struct buffer_head **bhp, journal_t *journal, 132470decc6SDave Kleikamp unsigned int offset) 133470decc6SDave Kleikamp { 134470decc6SDave Kleikamp int err; 13518eba7aaSMingming Cao unsigned long long blocknr; 136470decc6SDave Kleikamp struct buffer_head *bh; 137470decc6SDave Kleikamp 138470decc6SDave Kleikamp *bhp = NULL; 139470decc6SDave Kleikamp 140470decc6SDave Kleikamp if (offset >= journal->j_maxlen) { 141470decc6SDave Kleikamp printk(KERN_ERR "JBD: corrupted journal superblock\n"); 142470decc6SDave Kleikamp return -EIO; 143470decc6SDave Kleikamp } 144470decc6SDave Kleikamp 145f7f4bccbSMingming Cao err = jbd2_journal_bmap(journal, offset, &blocknr); 146470decc6SDave Kleikamp 147470decc6SDave Kleikamp if (err) { 148470decc6SDave Kleikamp printk (KERN_ERR "JBD: bad block at offset %u\n", 149470decc6SDave Kleikamp offset); 150470decc6SDave Kleikamp return err; 151470decc6SDave Kleikamp } 152470decc6SDave Kleikamp 153470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 154470decc6SDave Kleikamp if (!bh) 155470decc6SDave Kleikamp return -ENOMEM; 156470decc6SDave Kleikamp 157470decc6SDave Kleikamp if (!buffer_uptodate(bh)) { 158470decc6SDave Kleikamp /* If this is a brand new buffer, start readahead. 159470decc6SDave Kleikamp Otherwise, we assume we are already reading it. */ 160470decc6SDave Kleikamp if (!buffer_req(bh)) 161470decc6SDave Kleikamp do_readahead(journal, offset); 162470decc6SDave Kleikamp wait_on_buffer(bh); 163470decc6SDave Kleikamp } 164470decc6SDave Kleikamp 165470decc6SDave Kleikamp if (!buffer_uptodate(bh)) { 166470decc6SDave Kleikamp printk (KERN_ERR "JBD: Failed to read block at offset %u\n", 167470decc6SDave Kleikamp offset); 168470decc6SDave Kleikamp brelse(bh); 169470decc6SDave Kleikamp return -EIO; 170470decc6SDave Kleikamp } 171470decc6SDave Kleikamp 172470decc6SDave Kleikamp *bhp = bh; 173470decc6SDave Kleikamp return 0; 174470decc6SDave Kleikamp } 175470decc6SDave Kleikamp 176470decc6SDave Kleikamp 177470decc6SDave Kleikamp /* 178470decc6SDave Kleikamp * Count the number of in-use tags in a journal descriptor block. 179470decc6SDave Kleikamp */ 180470decc6SDave Kleikamp 181b517bea1SZach Brown static int count_tags(journal_t *journal, struct buffer_head *bh) 182470decc6SDave Kleikamp { 183470decc6SDave Kleikamp char * tagp; 184470decc6SDave Kleikamp journal_block_tag_t * tag; 185b517bea1SZach Brown int nr = 0, size = journal->j_blocksize; 186b517bea1SZach Brown int tag_bytes = journal_tag_bytes(journal); 187470decc6SDave Kleikamp 188470decc6SDave Kleikamp tagp = &bh->b_data[sizeof(journal_header_t)]; 189470decc6SDave Kleikamp 190b517bea1SZach Brown while ((tagp - bh->b_data + tag_bytes) <= size) { 191470decc6SDave Kleikamp tag = (journal_block_tag_t *) tagp; 192470decc6SDave Kleikamp 193470decc6SDave Kleikamp nr++; 194b517bea1SZach Brown tagp += tag_bytes; 195f7f4bccbSMingming Cao if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) 196470decc6SDave Kleikamp tagp += 16; 197470decc6SDave Kleikamp 198f7f4bccbSMingming Cao if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) 199470decc6SDave Kleikamp break; 200470decc6SDave Kleikamp } 201470decc6SDave Kleikamp 202470decc6SDave Kleikamp return nr; 203470decc6SDave Kleikamp } 204470decc6SDave Kleikamp 205470decc6SDave Kleikamp 206470decc6SDave Kleikamp /* Make sure we wrap around the log correctly! */ 207470decc6SDave Kleikamp #define wrap(journal, var) \ 208470decc6SDave Kleikamp do { \ 209470decc6SDave Kleikamp if (var >= (journal)->j_last) \ 210470decc6SDave Kleikamp var -= ((journal)->j_last - (journal)->j_first); \ 211470decc6SDave Kleikamp } while (0) 212470decc6SDave Kleikamp 213470decc6SDave Kleikamp /** 214f7f4bccbSMingming Cao * jbd2_journal_recover - recovers a on-disk journal 215470decc6SDave Kleikamp * @journal: the journal to recover 216470decc6SDave Kleikamp * 217470decc6SDave Kleikamp * The primary function for recovering the log contents when mounting a 218470decc6SDave Kleikamp * journaled device. 219470decc6SDave Kleikamp * 220470decc6SDave Kleikamp * Recovery is done in three passes. In the first pass, we look for the 221470decc6SDave Kleikamp * end of the log. In the second, we assemble the list of revoke 222470decc6SDave Kleikamp * blocks. In the third and final pass, we replay any un-revoked blocks 223470decc6SDave Kleikamp * in the log. 224470decc6SDave Kleikamp */ 225f7f4bccbSMingming Cao int jbd2_journal_recover(journal_t *journal) 226470decc6SDave Kleikamp { 22744519fafSHidehiro Kawai int err, err2; 228470decc6SDave Kleikamp journal_superblock_t * sb; 229470decc6SDave Kleikamp 230470decc6SDave Kleikamp struct recovery_info info; 231470decc6SDave Kleikamp 232470decc6SDave Kleikamp memset(&info, 0, sizeof(info)); 233470decc6SDave Kleikamp sb = journal->j_superblock; 234470decc6SDave Kleikamp 235470decc6SDave Kleikamp /* 236470decc6SDave Kleikamp * The journal superblock's s_start field (the current log head) 237470decc6SDave Kleikamp * is always zero if, and only if, the journal was cleanly 238470decc6SDave Kleikamp * unmounted. 239470decc6SDave Kleikamp */ 240470decc6SDave Kleikamp 241470decc6SDave Kleikamp if (!sb->s_start) { 242470decc6SDave Kleikamp jbd_debug(1, "No recovery required, last transaction %d\n", 243470decc6SDave Kleikamp be32_to_cpu(sb->s_sequence)); 244470decc6SDave Kleikamp journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 245470decc6SDave Kleikamp return 0; 246470decc6SDave Kleikamp } 247470decc6SDave Kleikamp 248470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_SCAN); 249470decc6SDave Kleikamp if (!err) 250470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_REVOKE); 251470decc6SDave Kleikamp if (!err) 252470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_REPLAY); 253470decc6SDave Kleikamp 254b38bd33aSMingming Cao jbd_debug(1, "JBD: recovery, exit status %d, " 255470decc6SDave Kleikamp "recovered transactions %u to %u\n", 256470decc6SDave Kleikamp err, info.start_transaction, info.end_transaction); 257b38bd33aSMingming Cao jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", 258470decc6SDave Kleikamp info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 259470decc6SDave Kleikamp 260470decc6SDave Kleikamp /* Restart the log at the next transaction ID, thus invalidating 261470decc6SDave Kleikamp * any existing commit records in the log. */ 262470decc6SDave Kleikamp journal->j_transaction_sequence = ++info.end_transaction; 263470decc6SDave Kleikamp 264f7f4bccbSMingming Cao jbd2_journal_clear_revoke(journal); 26544519fafSHidehiro Kawai err2 = sync_blockdev(journal->j_fs_dev); 26644519fafSHidehiro Kawai if (!err) 26744519fafSHidehiro Kawai err = err2; 26844519fafSHidehiro Kawai 269470decc6SDave Kleikamp return err; 270470decc6SDave Kleikamp } 271470decc6SDave Kleikamp 272470decc6SDave Kleikamp /** 273f7f4bccbSMingming Cao * jbd2_journal_skip_recovery - Start journal and wipe exiting records 274470decc6SDave Kleikamp * @journal: journal to startup 275470decc6SDave Kleikamp * 276470decc6SDave Kleikamp * Locate any valid recovery information from the journal and set up the 277470decc6SDave Kleikamp * journal structures in memory to ignore it (presumably because the 278470decc6SDave Kleikamp * caller has evidence that it is out of date). 279470decc6SDave Kleikamp * This function does'nt appear to be exorted.. 280470decc6SDave Kleikamp * 281470decc6SDave Kleikamp * We perform one pass over the journal to allow us to tell the user how 282470decc6SDave Kleikamp * much recovery information is being erased, and to let us initialise 283470decc6SDave Kleikamp * the journal transaction sequence numbers to the next unused ID. 284470decc6SDave Kleikamp */ 285f7f4bccbSMingming Cao int jbd2_journal_skip_recovery(journal_t *journal) 286470decc6SDave Kleikamp { 287470decc6SDave Kleikamp int err; 288470decc6SDave Kleikamp 289470decc6SDave Kleikamp struct recovery_info info; 290470decc6SDave Kleikamp 291470decc6SDave Kleikamp memset (&info, 0, sizeof(info)); 292470decc6SDave Kleikamp 293470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_SCAN); 294470decc6SDave Kleikamp 295470decc6SDave Kleikamp if (err) { 296470decc6SDave Kleikamp printk(KERN_ERR "JBD: error %d scanning journal\n", err); 297470decc6SDave Kleikamp ++journal->j_transaction_sequence; 298470decc6SDave Kleikamp } else { 299e23291b9SJose R. Santos #ifdef CONFIG_JBD2_DEBUG 3005a0790c2SAndi Kleen int dropped = info.end_transaction - 3015a0790c2SAndi Kleen be32_to_cpu(journal->j_superblock->s_sequence); 302b38bd33aSMingming Cao jbd_debug(1, 303470decc6SDave Kleikamp "JBD: ignoring %d transaction%s from the journal.\n", 304470decc6SDave Kleikamp dropped, (dropped == 1) ? "" : "s"); 305*9a4f6271STheodore Ts'o #endif 306470decc6SDave Kleikamp journal->j_transaction_sequence = ++info.end_transaction; 307470decc6SDave Kleikamp } 308470decc6SDave Kleikamp 309470decc6SDave Kleikamp journal->j_tail = 0; 310470decc6SDave Kleikamp return err; 311470decc6SDave Kleikamp } 312470decc6SDave Kleikamp 31318eba7aaSMingming Cao static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 314b517bea1SZach Brown { 31518eba7aaSMingming Cao unsigned long long block = be32_to_cpu(tag->t_blocknr); 316cd02ff0bSMingming Cao if (tag_bytes > JBD2_TAG_SIZE32) 317b517bea1SZach Brown block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 318b517bea1SZach Brown return block; 319b517bea1SZach Brown } 320b517bea1SZach Brown 321818d276cSGirish Shilamkar /* 322818d276cSGirish Shilamkar * calc_chksums calculates the checksums for the blocks described in the 323818d276cSGirish Shilamkar * descriptor block. 324818d276cSGirish Shilamkar */ 325818d276cSGirish Shilamkar static int calc_chksums(journal_t *journal, struct buffer_head *bh, 326818d276cSGirish Shilamkar unsigned long *next_log_block, __u32 *crc32_sum) 327818d276cSGirish Shilamkar { 328818d276cSGirish Shilamkar int i, num_blks, err; 329818d276cSGirish Shilamkar unsigned long io_block; 330818d276cSGirish Shilamkar struct buffer_head *obh; 331818d276cSGirish Shilamkar 332818d276cSGirish Shilamkar num_blks = count_tags(journal, bh); 333818d276cSGirish Shilamkar /* Calculate checksum of the descriptor block. */ 334818d276cSGirish Shilamkar *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 335818d276cSGirish Shilamkar 336818d276cSGirish Shilamkar for (i = 0; i < num_blks; i++) { 337818d276cSGirish Shilamkar io_block = (*next_log_block)++; 338818d276cSGirish Shilamkar wrap(journal, *next_log_block); 339818d276cSGirish Shilamkar err = jread(&obh, journal, io_block); 340818d276cSGirish Shilamkar if (err) { 341818d276cSGirish Shilamkar printk(KERN_ERR "JBD: IO error %d recovering block " 342818d276cSGirish Shilamkar "%lu in log\n", err, io_block); 343818d276cSGirish Shilamkar return 1; 344818d276cSGirish Shilamkar } else { 345818d276cSGirish Shilamkar *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 346818d276cSGirish Shilamkar obh->b_size); 347818d276cSGirish Shilamkar } 3488ea76900STheodore Ts'o put_bh(obh); 349818d276cSGirish Shilamkar } 350818d276cSGirish Shilamkar return 0; 351818d276cSGirish Shilamkar } 352818d276cSGirish Shilamkar 353470decc6SDave Kleikamp static int do_one_pass(journal_t *journal, 354470decc6SDave Kleikamp struct recovery_info *info, enum passtype pass) 355470decc6SDave Kleikamp { 356470decc6SDave Kleikamp unsigned int first_commit_ID, next_commit_ID; 357470decc6SDave Kleikamp unsigned long next_log_block; 358470decc6SDave Kleikamp int err, success = 0; 359470decc6SDave Kleikamp journal_superblock_t * sb; 360470decc6SDave Kleikamp journal_header_t * tmp; 361470decc6SDave Kleikamp struct buffer_head * bh; 362470decc6SDave Kleikamp unsigned int sequence; 363470decc6SDave Kleikamp int blocktype; 364b517bea1SZach Brown int tag_bytes = journal_tag_bytes(journal); 365818d276cSGirish Shilamkar __u32 crc32_sum = ~0; /* Transactional Checksums */ 366470decc6SDave Kleikamp 367470decc6SDave Kleikamp /* 368470decc6SDave Kleikamp * First thing is to establish what we expect to find in the log 369470decc6SDave Kleikamp * (in terms of transaction IDs), and where (in terms of log 370470decc6SDave Kleikamp * block offsets): query the superblock. 371470decc6SDave Kleikamp */ 372470decc6SDave Kleikamp 373470decc6SDave Kleikamp sb = journal->j_superblock; 374470decc6SDave Kleikamp next_commit_ID = be32_to_cpu(sb->s_sequence); 375470decc6SDave Kleikamp next_log_block = be32_to_cpu(sb->s_start); 376470decc6SDave Kleikamp 377470decc6SDave Kleikamp first_commit_ID = next_commit_ID; 378470decc6SDave Kleikamp if (pass == PASS_SCAN) 379470decc6SDave Kleikamp info->start_transaction = first_commit_ID; 380470decc6SDave Kleikamp 381470decc6SDave Kleikamp jbd_debug(1, "Starting recovery pass %d\n", pass); 382470decc6SDave Kleikamp 383470decc6SDave Kleikamp /* 384470decc6SDave Kleikamp * Now we walk through the log, transaction by transaction, 385470decc6SDave Kleikamp * making sure that each transaction has a commit block in the 386470decc6SDave Kleikamp * expected place. Each complete transaction gets replayed back 387470decc6SDave Kleikamp * into the main filesystem. 388470decc6SDave Kleikamp */ 389470decc6SDave Kleikamp 390470decc6SDave Kleikamp while (1) { 391470decc6SDave Kleikamp int flags; 392470decc6SDave Kleikamp char * tagp; 393470decc6SDave Kleikamp journal_block_tag_t * tag; 394470decc6SDave Kleikamp struct buffer_head * obh; 395470decc6SDave Kleikamp struct buffer_head * nbh; 396470decc6SDave Kleikamp 397e86e1438SAndi Kleen cond_resched(); 398470decc6SDave Kleikamp 399470decc6SDave Kleikamp /* If we already know where to stop the log traversal, 400470decc6SDave Kleikamp * check right now that we haven't gone past the end of 401470decc6SDave Kleikamp * the log. */ 402470decc6SDave Kleikamp 403470decc6SDave Kleikamp if (pass != PASS_SCAN) 404470decc6SDave Kleikamp if (tid_geq(next_commit_ID, info->end_transaction)) 405470decc6SDave Kleikamp break; 406470decc6SDave Kleikamp 407470decc6SDave Kleikamp jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 408470decc6SDave Kleikamp next_commit_ID, next_log_block, journal->j_last); 409470decc6SDave Kleikamp 410470decc6SDave Kleikamp /* Skip over each chunk of the transaction looking 411470decc6SDave Kleikamp * either the next descriptor block or the final commit 412470decc6SDave Kleikamp * record. */ 413470decc6SDave Kleikamp 414470decc6SDave Kleikamp jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 415470decc6SDave Kleikamp err = jread(&bh, journal, next_log_block); 416470decc6SDave Kleikamp if (err) 417470decc6SDave Kleikamp goto failed; 418470decc6SDave Kleikamp 419470decc6SDave Kleikamp next_log_block++; 420470decc6SDave Kleikamp wrap(journal, next_log_block); 421470decc6SDave Kleikamp 422470decc6SDave Kleikamp /* What kind of buffer is it? 423470decc6SDave Kleikamp * 424470decc6SDave Kleikamp * If it is a descriptor block, check that it has the 425470decc6SDave Kleikamp * expected sequence number. Otherwise, we're all done 426470decc6SDave Kleikamp * here. */ 427470decc6SDave Kleikamp 428470decc6SDave Kleikamp tmp = (journal_header_t *)bh->b_data; 429470decc6SDave Kleikamp 430f7f4bccbSMingming Cao if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 431470decc6SDave Kleikamp brelse(bh); 432470decc6SDave Kleikamp break; 433470decc6SDave Kleikamp } 434470decc6SDave Kleikamp 435470decc6SDave Kleikamp blocktype = be32_to_cpu(tmp->h_blocktype); 436470decc6SDave Kleikamp sequence = be32_to_cpu(tmp->h_sequence); 437470decc6SDave Kleikamp jbd_debug(3, "Found magic %d, sequence %d\n", 438470decc6SDave Kleikamp blocktype, sequence); 439470decc6SDave Kleikamp 440470decc6SDave Kleikamp if (sequence != next_commit_ID) { 441470decc6SDave Kleikamp brelse(bh); 442470decc6SDave Kleikamp break; 443470decc6SDave Kleikamp } 444470decc6SDave Kleikamp 445470decc6SDave Kleikamp /* OK, we have a valid descriptor block which matches 446470decc6SDave Kleikamp * all of the sequence number checks. What are we going 447470decc6SDave Kleikamp * to do with it? That depends on the pass... */ 448470decc6SDave Kleikamp 449470decc6SDave Kleikamp switch(blocktype) { 450f7f4bccbSMingming Cao case JBD2_DESCRIPTOR_BLOCK: 451470decc6SDave Kleikamp /* If it is a valid descriptor block, replay it 452818d276cSGirish Shilamkar * in pass REPLAY; if journal_checksums enabled, then 453818d276cSGirish Shilamkar * calculate checksums in PASS_SCAN, otherwise, 454818d276cSGirish Shilamkar * just skip over the blocks it describes. */ 455470decc6SDave Kleikamp if (pass != PASS_REPLAY) { 456818d276cSGirish Shilamkar if (pass == PASS_SCAN && 457818d276cSGirish Shilamkar JBD2_HAS_COMPAT_FEATURE(journal, 458818d276cSGirish Shilamkar JBD2_FEATURE_COMPAT_CHECKSUM) && 459818d276cSGirish Shilamkar !info->end_transaction) { 460818d276cSGirish Shilamkar if (calc_chksums(journal, bh, 461818d276cSGirish Shilamkar &next_log_block, 462818d276cSGirish Shilamkar &crc32_sum)) { 463818d276cSGirish Shilamkar put_bh(bh); 464818d276cSGirish Shilamkar break; 465818d276cSGirish Shilamkar } 466818d276cSGirish Shilamkar put_bh(bh); 467818d276cSGirish Shilamkar continue; 468818d276cSGirish Shilamkar } 469b517bea1SZach Brown next_log_block += count_tags(journal, bh); 470470decc6SDave Kleikamp wrap(journal, next_log_block); 471818d276cSGirish Shilamkar put_bh(bh); 472470decc6SDave Kleikamp continue; 473470decc6SDave Kleikamp } 474470decc6SDave Kleikamp 475470decc6SDave Kleikamp /* A descriptor block: we can now write all of 476470decc6SDave Kleikamp * the data blocks. Yay, useful work is finally 477470decc6SDave Kleikamp * getting done here! */ 478470decc6SDave Kleikamp 479470decc6SDave Kleikamp tagp = &bh->b_data[sizeof(journal_header_t)]; 480b517bea1SZach Brown while ((tagp - bh->b_data + tag_bytes) 481470decc6SDave Kleikamp <= journal->j_blocksize) { 482470decc6SDave Kleikamp unsigned long io_block; 483470decc6SDave Kleikamp 484470decc6SDave Kleikamp tag = (journal_block_tag_t *) tagp; 485470decc6SDave Kleikamp flags = be32_to_cpu(tag->t_flags); 486470decc6SDave Kleikamp 487470decc6SDave Kleikamp io_block = next_log_block++; 488470decc6SDave Kleikamp wrap(journal, next_log_block); 489470decc6SDave Kleikamp err = jread(&obh, journal, io_block); 490470decc6SDave Kleikamp if (err) { 491470decc6SDave Kleikamp /* Recover what we can, but 492470decc6SDave Kleikamp * report failure at the end. */ 493470decc6SDave Kleikamp success = err; 494470decc6SDave Kleikamp printk (KERN_ERR 495470decc6SDave Kleikamp "JBD: IO error %d recovering " 496470decc6SDave Kleikamp "block %ld in log\n", 497470decc6SDave Kleikamp err, io_block); 498470decc6SDave Kleikamp } else { 49918eba7aaSMingming Cao unsigned long long blocknr; 500470decc6SDave Kleikamp 501470decc6SDave Kleikamp J_ASSERT(obh != NULL); 502b517bea1SZach Brown blocknr = read_tag_block(tag_bytes, 503b517bea1SZach Brown tag); 504470decc6SDave Kleikamp 505470decc6SDave Kleikamp /* If the block has been 506470decc6SDave Kleikamp * revoked, then we're all done 507470decc6SDave Kleikamp * here. */ 508f7f4bccbSMingming Cao if (jbd2_journal_test_revoke 509470decc6SDave Kleikamp (journal, blocknr, 510470decc6SDave Kleikamp next_commit_ID)) { 511470decc6SDave Kleikamp brelse(obh); 512470decc6SDave Kleikamp ++info->nr_revoke_hits; 513470decc6SDave Kleikamp goto skip_write; 514470decc6SDave Kleikamp } 515470decc6SDave Kleikamp 516470decc6SDave Kleikamp /* Find a buffer for the new 517470decc6SDave Kleikamp * data being restored */ 518470decc6SDave Kleikamp nbh = __getblk(journal->j_fs_dev, 519470decc6SDave Kleikamp blocknr, 520470decc6SDave Kleikamp journal->j_blocksize); 521470decc6SDave Kleikamp if (nbh == NULL) { 522470decc6SDave Kleikamp printk(KERN_ERR 523470decc6SDave Kleikamp "JBD: Out of memory " 524470decc6SDave Kleikamp "during recovery.\n"); 525470decc6SDave Kleikamp err = -ENOMEM; 526470decc6SDave Kleikamp brelse(bh); 527470decc6SDave Kleikamp brelse(obh); 528470decc6SDave Kleikamp goto failed; 529470decc6SDave Kleikamp } 530470decc6SDave Kleikamp 531470decc6SDave Kleikamp lock_buffer(nbh); 532470decc6SDave Kleikamp memcpy(nbh->b_data, obh->b_data, 533470decc6SDave Kleikamp journal->j_blocksize); 534f7f4bccbSMingming Cao if (flags & JBD2_FLAG_ESCAPE) { 535d0025676SDuane Griffin *((__be32 *)nbh->b_data) = 536f7f4bccbSMingming Cao cpu_to_be32(JBD2_MAGIC_NUMBER); 537470decc6SDave Kleikamp } 538470decc6SDave Kleikamp 539470decc6SDave Kleikamp BUFFER_TRACE(nbh, "marking dirty"); 540470decc6SDave Kleikamp set_buffer_uptodate(nbh); 541470decc6SDave Kleikamp mark_buffer_dirty(nbh); 542470decc6SDave Kleikamp BUFFER_TRACE(nbh, "marking uptodate"); 543470decc6SDave Kleikamp ++info->nr_replays; 544470decc6SDave Kleikamp /* ll_rw_block(WRITE, 1, &nbh); */ 545470decc6SDave Kleikamp unlock_buffer(nbh); 546470decc6SDave Kleikamp brelse(obh); 547470decc6SDave Kleikamp brelse(nbh); 548470decc6SDave Kleikamp } 549470decc6SDave Kleikamp 550470decc6SDave Kleikamp skip_write: 551b517bea1SZach Brown tagp += tag_bytes; 552f7f4bccbSMingming Cao if (!(flags & JBD2_FLAG_SAME_UUID)) 553470decc6SDave Kleikamp tagp += 16; 554470decc6SDave Kleikamp 555f7f4bccbSMingming Cao if (flags & JBD2_FLAG_LAST_TAG) 556470decc6SDave Kleikamp break; 557470decc6SDave Kleikamp } 558470decc6SDave Kleikamp 559470decc6SDave Kleikamp brelse(bh); 560470decc6SDave Kleikamp continue; 561470decc6SDave Kleikamp 562f7f4bccbSMingming Cao case JBD2_COMMIT_BLOCK: 563818d276cSGirish Shilamkar /* How to differentiate between interrupted commit 564818d276cSGirish Shilamkar * and journal corruption ? 565818d276cSGirish Shilamkar * 566818d276cSGirish Shilamkar * {nth transaction} 567818d276cSGirish Shilamkar * Checksum Verification Failed 568818d276cSGirish Shilamkar * | 569818d276cSGirish Shilamkar * ____________________ 570818d276cSGirish Shilamkar * | | 571818d276cSGirish Shilamkar * async_commit sync_commit 572818d276cSGirish Shilamkar * | | 573818d276cSGirish Shilamkar * | GO TO NEXT "Journal Corruption" 574818d276cSGirish Shilamkar * | TRANSACTION 575818d276cSGirish Shilamkar * | 576818d276cSGirish Shilamkar * {(n+1)th transanction} 577818d276cSGirish Shilamkar * | 578818d276cSGirish Shilamkar * _______|______________ 579818d276cSGirish Shilamkar * | | 580818d276cSGirish Shilamkar * Commit block found Commit block not found 581818d276cSGirish Shilamkar * | | 582818d276cSGirish Shilamkar * "Journal Corruption" | 583818d276cSGirish Shilamkar * _____________|_________ 584818d276cSGirish Shilamkar * | | 585818d276cSGirish Shilamkar * nth trans corrupt OR nth trans 586818d276cSGirish Shilamkar * and (n+1)th interrupted interrupted 587818d276cSGirish Shilamkar * before commit block 588818d276cSGirish Shilamkar * could reach the disk. 589818d276cSGirish Shilamkar * (Cannot find the difference in above 590818d276cSGirish Shilamkar * mentioned conditions. Hence assume 591818d276cSGirish Shilamkar * "Interrupted Commit".) 592818d276cSGirish Shilamkar */ 593818d276cSGirish Shilamkar 594818d276cSGirish Shilamkar /* Found an expected commit block: if checksums 595818d276cSGirish Shilamkar * are present verify them in PASS_SCAN; else not 596818d276cSGirish Shilamkar * much to do other than move on to the next sequence 597470decc6SDave Kleikamp * number. */ 598818d276cSGirish Shilamkar if (pass == PASS_SCAN && 599818d276cSGirish Shilamkar JBD2_HAS_COMPAT_FEATURE(journal, 600818d276cSGirish Shilamkar JBD2_FEATURE_COMPAT_CHECKSUM)) { 601818d276cSGirish Shilamkar int chksum_err, chksum_seen; 602818d276cSGirish Shilamkar struct commit_header *cbh = 603818d276cSGirish Shilamkar (struct commit_header *)bh->b_data; 604818d276cSGirish Shilamkar unsigned found_chksum = 605818d276cSGirish Shilamkar be32_to_cpu(cbh->h_chksum[0]); 606818d276cSGirish Shilamkar 607818d276cSGirish Shilamkar chksum_err = chksum_seen = 0; 608818d276cSGirish Shilamkar 609818d276cSGirish Shilamkar if (info->end_transaction) { 610624080edSTheodore Ts'o journal->j_failed_commit = 611624080edSTheodore Ts'o info->end_transaction; 612818d276cSGirish Shilamkar brelse(bh); 613818d276cSGirish Shilamkar break; 614818d276cSGirish Shilamkar } 615818d276cSGirish Shilamkar 616818d276cSGirish Shilamkar if (crc32_sum == found_chksum && 617818d276cSGirish Shilamkar cbh->h_chksum_type == JBD2_CRC32_CHKSUM && 618818d276cSGirish Shilamkar cbh->h_chksum_size == 619818d276cSGirish Shilamkar JBD2_CRC32_CHKSUM_SIZE) 620818d276cSGirish Shilamkar chksum_seen = 1; 621818d276cSGirish Shilamkar else if (!(cbh->h_chksum_type == 0 && 622818d276cSGirish Shilamkar cbh->h_chksum_size == 0 && 623818d276cSGirish Shilamkar found_chksum == 0 && 624818d276cSGirish Shilamkar !chksum_seen)) 625818d276cSGirish Shilamkar /* 626818d276cSGirish Shilamkar * If fs is mounted using an old kernel and then 627818d276cSGirish Shilamkar * kernel with journal_chksum is used then we 628818d276cSGirish Shilamkar * get a situation where the journal flag has 629818d276cSGirish Shilamkar * checksum flag set but checksums are not 630818d276cSGirish Shilamkar * present i.e chksum = 0, in the individual 631818d276cSGirish Shilamkar * commit blocks. 632818d276cSGirish Shilamkar * Hence to avoid checksum failures, in this 633818d276cSGirish Shilamkar * situation, this extra check is added. 634818d276cSGirish Shilamkar */ 635818d276cSGirish Shilamkar chksum_err = 1; 636818d276cSGirish Shilamkar 637818d276cSGirish Shilamkar if (chksum_err) { 638818d276cSGirish Shilamkar info->end_transaction = next_commit_ID; 639818d276cSGirish Shilamkar 6404d605179SAneesh Kumar K.V if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 641818d276cSGirish Shilamkar JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 642624080edSTheodore Ts'o journal->j_failed_commit = 643624080edSTheodore Ts'o next_commit_ID; 644818d276cSGirish Shilamkar brelse(bh); 645818d276cSGirish Shilamkar break; 646818d276cSGirish Shilamkar } 647818d276cSGirish Shilamkar } 648818d276cSGirish Shilamkar crc32_sum = ~0; 649818d276cSGirish Shilamkar } 650470decc6SDave Kleikamp brelse(bh); 651470decc6SDave Kleikamp next_commit_ID++; 652470decc6SDave Kleikamp continue; 653470decc6SDave Kleikamp 654f7f4bccbSMingming Cao case JBD2_REVOKE_BLOCK: 655470decc6SDave Kleikamp /* If we aren't in the REVOKE pass, then we can 656470decc6SDave Kleikamp * just skip over this block. */ 657470decc6SDave Kleikamp if (pass != PASS_REVOKE) { 658470decc6SDave Kleikamp brelse(bh); 659470decc6SDave Kleikamp continue; 660470decc6SDave Kleikamp } 661470decc6SDave Kleikamp 662470decc6SDave Kleikamp err = scan_revoke_records(journal, bh, 663470decc6SDave Kleikamp next_commit_ID, info); 664470decc6SDave Kleikamp brelse(bh); 665470decc6SDave Kleikamp if (err) 666470decc6SDave Kleikamp goto failed; 667470decc6SDave Kleikamp continue; 668470decc6SDave Kleikamp 669470decc6SDave Kleikamp default: 670470decc6SDave Kleikamp jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 671470decc6SDave Kleikamp blocktype); 672470decc6SDave Kleikamp brelse(bh); 673470decc6SDave Kleikamp goto done; 674470decc6SDave Kleikamp } 675470decc6SDave Kleikamp } 676470decc6SDave Kleikamp 677470decc6SDave Kleikamp done: 678470decc6SDave Kleikamp /* 679470decc6SDave Kleikamp * We broke out of the log scan loop: either we came to the 680470decc6SDave Kleikamp * known end of the log or we found an unexpected block in the 681470decc6SDave Kleikamp * log. If the latter happened, then we know that the "current" 682470decc6SDave Kleikamp * transaction marks the end of the valid log. 683470decc6SDave Kleikamp */ 684470decc6SDave Kleikamp 685818d276cSGirish Shilamkar if (pass == PASS_SCAN) { 686818d276cSGirish Shilamkar if (!info->end_transaction) 687470decc6SDave Kleikamp info->end_transaction = next_commit_ID; 688818d276cSGirish Shilamkar } else { 689470decc6SDave Kleikamp /* It's really bad news if different passes end up at 690470decc6SDave Kleikamp * different places (but possible due to IO errors). */ 691470decc6SDave Kleikamp if (info->end_transaction != next_commit_ID) { 692470decc6SDave Kleikamp printk (KERN_ERR "JBD: recovery pass %d ended at " 693470decc6SDave Kleikamp "transaction %u, expected %u\n", 694470decc6SDave Kleikamp pass, next_commit_ID, info->end_transaction); 695470decc6SDave Kleikamp if (!success) 696470decc6SDave Kleikamp success = -EIO; 697470decc6SDave Kleikamp } 698470decc6SDave Kleikamp } 699470decc6SDave Kleikamp 700470decc6SDave Kleikamp return success; 701470decc6SDave Kleikamp 702470decc6SDave Kleikamp failed: 703470decc6SDave Kleikamp return err; 704470decc6SDave Kleikamp } 705470decc6SDave Kleikamp 706470decc6SDave Kleikamp 707470decc6SDave Kleikamp /* Scan a revoke record, marking all blocks mentioned as revoked. */ 708470decc6SDave Kleikamp 709470decc6SDave Kleikamp static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 710470decc6SDave Kleikamp tid_t sequence, struct recovery_info *info) 711470decc6SDave Kleikamp { 712f7f4bccbSMingming Cao jbd2_journal_revoke_header_t *header; 713470decc6SDave Kleikamp int offset, max; 714b517bea1SZach Brown int record_len = 4; 715470decc6SDave Kleikamp 716f7f4bccbSMingming Cao header = (jbd2_journal_revoke_header_t *) bh->b_data; 717f7f4bccbSMingming Cao offset = sizeof(jbd2_journal_revoke_header_t); 718470decc6SDave Kleikamp max = be32_to_cpu(header->r_count); 719470decc6SDave Kleikamp 720b517bea1SZach Brown if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 721b517bea1SZach Brown record_len = 8; 722b517bea1SZach Brown 723b517bea1SZach Brown while (offset + record_len <= max) { 72418eba7aaSMingming Cao unsigned long long blocknr; 725470decc6SDave Kleikamp int err; 726470decc6SDave Kleikamp 727b517bea1SZach Brown if (record_len == 4) 728470decc6SDave Kleikamp blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 729b517bea1SZach Brown else 730b517bea1SZach Brown blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 731b517bea1SZach Brown offset += record_len; 732f7f4bccbSMingming Cao err = jbd2_journal_set_revoke(journal, blocknr, sequence); 733470decc6SDave Kleikamp if (err) 734470decc6SDave Kleikamp return err; 735470decc6SDave Kleikamp ++info->nr_revokes; 736470decc6SDave Kleikamp } 737470decc6SDave Kleikamp return 0; 738470decc6SDave Kleikamp } 739