1f5166768STheodore Ts'o // SPDX-License-Identifier: GPL-2.0+
2470decc6SDave Kleikamp /*
358862699SUwe Kleine-König * linux/fs/jbd2/recovery.c
4470decc6SDave Kleikamp *
5470decc6SDave Kleikamp * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6470decc6SDave Kleikamp *
7470decc6SDave Kleikamp * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
8470decc6SDave Kleikamp *
9470decc6SDave Kleikamp * Journal recovery routines for the generic filesystem journaling code;
10470decc6SDave Kleikamp * part of the ext2fs journaling system.
11470decc6SDave Kleikamp */
12470decc6SDave Kleikamp
13470decc6SDave Kleikamp #ifndef __KERNEL__
14470decc6SDave Kleikamp #include "jfs_user.h"
15470decc6SDave Kleikamp #else
16470decc6SDave Kleikamp #include <linux/time.h>
17470decc6SDave Kleikamp #include <linux/fs.h>
18f7f4bccbSMingming Cao #include <linux/jbd2.h>
19470decc6SDave Kleikamp #include <linux/errno.h>
20818d276cSGirish Shilamkar #include <linux/crc32.h>
2179feb521SJan Kara #include <linux/blkdev.h>
22470decc6SDave Kleikamp #endif
23470decc6SDave Kleikamp
24470decc6SDave Kleikamp /*
25470decc6SDave Kleikamp * Maintain information about the progress of the recovery job, so that
26470decc6SDave Kleikamp * the different passes can carry information between them.
27470decc6SDave Kleikamp */
28470decc6SDave Kleikamp struct recovery_info
29470decc6SDave Kleikamp {
30470decc6SDave Kleikamp tid_t start_transaction;
31470decc6SDave Kleikamp tid_t end_transaction;
32c7fc6055SZhang Yi unsigned long head_block;
33470decc6SDave Kleikamp
34470decc6SDave Kleikamp int nr_replays;
35470decc6SDave Kleikamp int nr_revokes;
36470decc6SDave Kleikamp int nr_revoke_hits;
37470decc6SDave Kleikamp };
38470decc6SDave Kleikamp
39470decc6SDave Kleikamp static int do_one_pass(journal_t *journal,
40470decc6SDave Kleikamp struct recovery_info *info, enum passtype pass);
41470decc6SDave Kleikamp static int scan_revoke_records(journal_t *, struct buffer_head *,
42470decc6SDave Kleikamp tid_t, struct recovery_info *);
43470decc6SDave Kleikamp
44470decc6SDave Kleikamp #ifdef __KERNEL__
45470decc6SDave Kleikamp
46470decc6SDave Kleikamp /* Release readahead buffers after use */
journal_brelse_array(struct buffer_head * b[],int n)47470decc6SDave Kleikamp static void journal_brelse_array(struct buffer_head *b[], int n)
48470decc6SDave Kleikamp {
49470decc6SDave Kleikamp while (--n >= 0)
50470decc6SDave Kleikamp brelse (b[n]);
51470decc6SDave Kleikamp }
52470decc6SDave Kleikamp
53470decc6SDave Kleikamp
54470decc6SDave Kleikamp /*
55470decc6SDave Kleikamp * When reading from the journal, we are going through the block device
56470decc6SDave Kleikamp * layer directly and so there is no readahead being done for us. We
57470decc6SDave Kleikamp * need to implement any readahead ourselves if we want it to happen at
58470decc6SDave Kleikamp * all. Recovery is basically one long sequential read, so make sure we
59470decc6SDave Kleikamp * do the IO in reasonably large chunks.
60470decc6SDave Kleikamp *
61470decc6SDave Kleikamp * This is not so critical that we need to be enormously clever about
62470decc6SDave Kleikamp * the readahead size, though. 128K is a purely arbitrary, good-enough
63470decc6SDave Kleikamp * fixed value.
64470decc6SDave Kleikamp */
65470decc6SDave Kleikamp
66470decc6SDave Kleikamp #define MAXBUF 8
do_readahead(journal_t * journal,unsigned int start)67470decc6SDave Kleikamp static int do_readahead(journal_t *journal, unsigned int start)
68470decc6SDave Kleikamp {
69470decc6SDave Kleikamp int err;
70470decc6SDave Kleikamp unsigned int max, nbufs, next;
7118eba7aaSMingming Cao unsigned long long blocknr;
72470decc6SDave Kleikamp struct buffer_head *bh;
73470decc6SDave Kleikamp
74470decc6SDave Kleikamp struct buffer_head * bufs[MAXBUF];
75470decc6SDave Kleikamp
76470decc6SDave Kleikamp /* Do up to 128K of readahead */
77470decc6SDave Kleikamp max = start + (128 * 1024 / journal->j_blocksize);
78ede7dc7fSHarshad Shirwadkar if (max > journal->j_total_len)
79ede7dc7fSHarshad Shirwadkar max = journal->j_total_len;
80470decc6SDave Kleikamp
81470decc6SDave Kleikamp /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
82470decc6SDave Kleikamp * a time to the block device IO layer. */
83470decc6SDave Kleikamp
84470decc6SDave Kleikamp nbufs = 0;
85470decc6SDave Kleikamp
86470decc6SDave Kleikamp for (next = start; next < max; next++) {
87f7f4bccbSMingming Cao err = jbd2_journal_bmap(journal, next, &blocknr);
88470decc6SDave Kleikamp
89470decc6SDave Kleikamp if (err) {
90f2a44523SEryu Guan printk(KERN_ERR "JBD2: bad block at offset %u\n",
91470decc6SDave Kleikamp next);
92470decc6SDave Kleikamp goto failed;
93470decc6SDave Kleikamp }
94470decc6SDave Kleikamp
95470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
96470decc6SDave Kleikamp if (!bh) {
97470decc6SDave Kleikamp err = -ENOMEM;
98470decc6SDave Kleikamp goto failed;
99470decc6SDave Kleikamp }
100470decc6SDave Kleikamp
101470decc6SDave Kleikamp if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
102470decc6SDave Kleikamp bufs[nbufs++] = bh;
103470decc6SDave Kleikamp if (nbufs == MAXBUF) {
1048c004d1fSZhang Yi bh_readahead_batch(nbufs, bufs, 0);
105470decc6SDave Kleikamp journal_brelse_array(bufs, nbufs);
106470decc6SDave Kleikamp nbufs = 0;
107470decc6SDave Kleikamp }
108470decc6SDave Kleikamp } else
109470decc6SDave Kleikamp brelse(bh);
110470decc6SDave Kleikamp }
111470decc6SDave Kleikamp
112470decc6SDave Kleikamp if (nbufs)
1138c004d1fSZhang Yi bh_readahead_batch(nbufs, bufs, 0);
114470decc6SDave Kleikamp err = 0;
115470decc6SDave Kleikamp
116470decc6SDave Kleikamp failed:
117470decc6SDave Kleikamp if (nbufs)
118470decc6SDave Kleikamp journal_brelse_array(bufs, nbufs);
119470decc6SDave Kleikamp return err;
120470decc6SDave Kleikamp }
121470decc6SDave Kleikamp
122470decc6SDave Kleikamp #endif /* __KERNEL__ */
123470decc6SDave Kleikamp
124470decc6SDave Kleikamp
125470decc6SDave Kleikamp /*
126470decc6SDave Kleikamp * Read a block from the journal
127470decc6SDave Kleikamp */
128470decc6SDave Kleikamp
jread(struct buffer_head ** bhp,journal_t * journal,unsigned int offset)129470decc6SDave Kleikamp static int jread(struct buffer_head **bhp, journal_t *journal,
130470decc6SDave Kleikamp unsigned int offset)
131470decc6SDave Kleikamp {
132470decc6SDave Kleikamp int err;
13318eba7aaSMingming Cao unsigned long long blocknr;
134470decc6SDave Kleikamp struct buffer_head *bh;
135470decc6SDave Kleikamp
136470decc6SDave Kleikamp *bhp = NULL;
137470decc6SDave Kleikamp
138ede7dc7fSHarshad Shirwadkar if (offset >= journal->j_total_len) {
139f2a44523SEryu Guan printk(KERN_ERR "JBD2: corrupted journal superblock\n");
1406a797d27SDarrick J. Wong return -EFSCORRUPTED;
141470decc6SDave Kleikamp }
142470decc6SDave Kleikamp
143f7f4bccbSMingming Cao err = jbd2_journal_bmap(journal, offset, &blocknr);
144470decc6SDave Kleikamp
145470decc6SDave Kleikamp if (err) {
146f2a44523SEryu Guan printk(KERN_ERR "JBD2: bad block at offset %u\n",
147470decc6SDave Kleikamp offset);
148470decc6SDave Kleikamp return err;
149470decc6SDave Kleikamp }
150470decc6SDave Kleikamp
151470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
152470decc6SDave Kleikamp if (!bh)
153470decc6SDave Kleikamp return -ENOMEM;
154470decc6SDave Kleikamp
155470decc6SDave Kleikamp if (!buffer_uptodate(bh)) {
1568c004d1fSZhang Yi /*
1578c004d1fSZhang Yi * If this is a brand new buffer, start readahead.
1588c004d1fSZhang Yi * Otherwise, we assume we are already reading it.
1598c004d1fSZhang Yi */
1608c004d1fSZhang Yi bool need_readahead = !buffer_req(bh);
1618c004d1fSZhang Yi
1628c004d1fSZhang Yi bh_read_nowait(bh, 0);
1638c004d1fSZhang Yi if (need_readahead)
164470decc6SDave Kleikamp do_readahead(journal, offset);
165470decc6SDave Kleikamp wait_on_buffer(bh);
166470decc6SDave Kleikamp }
167470decc6SDave Kleikamp
168470decc6SDave Kleikamp if (!buffer_uptodate(bh)) {
169f2a44523SEryu Guan printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
170470decc6SDave Kleikamp offset);
171470decc6SDave Kleikamp brelse(bh);
172470decc6SDave Kleikamp return -EIO;
173470decc6SDave Kleikamp }
174470decc6SDave Kleikamp
175470decc6SDave Kleikamp *bhp = bh;
176470decc6SDave Kleikamp return 0;
177470decc6SDave Kleikamp }
178470decc6SDave Kleikamp
jbd2_descriptor_block_csum_verify(journal_t * j,void * buf)1791101cd4dSJan Kara static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
1803caa487fSDarrick J. Wong {
1813caa487fSDarrick J. Wong struct jbd2_journal_block_tail *tail;
18218a6ea1eSDarrick J. Wong __be32 provided;
18318a6ea1eSDarrick J. Wong __u32 calculated;
1843caa487fSDarrick J. Wong
185db9ee220SDarrick J. Wong if (!jbd2_journal_has_csum_v2or3(j))
1863caa487fSDarrick J. Wong return 1;
1873caa487fSDarrick J. Wong
1884009cc7aSTheodore Ts'o tail = (struct jbd2_journal_block_tail *)((char *)buf +
1894009cc7aSTheodore Ts'o j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
1903caa487fSDarrick J. Wong provided = tail->t_checksum;
1913caa487fSDarrick J. Wong tail->t_checksum = 0;
1923caa487fSDarrick J. Wong calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
1933caa487fSDarrick J. Wong tail->t_checksum = provided;
1943caa487fSDarrick J. Wong
19518a6ea1eSDarrick J. Wong return provided == cpu_to_be32(calculated);
1963caa487fSDarrick J. Wong }
197470decc6SDave Kleikamp
198470decc6SDave Kleikamp /*
199470decc6SDave Kleikamp * Count the number of in-use tags in a journal descriptor block.
200470decc6SDave Kleikamp */
201470decc6SDave Kleikamp
count_tags(journal_t * journal,struct buffer_head * bh)202b517bea1SZach Brown static int count_tags(journal_t *journal, struct buffer_head *bh)
203470decc6SDave Kleikamp {
204470decc6SDave Kleikamp char * tagp;
205a20d1cebSTheodore Ts'o journal_block_tag_t tag;
206b517bea1SZach Brown int nr = 0, size = journal->j_blocksize;
207b517bea1SZach Brown int tag_bytes = journal_tag_bytes(journal);
208470decc6SDave Kleikamp
209db9ee220SDarrick J. Wong if (jbd2_journal_has_csum_v2or3(journal))
2103caa487fSDarrick J. Wong size -= sizeof(struct jbd2_journal_block_tail);
2113caa487fSDarrick J. Wong
212470decc6SDave Kleikamp tagp = &bh->b_data[sizeof(journal_header_t)];
213470decc6SDave Kleikamp
214b517bea1SZach Brown while ((tagp - bh->b_data + tag_bytes) <= size) {
215a20d1cebSTheodore Ts'o memcpy(&tag, tagp, sizeof(tag));
216470decc6SDave Kleikamp
217470decc6SDave Kleikamp nr++;
218b517bea1SZach Brown tagp += tag_bytes;
219a20d1cebSTheodore Ts'o if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
220470decc6SDave Kleikamp tagp += 16;
221470decc6SDave Kleikamp
222a20d1cebSTheodore Ts'o if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
223470decc6SDave Kleikamp break;
224470decc6SDave Kleikamp }
225470decc6SDave Kleikamp
226470decc6SDave Kleikamp return nr;
227470decc6SDave Kleikamp }
228470decc6SDave Kleikamp
229470decc6SDave Kleikamp
230470decc6SDave Kleikamp /* Make sure we wrap around the log correctly! */
231470decc6SDave Kleikamp #define wrap(journal, var) \
232470decc6SDave Kleikamp do { \
2332dfba3bbSZhang Yi if (var >= (journal)->j_last) \
2342dfba3bbSZhang Yi var -= ((journal)->j_last - (journal)->j_first); \
235470decc6SDave Kleikamp } while (0)
236470decc6SDave Kleikamp
fc_do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)2375b849b5fSHarshad Shirwadkar static int fc_do_one_pass(journal_t *journal,
2385b849b5fSHarshad Shirwadkar struct recovery_info *info, enum passtype pass)
2395b849b5fSHarshad Shirwadkar {
2405b849b5fSHarshad Shirwadkar unsigned int expected_commit_id = info->end_transaction;
2415b849b5fSHarshad Shirwadkar unsigned long next_fc_block;
2425b849b5fSHarshad Shirwadkar struct buffer_head *bh;
2435b849b5fSHarshad Shirwadkar int err = 0;
2445b849b5fSHarshad Shirwadkar
2455b849b5fSHarshad Shirwadkar next_fc_block = journal->j_fc_first;
2465b849b5fSHarshad Shirwadkar if (!journal->j_fc_replay_callback)
2475b849b5fSHarshad Shirwadkar return 0;
2485b849b5fSHarshad Shirwadkar
2495b849b5fSHarshad Shirwadkar while (next_fc_block <= journal->j_fc_last) {
250cb3b3bf2SJan Kara jbd2_debug(3, "Fast commit replay: next block %ld\n",
2515b849b5fSHarshad Shirwadkar next_fc_block);
2525b849b5fSHarshad Shirwadkar err = jread(&bh, journal, next_fc_block);
2535b849b5fSHarshad Shirwadkar if (err) {
254cb3b3bf2SJan Kara jbd2_debug(3, "Fast commit replay: read error\n");
2555b849b5fSHarshad Shirwadkar break;
2565b849b5fSHarshad Shirwadkar }
2575b849b5fSHarshad Shirwadkar
2585b849b5fSHarshad Shirwadkar err = journal->j_fc_replay_callback(journal, bh, pass,
2595b849b5fSHarshad Shirwadkar next_fc_block - journal->j_fc_first,
2605b849b5fSHarshad Shirwadkar expected_commit_id);
261dfff66f3SYe Bin brelse(bh);
2625b849b5fSHarshad Shirwadkar next_fc_block++;
2635b849b5fSHarshad Shirwadkar if (err < 0 || err == JBD2_FC_REPLAY_STOP)
2645b849b5fSHarshad Shirwadkar break;
2655b849b5fSHarshad Shirwadkar err = 0;
2665b849b5fSHarshad Shirwadkar }
2675b849b5fSHarshad Shirwadkar
2685b849b5fSHarshad Shirwadkar if (err)
269cb3b3bf2SJan Kara jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
2705b849b5fSHarshad Shirwadkar
2715b849b5fSHarshad Shirwadkar return err;
2725b849b5fSHarshad Shirwadkar }
2735b849b5fSHarshad Shirwadkar
274470decc6SDave Kleikamp /**
275f7f4bccbSMingming Cao * jbd2_journal_recover - recovers a on-disk journal
276470decc6SDave Kleikamp * @journal: the journal to recover
277470decc6SDave Kleikamp *
278470decc6SDave Kleikamp * The primary function for recovering the log contents when mounting a
279470decc6SDave Kleikamp * journaled device.
280470decc6SDave Kleikamp *
281470decc6SDave Kleikamp * Recovery is done in three passes. In the first pass, we look for the
282470decc6SDave Kleikamp * end of the log. In the second, we assemble the list of revoke
283470decc6SDave Kleikamp * blocks. In the third and final pass, we replay any un-revoked blocks
284470decc6SDave Kleikamp * in the log.
285470decc6SDave Kleikamp */
jbd2_journal_recover(journal_t * journal)286f7f4bccbSMingming Cao int jbd2_journal_recover(journal_t *journal)
287470decc6SDave Kleikamp {
28844519fafSHidehiro Kawai int err, err2;
289470decc6SDave Kleikamp journal_superblock_t * sb;
290470decc6SDave Kleikamp
291470decc6SDave Kleikamp struct recovery_info info;
2925706a65cSZhihao Cheng errseq_t wb_err;
2935706a65cSZhihao Cheng struct address_space *mapping;
294470decc6SDave Kleikamp
295470decc6SDave Kleikamp memset(&info, 0, sizeof(info));
296470decc6SDave Kleikamp sb = journal->j_superblock;
297470decc6SDave Kleikamp
298470decc6SDave Kleikamp /*
299470decc6SDave Kleikamp * The journal superblock's s_start field (the current log head)
300470decc6SDave Kleikamp * is always zero if, and only if, the journal was cleanly
301470decc6SDave Kleikamp * unmounted.
302470decc6SDave Kleikamp */
303470decc6SDave Kleikamp if (!sb->s_start) {
304c7fc6055SZhang Yi jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
305c7fc6055SZhang Yi be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
306470decc6SDave Kleikamp journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
307c7fc6055SZhang Yi journal->j_head = be32_to_cpu(sb->s_head);
308470decc6SDave Kleikamp return 0;
309470decc6SDave Kleikamp }
310470decc6SDave Kleikamp
3115706a65cSZhihao Cheng wb_err = 0;
3125706a65cSZhihao Cheng mapping = journal->j_fs_dev->bd_inode->i_mapping;
3135706a65cSZhihao Cheng errseq_check_and_advance(&mapping->wb_err, &wb_err);
314470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_SCAN);
315470decc6SDave Kleikamp if (!err)
316470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_REVOKE);
317470decc6SDave Kleikamp if (!err)
318470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_REPLAY);
319470decc6SDave Kleikamp
320cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: recovery, exit status %d, "
321470decc6SDave Kleikamp "recovered transactions %u to %u\n",
322470decc6SDave Kleikamp err, info.start_transaction, info.end_transaction);
323cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
324470decc6SDave Kleikamp info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
325470decc6SDave Kleikamp
326470decc6SDave Kleikamp /* Restart the log at the next transaction ID, thus invalidating
327470decc6SDave Kleikamp * any existing commit records in the log. */
328470decc6SDave Kleikamp journal->j_transaction_sequence = ++info.end_transaction;
329c7fc6055SZhang Yi journal->j_head = info.head_block;
330c7fc6055SZhang Yi jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
331c7fc6055SZhang Yi journal->j_transaction_sequence, journal->j_head);
332470decc6SDave Kleikamp
333f7f4bccbSMingming Cao jbd2_journal_clear_revoke(journal);
33444519fafSHidehiro Kawai err2 = sync_blockdev(journal->j_fs_dev);
33544519fafSHidehiro Kawai if (!err)
33644519fafSHidehiro Kawai err = err2;
3375706a65cSZhihao Cheng err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
3385706a65cSZhihao Cheng if (!err)
3395706a65cSZhihao Cheng err = err2;
34079feb521SJan Kara /* Make sure all replayed data is on permanent storage */
341316e4cfdSTheodore Ts'o if (journal->j_flags & JBD2_BARRIER) {
342c6bf3f0eSChristoph Hellwig err2 = blkdev_issue_flush(journal->j_fs_dev);
343316e4cfdSTheodore Ts'o if (!err)
344316e4cfdSTheodore Ts'o err = err2;
345316e4cfdSTheodore Ts'o }
346470decc6SDave Kleikamp return err;
347470decc6SDave Kleikamp }
348470decc6SDave Kleikamp
349470decc6SDave Kleikamp /**
350f7f4bccbSMingming Cao * jbd2_journal_skip_recovery - Start journal and wipe exiting records
351470decc6SDave Kleikamp * @journal: journal to startup
352470decc6SDave Kleikamp *
353470decc6SDave Kleikamp * Locate any valid recovery information from the journal and set up the
354470decc6SDave Kleikamp * journal structures in memory to ignore it (presumably because the
355470decc6SDave Kleikamp * caller has evidence that it is out of date).
356bd7ced98SMasanari Iida * This function doesn't appear to be exported..
357470decc6SDave Kleikamp *
358470decc6SDave Kleikamp * We perform one pass over the journal to allow us to tell the user how
359470decc6SDave Kleikamp * much recovery information is being erased, and to let us initialise
360470decc6SDave Kleikamp * the journal transaction sequence numbers to the next unused ID.
361470decc6SDave Kleikamp */
jbd2_journal_skip_recovery(journal_t * journal)362f7f4bccbSMingming Cao int jbd2_journal_skip_recovery(journal_t *journal)
363470decc6SDave Kleikamp {
364470decc6SDave Kleikamp int err;
365470decc6SDave Kleikamp
366470decc6SDave Kleikamp struct recovery_info info;
367470decc6SDave Kleikamp
368470decc6SDave Kleikamp memset (&info, 0, sizeof(info));
369470decc6SDave Kleikamp
370470decc6SDave Kleikamp err = do_one_pass(journal, &info, PASS_SCAN);
371470decc6SDave Kleikamp
372470decc6SDave Kleikamp if (err) {
373f2a44523SEryu Guan printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
374470decc6SDave Kleikamp ++journal->j_transaction_sequence;
375c7fc6055SZhang Yi journal->j_head = journal->j_first;
376470decc6SDave Kleikamp } else {
377e23291b9SJose R. Santos #ifdef CONFIG_JBD2_DEBUG
3785a0790c2SAndi Kleen int dropped = info.end_transaction -
3795a0790c2SAndi Kleen be32_to_cpu(journal->j_superblock->s_sequence);
380cb3b3bf2SJan Kara jbd2_debug(1,
381f2a44523SEryu Guan "JBD2: ignoring %d transaction%s from the journal.\n",
382470decc6SDave Kleikamp dropped, (dropped == 1) ? "" : "s");
3839a4f6271STheodore Ts'o #endif
384470decc6SDave Kleikamp journal->j_transaction_sequence = ++info.end_transaction;
385c7fc6055SZhang Yi journal->j_head = info.head_block;
386470decc6SDave Kleikamp }
387470decc6SDave Kleikamp
388470decc6SDave Kleikamp journal->j_tail = 0;
389470decc6SDave Kleikamp return err;
390470decc6SDave Kleikamp }
391470decc6SDave Kleikamp
read_tag_block(journal_t * journal,journal_block_tag_t * tag)392db9ee220SDarrick J. Wong static inline unsigned long long read_tag_block(journal_t *journal,
393db9ee220SDarrick J. Wong journal_block_tag_t *tag)
394b517bea1SZach Brown {
39518eba7aaSMingming Cao unsigned long long block = be32_to_cpu(tag->t_blocknr);
39656316a0dSDarrick J. Wong if (jbd2_has_feature_64bit(journal))
397b517bea1SZach Brown block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
398b517bea1SZach Brown return block;
399b517bea1SZach Brown }
400b517bea1SZach Brown
401818d276cSGirish Shilamkar /*
402818d276cSGirish Shilamkar * calc_chksums calculates the checksums for the blocks described in the
403818d276cSGirish Shilamkar * descriptor block.
404818d276cSGirish Shilamkar */
calc_chksums(journal_t * journal,struct buffer_head * bh,unsigned long * next_log_block,__u32 * crc32_sum)405818d276cSGirish Shilamkar static int calc_chksums(journal_t *journal, struct buffer_head *bh,
406818d276cSGirish Shilamkar unsigned long *next_log_block, __u32 *crc32_sum)
407818d276cSGirish Shilamkar {
408818d276cSGirish Shilamkar int i, num_blks, err;
409818d276cSGirish Shilamkar unsigned long io_block;
410818d276cSGirish Shilamkar struct buffer_head *obh;
411818d276cSGirish Shilamkar
412818d276cSGirish Shilamkar num_blks = count_tags(journal, bh);
413818d276cSGirish Shilamkar /* Calculate checksum of the descriptor block. */
414818d276cSGirish Shilamkar *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
415818d276cSGirish Shilamkar
416818d276cSGirish Shilamkar for (i = 0; i < num_blks; i++) {
417818d276cSGirish Shilamkar io_block = (*next_log_block)++;
418818d276cSGirish Shilamkar wrap(journal, *next_log_block);
419818d276cSGirish Shilamkar err = jread(&obh, journal, io_block);
420818d276cSGirish Shilamkar if (err) {
421f2a44523SEryu Guan printk(KERN_ERR "JBD2: IO error %d recovering block "
422818d276cSGirish Shilamkar "%lu in log\n", err, io_block);
423818d276cSGirish Shilamkar return 1;
424818d276cSGirish Shilamkar } else {
425818d276cSGirish Shilamkar *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
426818d276cSGirish Shilamkar obh->b_size);
427818d276cSGirish Shilamkar }
4288ea76900STheodore Ts'o put_bh(obh);
429818d276cSGirish Shilamkar }
430818d276cSGirish Shilamkar return 0;
431818d276cSGirish Shilamkar }
432818d276cSGirish Shilamkar
jbd2_commit_block_csum_verify(journal_t * j,void * buf)4331f56c589SDarrick J. Wong static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
4341f56c589SDarrick J. Wong {
4351f56c589SDarrick J. Wong struct commit_header *h;
43618a6ea1eSDarrick J. Wong __be32 provided;
43718a6ea1eSDarrick J. Wong __u32 calculated;
4381f56c589SDarrick J. Wong
439db9ee220SDarrick J. Wong if (!jbd2_journal_has_csum_v2or3(j))
4401f56c589SDarrick J. Wong return 1;
4411f56c589SDarrick J. Wong
4421f56c589SDarrick J. Wong h = buf;
4431f56c589SDarrick J. Wong provided = h->h_chksum[0];
4441f56c589SDarrick J. Wong h->h_chksum[0] = 0;
4451f56c589SDarrick J. Wong calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
4461f56c589SDarrick J. Wong h->h_chksum[0] = provided;
4471f56c589SDarrick J. Wong
44818a6ea1eSDarrick J. Wong return provided == cpu_to_be32(calculated);
4491f56c589SDarrick J. Wong }
4501f56c589SDarrick J. Wong
jbd2_commit_block_csum_verify_partial(journal_t * j,void * buf)451*e16c4c24SYe Bin static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf)
452*e16c4c24SYe Bin {
453*e16c4c24SYe Bin struct commit_header *h;
454*e16c4c24SYe Bin __be32 provided;
455*e16c4c24SYe Bin __u32 calculated;
456*e16c4c24SYe Bin void *tmpbuf;
457*e16c4c24SYe Bin
458*e16c4c24SYe Bin tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL);
459*e16c4c24SYe Bin if (!tmpbuf)
460*e16c4c24SYe Bin return false;
461*e16c4c24SYe Bin
462*e16c4c24SYe Bin memcpy(tmpbuf, buf, sizeof(struct commit_header));
463*e16c4c24SYe Bin h = tmpbuf;
464*e16c4c24SYe Bin provided = h->h_chksum[0];
465*e16c4c24SYe Bin h->h_chksum[0] = 0;
466*e16c4c24SYe Bin calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize);
467*e16c4c24SYe Bin kfree(tmpbuf);
468*e16c4c24SYe Bin
469*e16c4c24SYe Bin return provided == cpu_to_be32(calculated);
470*e16c4c24SYe Bin }
471*e16c4c24SYe Bin
jbd2_block_tag_csum_verify(journal_t * j,journal_block_tag_t * tag,journal_block_tag3_t * tag3,void * buf,__u32 sequence)472c3900875SDarrick J. Wong static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
473a20d1cebSTheodore Ts'o journal_block_tag3_t *tag3,
474c3900875SDarrick J. Wong void *buf, __u32 sequence)
475c3900875SDarrick J. Wong {
476eee06c56SDarrick J. Wong __u32 csum32;
47718a6ea1eSDarrick J. Wong __be32 seq;
478c3900875SDarrick J. Wong
479db9ee220SDarrick J. Wong if (!jbd2_journal_has_csum_v2or3(j))
480c3900875SDarrick J. Wong return 1;
481c3900875SDarrick J. Wong
48218a6ea1eSDarrick J. Wong seq = cpu_to_be32(sequence);
48318a6ea1eSDarrick J. Wong csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
484eee06c56SDarrick J. Wong csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
485c3900875SDarrick J. Wong
48656316a0dSDarrick J. Wong if (jbd2_has_feature_csum3(j))
487db9ee220SDarrick J. Wong return tag3->t_checksum == cpu_to_be32(csum32);
488db9ee220SDarrick J. Wong else
489eee06c56SDarrick J. Wong return tag->t_checksum == cpu_to_be16(csum32);
490c3900875SDarrick J. Wong }
491c3900875SDarrick J. Wong
do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)492470decc6SDave Kleikamp static int do_one_pass(journal_t *journal,
493470decc6SDave Kleikamp struct recovery_info *info, enum passtype pass)
494470decc6SDave Kleikamp {
495470decc6SDave Kleikamp unsigned int first_commit_ID, next_commit_ID;
496c7fc6055SZhang Yi unsigned long next_log_block, head_block;
497470decc6SDave Kleikamp int err, success = 0;
498470decc6SDave Kleikamp journal_superblock_t * sb;
499470decc6SDave Kleikamp journal_header_t * tmp;
500470decc6SDave Kleikamp struct buffer_head * bh;
501470decc6SDave Kleikamp unsigned int sequence;
502470decc6SDave Kleikamp int blocktype;
503b517bea1SZach Brown int tag_bytes = journal_tag_bytes(journal);
504818d276cSGirish Shilamkar __u32 crc32_sum = ~0; /* Transactional Checksums */
5053caa487fSDarrick J. Wong int descr_csum_size = 0;
506022eaa75SDarrick J. Wong int block_error = 0;
507fc750a3bSchangfengnan bool need_check_commit_time = false;
508fc750a3bSchangfengnan __u64 last_trans_commit_time = 0, commit_time;
509470decc6SDave Kleikamp
510470decc6SDave Kleikamp /*
511470decc6SDave Kleikamp * First thing is to establish what we expect to find in the log
512470decc6SDave Kleikamp * (in terms of transaction IDs), and where (in terms of log
513470decc6SDave Kleikamp * block offsets): query the superblock.
514470decc6SDave Kleikamp */
515470decc6SDave Kleikamp
516470decc6SDave Kleikamp sb = journal->j_superblock;
517470decc6SDave Kleikamp next_commit_ID = be32_to_cpu(sb->s_sequence);
518470decc6SDave Kleikamp next_log_block = be32_to_cpu(sb->s_start);
519c7fc6055SZhang Yi head_block = next_log_block;
520470decc6SDave Kleikamp
521470decc6SDave Kleikamp first_commit_ID = next_commit_ID;
522470decc6SDave Kleikamp if (pass == PASS_SCAN)
523470decc6SDave Kleikamp info->start_transaction = first_commit_ID;
524470decc6SDave Kleikamp
525cb3b3bf2SJan Kara jbd2_debug(1, "Starting recovery pass %d\n", pass);
526470decc6SDave Kleikamp
527470decc6SDave Kleikamp /*
528470decc6SDave Kleikamp * Now we walk through the log, transaction by transaction,
529470decc6SDave Kleikamp * making sure that each transaction has a commit block in the
530470decc6SDave Kleikamp * expected place. Each complete transaction gets replayed back
531470decc6SDave Kleikamp * into the main filesystem.
532470decc6SDave Kleikamp */
533470decc6SDave Kleikamp
534470decc6SDave Kleikamp while (1) {
535470decc6SDave Kleikamp int flags;
536470decc6SDave Kleikamp char * tagp;
537a20d1cebSTheodore Ts'o journal_block_tag_t tag;
538470decc6SDave Kleikamp struct buffer_head * obh;
539470decc6SDave Kleikamp struct buffer_head * nbh;
540470decc6SDave Kleikamp
541e86e1438SAndi Kleen cond_resched();
542470decc6SDave Kleikamp
543470decc6SDave Kleikamp /* If we already know where to stop the log traversal,
544470decc6SDave Kleikamp * check right now that we haven't gone past the end of
545470decc6SDave Kleikamp * the log. */
546470decc6SDave Kleikamp
547470decc6SDave Kleikamp if (pass != PASS_SCAN)
548470decc6SDave Kleikamp if (tid_geq(next_commit_ID, info->end_transaction))
549470decc6SDave Kleikamp break;
550470decc6SDave Kleikamp
551cb3b3bf2SJan Kara jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
5522dfba3bbSZhang Yi next_commit_ID, next_log_block, journal->j_last);
553470decc6SDave Kleikamp
554470decc6SDave Kleikamp /* Skip over each chunk of the transaction looking
555470decc6SDave Kleikamp * either the next descriptor block or the final commit
556470decc6SDave Kleikamp * record. */
557470decc6SDave Kleikamp
558cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
559470decc6SDave Kleikamp err = jread(&bh, journal, next_log_block);
560470decc6SDave Kleikamp if (err)
561470decc6SDave Kleikamp goto failed;
562470decc6SDave Kleikamp
563470decc6SDave Kleikamp next_log_block++;
564470decc6SDave Kleikamp wrap(journal, next_log_block);
565470decc6SDave Kleikamp
566470decc6SDave Kleikamp /* What kind of buffer is it?
567470decc6SDave Kleikamp *
568470decc6SDave Kleikamp * If it is a descriptor block, check that it has the
569470decc6SDave Kleikamp * expected sequence number. Otherwise, we're all done
570470decc6SDave Kleikamp * here. */
571470decc6SDave Kleikamp
572470decc6SDave Kleikamp tmp = (journal_header_t *)bh->b_data;
573470decc6SDave Kleikamp
574f7f4bccbSMingming Cao if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
575470decc6SDave Kleikamp brelse(bh);
576470decc6SDave Kleikamp break;
577470decc6SDave Kleikamp }
578470decc6SDave Kleikamp
579470decc6SDave Kleikamp blocktype = be32_to_cpu(tmp->h_blocktype);
580470decc6SDave Kleikamp sequence = be32_to_cpu(tmp->h_sequence);
581cb3b3bf2SJan Kara jbd2_debug(3, "Found magic %d, sequence %d\n",
582470decc6SDave Kleikamp blocktype, sequence);
583470decc6SDave Kleikamp
584470decc6SDave Kleikamp if (sequence != next_commit_ID) {
585470decc6SDave Kleikamp brelse(bh);
586470decc6SDave Kleikamp break;
587470decc6SDave Kleikamp }
588470decc6SDave Kleikamp
589470decc6SDave Kleikamp /* OK, we have a valid descriptor block which matches
590470decc6SDave Kleikamp * all of the sequence number checks. What are we going
591470decc6SDave Kleikamp * to do with it? That depends on the pass... */
592470decc6SDave Kleikamp
593470decc6SDave Kleikamp switch(blocktype) {
594f7f4bccbSMingming Cao case JBD2_DESCRIPTOR_BLOCK:
5953caa487fSDarrick J. Wong /* Verify checksum first */
596db9ee220SDarrick J. Wong if (jbd2_journal_has_csum_v2or3(journal))
5973caa487fSDarrick J. Wong descr_csum_size =
5983caa487fSDarrick J. Wong sizeof(struct jbd2_journal_block_tail);
5993caa487fSDarrick J. Wong if (descr_csum_size > 0 &&
6001101cd4dSJan Kara !jbd2_descriptor_block_csum_verify(journal,
6013caa487fSDarrick J. Wong bh->b_data)) {
602fc750a3bSchangfengnan /*
603fc750a3bSchangfengnan * PASS_SCAN can see stale blocks due to lazy
604fc750a3bSchangfengnan * journal init. Don't error out on those yet.
605fc750a3bSchangfengnan */
606fc750a3bSchangfengnan if (pass != PASS_SCAN) {
607fc750a3bSchangfengnan pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
608b6924225SDarrick J. Wong next_log_block);
6096a797d27SDarrick J. Wong err = -EFSBADCRC;
610064d8389SDarrick J. Wong brelse(bh);
6113caa487fSDarrick J. Wong goto failed;
6123caa487fSDarrick J. Wong }
613fc750a3bSchangfengnan need_check_commit_time = true;
614cb3b3bf2SJan Kara jbd2_debug(1,
615fc750a3bSchangfengnan "invalid descriptor block found in %lu\n",
616fc750a3bSchangfengnan next_log_block);
617fc750a3bSchangfengnan }
6183caa487fSDarrick J. Wong
619470decc6SDave Kleikamp /* If it is a valid descriptor block, replay it
620818d276cSGirish Shilamkar * in pass REPLAY; if journal_checksums enabled, then
621818d276cSGirish Shilamkar * calculate checksums in PASS_SCAN, otherwise,
622818d276cSGirish Shilamkar * just skip over the blocks it describes. */
623470decc6SDave Kleikamp if (pass != PASS_REPLAY) {
624818d276cSGirish Shilamkar if (pass == PASS_SCAN &&
62556316a0dSDarrick J. Wong jbd2_has_feature_checksum(journal) &&
626fc750a3bSchangfengnan !need_check_commit_time &&
627818d276cSGirish Shilamkar !info->end_transaction) {
628818d276cSGirish Shilamkar if (calc_chksums(journal, bh,
629818d276cSGirish Shilamkar &next_log_block,
630818d276cSGirish Shilamkar &crc32_sum)) {
631818d276cSGirish Shilamkar put_bh(bh);
632818d276cSGirish Shilamkar break;
633818d276cSGirish Shilamkar }
634818d276cSGirish Shilamkar put_bh(bh);
635818d276cSGirish Shilamkar continue;
636818d276cSGirish Shilamkar }
637b517bea1SZach Brown next_log_block += count_tags(journal, bh);
638470decc6SDave Kleikamp wrap(journal, next_log_block);
639818d276cSGirish Shilamkar put_bh(bh);
640470decc6SDave Kleikamp continue;
641470decc6SDave Kleikamp }
642470decc6SDave Kleikamp
643470decc6SDave Kleikamp /* A descriptor block: we can now write all of
644470decc6SDave Kleikamp * the data blocks. Yay, useful work is finally
645470decc6SDave Kleikamp * getting done here! */
646470decc6SDave Kleikamp
647470decc6SDave Kleikamp tagp = &bh->b_data[sizeof(journal_header_t)];
648b517bea1SZach Brown while ((tagp - bh->b_data + tag_bytes)
6493caa487fSDarrick J. Wong <= journal->j_blocksize - descr_csum_size) {
650470decc6SDave Kleikamp unsigned long io_block;
651470decc6SDave Kleikamp
652a20d1cebSTheodore Ts'o memcpy(&tag, tagp, sizeof(tag));
653a20d1cebSTheodore Ts'o flags = be16_to_cpu(tag.t_flags);
654470decc6SDave Kleikamp
655470decc6SDave Kleikamp io_block = next_log_block++;
656470decc6SDave Kleikamp wrap(journal, next_log_block);
657470decc6SDave Kleikamp err = jread(&obh, journal, io_block);
658470decc6SDave Kleikamp if (err) {
659470decc6SDave Kleikamp /* Recover what we can, but
660470decc6SDave Kleikamp * report failure at the end. */
661470decc6SDave Kleikamp success = err;
662470decc6SDave Kleikamp printk(KERN_ERR
663f2a44523SEryu Guan "JBD2: IO error %d recovering "
664470decc6SDave Kleikamp "block %ld in log\n",
665470decc6SDave Kleikamp err, io_block);
666470decc6SDave Kleikamp } else {
66718eba7aaSMingming Cao unsigned long long blocknr;
668470decc6SDave Kleikamp
669470decc6SDave Kleikamp J_ASSERT(obh != NULL);
670db9ee220SDarrick J. Wong blocknr = read_tag_block(journal,
671a20d1cebSTheodore Ts'o &tag);
672470decc6SDave Kleikamp
673470decc6SDave Kleikamp /* If the block has been
674470decc6SDave Kleikamp * revoked, then we're all done
675470decc6SDave Kleikamp * here. */
676f7f4bccbSMingming Cao if (jbd2_journal_test_revoke
677470decc6SDave Kleikamp (journal, blocknr,
678470decc6SDave Kleikamp next_commit_ID)) {
679470decc6SDave Kleikamp brelse(obh);
680470decc6SDave Kleikamp ++info->nr_revoke_hits;
681470decc6SDave Kleikamp goto skip_write;
682470decc6SDave Kleikamp }
683470decc6SDave Kleikamp
684c3900875SDarrick J. Wong /* Look for block corruption */
685c3900875SDarrick J. Wong if (!jbd2_block_tag_csum_verify(
686a20d1cebSTheodore Ts'o journal, &tag, (journal_block_tag3_t *)tagp,
687a20d1cebSTheodore Ts'o obh->b_data, be32_to_cpu(tmp->h_sequence))) {
688c3900875SDarrick J. Wong brelse(obh);
6896a797d27SDarrick J. Wong success = -EFSBADCRC;
690a67c848aSDmitry Monakhov printk(KERN_ERR "JBD2: Invalid "
691c3900875SDarrick J. Wong "checksum recovering "
692ed65b00fSTheodore Ts'o "data block %llu in "
693ed65b00fSTheodore Ts'o "log\n", blocknr);
694022eaa75SDarrick J. Wong block_error = 1;
695022eaa75SDarrick J. Wong goto skip_write;
696c3900875SDarrick J. Wong }
697c3900875SDarrick J. Wong
698470decc6SDave Kleikamp /* Find a buffer for the new
699470decc6SDave Kleikamp * data being restored */
700470decc6SDave Kleikamp nbh = __getblk(journal->j_fs_dev,
701470decc6SDave Kleikamp blocknr,
702470decc6SDave Kleikamp journal->j_blocksize);
703470decc6SDave Kleikamp if (nbh == NULL) {
704470decc6SDave Kleikamp printk(KERN_ERR
705f2a44523SEryu Guan "JBD2: Out of memory "
706470decc6SDave Kleikamp "during recovery.\n");
707470decc6SDave Kleikamp err = -ENOMEM;
708470decc6SDave Kleikamp brelse(bh);
709470decc6SDave Kleikamp brelse(obh);
710470decc6SDave Kleikamp goto failed;
711470decc6SDave Kleikamp }
712470decc6SDave Kleikamp
713470decc6SDave Kleikamp lock_buffer(nbh);
714470decc6SDave Kleikamp memcpy(nbh->b_data, obh->b_data,
715470decc6SDave Kleikamp journal->j_blocksize);
716f7f4bccbSMingming Cao if (flags & JBD2_FLAG_ESCAPE) {
717d0025676SDuane Griffin *((__be32 *)nbh->b_data) =
718f7f4bccbSMingming Cao cpu_to_be32(JBD2_MAGIC_NUMBER);
719470decc6SDave Kleikamp }
720470decc6SDave Kleikamp
721470decc6SDave Kleikamp BUFFER_TRACE(nbh, "marking dirty");
722470decc6SDave Kleikamp set_buffer_uptodate(nbh);
723470decc6SDave Kleikamp mark_buffer_dirty(nbh);
724470decc6SDave Kleikamp BUFFER_TRACE(nbh, "marking uptodate");
725470decc6SDave Kleikamp ++info->nr_replays;
726470decc6SDave Kleikamp unlock_buffer(nbh);
727470decc6SDave Kleikamp brelse(obh);
728470decc6SDave Kleikamp brelse(nbh);
729470decc6SDave Kleikamp }
730470decc6SDave Kleikamp
731470decc6SDave Kleikamp skip_write:
732b517bea1SZach Brown tagp += tag_bytes;
733f7f4bccbSMingming Cao if (!(flags & JBD2_FLAG_SAME_UUID))
734470decc6SDave Kleikamp tagp += 16;
735470decc6SDave Kleikamp
736f7f4bccbSMingming Cao if (flags & JBD2_FLAG_LAST_TAG)
737470decc6SDave Kleikamp break;
738470decc6SDave Kleikamp }
739470decc6SDave Kleikamp
740470decc6SDave Kleikamp brelse(bh);
741470decc6SDave Kleikamp continue;
742470decc6SDave Kleikamp
743f7f4bccbSMingming Cao case JBD2_COMMIT_BLOCK:
744818d276cSGirish Shilamkar /* How to differentiate between interrupted commit
745818d276cSGirish Shilamkar * and journal corruption ?
746818d276cSGirish Shilamkar *
747818d276cSGirish Shilamkar * {nth transaction}
748818d276cSGirish Shilamkar * Checksum Verification Failed
749818d276cSGirish Shilamkar * |
750818d276cSGirish Shilamkar * ____________________
751818d276cSGirish Shilamkar * | |
752818d276cSGirish Shilamkar * async_commit sync_commit
753818d276cSGirish Shilamkar * | |
754818d276cSGirish Shilamkar * | GO TO NEXT "Journal Corruption"
755818d276cSGirish Shilamkar * | TRANSACTION
756818d276cSGirish Shilamkar * |
757818d276cSGirish Shilamkar * {(n+1)th transanction}
758818d276cSGirish Shilamkar * |
759818d276cSGirish Shilamkar * _______|______________
760818d276cSGirish Shilamkar * | |
761818d276cSGirish Shilamkar * Commit block found Commit block not found
762818d276cSGirish Shilamkar * | |
763818d276cSGirish Shilamkar * "Journal Corruption" |
764818d276cSGirish Shilamkar * _____________|_________
765818d276cSGirish Shilamkar * | |
766818d276cSGirish Shilamkar * nth trans corrupt OR nth trans
767818d276cSGirish Shilamkar * and (n+1)th interrupted interrupted
768818d276cSGirish Shilamkar * before commit block
769818d276cSGirish Shilamkar * could reach the disk.
770818d276cSGirish Shilamkar * (Cannot find the difference in above
771818d276cSGirish Shilamkar * mentioned conditions. Hence assume
772818d276cSGirish Shilamkar * "Interrupted Commit".)
773818d276cSGirish Shilamkar */
774fc750a3bSchangfengnan commit_time = be64_to_cpu(
775fc750a3bSchangfengnan ((struct commit_header *)bh->b_data)->h_commit_sec);
776fc750a3bSchangfengnan /*
777fc750a3bSchangfengnan * If need_check_commit_time is set, it means we are in
778fc750a3bSchangfengnan * PASS_SCAN and csum verify failed before. If
779fc750a3bSchangfengnan * commit_time is increasing, it's the same journal,
780fc750a3bSchangfengnan * otherwise it is stale journal block, just end this
781fc750a3bSchangfengnan * recovery.
782fc750a3bSchangfengnan */
783fc750a3bSchangfengnan if (need_check_commit_time) {
784fc750a3bSchangfengnan if (commit_time >= last_trans_commit_time) {
785fc750a3bSchangfengnan pr_err("JBD2: Invalid checksum found in transaction %u\n",
786fc750a3bSchangfengnan next_commit_ID);
787fc750a3bSchangfengnan err = -EFSBADCRC;
788fc750a3bSchangfengnan brelse(bh);
789fc750a3bSchangfengnan goto failed;
790fc750a3bSchangfengnan }
791fc750a3bSchangfengnan ignore_crc_mismatch:
792fc750a3bSchangfengnan /*
793fc750a3bSchangfengnan * It likely does not belong to same journal,
794fc750a3bSchangfengnan * just end this recovery with success.
795fc750a3bSchangfengnan */
796cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
797fc750a3bSchangfengnan next_commit_ID);
798fc750a3bSchangfengnan brelse(bh);
799fc750a3bSchangfengnan goto done;
800fc750a3bSchangfengnan }
801818d276cSGirish Shilamkar
802fc750a3bSchangfengnan /*
803fc750a3bSchangfengnan * Found an expected commit block: if checksums
804fc750a3bSchangfengnan * are present, verify them in PASS_SCAN; else not
805818d276cSGirish Shilamkar * much to do other than move on to the next sequence
806fc750a3bSchangfengnan * number.
807fc750a3bSchangfengnan */
808818d276cSGirish Shilamkar if (pass == PASS_SCAN &&
80956316a0dSDarrick J. Wong jbd2_has_feature_checksum(journal)) {
810818d276cSGirish Shilamkar struct commit_header *cbh =
811818d276cSGirish Shilamkar (struct commit_header *)bh->b_data;
812818d276cSGirish Shilamkar unsigned found_chksum =
813818d276cSGirish Shilamkar be32_to_cpu(cbh->h_chksum[0]);
814818d276cSGirish Shilamkar
815818d276cSGirish Shilamkar if (info->end_transaction) {
816624080edSTheodore Ts'o journal->j_failed_commit =
817624080edSTheodore Ts'o info->end_transaction;
818818d276cSGirish Shilamkar brelse(bh);
819818d276cSGirish Shilamkar break;
820818d276cSGirish Shilamkar }
821818d276cSGirish Shilamkar
82200a3fff0SShijie Luo /* Neither checksum match nor unused? */
82300a3fff0SShijie Luo if (!((crc32_sum == found_chksum &&
82400a3fff0SShijie Luo cbh->h_chksum_type ==
82500a3fff0SShijie Luo JBD2_CRC32_CHKSUM &&
826818d276cSGirish Shilamkar cbh->h_chksum_size ==
82700a3fff0SShijie Luo JBD2_CRC32_CHKSUM_SIZE) ||
82800a3fff0SShijie Luo (cbh->h_chksum_type == 0 &&
829818d276cSGirish Shilamkar cbh->h_chksum_size == 0 &&
83000a3fff0SShijie Luo found_chksum == 0)))
83100a3fff0SShijie Luo goto chksum_error;
832818d276cSGirish Shilamkar
833818d276cSGirish Shilamkar crc32_sum = ~0;
834818d276cSGirish Shilamkar }
8351f56c589SDarrick J. Wong if (pass == PASS_SCAN &&
8361f56c589SDarrick J. Wong !jbd2_commit_block_csum_verify(journal,
8371f56c589SDarrick J. Wong bh->b_data)) {
838*e16c4c24SYe Bin if (jbd2_commit_block_csum_verify_partial(
839*e16c4c24SYe Bin journal,
840*e16c4c24SYe Bin bh->b_data)) {
841*e16c4c24SYe Bin pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n",
842*e16c4c24SYe Bin next_commit_ID, next_log_block);
843*e16c4c24SYe Bin goto chksum_ok;
844*e16c4c24SYe Bin }
84500a3fff0SShijie Luo chksum_error:
846fc750a3bSchangfengnan if (commit_time < last_trans_commit_time)
847fc750a3bSchangfengnan goto ignore_crc_mismatch;
8481f56c589SDarrick J. Wong info->end_transaction = next_commit_ID;
849c7fc6055SZhang Yi info->head_block = head_block;
8501f56c589SDarrick J. Wong
85156316a0dSDarrick J. Wong if (!jbd2_has_feature_async_commit(journal)) {
8521f56c589SDarrick J. Wong journal->j_failed_commit =
8531f56c589SDarrick J. Wong next_commit_ID;
8541f56c589SDarrick J. Wong brelse(bh);
8551f56c589SDarrick J. Wong break;
8561f56c589SDarrick J. Wong }
8571f56c589SDarrick J. Wong }
858c7fc6055SZhang Yi if (pass == PASS_SCAN) {
859*e16c4c24SYe Bin chksum_ok:
860fc750a3bSchangfengnan last_trans_commit_time = commit_time;
861c7fc6055SZhang Yi head_block = next_log_block;
862c7fc6055SZhang Yi }
863470decc6SDave Kleikamp brelse(bh);
864470decc6SDave Kleikamp next_commit_ID++;
865470decc6SDave Kleikamp continue;
866470decc6SDave Kleikamp
867f7f4bccbSMingming Cao case JBD2_REVOKE_BLOCK:
868fc750a3bSchangfengnan /*
869fc750a3bSchangfengnan * Check revoke block crc in pass_scan, if csum verify
870fc750a3bSchangfengnan * failed, check commit block time later.
871fc750a3bSchangfengnan */
872fc750a3bSchangfengnan if (pass == PASS_SCAN &&
873fc750a3bSchangfengnan !jbd2_descriptor_block_csum_verify(journal,
874fc750a3bSchangfengnan bh->b_data)) {
875cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
876fc750a3bSchangfengnan next_log_block);
877fc750a3bSchangfengnan need_check_commit_time = true;
878fc750a3bSchangfengnan }
879*e16c4c24SYe Bin
880470decc6SDave Kleikamp /* If we aren't in the REVOKE pass, then we can
881470decc6SDave Kleikamp * just skip over this block. */
882470decc6SDave Kleikamp if (pass != PASS_REVOKE) {
883470decc6SDave Kleikamp brelse(bh);
884470decc6SDave Kleikamp continue;
885470decc6SDave Kleikamp }
886470decc6SDave Kleikamp
887470decc6SDave Kleikamp err = scan_revoke_records(journal, bh,
888470decc6SDave Kleikamp next_commit_ID, info);
889470decc6SDave Kleikamp brelse(bh);
890470decc6SDave Kleikamp if (err)
891470decc6SDave Kleikamp goto failed;
892470decc6SDave Kleikamp continue;
893470decc6SDave Kleikamp
894470decc6SDave Kleikamp default:
895cb3b3bf2SJan Kara jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
896470decc6SDave Kleikamp blocktype);
897470decc6SDave Kleikamp brelse(bh);
898470decc6SDave Kleikamp goto done;
899470decc6SDave Kleikamp }
900470decc6SDave Kleikamp }
901470decc6SDave Kleikamp
902470decc6SDave Kleikamp done:
903470decc6SDave Kleikamp /*
904470decc6SDave Kleikamp * We broke out of the log scan loop: either we came to the
905470decc6SDave Kleikamp * known end of the log or we found an unexpected block in the
906470decc6SDave Kleikamp * log. If the latter happened, then we know that the "current"
907470decc6SDave Kleikamp * transaction marks the end of the valid log.
908470decc6SDave Kleikamp */
909470decc6SDave Kleikamp
910818d276cSGirish Shilamkar if (pass == PASS_SCAN) {
911818d276cSGirish Shilamkar if (!info->end_transaction)
912470decc6SDave Kleikamp info->end_transaction = next_commit_ID;
913c7fc6055SZhang Yi if (!info->head_block)
914c7fc6055SZhang Yi info->head_block = head_block;
915818d276cSGirish Shilamkar } else {
916470decc6SDave Kleikamp /* It's really bad news if different passes end up at
917470decc6SDave Kleikamp * different places (but possible due to IO errors). */
918470decc6SDave Kleikamp if (info->end_transaction != next_commit_ID) {
919f2a44523SEryu Guan printk(KERN_ERR "JBD2: recovery pass %d ended at "
920470decc6SDave Kleikamp "transaction %u, expected %u\n",
921470decc6SDave Kleikamp pass, next_commit_ID, info->end_transaction);
922470decc6SDave Kleikamp if (!success)
923470decc6SDave Kleikamp success = -EIO;
924470decc6SDave Kleikamp }
925470decc6SDave Kleikamp }
9265b849b5fSHarshad Shirwadkar
9275b849b5fSHarshad Shirwadkar if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
9285b849b5fSHarshad Shirwadkar err = fc_do_one_pass(journal, info, pass);
9295b849b5fSHarshad Shirwadkar if (err)
9305b849b5fSHarshad Shirwadkar success = err;
9315b849b5fSHarshad Shirwadkar }
9325b849b5fSHarshad Shirwadkar
933022eaa75SDarrick J. Wong if (block_error && success == 0)
934022eaa75SDarrick J. Wong success = -EIO;
935470decc6SDave Kleikamp return success;
936470decc6SDave Kleikamp
937470decc6SDave Kleikamp failed:
938470decc6SDave Kleikamp return err;
939470decc6SDave Kleikamp }
940470decc6SDave Kleikamp
941470decc6SDave Kleikamp /* Scan a revoke record, marking all blocks mentioned as revoked. */
942470decc6SDave Kleikamp
scan_revoke_records(journal_t * journal,struct buffer_head * bh,tid_t sequence,struct recovery_info * info)943470decc6SDave Kleikamp static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
944470decc6SDave Kleikamp tid_t sequence, struct recovery_info *info)
945470decc6SDave Kleikamp {
946f7f4bccbSMingming Cao jbd2_journal_revoke_header_t *header;
947470decc6SDave Kleikamp int offset, max;
9484009cc7aSTheodore Ts'o unsigned csum_size = 0;
949e531d0bcSDarrick J. Wong __u32 rcount;
950b517bea1SZach Brown int record_len = 4;
951470decc6SDave Kleikamp
952f7f4bccbSMingming Cao header = (jbd2_journal_revoke_header_t *) bh->b_data;
953f7f4bccbSMingming Cao offset = sizeof(jbd2_journal_revoke_header_t);
954e531d0bcSDarrick J. Wong rcount = be32_to_cpu(header->r_count);
955470decc6SDave Kleikamp
956e531d0bcSDarrick J. Wong if (jbd2_journal_has_csum_v2or3(journal))
9571101cd4dSJan Kara csum_size = sizeof(struct jbd2_journal_block_tail);
958e531d0bcSDarrick J. Wong if (rcount > journal->j_blocksize - csum_size)
959e531d0bcSDarrick J. Wong return -EINVAL;
960e531d0bcSDarrick J. Wong max = rcount;
961e531d0bcSDarrick J. Wong
96256316a0dSDarrick J. Wong if (jbd2_has_feature_64bit(journal))
963b517bea1SZach Brown record_len = 8;
964b517bea1SZach Brown
965b517bea1SZach Brown while (offset + record_len <= max) {
96618eba7aaSMingming Cao unsigned long long blocknr;
967470decc6SDave Kleikamp int err;
968470decc6SDave Kleikamp
969b517bea1SZach Brown if (record_len == 4)
970470decc6SDave Kleikamp blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
971b517bea1SZach Brown else
972b517bea1SZach Brown blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
973b517bea1SZach Brown offset += record_len;
974f7f4bccbSMingming Cao err = jbd2_journal_set_revoke(journal, blocknr, sequence);
975470decc6SDave Kleikamp if (err)
976470decc6SDave Kleikamp return err;
977470decc6SDave Kleikamp ++info->nr_revokes;
978470decc6SDave Kleikamp }
979470decc6SDave Kleikamp return 0;
980470decc6SDave Kleikamp }
981