xref: /openbmc/linux/fs/ext4/fast_commit.c (revision b1b7dce3)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
68aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
69aa75f4d3SHarshad Shirwadkar  * attributes). Fast commit ineligiblity is marked by calling one of the
70aa75f4d3SHarshad Shirwadkar  * two following functions:
71aa75f4d3SHarshad Shirwadkar  *
72aa75f4d3SHarshad Shirwadkar  * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
73aa75f4d3SHarshad Shirwadkar  *   back to full commit. This is useful in case of transient errors.
74aa75f4d3SHarshad Shirwadkar  *
75aa75f4d3SHarshad Shirwadkar  * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
76aa75f4d3SHarshad Shirwadkar  *   the fast commits happening between ext4_fc_start_ineligible() and
77aa75f4d3SHarshad Shirwadkar  *   ext4_fc_stop_ineligible() and one fast commit after the call to
78aa75f4d3SHarshad Shirwadkar  *   ext4_fc_stop_ineligible() to fall back to full commits. It is important to
79aa75f4d3SHarshad Shirwadkar  *   make one more fast commit to fall back to full commit after stop call so
80aa75f4d3SHarshad Shirwadkar  *   that it guaranteed that the fast commit ineligible operation contained
81aa75f4d3SHarshad Shirwadkar  *   within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
82aa75f4d3SHarshad Shirwadkar  *   followed by at least 1 full commit.
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
85aa75f4d3SHarshad Shirwadkar  * --------------------
86a740762fSHarshad Shirwadkar  * In order to guarantee atomicity during the commit operation, fast commit
87aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
88aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
89aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
90aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
91aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
92aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
95aa75f4d3SHarshad Shirwadkar  * - fsync()
96aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
97aa75f4d3SHarshad Shirwadkar  * - Truncate file A
98aa75f4d3SHarshad Shirwadkar  * - fsync()
99aa75f4d3SHarshad Shirwadkar  *
100aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
101aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
102aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
103aa75f4d3SHarshad Shirwadkar  *
104aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
105aa75f4d3SHarshad Shirwadkar  *
106*b1b7dce3SHarshad Shirwadkar  * Fast Commit Replay Idempotence
107*b1b7dce3SHarshad Shirwadkar  * ------------------------------
108*b1b7dce3SHarshad Shirwadkar  *
109*b1b7dce3SHarshad Shirwadkar  * Fast commits tags are idempotent in nature provided the recovery code follows
110*b1b7dce3SHarshad Shirwadkar  * certain rules. The guiding principle that the commit path follows while
111*b1b7dce3SHarshad Shirwadkar  * committing is that it stores the result of a particular operation instead of
112*b1b7dce3SHarshad Shirwadkar  * storing the procedure.
113*b1b7dce3SHarshad Shirwadkar  *
114*b1b7dce3SHarshad Shirwadkar  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
115*b1b7dce3SHarshad Shirwadkar  * was associated with inode 10. During fast commit, instead of storing this
116*b1b7dce3SHarshad Shirwadkar  * operation as a procedure "rename a to b", we store the resulting file system
117*b1b7dce3SHarshad Shirwadkar  * state as a "series" of outcomes:
118*b1b7dce3SHarshad Shirwadkar  *
119*b1b7dce3SHarshad Shirwadkar  * - Link dirent b to inode 10
120*b1b7dce3SHarshad Shirwadkar  * - Unlink dirent a
121*b1b7dce3SHarshad Shirwadkar  * - Inode <10> with valid refcount
122*b1b7dce3SHarshad Shirwadkar  *
123*b1b7dce3SHarshad Shirwadkar  * Now when recovery code runs, it needs "enforce" this state on the file
124*b1b7dce3SHarshad Shirwadkar  * system. This is what guarantees idempotence of fast commit replay.
125*b1b7dce3SHarshad Shirwadkar  *
126*b1b7dce3SHarshad Shirwadkar  * Let's take an example of a procedure that is not idempotent and see how fast
127*b1b7dce3SHarshad Shirwadkar  * commits make it idempotent. Consider following sequence of operations:
128*b1b7dce3SHarshad Shirwadkar  *
129*b1b7dce3SHarshad Shirwadkar  *     rm A;    mv B A;    read A
130*b1b7dce3SHarshad Shirwadkar  *  (x)     (y)        (z)
131*b1b7dce3SHarshad Shirwadkar  *
132*b1b7dce3SHarshad Shirwadkar  * (x), (y) and (z) are the points at which we can crash. If we store this
133*b1b7dce3SHarshad Shirwadkar  * sequence of operations as is then the replay is not idempotent. Let's say
134*b1b7dce3SHarshad Shirwadkar  * while in replay, we crash at (z). During the second replay, file A (which was
135*b1b7dce3SHarshad Shirwadkar  * actually created as a result of "mv B A" operation) would get deleted. Thus,
136*b1b7dce3SHarshad Shirwadkar  * file named A would be absent when we try to read A. So, this sequence of
137*b1b7dce3SHarshad Shirwadkar  * operations is not idempotent. However, as mentioned above, instead of storing
138*b1b7dce3SHarshad Shirwadkar  * the procedure fast commits store the outcome of each procedure. Thus the fast
139*b1b7dce3SHarshad Shirwadkar  * commit log for above procedure would be as follows:
140*b1b7dce3SHarshad Shirwadkar  *
141*b1b7dce3SHarshad Shirwadkar  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
142*b1b7dce3SHarshad Shirwadkar  * inode 11 before the replay)
143*b1b7dce3SHarshad Shirwadkar  *
144*b1b7dce3SHarshad Shirwadkar  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
145*b1b7dce3SHarshad Shirwadkar  * (w)          (x)                    (y)          (z)
146*b1b7dce3SHarshad Shirwadkar  *
147*b1b7dce3SHarshad Shirwadkar  * If we crash at (z), we will have file A linked to inode 11. During the second
148*b1b7dce3SHarshad Shirwadkar  * replay, we will remove file A (inode 11). But we will create it back and make
149*b1b7dce3SHarshad Shirwadkar  * it point to inode 11. We won't find B, so we'll just skip that step. At this
150*b1b7dce3SHarshad Shirwadkar  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
151*b1b7dce3SHarshad Shirwadkar  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
152*b1b7dce3SHarshad Shirwadkar  * similarly. Thus, by converting a non-idempotent procedure into a series of
153*b1b7dce3SHarshad Shirwadkar  * idempotent outcomes, fast commits ensured idempotence during the replay.
154*b1b7dce3SHarshad Shirwadkar  *
155aa75f4d3SHarshad Shirwadkar  * TODOs
156aa75f4d3SHarshad Shirwadkar  * -----
157*b1b7dce3SHarshad Shirwadkar  *
158*b1b7dce3SHarshad Shirwadkar  * 0) Fast commit replay path hardening: Fast commit replay code should use
159*b1b7dce3SHarshad Shirwadkar  *    journal handles to make sure all the updates it does during the replay
160*b1b7dce3SHarshad Shirwadkar  *    path are atomic. With that if we crash during fast commit replay, after
161*b1b7dce3SHarshad Shirwadkar  *    trying to do recovery again, we will find a file system where fast commit
162*b1b7dce3SHarshad Shirwadkar  *    area is invalid (because new full commit would be found). In order to deal
163*b1b7dce3SHarshad Shirwadkar  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
164*b1b7dce3SHarshad Shirwadkar  *    superblock state is persisted before starting the replay, so that after
165*b1b7dce3SHarshad Shirwadkar  *    the crash, fast commit recovery code can look at that flag and perform
166*b1b7dce3SHarshad Shirwadkar  *    fast commit recovery even if that area is invalidated by later full
167*b1b7dce3SHarshad Shirwadkar  *    commits.
168*b1b7dce3SHarshad Shirwadkar  *
169aa75f4d3SHarshad Shirwadkar  * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
170aa75f4d3SHarshad Shirwadkar  *    eligible update must be protected within ext4_fc_start_update() and
171aa75f4d3SHarshad Shirwadkar  *    ext4_fc_stop_update(). These routines are called at much higher
172aa75f4d3SHarshad Shirwadkar  *    routines. This can be made more fine grained by combining with
173aa75f4d3SHarshad Shirwadkar  *    ext4_journal_start().
174aa75f4d3SHarshad Shirwadkar  *
175aa75f4d3SHarshad Shirwadkar  * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
176aa75f4d3SHarshad Shirwadkar  *
177aa75f4d3SHarshad Shirwadkar  * 3) Handle more ineligible cases.
178aa75f4d3SHarshad Shirwadkar  */
179aa75f4d3SHarshad Shirwadkar 
180aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
181aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
182aa75f4d3SHarshad Shirwadkar 
183aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
184aa75f4d3SHarshad Shirwadkar {
185aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
186aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
187aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
188aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
189aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
190aa75f4d3SHarshad Shirwadkar 	} else {
191aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
192aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
193aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
194aa75f4d3SHarshad Shirwadkar 	}
195aa75f4d3SHarshad Shirwadkar 
196aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
197aa75f4d3SHarshad Shirwadkar }
198aa75f4d3SHarshad Shirwadkar 
199aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
200aa75f4d3SHarshad Shirwadkar {
201aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
202aa75f4d3SHarshad Shirwadkar 
203aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
204aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
205aa75f4d3SHarshad Shirwadkar }
206aa75f4d3SHarshad Shirwadkar 
207aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
208aa75f4d3SHarshad Shirwadkar {
209aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
210aa75f4d3SHarshad Shirwadkar 
211aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
212aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
213aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
214aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
215aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
216aa75f4d3SHarshad Shirwadkar }
217aa75f4d3SHarshad Shirwadkar 
218f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */
219f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode)
220fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
221f6634e26SHarshad Shirwadkar {
222f6634e26SHarshad Shirwadkar 	wait_queue_head_t *wq;
223f6634e26SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
224f6634e26SHarshad Shirwadkar 
225f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
226f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
227f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
228f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_state_flags,
229f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
230f6634e26SHarshad Shirwadkar #else
231f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
232f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
233f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_flags,
234f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
235f6634e26SHarshad Shirwadkar #endif
236f6634e26SHarshad Shirwadkar 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
237f6634e26SHarshad Shirwadkar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
238f6634e26SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
239f6634e26SHarshad Shirwadkar 	schedule();
240f6634e26SHarshad Shirwadkar 	finish_wait(wq, &wait.wq_entry);
241f6634e26SHarshad Shirwadkar }
242f6634e26SHarshad Shirwadkar 
243aa75f4d3SHarshad Shirwadkar /*
244aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
245aa75f4d3SHarshad Shirwadkar  *
246aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
247aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
248aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
249aa75f4d3SHarshad Shirwadkar  */
250aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
251aa75f4d3SHarshad Shirwadkar {
252aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
253aa75f4d3SHarshad Shirwadkar 
2548016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2558016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
256aa75f4d3SHarshad Shirwadkar 		return;
257aa75f4d3SHarshad Shirwadkar 
258aa75f4d3SHarshad Shirwadkar restart:
259aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
260aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
261aa75f4d3SHarshad Shirwadkar 		goto out;
262aa75f4d3SHarshad Shirwadkar 
263aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
264f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
265aa75f4d3SHarshad Shirwadkar 		goto restart;
266aa75f4d3SHarshad Shirwadkar 	}
267aa75f4d3SHarshad Shirwadkar out:
268aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
269aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
270aa75f4d3SHarshad Shirwadkar }
271aa75f4d3SHarshad Shirwadkar 
272aa75f4d3SHarshad Shirwadkar /*
273aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
274aa75f4d3SHarshad Shirwadkar  */
275aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
276aa75f4d3SHarshad Shirwadkar {
277aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
278aa75f4d3SHarshad Shirwadkar 
2798016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2808016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
281aa75f4d3SHarshad Shirwadkar 		return;
282aa75f4d3SHarshad Shirwadkar 
283aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
284aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
285aa75f4d3SHarshad Shirwadkar }
286aa75f4d3SHarshad Shirwadkar 
287aa75f4d3SHarshad Shirwadkar /*
288aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
289aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
290aa75f4d3SHarshad Shirwadkar  */
291aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
292aa75f4d3SHarshad Shirwadkar {
293aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
294aa75f4d3SHarshad Shirwadkar 
2958016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2968016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
297aa75f4d3SHarshad Shirwadkar 		return;
298aa75f4d3SHarshad Shirwadkar 
299aa75f4d3SHarshad Shirwadkar restart:
300aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
301aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list)) {
302aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
303aa75f4d3SHarshad Shirwadkar 		return;
304aa75f4d3SHarshad Shirwadkar 	}
305aa75f4d3SHarshad Shirwadkar 
306aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
307f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
308aa75f4d3SHarshad Shirwadkar 		goto restart;
309aa75f4d3SHarshad Shirwadkar 	}
310aa75f4d3SHarshad Shirwadkar 	list_del_init(&ei->i_fc_list);
311aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
312aa75f4d3SHarshad Shirwadkar }
313aa75f4d3SHarshad Shirwadkar 
314aa75f4d3SHarshad Shirwadkar /*
315aa75f4d3SHarshad Shirwadkar  * Mark file system as fast commit ineligible. This means that next commit
316aa75f4d3SHarshad Shirwadkar  * operation would result in a full jbd2 commit.
317aa75f4d3SHarshad Shirwadkar  */
318aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
319aa75f4d3SHarshad Shirwadkar {
320aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
321aa75f4d3SHarshad Shirwadkar 
3228016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3238016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3248016e29fSHarshad Shirwadkar 		return;
3258016e29fSHarshad Shirwadkar 
3269b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
327aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
328aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
329aa75f4d3SHarshad Shirwadkar }
330aa75f4d3SHarshad Shirwadkar 
331aa75f4d3SHarshad Shirwadkar /*
332aa75f4d3SHarshad Shirwadkar  * Start a fast commit ineligible update. Any commits that happen while
333aa75f4d3SHarshad Shirwadkar  * such an operation is in progress fall back to full commits.
334aa75f4d3SHarshad Shirwadkar  */
335aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason)
336aa75f4d3SHarshad Shirwadkar {
337aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
338aa75f4d3SHarshad Shirwadkar 
3398016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3408016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3418016e29fSHarshad Shirwadkar 		return;
3428016e29fSHarshad Shirwadkar 
343aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
344aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
345aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_ineligible_updates);
346aa75f4d3SHarshad Shirwadkar }
347aa75f4d3SHarshad Shirwadkar 
348aa75f4d3SHarshad Shirwadkar /*
349ababea77SHarshad Shirwadkar  * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
350aa75f4d3SHarshad Shirwadkar  * to ensure that after stopping the ineligible update, at least one full
351aa75f4d3SHarshad Shirwadkar  * commit takes place.
352aa75f4d3SHarshad Shirwadkar  */
353aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb)
354aa75f4d3SHarshad Shirwadkar {
3558016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3568016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3578016e29fSHarshad Shirwadkar 		return;
3588016e29fSHarshad Shirwadkar 
3599b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
360aa75f4d3SHarshad Shirwadkar 	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
361aa75f4d3SHarshad Shirwadkar }
362aa75f4d3SHarshad Shirwadkar 
363aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb)
364aa75f4d3SHarshad Shirwadkar {
3659b5f6c9bSHarshad Shirwadkar 	return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
3669b5f6c9bSHarshad Shirwadkar 		atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
367aa75f4d3SHarshad Shirwadkar }
368aa75f4d3SHarshad Shirwadkar 
369aa75f4d3SHarshad Shirwadkar /*
370aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
371aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
372aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
373aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
374aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
375aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
376aa75f4d3SHarshad Shirwadkar  *
377aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
378aa75f4d3SHarshad Shirwadkar  */
379aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
380a80f7fcfSHarshad Shirwadkar 	handle_t *handle, struct inode *inode,
381a80f7fcfSHarshad Shirwadkar 	int (*__fc_track_fn)(struct inode *, void *, bool),
382aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
383aa75f4d3SHarshad Shirwadkar {
384aa75f4d3SHarshad Shirwadkar 	bool update = false;
385aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
386aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
387a80f7fcfSHarshad Shirwadkar 	tid_t tid = 0;
388aa75f4d3SHarshad Shirwadkar 	int ret;
389aa75f4d3SHarshad Shirwadkar 
3908016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
3918016e29fSHarshad Shirwadkar 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
392aa75f4d3SHarshad Shirwadkar 		return -EOPNOTSUPP;
393aa75f4d3SHarshad Shirwadkar 
394aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_is_ineligible(inode->i_sb))
395aa75f4d3SHarshad Shirwadkar 		return -EINVAL;
396aa75f4d3SHarshad Shirwadkar 
397a80f7fcfSHarshad Shirwadkar 	tid = handle->h_transaction->t_tid;
398aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
399a80f7fcfSHarshad Shirwadkar 	if (tid == ei->i_sync_tid) {
400aa75f4d3SHarshad Shirwadkar 		update = true;
401aa75f4d3SHarshad Shirwadkar 	} else {
402aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
403a80f7fcfSHarshad Shirwadkar 		ei->i_sync_tid = tid;
404aa75f4d3SHarshad Shirwadkar 	}
405aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
406aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
407aa75f4d3SHarshad Shirwadkar 
408aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
409aa75f4d3SHarshad Shirwadkar 		return ret;
410aa75f4d3SHarshad Shirwadkar 
411aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
412aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
413aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
4149b5f6c9bSHarshad Shirwadkar 				(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
415aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
416aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
417aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
418aa75f4d3SHarshad Shirwadkar 
419aa75f4d3SHarshad Shirwadkar 	return ret;
420aa75f4d3SHarshad Shirwadkar }
421aa75f4d3SHarshad Shirwadkar 
422aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
423aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
424aa75f4d3SHarshad Shirwadkar 	int op;
425aa75f4d3SHarshad Shirwadkar };
426aa75f4d3SHarshad Shirwadkar 
427aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
428aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
429aa75f4d3SHarshad Shirwadkar {
430aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
431aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
432aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
433aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
434aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
435aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
436aa75f4d3SHarshad Shirwadkar 
437aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
438aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
439aa75f4d3SHarshad Shirwadkar 	if (!node) {
440b21ebf14SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
441aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
442aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
443aa75f4d3SHarshad Shirwadkar 	}
444aa75f4d3SHarshad Shirwadkar 
445aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
446aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
447aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
448aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
449aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
450aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
451aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
452aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
453b21ebf14SHarshad Shirwadkar 				EXT4_FC_REASON_NOMEM);
454aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
455aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
456aa75f4d3SHarshad Shirwadkar 		}
457aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
458aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
459aa75f4d3SHarshad Shirwadkar 	} else {
460aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
461aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
462aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
463aa75f4d3SHarshad Shirwadkar 	}
464aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
465aa75f4d3SHarshad Shirwadkar 
466aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
4679b5f6c9bSHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
468aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
469aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
470aa75f4d3SHarshad Shirwadkar 	else
471aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
472aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
473aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
474aa75f4d3SHarshad Shirwadkar 
475aa75f4d3SHarshad Shirwadkar 	return 0;
476aa75f4d3SHarshad Shirwadkar }
477aa75f4d3SHarshad Shirwadkar 
478a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle,
479a80f7fcfSHarshad Shirwadkar 		struct inode *inode, struct dentry *dentry)
480aa75f4d3SHarshad Shirwadkar {
481aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
482aa75f4d3SHarshad Shirwadkar 	int ret;
483aa75f4d3SHarshad Shirwadkar 
484aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
485aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
486aa75f4d3SHarshad Shirwadkar 
487a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
488aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
489aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_unlink(inode, dentry, ret);
490aa75f4d3SHarshad Shirwadkar }
491aa75f4d3SHarshad Shirwadkar 
492a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
493a80f7fcfSHarshad Shirwadkar {
494a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
495a80f7fcfSHarshad Shirwadkar }
496a80f7fcfSHarshad Shirwadkar 
497a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle,
498a80f7fcfSHarshad Shirwadkar 	struct inode *inode, struct dentry *dentry)
499aa75f4d3SHarshad Shirwadkar {
500aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
501aa75f4d3SHarshad Shirwadkar 	int ret;
502aa75f4d3SHarshad Shirwadkar 
503aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
504aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
505aa75f4d3SHarshad Shirwadkar 
506a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
507aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
508aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_link(inode, dentry, ret);
509aa75f4d3SHarshad Shirwadkar }
510aa75f4d3SHarshad Shirwadkar 
511a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
512a80f7fcfSHarshad Shirwadkar {
513a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
514a80f7fcfSHarshad Shirwadkar }
515a80f7fcfSHarshad Shirwadkar 
516a80f7fcfSHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
517aa75f4d3SHarshad Shirwadkar {
518aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
519a80f7fcfSHarshad Shirwadkar 	struct inode *inode = d_inode(dentry);
520aa75f4d3SHarshad Shirwadkar 	int ret;
521aa75f4d3SHarshad Shirwadkar 
522aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
523aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
524aa75f4d3SHarshad Shirwadkar 
525a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
526aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
527aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_create(inode, dentry, ret);
528aa75f4d3SHarshad Shirwadkar }
529aa75f4d3SHarshad Shirwadkar 
530aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
531aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
532aa75f4d3SHarshad Shirwadkar {
533aa75f4d3SHarshad Shirwadkar 	if (update)
534aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
535aa75f4d3SHarshad Shirwadkar 
536aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
537aa75f4d3SHarshad Shirwadkar 
538aa75f4d3SHarshad Shirwadkar 	return 0;
539aa75f4d3SHarshad Shirwadkar }
540aa75f4d3SHarshad Shirwadkar 
541a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
542aa75f4d3SHarshad Shirwadkar {
543aa75f4d3SHarshad Shirwadkar 	int ret;
544aa75f4d3SHarshad Shirwadkar 
545aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
546aa75f4d3SHarshad Shirwadkar 		return;
547aa75f4d3SHarshad Shirwadkar 
548556e0319SHarshad Shirwadkar 	if (ext4_should_journal_data(inode)) {
549556e0319SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb,
550556e0319SHarshad Shirwadkar 					EXT4_FC_REASON_INODE_JOURNAL_DATA);
551556e0319SHarshad Shirwadkar 		return;
552556e0319SHarshad Shirwadkar 	}
553556e0319SHarshad Shirwadkar 
554a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
555aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_inode(inode, ret);
556aa75f4d3SHarshad Shirwadkar }
557aa75f4d3SHarshad Shirwadkar 
558aa75f4d3SHarshad Shirwadkar struct __track_range_args {
559aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
560aa75f4d3SHarshad Shirwadkar };
561aa75f4d3SHarshad Shirwadkar 
562aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
563aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
564aa75f4d3SHarshad Shirwadkar {
565aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
566aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
567aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
568aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
569aa75f4d3SHarshad Shirwadkar 
570aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
571aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
572aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
573aa75f4d3SHarshad Shirwadkar 	}
574aa75f4d3SHarshad Shirwadkar 
575aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
576aa75f4d3SHarshad Shirwadkar 
577aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
578aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
579aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
580aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
581aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
582aa75f4d3SHarshad Shirwadkar 	} else {
583aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
584aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
585aa75f4d3SHarshad Shirwadkar 	}
586aa75f4d3SHarshad Shirwadkar 
587aa75f4d3SHarshad Shirwadkar 	return 0;
588aa75f4d3SHarshad Shirwadkar }
589aa75f4d3SHarshad Shirwadkar 
590a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
591aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
592aa75f4d3SHarshad Shirwadkar {
593aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
594aa75f4d3SHarshad Shirwadkar 	int ret;
595aa75f4d3SHarshad Shirwadkar 
596aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
597aa75f4d3SHarshad Shirwadkar 		return;
598aa75f4d3SHarshad Shirwadkar 
599aa75f4d3SHarshad Shirwadkar 	args.start = start;
600aa75f4d3SHarshad Shirwadkar 	args.end = end;
601aa75f4d3SHarshad Shirwadkar 
602a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
603aa75f4d3SHarshad Shirwadkar 
604aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_range(inode, start, end, ret);
605aa75f4d3SHarshad Shirwadkar }
606aa75f4d3SHarshad Shirwadkar 
607aa75f4d3SHarshad Shirwadkar static void ext4_fc_submit_bh(struct super_block *sb)
608aa75f4d3SHarshad Shirwadkar {
609aa75f4d3SHarshad Shirwadkar 	int write_flags = REQ_SYNC;
610aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
611aa75f4d3SHarshad Shirwadkar 
612a740762fSHarshad Shirwadkar 	/* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
613aa75f4d3SHarshad Shirwadkar 	if (test_opt(sb, BARRIER))
614aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
615aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
616764b3fd3SHarshad Shirwadkar 	set_buffer_dirty(bh);
617aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
618aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
619aa75f4d3SHarshad Shirwadkar 	submit_bh(REQ_OP_WRITE, write_flags, bh);
620aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
621aa75f4d3SHarshad Shirwadkar }
622aa75f4d3SHarshad Shirwadkar 
623aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
624aa75f4d3SHarshad Shirwadkar 
625aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
626aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
627aa75f4d3SHarshad Shirwadkar 				u32 *crc)
628aa75f4d3SHarshad Shirwadkar {
629aa75f4d3SHarshad Shirwadkar 	void *ret;
630aa75f4d3SHarshad Shirwadkar 
631aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
632aa75f4d3SHarshad Shirwadkar 	if (crc)
633aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
634aa75f4d3SHarshad Shirwadkar 	return ret;
635aa75f4d3SHarshad Shirwadkar }
636aa75f4d3SHarshad Shirwadkar 
637aa75f4d3SHarshad Shirwadkar /*
638aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
639aa75f4d3SHarshad Shirwadkar  *
640aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
641aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
642aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
643aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
644aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
645aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
646aa75f4d3SHarshad Shirwadkar  * we added.
647aa75f4d3SHarshad Shirwadkar  */
648aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
649aa75f4d3SHarshad Shirwadkar {
650aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
651aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
652aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
653aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
654aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
655aa75f4d3SHarshad Shirwadkar 	int pad_len;
656aa75f4d3SHarshad Shirwadkar 
657aa75f4d3SHarshad Shirwadkar 	/*
658aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
659aa75f4d3SHarshad Shirwadkar 	 * padding.
660aa75f4d3SHarshad Shirwadkar 	 */
661aa75f4d3SHarshad Shirwadkar 	if (len + sizeof(struct ext4_fc_tl) > bsize)
662aa75f4d3SHarshad Shirwadkar 		return NULL;
663aa75f4d3SHarshad Shirwadkar 
664aa75f4d3SHarshad Shirwadkar 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
665aa75f4d3SHarshad Shirwadkar 		/*
666aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
667aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
668aa75f4d3SHarshad Shirwadkar 		 */
669aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
670aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
671aa75f4d3SHarshad Shirwadkar 			if (ret)
672aa75f4d3SHarshad Shirwadkar 				return NULL;
673aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
674aa75f4d3SHarshad Shirwadkar 		}
675aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
676aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
677aa75f4d3SHarshad Shirwadkar 	}
678aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
679aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
680aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
681aa75f4d3SHarshad Shirwadkar 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
682aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
683aa75f4d3SHarshad Shirwadkar 	if (crc)
684aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
685aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
686aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
687aa75f4d3SHarshad Shirwadkar 	ext4_fc_submit_bh(sb);
688aa75f4d3SHarshad Shirwadkar 
689aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
690aa75f4d3SHarshad Shirwadkar 	if (ret)
691aa75f4d3SHarshad Shirwadkar 		return NULL;
692aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
693aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
694aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
695aa75f4d3SHarshad Shirwadkar }
696aa75f4d3SHarshad Shirwadkar 
697aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
698aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
699aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
700aa75f4d3SHarshad Shirwadkar {
701aa75f4d3SHarshad Shirwadkar 	if (crc)
702aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
703aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
704aa75f4d3SHarshad Shirwadkar }
705aa75f4d3SHarshad Shirwadkar 
706aa75f4d3SHarshad Shirwadkar /*
707aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
708aa75f4d3SHarshad Shirwadkar  *
709aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
710aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
711aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
712aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
713aa75f4d3SHarshad Shirwadkar  */
714aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
715aa75f4d3SHarshad Shirwadkar {
716aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
717aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
718aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
719aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
720aa75f4d3SHarshad Shirwadkar 	u8 *dst;
721aa75f4d3SHarshad Shirwadkar 
722aa75f4d3SHarshad Shirwadkar 	/*
723aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
724aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
725aa75f4d3SHarshad Shirwadkar 	 */
726aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
727aa75f4d3SHarshad Shirwadkar 	if (!dst)
728aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
729aa75f4d3SHarshad Shirwadkar 
730aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
731aa75f4d3SHarshad Shirwadkar 
732aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
733aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
734aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
735aa75f4d3SHarshad Shirwadkar 
736aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
737aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
738aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
739aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
740aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
741aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
742aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
743aa75f4d3SHarshad Shirwadkar 
744aa75f4d3SHarshad Shirwadkar 	ext4_fc_submit_bh(sb);
745aa75f4d3SHarshad Shirwadkar 
746aa75f4d3SHarshad Shirwadkar 	return 0;
747aa75f4d3SHarshad Shirwadkar }
748aa75f4d3SHarshad Shirwadkar 
749aa75f4d3SHarshad Shirwadkar /*
750aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
751aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
752aa75f4d3SHarshad Shirwadkar  */
753aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
754aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
755aa75f4d3SHarshad Shirwadkar {
756aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
757aa75f4d3SHarshad Shirwadkar 	u8 *dst;
758aa75f4d3SHarshad Shirwadkar 
759aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
760aa75f4d3SHarshad Shirwadkar 	if (!dst)
761aa75f4d3SHarshad Shirwadkar 		return false;
762aa75f4d3SHarshad Shirwadkar 
763aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
764aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
765aa75f4d3SHarshad Shirwadkar 
766aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
767aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
768aa75f4d3SHarshad Shirwadkar 
769aa75f4d3SHarshad Shirwadkar 	return true;
770aa75f4d3SHarshad Shirwadkar }
771aa75f4d3SHarshad Shirwadkar 
772aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
773aa75f4d3SHarshad Shirwadkar static  bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
774aa75f4d3SHarshad Shirwadkar 					int parent_ino, int ino, int dlen,
775aa75f4d3SHarshad Shirwadkar 					const unsigned char *dname,
776aa75f4d3SHarshad Shirwadkar 					u32 *crc)
777aa75f4d3SHarshad Shirwadkar {
778aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
779aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
780aa75f4d3SHarshad Shirwadkar 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
781aa75f4d3SHarshad Shirwadkar 					crc);
782aa75f4d3SHarshad Shirwadkar 
783aa75f4d3SHarshad Shirwadkar 	if (!dst)
784aa75f4d3SHarshad Shirwadkar 		return false;
785aa75f4d3SHarshad Shirwadkar 
786aa75f4d3SHarshad Shirwadkar 	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
787aa75f4d3SHarshad Shirwadkar 	fcd.fc_ino = cpu_to_le32(ino);
788aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
789aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
790aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
791aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
792aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
793aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
794aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
795aa75f4d3SHarshad Shirwadkar 	dst += dlen;
796aa75f4d3SHarshad Shirwadkar 
797aa75f4d3SHarshad Shirwadkar 	return true;
798aa75f4d3SHarshad Shirwadkar }
799aa75f4d3SHarshad Shirwadkar 
800aa75f4d3SHarshad Shirwadkar /*
801aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
802aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
803aa75f4d3SHarshad Shirwadkar  */
804aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
805aa75f4d3SHarshad Shirwadkar {
806aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
807aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
808aa75f4d3SHarshad Shirwadkar 	int ret;
809aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
810aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
811aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
812aa75f4d3SHarshad Shirwadkar 	u8 *dst;
813aa75f4d3SHarshad Shirwadkar 
814aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
815aa75f4d3SHarshad Shirwadkar 	if (ret)
816aa75f4d3SHarshad Shirwadkar 		return ret;
817aa75f4d3SHarshad Shirwadkar 
818aa75f4d3SHarshad Shirwadkar 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
819aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
820aa75f4d3SHarshad Shirwadkar 
821aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
822aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
823aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
824aa75f4d3SHarshad Shirwadkar 
825aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
826aa75f4d3SHarshad Shirwadkar 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
827aa75f4d3SHarshad Shirwadkar 	if (!dst)
828aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
829aa75f4d3SHarshad Shirwadkar 
830aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
831aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
832aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
833aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
834aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
835aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
836aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
837aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
838aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
839aa75f4d3SHarshad Shirwadkar 
840aa75f4d3SHarshad Shirwadkar 	return 0;
841aa75f4d3SHarshad Shirwadkar }
842aa75f4d3SHarshad Shirwadkar 
843aa75f4d3SHarshad Shirwadkar /*
844aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
845aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
846aa75f4d3SHarshad Shirwadkar  */
847aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
848aa75f4d3SHarshad Shirwadkar {
849aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
850aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
851aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
852aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
853aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
854aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
855aa75f4d3SHarshad Shirwadkar 	int ret;
856aa75f4d3SHarshad Shirwadkar 
857aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
858aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
859aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
860aa75f4d3SHarshad Shirwadkar 		return 0;
861aa75f4d3SHarshad Shirwadkar 	}
862aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
863aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
864aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
865aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
866aa75f4d3SHarshad Shirwadkar 
867aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
868aa75f4d3SHarshad Shirwadkar 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
869aa75f4d3SHarshad Shirwadkar 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
870aa75f4d3SHarshad Shirwadkar 
871aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
872aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
873aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
874aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
875aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
876aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
877aa75f4d3SHarshad Shirwadkar 
878aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
879aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
880aa75f4d3SHarshad Shirwadkar 			continue;
881aa75f4d3SHarshad Shirwadkar 		}
882aa75f4d3SHarshad Shirwadkar 
883aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
884aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
885aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
886aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
887aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
888aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
889aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
890aa75f4d3SHarshad Shirwadkar 		} else {
891aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
892aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
893aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
894aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
895aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
896aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
897aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
898aa75f4d3SHarshad Shirwadkar 			else
899aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
900aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
901aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
902aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
903aa75f4d3SHarshad Shirwadkar 		}
904aa75f4d3SHarshad Shirwadkar 
905aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
906aa75f4d3SHarshad Shirwadkar 	}
907aa75f4d3SHarshad Shirwadkar 
908aa75f4d3SHarshad Shirwadkar 	return 0;
909aa75f4d3SHarshad Shirwadkar }
910aa75f4d3SHarshad Shirwadkar 
911aa75f4d3SHarshad Shirwadkar 
912aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
913aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
914aa75f4d3SHarshad Shirwadkar {
915aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
916aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
917aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
918aa75f4d3SHarshad Shirwadkar 	struct list_head *pos;
919aa75f4d3SHarshad Shirwadkar 	int ret = 0;
920aa75f4d3SHarshad Shirwadkar 
921aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
9229b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
923aa75f4d3SHarshad Shirwadkar 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
924aa75f4d3SHarshad Shirwadkar 		ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
925aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
926aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
927aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
928aa75f4d3SHarshad Shirwadkar 
929aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
930aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
931aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
932aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
933aa75f4d3SHarshad Shirwadkar 				schedule();
934aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
935aa75f4d3SHarshad Shirwadkar 			}
936aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
937aa75f4d3SHarshad Shirwadkar 		}
938aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
939aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
940aa75f4d3SHarshad Shirwadkar 		if (ret)
941aa75f4d3SHarshad Shirwadkar 			return ret;
942aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
943aa75f4d3SHarshad Shirwadkar 	}
944aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
945aa75f4d3SHarshad Shirwadkar 
946aa75f4d3SHarshad Shirwadkar 	return ret;
947aa75f4d3SHarshad Shirwadkar }
948aa75f4d3SHarshad Shirwadkar 
949aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
950aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
951aa75f4d3SHarshad Shirwadkar {
952aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
953aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
954aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
955aa75f4d3SHarshad Shirwadkar 	int ret = 0;
956aa75f4d3SHarshad Shirwadkar 
957aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
958aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
959aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
960aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
961aa75f4d3SHarshad Shirwadkar 			continue;
962aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
963aa75f4d3SHarshad Shirwadkar 
964aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
965aa75f4d3SHarshad Shirwadkar 		if (ret)
966aa75f4d3SHarshad Shirwadkar 			return ret;
967aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
968aa75f4d3SHarshad Shirwadkar 	}
969aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
970aa75f4d3SHarshad Shirwadkar 
971aa75f4d3SHarshad Shirwadkar 	return 0;
972aa75f4d3SHarshad Shirwadkar }
973aa75f4d3SHarshad Shirwadkar 
974aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
975aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
976fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock)
977fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock)
978aa75f4d3SHarshad Shirwadkar {
979aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
980aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
981aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
982aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
983aa75f4d3SHarshad Shirwadkar 	struct list_head *pos, *n, *fcd_pos, *fcd_n;
984aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
985aa75f4d3SHarshad Shirwadkar 	int ret;
986aa75f4d3SHarshad Shirwadkar 
987aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
988aa75f4d3SHarshad Shirwadkar 		return 0;
989aa75f4d3SHarshad Shirwadkar 	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
990aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
991aa75f4d3SHarshad Shirwadkar 					fcd_list);
992aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
993aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
994aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_dentry_tlv(
995aa75f4d3SHarshad Shirwadkar 				sb, fc_dentry->fcd_op,
996aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_parent, fc_dentry->fcd_ino,
997aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_name.len,
998aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_name.name, crc)) {
999aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
1000aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
1001aa75f4d3SHarshad Shirwadkar 			}
1002aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
1003aa75f4d3SHarshad Shirwadkar 			continue;
1004aa75f4d3SHarshad Shirwadkar 		}
1005aa75f4d3SHarshad Shirwadkar 
1006aa75f4d3SHarshad Shirwadkar 		inode = NULL;
1007aa75f4d3SHarshad Shirwadkar 		list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
1008aa75f4d3SHarshad Shirwadkar 			ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
1009aa75f4d3SHarshad Shirwadkar 			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
1010aa75f4d3SHarshad Shirwadkar 				inode = &ei->vfs_inode;
1011aa75f4d3SHarshad Shirwadkar 				break;
1012aa75f4d3SHarshad Shirwadkar 			}
1013aa75f4d3SHarshad Shirwadkar 		}
1014aa75f4d3SHarshad Shirwadkar 		/*
1015aa75f4d3SHarshad Shirwadkar 		 * If we don't find inode in our list, then it was deleted,
1016aa75f4d3SHarshad Shirwadkar 		 * in which case, we don't need to record it's create tag.
1017aa75f4d3SHarshad Shirwadkar 		 */
1018aa75f4d3SHarshad Shirwadkar 		if (!inode)
1019aa75f4d3SHarshad Shirwadkar 			continue;
1020aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1021aa75f4d3SHarshad Shirwadkar 
1022aa75f4d3SHarshad Shirwadkar 		/*
1023aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
1024aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
1025aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
1026aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
1027aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
1028aa75f4d3SHarshad Shirwadkar 		 */
1029aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
1030aa75f4d3SHarshad Shirwadkar 		if (ret)
1031aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1032aa75f4d3SHarshad Shirwadkar 
1033aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
1034aa75f4d3SHarshad Shirwadkar 		if (ret)
1035aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1036aa75f4d3SHarshad Shirwadkar 
1037aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_dentry_tlv(
1038aa75f4d3SHarshad Shirwadkar 			sb, fc_dentry->fcd_op,
1039aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_parent, fc_dentry->fcd_ino,
1040aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len,
1041aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.name, crc)) {
1042aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
1043aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1044aa75f4d3SHarshad Shirwadkar 		}
1045aa75f4d3SHarshad Shirwadkar 
1046aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1047aa75f4d3SHarshad Shirwadkar 	}
1048aa75f4d3SHarshad Shirwadkar 	return 0;
1049aa75f4d3SHarshad Shirwadkar lock_and_exit:
1050aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1051aa75f4d3SHarshad Shirwadkar 	return ret;
1052aa75f4d3SHarshad Shirwadkar }
1053aa75f4d3SHarshad Shirwadkar 
1054aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
1055aa75f4d3SHarshad Shirwadkar {
1056aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1057aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1058aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1059aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
1060aa75f4d3SHarshad Shirwadkar 	struct list_head *pos;
1061aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1062aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
1063aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1064aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
1065aa75f4d3SHarshad Shirwadkar 
1066aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
1067aa75f4d3SHarshad Shirwadkar 	if (ret)
1068aa75f4d3SHarshad Shirwadkar 		return ret;
1069aa75f4d3SHarshad Shirwadkar 
1070aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
1071aa75f4d3SHarshad Shirwadkar 	if (ret)
1072aa75f4d3SHarshad Shirwadkar 		return ret;
1073aa75f4d3SHarshad Shirwadkar 
1074da0c5d26SHarshad Shirwadkar 	/*
1075da0c5d26SHarshad Shirwadkar 	 * If file system device is different from journal device, issue a cache
1076da0c5d26SHarshad Shirwadkar 	 * flush before we start writing fast commit blocks.
1077da0c5d26SHarshad Shirwadkar 	 */
1078da0c5d26SHarshad Shirwadkar 	if (journal->j_fs_dev != journal->j_dev)
1079da0c5d26SHarshad Shirwadkar 		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
1080da0c5d26SHarshad Shirwadkar 
1081aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1082aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1083aa75f4d3SHarshad Shirwadkar 		/*
1084aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1085aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1086aa75f4d3SHarshad Shirwadkar 		 */
1087aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1088aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1089aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1090aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1091aa75f4d3SHarshad Shirwadkar 			(u8 *)&head, &crc))
1092aa75f4d3SHarshad Shirwadkar 			goto out;
1093aa75f4d3SHarshad Shirwadkar 	}
1094aa75f4d3SHarshad Shirwadkar 
1095aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1096aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1097aa75f4d3SHarshad Shirwadkar 	if (ret) {
1098aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1099aa75f4d3SHarshad Shirwadkar 		goto out;
1100aa75f4d3SHarshad Shirwadkar 	}
1101aa75f4d3SHarshad Shirwadkar 
1102aa75f4d3SHarshad Shirwadkar 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
1103aa75f4d3SHarshad Shirwadkar 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1104aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1105aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1106aa75f4d3SHarshad Shirwadkar 			continue;
1107aa75f4d3SHarshad Shirwadkar 
1108aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1109aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1110aa75f4d3SHarshad Shirwadkar 		if (ret)
1111aa75f4d3SHarshad Shirwadkar 			goto out;
1112aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1113aa75f4d3SHarshad Shirwadkar 		if (ret)
1114aa75f4d3SHarshad Shirwadkar 			goto out;
1115aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1116aa75f4d3SHarshad Shirwadkar 	}
1117aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1118aa75f4d3SHarshad Shirwadkar 
1119aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1120aa75f4d3SHarshad Shirwadkar 
1121aa75f4d3SHarshad Shirwadkar out:
1122aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1123aa75f4d3SHarshad Shirwadkar 	return ret;
1124aa75f4d3SHarshad Shirwadkar }
1125aa75f4d3SHarshad Shirwadkar 
1126aa75f4d3SHarshad Shirwadkar /*
1127aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1128aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1129aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1130aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1131aa75f4d3SHarshad Shirwadkar  */
1132aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1133aa75f4d3SHarshad Shirwadkar {
1134aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1135aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1136aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1137aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
1138aa75f4d3SHarshad Shirwadkar 	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
1139aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1140aa75f4d3SHarshad Shirwadkar 
1141aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_start(sb);
1142aa75f4d3SHarshad Shirwadkar 
1143aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1144aa75f4d3SHarshad Shirwadkar 
1145aa75f4d3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
1146aa75f4d3SHarshad Shirwadkar 		(ext4_fc_is_ineligible(sb))) {
1147aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_INELIGIBLE;
1148aa75f4d3SHarshad Shirwadkar 		goto out;
1149aa75f4d3SHarshad Shirwadkar 	}
1150aa75f4d3SHarshad Shirwadkar 
1151aa75f4d3SHarshad Shirwadkar restart_fc:
1152aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1153aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1154aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1155aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1156aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1157aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
1158aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_ALREADY_COMMITTED;
1159aa75f4d3SHarshad Shirwadkar 		goto out;
1160aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
1161aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1162aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_START_FAILED;
1163aa75f4d3SHarshad Shirwadkar 		goto out;
1164aa75f4d3SHarshad Shirwadkar 	}
1165aa75f4d3SHarshad Shirwadkar 
1166aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1167aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1168aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1169aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1170aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1171aa75f4d3SHarshad Shirwadkar 		goto out;
1172aa75f4d3SHarshad Shirwadkar 	}
1173aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1174aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1175aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1176aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1177aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1178aa75f4d3SHarshad Shirwadkar 		goto out;
1179aa75f4d3SHarshad Shirwadkar 	}
1180aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
1181aa75f4d3SHarshad Shirwadkar 	jbd2_fc_end_commit(journal);
1182aa75f4d3SHarshad Shirwadkar out:
1183aa75f4d3SHarshad Shirwadkar 	/* Has any ineligible update happened since we started? */
1184aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
1185aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1186aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_INELIGIBLE;
1187aa75f4d3SHarshad Shirwadkar 	}
1188aa75f4d3SHarshad Shirwadkar 
1189aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1190aa75f4d3SHarshad Shirwadkar 	if (reason != EXT4_FC_REASON_OK &&
1191aa75f4d3SHarshad Shirwadkar 		reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
1192aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_commits++;
1193aa75f4d3SHarshad Shirwadkar 	} else {
1194aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_num_commits++;
1195aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_numblks += nblks;
1196aa75f4d3SHarshad Shirwadkar 	}
1197aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1198aa75f4d3SHarshad Shirwadkar 	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
1199aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_stop(sb, nblks, reason);
1200aa75f4d3SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1201aa75f4d3SHarshad Shirwadkar 	/*
1202aa75f4d3SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we don't
1203aa75f4d3SHarshad Shirwadkar 	 * react too strongly to vast changes in the commit time
1204aa75f4d3SHarshad Shirwadkar 	 */
1205aa75f4d3SHarshad Shirwadkar 	if (likely(sbi->s_fc_avg_commit_time))
1206aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = (commit_time +
1207aa75f4d3SHarshad Shirwadkar 				sbi->s_fc_avg_commit_time * 3) / 4;
1208aa75f4d3SHarshad Shirwadkar 	else
1209aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = commit_time;
1210aa75f4d3SHarshad Shirwadkar 	jbd_debug(1,
1211aa75f4d3SHarshad Shirwadkar 		"Fast commit ended with blks = %d, reason = %d, subtid - %d",
1212aa75f4d3SHarshad Shirwadkar 		nblks, reason, subtid);
1213aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_FAILED)
12140bce577bSHarshad Shirwadkar 		return jbd2_fc_end_commit_fallback(journal);
1215aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_START_FAILED ||
1216aa75f4d3SHarshad Shirwadkar 		reason == EXT4_FC_REASON_INELIGIBLE)
1217aa75f4d3SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1218aa75f4d3SHarshad Shirwadkar 	return 0;
1219aa75f4d3SHarshad Shirwadkar }
1220aa75f4d3SHarshad Shirwadkar 
1221ff780b91SHarshad Shirwadkar /*
1222ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1223ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1224ff780b91SHarshad Shirwadkar  */
1225ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full)
1226ff780b91SHarshad Shirwadkar {
1227aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1228aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1229aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1230aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1231aa75f4d3SHarshad Shirwadkar 	struct list_head *pos, *n;
1232aa75f4d3SHarshad Shirwadkar 
1233aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1234aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1235aa75f4d3SHarshad Shirwadkar 
1236aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1237aa75f4d3SHarshad Shirwadkar 
1238aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1239aa75f4d3SHarshad Shirwadkar 	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
1240aa75f4d3SHarshad Shirwadkar 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1241aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1242aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1243aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1244aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(&iter->vfs_inode);
1245aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1246aa75f4d3SHarshad Shirwadkar 		smp_mb();
1247aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1248aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1249aa75f4d3SHarshad Shirwadkar #else
1250aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1251aa75f4d3SHarshad Shirwadkar #endif
1252aa75f4d3SHarshad Shirwadkar 	}
1253aa75f4d3SHarshad Shirwadkar 
1254aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1255aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1256aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1257aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1258aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1259aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1260aa75f4d3SHarshad Shirwadkar 
1261aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1262aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1263aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1264aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1265aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1266aa75f4d3SHarshad Shirwadkar 	}
1267aa75f4d3SHarshad Shirwadkar 
1268aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1269aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1270aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1271aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING]);
1272aa75f4d3SHarshad Shirwadkar 
12739b5f6c9bSHarshad Shirwadkar 	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
12749b5f6c9bSHarshad Shirwadkar 	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1275aa75f4d3SHarshad Shirwadkar 
1276aa75f4d3SHarshad Shirwadkar 	if (full)
1277aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1278aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1279aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1280ff780b91SHarshad Shirwadkar }
12816866d7b3SHarshad Shirwadkar 
12828016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
12838016e29fSHarshad Shirwadkar 
12848016e29fSHarshad Shirwadkar /* Get length of a particular tlv */
12858016e29fSHarshad Shirwadkar static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
12868016e29fSHarshad Shirwadkar {
12878016e29fSHarshad Shirwadkar 	return le16_to_cpu(tl->fc_len);
12888016e29fSHarshad Shirwadkar }
12898016e29fSHarshad Shirwadkar 
12908016e29fSHarshad Shirwadkar /* Get a pointer to "value" of a tlv */
12918016e29fSHarshad Shirwadkar static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
12928016e29fSHarshad Shirwadkar {
12938016e29fSHarshad Shirwadkar 	return (u8 *)tl + sizeof(*tl);
12948016e29fSHarshad Shirwadkar }
12958016e29fSHarshad Shirwadkar 
12968016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
12978016e29fSHarshad Shirwadkar struct dentry_info_args {
12988016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
12998016e29fSHarshad Shirwadkar 	char *dname;
13008016e29fSHarshad Shirwadkar };
13018016e29fSHarshad Shirwadkar 
13028016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
13038016e29fSHarshad Shirwadkar 				struct  ext4_fc_tl *tl)
13048016e29fSHarshad Shirwadkar {
13058016e29fSHarshad Shirwadkar 	struct ext4_fc_dentry_info *fcd;
13068016e29fSHarshad Shirwadkar 
13078016e29fSHarshad Shirwadkar 	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
13088016e29fSHarshad Shirwadkar 
13098016e29fSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
13108016e29fSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd->fc_ino);
13118016e29fSHarshad Shirwadkar 	darg->dname = fcd->fc_dname;
13128016e29fSHarshad Shirwadkar 	darg->dname_len = ext4_fc_tag_len(tl) -
13138016e29fSHarshad Shirwadkar 			sizeof(struct ext4_fc_dentry_info);
13148016e29fSHarshad Shirwadkar }
13158016e29fSHarshad Shirwadkar 
13168016e29fSHarshad Shirwadkar /* Unlink replay function */
13178016e29fSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
13188016e29fSHarshad Shirwadkar {
13198016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
13208016e29fSHarshad Shirwadkar 	struct qstr entry;
13218016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13228016e29fSHarshad Shirwadkar 	int ret = 0;
13238016e29fSHarshad Shirwadkar 
13248016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
13258016e29fSHarshad Shirwadkar 
13268016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
13278016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13288016e29fSHarshad Shirwadkar 
13298016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
13308016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
13318016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
13328016e29fSHarshad Shirwadkar 
13338016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
13348016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", darg.ino);
13358016e29fSHarshad Shirwadkar 		return 0;
13368016e29fSHarshad Shirwadkar 	}
13378016e29fSHarshad Shirwadkar 
13388016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
13398016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
13408016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(old_parent)) {
13418016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
13428016e29fSHarshad Shirwadkar 		iput(inode);
13438016e29fSHarshad Shirwadkar 		return 0;
13448016e29fSHarshad Shirwadkar 	}
13458016e29fSHarshad Shirwadkar 
1346a80f7fcfSHarshad Shirwadkar 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
13478016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
13488016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
13498016e29fSHarshad Shirwadkar 		ret = 0;
13508016e29fSHarshad Shirwadkar 	iput(old_parent);
13518016e29fSHarshad Shirwadkar 	iput(inode);
13528016e29fSHarshad Shirwadkar 	return ret;
13538016e29fSHarshad Shirwadkar }
13548016e29fSHarshad Shirwadkar 
13558016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
13568016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
13578016e29fSHarshad Shirwadkar 				struct inode *inode)
13588016e29fSHarshad Shirwadkar {
13598016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
13608016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
13618016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
13628016e29fSHarshad Shirwadkar 	int ret = 0;
13638016e29fSHarshad Shirwadkar 
13648016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
13658016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
13668016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
13678016e29fSHarshad Shirwadkar 		dir = NULL;
13688016e29fSHarshad Shirwadkar 		goto out;
13698016e29fSHarshad Shirwadkar 	}
13708016e29fSHarshad Shirwadkar 
13718016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
13728016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
13738016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to obtain dentry");
13748016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
13758016e29fSHarshad Shirwadkar 		goto out;
13768016e29fSHarshad Shirwadkar 	}
13778016e29fSHarshad Shirwadkar 
13788016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
13798016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
13808016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode dentry not created.");
13818016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
13828016e29fSHarshad Shirwadkar 		goto out;
13838016e29fSHarshad Shirwadkar 	}
13848016e29fSHarshad Shirwadkar 
13858016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
13868016e29fSHarshad Shirwadkar 	/*
13878016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
13888016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
13898016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
13908016e29fSHarshad Shirwadkar 	 * could complete.
13918016e29fSHarshad Shirwadkar 	 */
13928016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
13938016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to link\n");
13948016e29fSHarshad Shirwadkar 		goto out;
13958016e29fSHarshad Shirwadkar 	}
13968016e29fSHarshad Shirwadkar 
13978016e29fSHarshad Shirwadkar 	ret = 0;
13988016e29fSHarshad Shirwadkar out:
13998016e29fSHarshad Shirwadkar 	if (dentry_dir) {
14008016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
14018016e29fSHarshad Shirwadkar 		dput(dentry_dir);
14028016e29fSHarshad Shirwadkar 	} else if (dir) {
14038016e29fSHarshad Shirwadkar 		iput(dir);
14048016e29fSHarshad Shirwadkar 	}
14058016e29fSHarshad Shirwadkar 	if (dentry_inode) {
14068016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
14078016e29fSHarshad Shirwadkar 		dput(dentry_inode);
14088016e29fSHarshad Shirwadkar 	}
14098016e29fSHarshad Shirwadkar 
14108016e29fSHarshad Shirwadkar 	return ret;
14118016e29fSHarshad Shirwadkar }
14128016e29fSHarshad Shirwadkar 
14138016e29fSHarshad Shirwadkar /* Link replay function */
14148016e29fSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl)
14158016e29fSHarshad Shirwadkar {
14168016e29fSHarshad Shirwadkar 	struct inode *inode;
14178016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
14188016e29fSHarshad Shirwadkar 	int ret = 0;
14198016e29fSHarshad Shirwadkar 
14208016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
14218016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
14228016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
14238016e29fSHarshad Shirwadkar 
14248016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
14258016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
14268016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
14278016e29fSHarshad Shirwadkar 		return 0;
14288016e29fSHarshad Shirwadkar 	}
14298016e29fSHarshad Shirwadkar 
14308016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
14318016e29fSHarshad Shirwadkar 	iput(inode);
14328016e29fSHarshad Shirwadkar 	return ret;
14338016e29fSHarshad Shirwadkar }
14348016e29fSHarshad Shirwadkar 
14358016e29fSHarshad Shirwadkar /*
14368016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
14378016e29fSHarshad Shirwadkar  * block bitmaps correctly.
14388016e29fSHarshad Shirwadkar  */
14398016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
14408016e29fSHarshad Shirwadkar {
14418016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
14428016e29fSHarshad Shirwadkar 	int i;
14438016e29fSHarshad Shirwadkar 
14448016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
14458016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
14468016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
14478016e29fSHarshad Shirwadkar 			return 0;
14488016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
14498016e29fSHarshad Shirwadkar 		state->fc_modified_inodes_size +=
14508016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
14518016e29fSHarshad Shirwadkar 		state->fc_modified_inodes = krealloc(
14528016e29fSHarshad Shirwadkar 					state->fc_modified_inodes, sizeof(int) *
14538016e29fSHarshad Shirwadkar 					state->fc_modified_inodes_size,
14548016e29fSHarshad Shirwadkar 					GFP_KERNEL);
14558016e29fSHarshad Shirwadkar 		if (!state->fc_modified_inodes)
14568016e29fSHarshad Shirwadkar 			return -ENOMEM;
14578016e29fSHarshad Shirwadkar 	}
14588016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
14598016e29fSHarshad Shirwadkar 	return 0;
14608016e29fSHarshad Shirwadkar }
14618016e29fSHarshad Shirwadkar 
14628016e29fSHarshad Shirwadkar /*
14638016e29fSHarshad Shirwadkar  * Inode replay function
14648016e29fSHarshad Shirwadkar  */
14658016e29fSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
14668016e29fSHarshad Shirwadkar {
14678016e29fSHarshad Shirwadkar 	struct ext4_fc_inode *fc_inode;
14688016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
14698016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
14708016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
14718016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
14728016e29fSHarshad Shirwadkar 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
14738016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
14748016e29fSHarshad Shirwadkar 
14758016e29fSHarshad Shirwadkar 	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
14768016e29fSHarshad Shirwadkar 
14778016e29fSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode->fc_ino);
14788016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
14798016e29fSHarshad Shirwadkar 
14808016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
14818016e29fSHarshad Shirwadkar 	if (!IS_ERR_OR_NULL(inode)) {
14828016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
14838016e29fSHarshad Shirwadkar 		iput(inode);
14848016e29fSHarshad Shirwadkar 	}
14858016e29fSHarshad Shirwadkar 
14868016e29fSHarshad Shirwadkar 	ext4_fc_record_modified_inode(sb, ino);
14878016e29fSHarshad Shirwadkar 
14888016e29fSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
14898016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
14908016e29fSHarshad Shirwadkar 	if (ret)
14918016e29fSHarshad Shirwadkar 		goto out;
14928016e29fSHarshad Shirwadkar 
14938016e29fSHarshad Shirwadkar 	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
14948016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
14958016e29fSHarshad Shirwadkar 
14968016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
14978016e29fSHarshad Shirwadkar 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
14988016e29fSHarshad Shirwadkar 		inode_len - offsetof(struct ext4_inode, i_generation));
14998016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
15008016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
15018016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
15028016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
15038016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
15048016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
15058016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
15068016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
15078016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
15088016e29fSHarshad Shirwadkar 		}
15098016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
15108016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
15118016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
15128016e29fSHarshad Shirwadkar 	}
15138016e29fSHarshad Shirwadkar 
15148016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
15158016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15168016e29fSHarshad Shirwadkar 	if (ret)
15178016e29fSHarshad Shirwadkar 		goto out;
15188016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
15198016e29fSHarshad Shirwadkar 	if (ret)
15208016e29fSHarshad Shirwadkar 		goto out;
15218016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
15228016e29fSHarshad Shirwadkar 	if (ret)
15238016e29fSHarshad Shirwadkar 		goto out;
15248016e29fSHarshad Shirwadkar 
15258016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
15268016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
15278016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
15288016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
15298016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
15308016e29fSHarshad Shirwadkar 	}
15318016e29fSHarshad Shirwadkar 
15328016e29fSHarshad Shirwadkar 	/*
15338016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
15348016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
15358016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
15368016e29fSHarshad Shirwadkar 	 */
15378016e29fSHarshad Shirwadkar 	ext4_ext_replay_set_iblocks(inode);
15388016e29fSHarshad Shirwadkar 
15398016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
15408016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
15418016e29fSHarshad Shirwadkar 
15428016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
15438016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15448016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
15458016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
15468016e29fSHarshad Shirwadkar out:
15478016e29fSHarshad Shirwadkar 	iput(inode);
15488016e29fSHarshad Shirwadkar 	if (!ret)
15498016e29fSHarshad Shirwadkar 		blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
15508016e29fSHarshad Shirwadkar 
15518016e29fSHarshad Shirwadkar 	return 0;
15528016e29fSHarshad Shirwadkar }
15538016e29fSHarshad Shirwadkar 
15548016e29fSHarshad Shirwadkar /*
15558016e29fSHarshad Shirwadkar  * Dentry create replay function.
15568016e29fSHarshad Shirwadkar  *
15578016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
15588016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
15598016e29fSHarshad Shirwadkar  * been replayed before we start here.
15608016e29fSHarshad Shirwadkar  */
15618016e29fSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
15628016e29fSHarshad Shirwadkar {
15638016e29fSHarshad Shirwadkar 	int ret = 0;
15648016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
15658016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
15668016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
15678016e29fSHarshad Shirwadkar 
15688016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
15698016e29fSHarshad Shirwadkar 
15708016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
15718016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
15728016e29fSHarshad Shirwadkar 
15738016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
15748016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
15758016e29fSHarshad Shirwadkar 	if (ret)
15768016e29fSHarshad Shirwadkar 		goto out;
15778016e29fSHarshad Shirwadkar 
15788016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
15798016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
15808016e29fSHarshad Shirwadkar 		jbd_debug(1, "inode %d not found.", darg.ino);
15818016e29fSHarshad Shirwadkar 		inode = NULL;
15828016e29fSHarshad Shirwadkar 		ret = -EINVAL;
15838016e29fSHarshad Shirwadkar 		goto out;
15848016e29fSHarshad Shirwadkar 	}
15858016e29fSHarshad Shirwadkar 
15868016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
15878016e29fSHarshad Shirwadkar 		/*
15888016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
15898016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
15908016e29fSHarshad Shirwadkar 		 */
15918016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
15928016e29fSHarshad Shirwadkar 		if (IS_ERR_OR_NULL(dir)) {
15938016e29fSHarshad Shirwadkar 			jbd_debug(1, "Dir %d not found.", darg.ino);
15948016e29fSHarshad Shirwadkar 			goto out;
15958016e29fSHarshad Shirwadkar 		}
15968016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
15978016e29fSHarshad Shirwadkar 		iput(dir);
15988016e29fSHarshad Shirwadkar 		if (ret) {
15998016e29fSHarshad Shirwadkar 			ret = 0;
16008016e29fSHarshad Shirwadkar 			goto out;
16018016e29fSHarshad Shirwadkar 		}
16028016e29fSHarshad Shirwadkar 	}
16038016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
16048016e29fSHarshad Shirwadkar 	if (ret)
16058016e29fSHarshad Shirwadkar 		goto out;
16068016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
16078016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
16088016e29fSHarshad Shirwadkar out:
16098016e29fSHarshad Shirwadkar 	if (inode)
16108016e29fSHarshad Shirwadkar 		iput(inode);
16118016e29fSHarshad Shirwadkar 	return ret;
16128016e29fSHarshad Shirwadkar }
16138016e29fSHarshad Shirwadkar 
16148016e29fSHarshad Shirwadkar /*
16158016e29fSHarshad Shirwadkar  * Record physical disk regions which are in use as per fast commit area. Our
16168016e29fSHarshad Shirwadkar  * simple replay phase allocator excludes these regions from allocation.
16178016e29fSHarshad Shirwadkar  */
16188016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino,
16198016e29fSHarshad Shirwadkar 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
16208016e29fSHarshad Shirwadkar {
16218016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
16228016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
16238016e29fSHarshad Shirwadkar 
16248016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
16258016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
16268016e29fSHarshad Shirwadkar 		state->fc_regions_size +=
16278016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
16288016e29fSHarshad Shirwadkar 		state->fc_regions = krealloc(
16298016e29fSHarshad Shirwadkar 					state->fc_regions,
16308016e29fSHarshad Shirwadkar 					state->fc_regions_size *
16318016e29fSHarshad Shirwadkar 					sizeof(struct ext4_fc_alloc_region),
16328016e29fSHarshad Shirwadkar 					GFP_KERNEL);
16338016e29fSHarshad Shirwadkar 		if (!state->fc_regions)
16348016e29fSHarshad Shirwadkar 			return -ENOMEM;
16358016e29fSHarshad Shirwadkar 	}
16368016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
16378016e29fSHarshad Shirwadkar 	region->ino = ino;
16388016e29fSHarshad Shirwadkar 	region->lblk = lblk;
16398016e29fSHarshad Shirwadkar 	region->pblk = pblk;
16408016e29fSHarshad Shirwadkar 	region->len = len;
16418016e29fSHarshad Shirwadkar 
16428016e29fSHarshad Shirwadkar 	return 0;
16438016e29fSHarshad Shirwadkar }
16448016e29fSHarshad Shirwadkar 
16458016e29fSHarshad Shirwadkar /* Replay add range tag */
16468016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
16478016e29fSHarshad Shirwadkar 				struct ext4_fc_tl *tl)
16488016e29fSHarshad Shirwadkar {
16498016e29fSHarshad Shirwadkar 	struct ext4_fc_add_range *fc_add_ex;
16508016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
16518016e29fSHarshad Shirwadkar 	struct inode *inode;
16528016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
16538016e29fSHarshad Shirwadkar 	int remaining, len;
16548016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
16558016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
16568016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
16578016e29fSHarshad Shirwadkar 	int ret;
16588016e29fSHarshad Shirwadkar 
16598016e29fSHarshad Shirwadkar 	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
16608016e29fSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
16618016e29fSHarshad Shirwadkar 
16628016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
16638016e29fSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
16648016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
16658016e29fSHarshad Shirwadkar 
16668016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
16678016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
16688016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
16698016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
16708016e29fSHarshad Shirwadkar 		return 0;
16718016e29fSHarshad Shirwadkar 	}
16728016e29fSHarshad Shirwadkar 
16738016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
16748016e29fSHarshad Shirwadkar 
16758016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
16768016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
16778016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
16788016e29fSHarshad Shirwadkar 
16798016e29fSHarshad Shirwadkar 	cur = start;
16808016e29fSHarshad Shirwadkar 	remaining = len;
16818016e29fSHarshad Shirwadkar 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
16828016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
16838016e29fSHarshad Shirwadkar 		  inode->i_ino);
16848016e29fSHarshad Shirwadkar 
16858016e29fSHarshad Shirwadkar 	while (remaining > 0) {
16868016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
16878016e29fSHarshad Shirwadkar 		map.m_len = remaining;
16888016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
16898016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
16908016e29fSHarshad Shirwadkar 
16918016e29fSHarshad Shirwadkar 		if (ret < 0) {
16928016e29fSHarshad Shirwadkar 			iput(inode);
16938016e29fSHarshad Shirwadkar 			return 0;
16948016e29fSHarshad Shirwadkar 		}
16958016e29fSHarshad Shirwadkar 
16968016e29fSHarshad Shirwadkar 		if (ret == 0) {
16978016e29fSHarshad Shirwadkar 			/* Range is not mapped */
16988016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
16998c9be1e5SHarshad Shirwadkar 			if (IS_ERR(path)) {
17008c9be1e5SHarshad Shirwadkar 				iput(inode);
17018c9be1e5SHarshad Shirwadkar 				return 0;
17028c9be1e5SHarshad Shirwadkar 			}
17038016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
17048016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
17058016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
17068016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
17078016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
17088016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
17098016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
17108016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
17118016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
17128016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
17138016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
17148016e29fSHarshad Shirwadkar 			ext4_ext_drop_refs(path);
17158016e29fSHarshad Shirwadkar 			kfree(path);
17168016e29fSHarshad Shirwadkar 			if (ret) {
17178016e29fSHarshad Shirwadkar 				iput(inode);
17188016e29fSHarshad Shirwadkar 				return 0;
17198016e29fSHarshad Shirwadkar 			}
17208016e29fSHarshad Shirwadkar 			goto next;
17218016e29fSHarshad Shirwadkar 		}
17228016e29fSHarshad Shirwadkar 
17238016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
17248016e29fSHarshad Shirwadkar 			/*
17258016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
17268016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
17278016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
17288016e29fSHarshad Shirwadkar 			 */
17298016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17308016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
17318016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
17328016e29fSHarshad Shirwadkar 			if (ret) {
17338016e29fSHarshad Shirwadkar 				iput(inode);
17348016e29fSHarshad Shirwadkar 				return 0;
17358016e29fSHarshad Shirwadkar 			}
17368016e29fSHarshad Shirwadkar 			/*
17378016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
17388016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
17398016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
17408016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
17418016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
17428016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
17438016e29fSHarshad Shirwadkar 			 * modified inodes.
17448016e29fSHarshad Shirwadkar 			 */
17458016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17468016e29fSHarshad Shirwadkar 			goto next;
17478016e29fSHarshad Shirwadkar 		}
17488016e29fSHarshad Shirwadkar 
17498016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
17508016e29fSHarshad Shirwadkar 		jbd_debug(1, "Converting from %d to %d %lld",
17518016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
17528016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
17538016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17548016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
17558016e29fSHarshad Shirwadkar 		if (ret) {
17568016e29fSHarshad Shirwadkar 			iput(inode);
17578016e29fSHarshad Shirwadkar 			return 0;
17588016e29fSHarshad Shirwadkar 		}
17598016e29fSHarshad Shirwadkar 		/*
17608016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
17618016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
17628016e29fSHarshad Shirwadkar 		 */
17638016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
17648016e29fSHarshad Shirwadkar next:
17658016e29fSHarshad Shirwadkar 		cur += map.m_len;
17668016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
17678016e29fSHarshad Shirwadkar 	}
17688016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
17698016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
17708016e29fSHarshad Shirwadkar 	iput(inode);
17718016e29fSHarshad Shirwadkar 	return 0;
17728016e29fSHarshad Shirwadkar }
17738016e29fSHarshad Shirwadkar 
17748016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
17758016e29fSHarshad Shirwadkar static int
17768016e29fSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
17778016e29fSHarshad Shirwadkar {
17788016e29fSHarshad Shirwadkar 	struct inode *inode;
17798016e29fSHarshad Shirwadkar 	struct ext4_fc_del_range *lrange;
17808016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17818016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
17828016e29fSHarshad Shirwadkar 	int ret;
17838016e29fSHarshad Shirwadkar 
17848016e29fSHarshad Shirwadkar 	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
17858016e29fSHarshad Shirwadkar 	cur = le32_to_cpu(lrange->fc_lblk);
17868016e29fSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange->fc_len);
17878016e29fSHarshad Shirwadkar 
17888016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
17898016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_ino), cur, remaining);
17908016e29fSHarshad Shirwadkar 
17918016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
17928016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
17938016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
17948016e29fSHarshad Shirwadkar 		return 0;
17958016e29fSHarshad Shirwadkar 	}
17968016e29fSHarshad Shirwadkar 
17978016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
17988016e29fSHarshad Shirwadkar 
17998016e29fSHarshad Shirwadkar 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
18008016e29fSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange->fc_lblk),
18018016e29fSHarshad Shirwadkar 			le32_to_cpu(lrange->fc_len));
18028016e29fSHarshad Shirwadkar 	while (remaining > 0) {
18038016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
18048016e29fSHarshad Shirwadkar 		map.m_len = remaining;
18058016e29fSHarshad Shirwadkar 
18068016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
18078016e29fSHarshad Shirwadkar 		if (ret < 0) {
18088016e29fSHarshad Shirwadkar 			iput(inode);
18098016e29fSHarshad Shirwadkar 			return 0;
18108016e29fSHarshad Shirwadkar 		}
18118016e29fSHarshad Shirwadkar 		if (ret > 0) {
18128016e29fSHarshad Shirwadkar 			remaining -= ret;
18138016e29fSHarshad Shirwadkar 			cur += ret;
18148016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
18158016e29fSHarshad Shirwadkar 		} else {
18168016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
18178016e29fSHarshad Shirwadkar 			cur += map.m_len;
18188016e29fSHarshad Shirwadkar 		}
18198016e29fSHarshad Shirwadkar 	}
18208016e29fSHarshad Shirwadkar 
18218016e29fSHarshad Shirwadkar 	ret = ext4_punch_hole(inode,
18228016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
18238016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_len) <<  sb->s_blocksize_bits);
18248016e29fSHarshad Shirwadkar 	if (ret)
18258016e29fSHarshad Shirwadkar 		jbd_debug(1, "ext4_punch_hole returned %d", ret);
18268016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
18278016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
18288016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
18298016e29fSHarshad Shirwadkar 	iput(inode);
18308016e29fSHarshad Shirwadkar 
18318016e29fSHarshad Shirwadkar 	return 0;
18328016e29fSHarshad Shirwadkar }
18338016e29fSHarshad Shirwadkar 
18348016e29fSHarshad Shirwadkar static inline const char *tag2str(u16 tag)
18358016e29fSHarshad Shirwadkar {
18368016e29fSHarshad Shirwadkar 	switch (tag) {
18378016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_LINK:
18388016e29fSHarshad Shirwadkar 		return "TAG_ADD_ENTRY";
18398016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_UNLINK:
18408016e29fSHarshad Shirwadkar 		return "TAG_DEL_ENTRY";
18418016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_ADD_RANGE:
18428016e29fSHarshad Shirwadkar 		return "TAG_ADD_RANGE";
18438016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_CREAT:
18448016e29fSHarshad Shirwadkar 		return "TAG_CREAT_DENTRY";
18458016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_DEL_RANGE:
18468016e29fSHarshad Shirwadkar 		return "TAG_DEL_RANGE";
18478016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_INODE:
18488016e29fSHarshad Shirwadkar 		return "TAG_INODE";
18498016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_PAD:
18508016e29fSHarshad Shirwadkar 		return "TAG_PAD";
18518016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_TAIL:
18528016e29fSHarshad Shirwadkar 		return "TAG_TAIL";
18538016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_HEAD:
18548016e29fSHarshad Shirwadkar 		return "TAG_HEAD";
18558016e29fSHarshad Shirwadkar 	default:
18568016e29fSHarshad Shirwadkar 		return "TAG_ERROR";
18578016e29fSHarshad Shirwadkar 	}
18588016e29fSHarshad Shirwadkar }
18598016e29fSHarshad Shirwadkar 
18608016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
18618016e29fSHarshad Shirwadkar {
18628016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18638016e29fSHarshad Shirwadkar 	struct inode *inode;
18648016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
18658016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
18668016e29fSHarshad Shirwadkar 	int i, ret, j;
18678016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
18688016e29fSHarshad Shirwadkar 
18698016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18708016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
18718016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
18728016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
18738016e29fSHarshad Shirwadkar 		if (IS_ERR_OR_NULL(inode)) {
18748016e29fSHarshad Shirwadkar 			jbd_debug(1, "Inode %d not found.",
18758016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
18768016e29fSHarshad Shirwadkar 			continue;
18778016e29fSHarshad Shirwadkar 		}
18788016e29fSHarshad Shirwadkar 		cur = 0;
18798016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
18808016e29fSHarshad Shirwadkar 		while (cur < end) {
18818016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
18828016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
18838016e29fSHarshad Shirwadkar 
18848016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
18858016e29fSHarshad Shirwadkar 			if (ret < 0)
18868016e29fSHarshad Shirwadkar 				break;
18878016e29fSHarshad Shirwadkar 
18888016e29fSHarshad Shirwadkar 			if (ret > 0) {
18898016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
18908016e29fSHarshad Shirwadkar 				if (!IS_ERR_OR_NULL(path)) {
18918016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
18928016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
18938016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
18948016e29fSHarshad Shirwadkar 					ext4_ext_drop_refs(path);
18958016e29fSHarshad Shirwadkar 					kfree(path);
18968016e29fSHarshad Shirwadkar 				}
18978016e29fSHarshad Shirwadkar 				cur += ret;
18988016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
18998016e29fSHarshad Shirwadkar 							map.m_len, 1);
19008016e29fSHarshad Shirwadkar 			} else {
19018016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
19028016e29fSHarshad Shirwadkar 			}
19038016e29fSHarshad Shirwadkar 		}
19048016e29fSHarshad Shirwadkar 		iput(inode);
19058016e29fSHarshad Shirwadkar 	}
19068016e29fSHarshad Shirwadkar }
19078016e29fSHarshad Shirwadkar 
19088016e29fSHarshad Shirwadkar /*
19098016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
19108016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
19118016e29fSHarshad Shirwadkar  * if it is okay to use a block.
19128016e29fSHarshad Shirwadkar  */
19138016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
19148016e29fSHarshad Shirwadkar {
19158016e29fSHarshad Shirwadkar 	int i;
19168016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19178016e29fSHarshad Shirwadkar 
19188016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
19198016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
19208016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
19218016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
19228016e29fSHarshad Shirwadkar 			continue;
19238016e29fSHarshad Shirwadkar 		if (blk >= state->fc_regions[i].pblk &&
19248016e29fSHarshad Shirwadkar 		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
19258016e29fSHarshad Shirwadkar 			return true;
19268016e29fSHarshad Shirwadkar 	}
19278016e29fSHarshad Shirwadkar 	return false;
19288016e29fSHarshad Shirwadkar }
19298016e29fSHarshad Shirwadkar 
19308016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
19318016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
19328016e29fSHarshad Shirwadkar {
19338016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19348016e29fSHarshad Shirwadkar 
19358016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
19368016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
19378016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
19388016e29fSHarshad Shirwadkar }
19398016e29fSHarshad Shirwadkar 
19408016e29fSHarshad Shirwadkar /*
19418016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
19428016e29fSHarshad Shirwadkar  *
19438016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
19448016e29fSHarshad Shirwadkar  * for doing following things:
19458016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
19468016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
19478016e29fSHarshad Shirwadkar  * - Verify CRC
19488016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
19498016e29fSHarshad Shirwadkar  *
19508016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
19518016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
19528016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
19538016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
19548016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
19558016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
19568016e29fSHarshad Shirwadkar  */
19578016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
19588016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
19598016e29fSHarshad Shirwadkar 				tid_t expected_tid)
19608016e29fSHarshad Shirwadkar {
19618016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
19628016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19638016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19648016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
19658016e29fSHarshad Shirwadkar 	struct ext4_fc_add_range *ext;
19668016e29fSHarshad Shirwadkar 	struct ext4_fc_tl *tl;
19678016e29fSHarshad Shirwadkar 	struct ext4_fc_tail *tail;
19688016e29fSHarshad Shirwadkar 	__u8 *start, *end;
19698016e29fSHarshad Shirwadkar 	struct ext4_fc_head *head;
19708016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
19718016e29fSHarshad Shirwadkar 
19728016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
19738016e29fSHarshad Shirwadkar 
19748016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
19758016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
19768016e29fSHarshad Shirwadkar 
19778016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
19788016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
19798016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
19808016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
19818016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
19828016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
19838016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
19848016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
19858016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
19868016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
19878016e29fSHarshad Shirwadkar 			return 0;
19888016e29fSHarshad Shirwadkar 	}
19898016e29fSHarshad Shirwadkar 
19908016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
19918016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
19928016e29fSHarshad Shirwadkar 		goto out_err;
19938016e29fSHarshad Shirwadkar 	}
19948016e29fSHarshad Shirwadkar 
19958016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
19968016e29fSHarshad Shirwadkar 	fc_for_each_tl(start, end, tl) {
19978016e29fSHarshad Shirwadkar 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
19988016e29fSHarshad Shirwadkar 			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
19998016e29fSHarshad Shirwadkar 		switch (le16_to_cpu(tl->fc_tag)) {
20008016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
20018016e29fSHarshad Shirwadkar 			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
20028016e29fSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext->fc_ex;
20038016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
20048016e29fSHarshad Shirwadkar 				le32_to_cpu(ext->fc_ino),
20058016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
20068016e29fSHarshad Shirwadkar 				ext4_ext_get_actual_len(ex));
20078016e29fSHarshad Shirwadkar 			if (ret < 0)
20088016e29fSHarshad Shirwadkar 				break;
20098016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
20108016e29fSHarshad Shirwadkar 			fallthrough;
20118016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
20128016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
20138016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
20148016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
20158016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
20168016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
20178016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
20188016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
20198016e29fSHarshad Shirwadkar 					sizeof(*tl) + ext4_fc_tag_len(tl));
20208016e29fSHarshad Shirwadkar 			break;
20218016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
20228016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
20238016e29fSHarshad Shirwadkar 			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
20248016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
20258016e29fSHarshad Shirwadkar 						sizeof(*tl) +
20268016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
20278016e29fSHarshad Shirwadkar 						fc_crc));
20288016e29fSHarshad Shirwadkar 			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
20298016e29fSHarshad Shirwadkar 				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
20308016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
20318016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
20328016e29fSHarshad Shirwadkar 					state->fc_regions_used;
20338016e29fSHarshad Shirwadkar 			} else {
20348016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
20358016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
20368016e29fSHarshad Shirwadkar 			}
20378016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
20388016e29fSHarshad Shirwadkar 			break;
20398016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
20408016e29fSHarshad Shirwadkar 			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
20418016e29fSHarshad Shirwadkar 			if (le32_to_cpu(head->fc_features) &
20428016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
20438016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
20448016e29fSHarshad Shirwadkar 				break;
20458016e29fSHarshad Shirwadkar 			}
20468016e29fSHarshad Shirwadkar 			if (le32_to_cpu(head->fc_tid) != expected_tid) {
20478016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
20488016e29fSHarshad Shirwadkar 				break;
20498016e29fSHarshad Shirwadkar 			}
20508016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
20518016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
20528016e29fSHarshad Shirwadkar 					sizeof(*tl) + ext4_fc_tag_len(tl));
20538016e29fSHarshad Shirwadkar 			break;
20548016e29fSHarshad Shirwadkar 		default:
20558016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
20568016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
20578016e29fSHarshad Shirwadkar 		}
20588016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
20598016e29fSHarshad Shirwadkar 			break;
20608016e29fSHarshad Shirwadkar 	}
20618016e29fSHarshad Shirwadkar 
20628016e29fSHarshad Shirwadkar out_err:
20638016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
20648016e29fSHarshad Shirwadkar 	return ret;
20658016e29fSHarshad Shirwadkar }
20668016e29fSHarshad Shirwadkar 
20675b849b5fSHarshad Shirwadkar /*
20685b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
20698016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
20705b849b5fSHarshad Shirwadkar  */
20715b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
20725b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
20735b849b5fSHarshad Shirwadkar {
20748016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
20758016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
20768016e29fSHarshad Shirwadkar 	struct ext4_fc_tl *tl;
20778016e29fSHarshad Shirwadkar 	__u8 *start, *end;
20788016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
20798016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
20808016e29fSHarshad Shirwadkar 	struct ext4_fc_tail *tail;
20818016e29fSHarshad Shirwadkar 
20828016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
20838016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
20848016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
20858016e29fSHarshad Shirwadkar 	}
20868016e29fSHarshad Shirwadkar 
20878016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
20888016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
20898016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
20908016e29fSHarshad Shirwadkar 	}
20918016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
20928016e29fSHarshad Shirwadkar 		jbd_debug(1, "Replay stops\n");
20938016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
20945b849b5fSHarshad Shirwadkar 		return 0;
20955b849b5fSHarshad Shirwadkar 	}
20965b849b5fSHarshad Shirwadkar 
20978016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
20988016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
20998016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
21008016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
21018016e29fSHarshad Shirwadkar 	}
21028016e29fSHarshad Shirwadkar #endif
21038016e29fSHarshad Shirwadkar 
21048016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
21058016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
21068016e29fSHarshad Shirwadkar 
21078016e29fSHarshad Shirwadkar 	fc_for_each_tl(start, end, tl) {
21088016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
21098016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
21108016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
21118016e29fSHarshad Shirwadkar 			break;
21128016e29fSHarshad Shirwadkar 		}
21138016e29fSHarshad Shirwadkar 		jbd_debug(3, "Replay phase, tag:%s\n",
21148016e29fSHarshad Shirwadkar 				tag2str(le16_to_cpu(tl->fc_tag)));
21158016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
21168016e29fSHarshad Shirwadkar 		switch (le16_to_cpu(tl->fc_tag)) {
21178016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
21188016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, tl);
21198016e29fSHarshad Shirwadkar 			break;
21208016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
21218016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, tl);
21228016e29fSHarshad Shirwadkar 			break;
21238016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
21248016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, tl);
21258016e29fSHarshad Shirwadkar 			break;
21268016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
21278016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, tl);
21288016e29fSHarshad Shirwadkar 			break;
21298016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
21308016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, tl);
21318016e29fSHarshad Shirwadkar 			break;
21328016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
21338016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, tl);
21348016e29fSHarshad Shirwadkar 			break;
21358016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
21368016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
21378016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
21388016e29fSHarshad Shirwadkar 			break;
21398016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
21408016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
21418016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
21428016e29fSHarshad Shirwadkar 			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
21438016e29fSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
21448016e29fSHarshad Shirwadkar 			break;
21458016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
21468016e29fSHarshad Shirwadkar 			break;
21478016e29fSHarshad Shirwadkar 		default:
21488016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
21498016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
21508016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
21518016e29fSHarshad Shirwadkar 			break;
21528016e29fSHarshad Shirwadkar 		}
21538016e29fSHarshad Shirwadkar 		if (ret < 0)
21548016e29fSHarshad Shirwadkar 			break;
21558016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
21568016e29fSHarshad Shirwadkar 	}
21578016e29fSHarshad Shirwadkar 	return ret;
21588016e29fSHarshad Shirwadkar }
21598016e29fSHarshad Shirwadkar 
21606866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
21616866d7b3SHarshad Shirwadkar {
21625b849b5fSHarshad Shirwadkar 	/*
21635b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
21645b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
21655b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
21665b849b5fSHarshad Shirwadkar 	 */
21675b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
21686866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
21696866d7b3SHarshad Shirwadkar 		return;
2170ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
21716866d7b3SHarshad Shirwadkar }
2172aa75f4d3SHarshad Shirwadkar 
2173fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = {
2174ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2175ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2176ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2177ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2178ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2179ce8c59d1SHarshad Shirwadkar 	"Resize",
2180ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2181ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2182556e0319SHarshad Shirwadkar 	"Data journalling",
2183ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2184ce8c59d1SHarshad Shirwadkar };
2185ce8c59d1SHarshad Shirwadkar 
2186ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2187ce8c59d1SHarshad Shirwadkar {
2188ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2189ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2190ce8c59d1SHarshad Shirwadkar 	int i;
2191ce8c59d1SHarshad Shirwadkar 
2192ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2193ce8c59d1SHarshad Shirwadkar 		return 0;
2194ce8c59d1SHarshad Shirwadkar 
2195ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2196ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2197ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2198ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
2199ce8c59d1SHarshad Shirwadkar 		   div_u64(sbi->s_fc_avg_commit_time, 1000));
2200ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2201ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2202ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2203ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2204ce8c59d1SHarshad Shirwadkar 
2205ce8c59d1SHarshad Shirwadkar 	return 0;
2206ce8c59d1SHarshad Shirwadkar }
2207ce8c59d1SHarshad Shirwadkar 
2208aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2209aa75f4d3SHarshad Shirwadkar {
2210aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2211aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2212aa75f4d3SHarshad Shirwadkar 
2213aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2214aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2215aa75f4d3SHarshad Shirwadkar 
2216aa75f4d3SHarshad Shirwadkar 	return 0;
2217aa75f4d3SHarshad Shirwadkar }
2218