xref: /openbmc/linux/fs/ext4/fast_commit.c (revision e85c81ba8859a4c839bcd69c5d83b32954133a5b)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
687bbbe241SHarshad Shirwadkar  *
69aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
707bbbe241SHarshad Shirwadkar  * attributes). Fast commit ineligibility is marked by calling
717bbbe241SHarshad Shirwadkar  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
727bbbe241SHarshad Shirwadkar  * to full commit.
73aa75f4d3SHarshad Shirwadkar  *
74aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
75aa75f4d3SHarshad Shirwadkar  * --------------------
76a740762fSHarshad Shirwadkar  * In order to guarantee atomicity during the commit operation, fast commit
77aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
79aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
80aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
81aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
82aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
85aa75f4d3SHarshad Shirwadkar  * - fsync()
86aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
87aa75f4d3SHarshad Shirwadkar  * - Truncate file A
88aa75f4d3SHarshad Shirwadkar  * - fsync()
89aa75f4d3SHarshad Shirwadkar  *
90aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
91aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
95aa75f4d3SHarshad Shirwadkar  *
96b1b7dce3SHarshad Shirwadkar  * Fast Commit Replay Idempotence
97b1b7dce3SHarshad Shirwadkar  * ------------------------------
98b1b7dce3SHarshad Shirwadkar  *
99b1b7dce3SHarshad Shirwadkar  * Fast commits tags are idempotent in nature provided the recovery code follows
100b1b7dce3SHarshad Shirwadkar  * certain rules. The guiding principle that the commit path follows while
101b1b7dce3SHarshad Shirwadkar  * committing is that it stores the result of a particular operation instead of
102b1b7dce3SHarshad Shirwadkar  * storing the procedure.
103b1b7dce3SHarshad Shirwadkar  *
104b1b7dce3SHarshad Shirwadkar  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105b1b7dce3SHarshad Shirwadkar  * was associated with inode 10. During fast commit, instead of storing this
106b1b7dce3SHarshad Shirwadkar  * operation as a procedure "rename a to b", we store the resulting file system
107b1b7dce3SHarshad Shirwadkar  * state as a "series" of outcomes:
108b1b7dce3SHarshad Shirwadkar  *
109b1b7dce3SHarshad Shirwadkar  * - Link dirent b to inode 10
110b1b7dce3SHarshad Shirwadkar  * - Unlink dirent a
111b1b7dce3SHarshad Shirwadkar  * - Inode <10> with valid refcount
112b1b7dce3SHarshad Shirwadkar  *
113b1b7dce3SHarshad Shirwadkar  * Now when recovery code runs, it needs "enforce" this state on the file
114b1b7dce3SHarshad Shirwadkar  * system. This is what guarantees idempotence of fast commit replay.
115b1b7dce3SHarshad Shirwadkar  *
116b1b7dce3SHarshad Shirwadkar  * Let's take an example of a procedure that is not idempotent and see how fast
117b1b7dce3SHarshad Shirwadkar  * commits make it idempotent. Consider following sequence of operations:
118b1b7dce3SHarshad Shirwadkar  *
119b1b7dce3SHarshad Shirwadkar  *     rm A;    mv B A;    read A
120b1b7dce3SHarshad Shirwadkar  *  (x)     (y)        (z)
121b1b7dce3SHarshad Shirwadkar  *
122b1b7dce3SHarshad Shirwadkar  * (x), (y) and (z) are the points at which we can crash. If we store this
123b1b7dce3SHarshad Shirwadkar  * sequence of operations as is then the replay is not idempotent. Let's say
124b1b7dce3SHarshad Shirwadkar  * while in replay, we crash at (z). During the second replay, file A (which was
125b1b7dce3SHarshad Shirwadkar  * actually created as a result of "mv B A" operation) would get deleted. Thus,
126b1b7dce3SHarshad Shirwadkar  * file named A would be absent when we try to read A. So, this sequence of
127b1b7dce3SHarshad Shirwadkar  * operations is not idempotent. However, as mentioned above, instead of storing
128b1b7dce3SHarshad Shirwadkar  * the procedure fast commits store the outcome of each procedure. Thus the fast
129b1b7dce3SHarshad Shirwadkar  * commit log for above procedure would be as follows:
130b1b7dce3SHarshad Shirwadkar  *
131b1b7dce3SHarshad Shirwadkar  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132b1b7dce3SHarshad Shirwadkar  * inode 11 before the replay)
133b1b7dce3SHarshad Shirwadkar  *
134b1b7dce3SHarshad Shirwadkar  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
135b1b7dce3SHarshad Shirwadkar  * (w)          (x)                    (y)          (z)
136b1b7dce3SHarshad Shirwadkar  *
137b1b7dce3SHarshad Shirwadkar  * If we crash at (z), we will have file A linked to inode 11. During the second
138b1b7dce3SHarshad Shirwadkar  * replay, we will remove file A (inode 11). But we will create it back and make
139b1b7dce3SHarshad Shirwadkar  * it point to inode 11. We won't find B, so we'll just skip that step. At this
140b1b7dce3SHarshad Shirwadkar  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141b1b7dce3SHarshad Shirwadkar  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142b1b7dce3SHarshad Shirwadkar  * similarly. Thus, by converting a non-idempotent procedure into a series of
143b1b7dce3SHarshad Shirwadkar  * idempotent outcomes, fast commits ensured idempotence during the replay.
144b1b7dce3SHarshad Shirwadkar  *
145aa75f4d3SHarshad Shirwadkar  * TODOs
146aa75f4d3SHarshad Shirwadkar  * -----
147b1b7dce3SHarshad Shirwadkar  *
148b1b7dce3SHarshad Shirwadkar  * 0) Fast commit replay path hardening: Fast commit replay code should use
149b1b7dce3SHarshad Shirwadkar  *    journal handles to make sure all the updates it does during the replay
150b1b7dce3SHarshad Shirwadkar  *    path are atomic. With that if we crash during fast commit replay, after
151b1b7dce3SHarshad Shirwadkar  *    trying to do recovery again, we will find a file system where fast commit
152b1b7dce3SHarshad Shirwadkar  *    area is invalid (because new full commit would be found). In order to deal
153b1b7dce3SHarshad Shirwadkar  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
154b1b7dce3SHarshad Shirwadkar  *    superblock state is persisted before starting the replay, so that after
155b1b7dce3SHarshad Shirwadkar  *    the crash, fast commit recovery code can look at that flag and perform
156b1b7dce3SHarshad Shirwadkar  *    fast commit recovery even if that area is invalidated by later full
157b1b7dce3SHarshad Shirwadkar  *    commits.
158b1b7dce3SHarshad Shirwadkar  *
159d1199b94SHarshad Shirwadkar  * 1) Fast commit's commit path locks the entire file system during fast
160d1199b94SHarshad Shirwadkar  *    commit. This has significant performance penalty. Instead of that, we
161d1199b94SHarshad Shirwadkar  *    should use ext4_fc_start/stop_update functions to start inode level
162d1199b94SHarshad Shirwadkar  *    updates from ext4_journal_start/stop. Once we do that we can drop file
163d1199b94SHarshad Shirwadkar  *    system locking during commit path.
164aa75f4d3SHarshad Shirwadkar  *
165d1199b94SHarshad Shirwadkar  * 2) Handle more ineligible cases.
166aa75f4d3SHarshad Shirwadkar  */
167aa75f4d3SHarshad Shirwadkar 
168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
170aa75f4d3SHarshad Shirwadkar 
171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
172aa75f4d3SHarshad Shirwadkar {
173aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
174aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
175aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
176aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
177aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
178aa75f4d3SHarshad Shirwadkar 	} else {
179aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
180aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
181aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
182aa75f4d3SHarshad Shirwadkar 	}
183aa75f4d3SHarshad Shirwadkar 
184aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
185aa75f4d3SHarshad Shirwadkar }
186aa75f4d3SHarshad Shirwadkar 
187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
188aa75f4d3SHarshad Shirwadkar {
189aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
190aa75f4d3SHarshad Shirwadkar 
191aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
192aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
193aa75f4d3SHarshad Shirwadkar }
194aa75f4d3SHarshad Shirwadkar 
195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
196aa75f4d3SHarshad Shirwadkar {
197aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
198aa75f4d3SHarshad Shirwadkar 
199aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
200aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
202aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
203aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
204aa75f4d3SHarshad Shirwadkar }
205aa75f4d3SHarshad Shirwadkar 
206f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */
207f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode)
208fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
209f6634e26SHarshad Shirwadkar {
210f6634e26SHarshad Shirwadkar 	wait_queue_head_t *wq;
211f6634e26SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
212f6634e26SHarshad Shirwadkar 
213f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
214f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
215f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
216f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_state_flags,
217f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
218f6634e26SHarshad Shirwadkar #else
219f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
220f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
221f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_flags,
222f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
223f6634e26SHarshad Shirwadkar #endif
224f6634e26SHarshad Shirwadkar 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
225f6634e26SHarshad Shirwadkar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
226f6634e26SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
227f6634e26SHarshad Shirwadkar 	schedule();
228f6634e26SHarshad Shirwadkar 	finish_wait(wq, &wait.wq_entry);
229f6634e26SHarshad Shirwadkar }
230f6634e26SHarshad Shirwadkar 
231aa75f4d3SHarshad Shirwadkar /*
232aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
233aa75f4d3SHarshad Shirwadkar  *
234aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
235aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
236aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
237aa75f4d3SHarshad Shirwadkar  */
238aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
239aa75f4d3SHarshad Shirwadkar {
240aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
241aa75f4d3SHarshad Shirwadkar 
2428016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2438016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
244aa75f4d3SHarshad Shirwadkar 		return;
245aa75f4d3SHarshad Shirwadkar 
246aa75f4d3SHarshad Shirwadkar restart:
247aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
248aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
249aa75f4d3SHarshad Shirwadkar 		goto out;
250aa75f4d3SHarshad Shirwadkar 
251aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
252f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
253aa75f4d3SHarshad Shirwadkar 		goto restart;
254aa75f4d3SHarshad Shirwadkar 	}
255aa75f4d3SHarshad Shirwadkar out:
256aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
257aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
258aa75f4d3SHarshad Shirwadkar }
259aa75f4d3SHarshad Shirwadkar 
260aa75f4d3SHarshad Shirwadkar /*
261aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
262aa75f4d3SHarshad Shirwadkar  */
263aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
264aa75f4d3SHarshad Shirwadkar {
265aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
266aa75f4d3SHarshad Shirwadkar 
2678016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2688016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
269aa75f4d3SHarshad Shirwadkar 		return;
270aa75f4d3SHarshad Shirwadkar 
271aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
272aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
273aa75f4d3SHarshad Shirwadkar }
274aa75f4d3SHarshad Shirwadkar 
275aa75f4d3SHarshad Shirwadkar /*
276aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
277aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
278aa75f4d3SHarshad Shirwadkar  */
279aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
280aa75f4d3SHarshad Shirwadkar {
281aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
282aa75f4d3SHarshad Shirwadkar 
2838016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2848016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
285aa75f4d3SHarshad Shirwadkar 		return;
286aa75f4d3SHarshad Shirwadkar 
287aa75f4d3SHarshad Shirwadkar restart:
288aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
289aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list)) {
290aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
291aa75f4d3SHarshad Shirwadkar 		return;
292aa75f4d3SHarshad Shirwadkar 	}
293aa75f4d3SHarshad Shirwadkar 
294aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
295f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
296aa75f4d3SHarshad Shirwadkar 		goto restart;
297aa75f4d3SHarshad Shirwadkar 	}
298aa75f4d3SHarshad Shirwadkar 	list_del_init(&ei->i_fc_list);
299aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
300aa75f4d3SHarshad Shirwadkar }
301aa75f4d3SHarshad Shirwadkar 
302aa75f4d3SHarshad Shirwadkar /*
303*e85c81baSXin Yin  * Mark file system as fast commit ineligible, and record latest
304*e85c81baSXin Yin  * ineligible transaction tid. This means until the recorded
305*e85c81baSXin Yin  * transaction, commit operation would result in a full jbd2 commit.
306aa75f4d3SHarshad Shirwadkar  */
307*e85c81baSXin Yin void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
308aa75f4d3SHarshad Shirwadkar {
309aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
310*e85c81baSXin Yin 	tid_t tid;
311aa75f4d3SHarshad Shirwadkar 
3128016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3138016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3148016e29fSHarshad Shirwadkar 		return;
3158016e29fSHarshad Shirwadkar 
3169b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
317*e85c81baSXin Yin 	if (handle && !IS_ERR(handle))
318*e85c81baSXin Yin 		tid = handle->h_transaction->t_tid;
319*e85c81baSXin Yin 	else {
320*e85c81baSXin Yin 		read_lock(&sbi->s_journal->j_state_lock);
321*e85c81baSXin Yin 		tid = sbi->s_journal->j_running_transaction ?
322*e85c81baSXin Yin 				sbi->s_journal->j_running_transaction->t_tid : 0;
323*e85c81baSXin Yin 		read_unlock(&sbi->s_journal->j_state_lock);
324*e85c81baSXin Yin 	}
325*e85c81baSXin Yin 	spin_lock(&sbi->s_fc_lock);
326*e85c81baSXin Yin 	if (sbi->s_fc_ineligible_tid < tid)
327*e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = tid;
328*e85c81baSXin Yin 	spin_unlock(&sbi->s_fc_lock);
329aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
330aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
331aa75f4d3SHarshad Shirwadkar }
332aa75f4d3SHarshad Shirwadkar 
333aa75f4d3SHarshad Shirwadkar /*
334aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
335aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
336aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
337aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
338aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
339aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
340aa75f4d3SHarshad Shirwadkar  *
341aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
342aa75f4d3SHarshad Shirwadkar  */
343aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
344a80f7fcfSHarshad Shirwadkar 	handle_t *handle, struct inode *inode,
345a80f7fcfSHarshad Shirwadkar 	int (*__fc_track_fn)(struct inode *, void *, bool),
346aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
347aa75f4d3SHarshad Shirwadkar {
348aa75f4d3SHarshad Shirwadkar 	bool update = false;
349aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
350aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
351a80f7fcfSHarshad Shirwadkar 	tid_t tid = 0;
352aa75f4d3SHarshad Shirwadkar 	int ret;
353aa75f4d3SHarshad Shirwadkar 
3548016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
3558016e29fSHarshad Shirwadkar 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
356aa75f4d3SHarshad Shirwadkar 		return -EOPNOTSUPP;
357aa75f4d3SHarshad Shirwadkar 
3587bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
359aa75f4d3SHarshad Shirwadkar 		return -EINVAL;
360aa75f4d3SHarshad Shirwadkar 
361a80f7fcfSHarshad Shirwadkar 	tid = handle->h_transaction->t_tid;
362aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
363a80f7fcfSHarshad Shirwadkar 	if (tid == ei->i_sync_tid) {
364aa75f4d3SHarshad Shirwadkar 		update = true;
365aa75f4d3SHarshad Shirwadkar 	} else {
366aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
367a80f7fcfSHarshad Shirwadkar 		ei->i_sync_tid = tid;
368aa75f4d3SHarshad Shirwadkar 	}
369aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
370aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
371aa75f4d3SHarshad Shirwadkar 
372aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
373aa75f4d3SHarshad Shirwadkar 		return ret;
374aa75f4d3SHarshad Shirwadkar 
375aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
376aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
377aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
3789b5f6c9bSHarshad Shirwadkar 				(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
379aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
380aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
381aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
382aa75f4d3SHarshad Shirwadkar 
383aa75f4d3SHarshad Shirwadkar 	return ret;
384aa75f4d3SHarshad Shirwadkar }
385aa75f4d3SHarshad Shirwadkar 
386aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
387aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
388aa75f4d3SHarshad Shirwadkar 	int op;
389aa75f4d3SHarshad Shirwadkar };
390aa75f4d3SHarshad Shirwadkar 
391aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
392aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
393aa75f4d3SHarshad Shirwadkar {
394aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
395aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
396aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
397aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
398aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
399aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
400aa75f4d3SHarshad Shirwadkar 
401aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
402aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
403aa75f4d3SHarshad Shirwadkar 	if (!node) {
404*e85c81baSXin Yin 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
405aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
406aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
407aa75f4d3SHarshad Shirwadkar 	}
408aa75f4d3SHarshad Shirwadkar 
409aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
410aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
411aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
412aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
413aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
414aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
415aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
416aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
417*e85c81baSXin Yin 				EXT4_FC_REASON_NOMEM, NULL);
418aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
419aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
420aa75f4d3SHarshad Shirwadkar 		}
421aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
422aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
423aa75f4d3SHarshad Shirwadkar 	} else {
424aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
425aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
426aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
427aa75f4d3SHarshad Shirwadkar 	}
428aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
429aa75f4d3SHarshad Shirwadkar 
430aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
4319b5f6c9bSHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
432aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
433aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
434aa75f4d3SHarshad Shirwadkar 	else
435aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
436aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
437aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
438aa75f4d3SHarshad Shirwadkar 
439aa75f4d3SHarshad Shirwadkar 	return 0;
440aa75f4d3SHarshad Shirwadkar }
441aa75f4d3SHarshad Shirwadkar 
442a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle,
443a80f7fcfSHarshad Shirwadkar 		struct inode *inode, struct dentry *dentry)
444aa75f4d3SHarshad Shirwadkar {
445aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
446aa75f4d3SHarshad Shirwadkar 	int ret;
447aa75f4d3SHarshad Shirwadkar 
448aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
449aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
450aa75f4d3SHarshad Shirwadkar 
451a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
452aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
453aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_unlink(inode, dentry, ret);
454aa75f4d3SHarshad Shirwadkar }
455aa75f4d3SHarshad Shirwadkar 
456a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
457a80f7fcfSHarshad Shirwadkar {
458a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
459a80f7fcfSHarshad Shirwadkar }
460a80f7fcfSHarshad Shirwadkar 
461a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle,
462a80f7fcfSHarshad Shirwadkar 	struct inode *inode, struct dentry *dentry)
463aa75f4d3SHarshad Shirwadkar {
464aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
465aa75f4d3SHarshad Shirwadkar 	int ret;
466aa75f4d3SHarshad Shirwadkar 
467aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
468aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
469aa75f4d3SHarshad Shirwadkar 
470a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
471aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
472aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_link(inode, dentry, ret);
473aa75f4d3SHarshad Shirwadkar }
474aa75f4d3SHarshad Shirwadkar 
475a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
476a80f7fcfSHarshad Shirwadkar {
477a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
478a80f7fcfSHarshad Shirwadkar }
479a80f7fcfSHarshad Shirwadkar 
4808210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
4818210bb29SHarshad Shirwadkar 			  struct dentry *dentry)
482aa75f4d3SHarshad Shirwadkar {
483aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
484aa75f4d3SHarshad Shirwadkar 	int ret;
485aa75f4d3SHarshad Shirwadkar 
486aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
487aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
488aa75f4d3SHarshad Shirwadkar 
489a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
490aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
491aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_create(inode, dentry, ret);
492aa75f4d3SHarshad Shirwadkar }
493aa75f4d3SHarshad Shirwadkar 
4948210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
4958210bb29SHarshad Shirwadkar {
4968210bb29SHarshad Shirwadkar 	__ext4_fc_track_create(handle, d_inode(dentry), dentry);
4978210bb29SHarshad Shirwadkar }
4988210bb29SHarshad Shirwadkar 
499aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
500aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
501aa75f4d3SHarshad Shirwadkar {
502aa75f4d3SHarshad Shirwadkar 	if (update)
503aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
504aa75f4d3SHarshad Shirwadkar 
505aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
506aa75f4d3SHarshad Shirwadkar 
507aa75f4d3SHarshad Shirwadkar 	return 0;
508aa75f4d3SHarshad Shirwadkar }
509aa75f4d3SHarshad Shirwadkar 
510a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
511aa75f4d3SHarshad Shirwadkar {
512aa75f4d3SHarshad Shirwadkar 	int ret;
513aa75f4d3SHarshad Shirwadkar 
514aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
515aa75f4d3SHarshad Shirwadkar 		return;
516aa75f4d3SHarshad Shirwadkar 
517556e0319SHarshad Shirwadkar 	if (ext4_should_journal_data(inode)) {
518556e0319SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb,
519*e85c81baSXin Yin 					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
520556e0319SHarshad Shirwadkar 		return;
521556e0319SHarshad Shirwadkar 	}
522556e0319SHarshad Shirwadkar 
523a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
524aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_inode(inode, ret);
525aa75f4d3SHarshad Shirwadkar }
526aa75f4d3SHarshad Shirwadkar 
527aa75f4d3SHarshad Shirwadkar struct __track_range_args {
528aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
529aa75f4d3SHarshad Shirwadkar };
530aa75f4d3SHarshad Shirwadkar 
531aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
532aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
533aa75f4d3SHarshad Shirwadkar {
534aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
535aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
536aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
537aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
538aa75f4d3SHarshad Shirwadkar 
539aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
540aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
541aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
542aa75f4d3SHarshad Shirwadkar 	}
543aa75f4d3SHarshad Shirwadkar 
544aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
545aa75f4d3SHarshad Shirwadkar 
546aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
547aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
548aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
549aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
550aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
551aa75f4d3SHarshad Shirwadkar 	} else {
552aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
553aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
554aa75f4d3SHarshad Shirwadkar 	}
555aa75f4d3SHarshad Shirwadkar 
556aa75f4d3SHarshad Shirwadkar 	return 0;
557aa75f4d3SHarshad Shirwadkar }
558aa75f4d3SHarshad Shirwadkar 
559a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
560aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
561aa75f4d3SHarshad Shirwadkar {
562aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
563aa75f4d3SHarshad Shirwadkar 	int ret;
564aa75f4d3SHarshad Shirwadkar 
565aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
566aa75f4d3SHarshad Shirwadkar 		return;
567aa75f4d3SHarshad Shirwadkar 
568aa75f4d3SHarshad Shirwadkar 	args.start = start;
569aa75f4d3SHarshad Shirwadkar 	args.end = end;
570aa75f4d3SHarshad Shirwadkar 
571a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
572aa75f4d3SHarshad Shirwadkar 
573aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_range(inode, start, end, ret);
574aa75f4d3SHarshad Shirwadkar }
575aa75f4d3SHarshad Shirwadkar 
576e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
577aa75f4d3SHarshad Shirwadkar {
578aa75f4d3SHarshad Shirwadkar 	int write_flags = REQ_SYNC;
579aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
580aa75f4d3SHarshad Shirwadkar 
581e9f53353SDaejun Park 	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
582e9f53353SDaejun Park 	if (test_opt(sb, BARRIER) && is_tail)
583aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
584aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
585764b3fd3SHarshad Shirwadkar 	set_buffer_dirty(bh);
586aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
587aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
588aa75f4d3SHarshad Shirwadkar 	submit_bh(REQ_OP_WRITE, write_flags, bh);
589aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
590aa75f4d3SHarshad Shirwadkar }
591aa75f4d3SHarshad Shirwadkar 
592aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
593aa75f4d3SHarshad Shirwadkar 
594aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
595aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
596aa75f4d3SHarshad Shirwadkar 				u32 *crc)
597aa75f4d3SHarshad Shirwadkar {
598aa75f4d3SHarshad Shirwadkar 	void *ret;
599aa75f4d3SHarshad Shirwadkar 
600aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
601aa75f4d3SHarshad Shirwadkar 	if (crc)
602aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
603aa75f4d3SHarshad Shirwadkar 	return ret;
604aa75f4d3SHarshad Shirwadkar }
605aa75f4d3SHarshad Shirwadkar 
606aa75f4d3SHarshad Shirwadkar /*
607aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
608aa75f4d3SHarshad Shirwadkar  *
609aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
610aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
611aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
612aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
613aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
614aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
615aa75f4d3SHarshad Shirwadkar  * we added.
616aa75f4d3SHarshad Shirwadkar  */
617aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
618aa75f4d3SHarshad Shirwadkar {
619aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
620aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
621aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
622aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
623aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
624aa75f4d3SHarshad Shirwadkar 	int pad_len;
625aa75f4d3SHarshad Shirwadkar 
626aa75f4d3SHarshad Shirwadkar 	/*
627aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
628aa75f4d3SHarshad Shirwadkar 	 * padding.
629aa75f4d3SHarshad Shirwadkar 	 */
630aa75f4d3SHarshad Shirwadkar 	if (len + sizeof(struct ext4_fc_tl) > bsize)
631aa75f4d3SHarshad Shirwadkar 		return NULL;
632aa75f4d3SHarshad Shirwadkar 
633aa75f4d3SHarshad Shirwadkar 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
634aa75f4d3SHarshad Shirwadkar 		/*
635aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
636aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
637aa75f4d3SHarshad Shirwadkar 		 */
638aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
639aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
640aa75f4d3SHarshad Shirwadkar 			if (ret)
641aa75f4d3SHarshad Shirwadkar 				return NULL;
642aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
643aa75f4d3SHarshad Shirwadkar 		}
644aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
645aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
646aa75f4d3SHarshad Shirwadkar 	}
647aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
648aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
649aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
650aa75f4d3SHarshad Shirwadkar 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
651aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
652aa75f4d3SHarshad Shirwadkar 	if (crc)
653aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
654aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
655aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
656e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, false);
657aa75f4d3SHarshad Shirwadkar 
658aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
659aa75f4d3SHarshad Shirwadkar 	if (ret)
660aa75f4d3SHarshad Shirwadkar 		return NULL;
661aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
662aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
663aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
664aa75f4d3SHarshad Shirwadkar }
665aa75f4d3SHarshad Shirwadkar 
666aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
667aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
668aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
669aa75f4d3SHarshad Shirwadkar {
670aa75f4d3SHarshad Shirwadkar 	if (crc)
671aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
672aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
673aa75f4d3SHarshad Shirwadkar }
674aa75f4d3SHarshad Shirwadkar 
675aa75f4d3SHarshad Shirwadkar /*
676aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
677aa75f4d3SHarshad Shirwadkar  *
678aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
679aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
680aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
681aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
682aa75f4d3SHarshad Shirwadkar  */
683aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
684aa75f4d3SHarshad Shirwadkar {
685aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
686aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
687aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
688aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
689aa75f4d3SHarshad Shirwadkar 	u8 *dst;
690aa75f4d3SHarshad Shirwadkar 
691aa75f4d3SHarshad Shirwadkar 	/*
692aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
693aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
694aa75f4d3SHarshad Shirwadkar 	 */
695aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
696aa75f4d3SHarshad Shirwadkar 	if (!dst)
697aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
698aa75f4d3SHarshad Shirwadkar 
699aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
700aa75f4d3SHarshad Shirwadkar 
701aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
702aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
703aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
704aa75f4d3SHarshad Shirwadkar 
705aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
706aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
707aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
708aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
709aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
710aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
711aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
712aa75f4d3SHarshad Shirwadkar 
713e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, true);
714aa75f4d3SHarshad Shirwadkar 
715aa75f4d3SHarshad Shirwadkar 	return 0;
716aa75f4d3SHarshad Shirwadkar }
717aa75f4d3SHarshad Shirwadkar 
718aa75f4d3SHarshad Shirwadkar /*
719aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
720aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
721aa75f4d3SHarshad Shirwadkar  */
722aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
723aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
724aa75f4d3SHarshad Shirwadkar {
725aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
726aa75f4d3SHarshad Shirwadkar 	u8 *dst;
727aa75f4d3SHarshad Shirwadkar 
728aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
729aa75f4d3SHarshad Shirwadkar 	if (!dst)
730aa75f4d3SHarshad Shirwadkar 		return false;
731aa75f4d3SHarshad Shirwadkar 
732aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
733aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
734aa75f4d3SHarshad Shirwadkar 
735aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
736aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
737aa75f4d3SHarshad Shirwadkar 
738aa75f4d3SHarshad Shirwadkar 	return true;
739aa75f4d3SHarshad Shirwadkar }
740aa75f4d3SHarshad Shirwadkar 
741aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
742facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
743facec450SGuoqing Jiang 				   struct ext4_fc_dentry_update *fc_dentry)
744aa75f4d3SHarshad Shirwadkar {
745aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
746aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
747facec450SGuoqing Jiang 	int dlen = fc_dentry->fcd_name.len;
748aa75f4d3SHarshad Shirwadkar 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
749aa75f4d3SHarshad Shirwadkar 					crc);
750aa75f4d3SHarshad Shirwadkar 
751aa75f4d3SHarshad Shirwadkar 	if (!dst)
752aa75f4d3SHarshad Shirwadkar 		return false;
753aa75f4d3SHarshad Shirwadkar 
754facec450SGuoqing Jiang 	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
755facec450SGuoqing Jiang 	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
756facec450SGuoqing Jiang 	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
757aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
758aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
759aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
760aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
761aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
762facec450SGuoqing Jiang 	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
763aa75f4d3SHarshad Shirwadkar 
764aa75f4d3SHarshad Shirwadkar 	return true;
765aa75f4d3SHarshad Shirwadkar }
766aa75f4d3SHarshad Shirwadkar 
767aa75f4d3SHarshad Shirwadkar /*
768aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
769aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
770aa75f4d3SHarshad Shirwadkar  */
771aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
772aa75f4d3SHarshad Shirwadkar {
773aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
774aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
775aa75f4d3SHarshad Shirwadkar 	int ret;
776aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
777aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
778aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
779aa75f4d3SHarshad Shirwadkar 	u8 *dst;
780aa75f4d3SHarshad Shirwadkar 
781aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
782aa75f4d3SHarshad Shirwadkar 	if (ret)
783aa75f4d3SHarshad Shirwadkar 		return ret;
784aa75f4d3SHarshad Shirwadkar 
7856c31a689SHarshad Shirwadkar 	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
7866c31a689SHarshad Shirwadkar 		inode_len = EXT4_INODE_SIZE(inode->i_sb);
7876c31a689SHarshad Shirwadkar 	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
788aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
789aa75f4d3SHarshad Shirwadkar 
790aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
791aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
792aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
793aa75f4d3SHarshad Shirwadkar 
794aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
795aa75f4d3SHarshad Shirwadkar 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
796aa75f4d3SHarshad Shirwadkar 	if (!dst)
797aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
798aa75f4d3SHarshad Shirwadkar 
799aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
800aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
801aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
802aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
803aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
804aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
805aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
806aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
807aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
808aa75f4d3SHarshad Shirwadkar 
809aa75f4d3SHarshad Shirwadkar 	return 0;
810aa75f4d3SHarshad Shirwadkar }
811aa75f4d3SHarshad Shirwadkar 
812aa75f4d3SHarshad Shirwadkar /*
813aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
814aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
815aa75f4d3SHarshad Shirwadkar  */
816aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
817aa75f4d3SHarshad Shirwadkar {
818aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
819aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
820aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
821aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
822aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
823aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
824aa75f4d3SHarshad Shirwadkar 	int ret;
825aa75f4d3SHarshad Shirwadkar 
826aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
827aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
828aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
829aa75f4d3SHarshad Shirwadkar 		return 0;
830aa75f4d3SHarshad Shirwadkar 	}
831aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
832aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
833aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
834aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
835aa75f4d3SHarshad Shirwadkar 
836aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
837aa75f4d3SHarshad Shirwadkar 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
838aa75f4d3SHarshad Shirwadkar 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
839aa75f4d3SHarshad Shirwadkar 
840aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
841aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
842aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
843aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
844aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
845aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
846aa75f4d3SHarshad Shirwadkar 
847aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
848aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
849aa75f4d3SHarshad Shirwadkar 			continue;
850aa75f4d3SHarshad Shirwadkar 		}
851aa75f4d3SHarshad Shirwadkar 
852aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
853aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
854aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
855aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
856aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
857aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
858aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
859aa75f4d3SHarshad Shirwadkar 		} else {
860a2c2f082SHou Tao 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
861a2c2f082SHou Tao 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
862a2c2f082SHou Tao 
863a2c2f082SHou Tao 			/* Limit the number of blocks in one extent */
864a2c2f082SHou Tao 			map.m_len = min(max, map.m_len);
865a2c2f082SHou Tao 
866aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
867aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
868aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
869aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
870aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
871aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
872aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
873aa75f4d3SHarshad Shirwadkar 			else
874aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
875aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
876aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
877aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
878aa75f4d3SHarshad Shirwadkar 		}
879aa75f4d3SHarshad Shirwadkar 
880aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
881aa75f4d3SHarshad Shirwadkar 	}
882aa75f4d3SHarshad Shirwadkar 
883aa75f4d3SHarshad Shirwadkar 	return 0;
884aa75f4d3SHarshad Shirwadkar }
885aa75f4d3SHarshad Shirwadkar 
886aa75f4d3SHarshad Shirwadkar 
887aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
888aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
889aa75f4d3SHarshad Shirwadkar {
890aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
891aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
892aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
893aa75f4d3SHarshad Shirwadkar 	int ret = 0;
894aa75f4d3SHarshad Shirwadkar 
895aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
8969b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
89796e7c02dSDaejun Park 	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
898aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
899aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
900aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
901aa75f4d3SHarshad Shirwadkar 
902aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
903aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
904aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
905aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
906aa75f4d3SHarshad Shirwadkar 				schedule();
907aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
908aa75f4d3SHarshad Shirwadkar 			}
909aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
910aa75f4d3SHarshad Shirwadkar 		}
911aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
912aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
913aa75f4d3SHarshad Shirwadkar 		if (ret)
914aa75f4d3SHarshad Shirwadkar 			return ret;
915aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
916aa75f4d3SHarshad Shirwadkar 	}
917aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
918aa75f4d3SHarshad Shirwadkar 
919aa75f4d3SHarshad Shirwadkar 	return ret;
920aa75f4d3SHarshad Shirwadkar }
921aa75f4d3SHarshad Shirwadkar 
922aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
923aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
924aa75f4d3SHarshad Shirwadkar {
925aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
926aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
927aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
928aa75f4d3SHarshad Shirwadkar 	int ret = 0;
929aa75f4d3SHarshad Shirwadkar 
930aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
931aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
932aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
933aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
934aa75f4d3SHarshad Shirwadkar 			continue;
935aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
936aa75f4d3SHarshad Shirwadkar 
937aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
938aa75f4d3SHarshad Shirwadkar 		if (ret)
939aa75f4d3SHarshad Shirwadkar 			return ret;
940aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
941aa75f4d3SHarshad Shirwadkar 	}
942aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
943aa75f4d3SHarshad Shirwadkar 
944aa75f4d3SHarshad Shirwadkar 	return 0;
945aa75f4d3SHarshad Shirwadkar }
946aa75f4d3SHarshad Shirwadkar 
947aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
948aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
949fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock)
950fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock)
951aa75f4d3SHarshad Shirwadkar {
952aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
953aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
95496e7c02dSDaejun Park 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
955aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
95696e7c02dSDaejun Park 	struct ext4_inode_info *ei, *ei_n;
957aa75f4d3SHarshad Shirwadkar 	int ret;
958aa75f4d3SHarshad Shirwadkar 
959aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
960aa75f4d3SHarshad Shirwadkar 		return 0;
96196e7c02dSDaejun Park 	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
96296e7c02dSDaejun Park 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
963aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
964aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
965facec450SGuoqing Jiang 			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
966aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
967aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
968aa75f4d3SHarshad Shirwadkar 			}
969aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
970aa75f4d3SHarshad Shirwadkar 			continue;
971aa75f4d3SHarshad Shirwadkar 		}
972aa75f4d3SHarshad Shirwadkar 
973aa75f4d3SHarshad Shirwadkar 		inode = NULL;
97496e7c02dSDaejun Park 		list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
97596e7c02dSDaejun Park 					 i_fc_list) {
976aa75f4d3SHarshad Shirwadkar 			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
977aa75f4d3SHarshad Shirwadkar 				inode = &ei->vfs_inode;
978aa75f4d3SHarshad Shirwadkar 				break;
979aa75f4d3SHarshad Shirwadkar 			}
980aa75f4d3SHarshad Shirwadkar 		}
981aa75f4d3SHarshad Shirwadkar 		/*
982aa75f4d3SHarshad Shirwadkar 		 * If we don't find inode in our list, then it was deleted,
983aa75f4d3SHarshad Shirwadkar 		 * in which case, we don't need to record it's create tag.
984aa75f4d3SHarshad Shirwadkar 		 */
985aa75f4d3SHarshad Shirwadkar 		if (!inode)
986aa75f4d3SHarshad Shirwadkar 			continue;
987aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
988aa75f4d3SHarshad Shirwadkar 
989aa75f4d3SHarshad Shirwadkar 		/*
990aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
991aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
992aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
993aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
994aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
995aa75f4d3SHarshad Shirwadkar 		 */
996aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
997aa75f4d3SHarshad Shirwadkar 		if (ret)
998aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
999aa75f4d3SHarshad Shirwadkar 
1000aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
1001aa75f4d3SHarshad Shirwadkar 		if (ret)
1002aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1003aa75f4d3SHarshad Shirwadkar 
1004facec450SGuoqing Jiang 		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1005aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
1006aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1007aa75f4d3SHarshad Shirwadkar 		}
1008aa75f4d3SHarshad Shirwadkar 
1009aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1010aa75f4d3SHarshad Shirwadkar 	}
1011aa75f4d3SHarshad Shirwadkar 	return 0;
1012aa75f4d3SHarshad Shirwadkar lock_and_exit:
1013aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1014aa75f4d3SHarshad Shirwadkar 	return ret;
1015aa75f4d3SHarshad Shirwadkar }
1016aa75f4d3SHarshad Shirwadkar 
1017aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
1018aa75f4d3SHarshad Shirwadkar {
1019aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1020aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1021aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1022aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
1023aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1024aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
1025aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1026aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
1027aa75f4d3SHarshad Shirwadkar 
1028aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
1029aa75f4d3SHarshad Shirwadkar 	if (ret)
1030aa75f4d3SHarshad Shirwadkar 		return ret;
1031aa75f4d3SHarshad Shirwadkar 
1032aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
1033aa75f4d3SHarshad Shirwadkar 	if (ret)
1034aa75f4d3SHarshad Shirwadkar 		return ret;
1035aa75f4d3SHarshad Shirwadkar 
1036da0c5d26SHarshad Shirwadkar 	/*
1037da0c5d26SHarshad Shirwadkar 	 * If file system device is different from journal device, issue a cache
1038da0c5d26SHarshad Shirwadkar 	 * flush before we start writing fast commit blocks.
1039da0c5d26SHarshad Shirwadkar 	 */
1040da0c5d26SHarshad Shirwadkar 	if (journal->j_fs_dev != journal->j_dev)
1041c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_fs_dev);
1042da0c5d26SHarshad Shirwadkar 
1043aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1044aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1045aa75f4d3SHarshad Shirwadkar 		/*
1046aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1047aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1048aa75f4d3SHarshad Shirwadkar 		 */
1049aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1050aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1051aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1052aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1053e1262cd2SXu Yihang 			(u8 *)&head, &crc)) {
1054e1262cd2SXu Yihang 			ret = -ENOSPC;
1055aa75f4d3SHarshad Shirwadkar 			goto out;
1056aa75f4d3SHarshad Shirwadkar 		}
1057e1262cd2SXu Yihang 	}
1058aa75f4d3SHarshad Shirwadkar 
1059aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1060aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1061aa75f4d3SHarshad Shirwadkar 	if (ret) {
1062aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1063aa75f4d3SHarshad Shirwadkar 		goto out;
1064aa75f4d3SHarshad Shirwadkar 	}
1065aa75f4d3SHarshad Shirwadkar 
106696e7c02dSDaejun Park 	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1067aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1068aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1069aa75f4d3SHarshad Shirwadkar 			continue;
1070aa75f4d3SHarshad Shirwadkar 
1071aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1072aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1073aa75f4d3SHarshad Shirwadkar 		if (ret)
1074aa75f4d3SHarshad Shirwadkar 			goto out;
1075aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1076aa75f4d3SHarshad Shirwadkar 		if (ret)
1077aa75f4d3SHarshad Shirwadkar 			goto out;
1078aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1079aa75f4d3SHarshad Shirwadkar 	}
1080aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1081aa75f4d3SHarshad Shirwadkar 
1082aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1083aa75f4d3SHarshad Shirwadkar 
1084aa75f4d3SHarshad Shirwadkar out:
1085aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1086aa75f4d3SHarshad Shirwadkar 	return ret;
1087aa75f4d3SHarshad Shirwadkar }
1088aa75f4d3SHarshad Shirwadkar 
10890915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status,
10900915e464SHarshad Shirwadkar 				 u64 commit_time, int nblks)
10910915e464SHarshad Shirwadkar {
10920915e464SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
10930915e464SHarshad Shirwadkar 
10940915e464SHarshad Shirwadkar 	jbd_debug(1, "Fast commit ended with status = %d", status);
10950915e464SHarshad Shirwadkar 	if (status == EXT4_FC_STATUS_OK) {
10960915e464SHarshad Shirwadkar 		stats->fc_num_commits++;
10970915e464SHarshad Shirwadkar 		stats->fc_numblks += nblks;
10980915e464SHarshad Shirwadkar 		if (likely(stats->s_fc_avg_commit_time))
10990915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time =
11000915e464SHarshad Shirwadkar 				(commit_time +
11010915e464SHarshad Shirwadkar 				 stats->s_fc_avg_commit_time * 3) / 4;
11020915e464SHarshad Shirwadkar 		else
11030915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time = commit_time;
11040915e464SHarshad Shirwadkar 	} else if (status == EXT4_FC_STATUS_FAILED ||
11050915e464SHarshad Shirwadkar 		   status == EXT4_FC_STATUS_INELIGIBLE) {
11060915e464SHarshad Shirwadkar 		if (status == EXT4_FC_STATUS_FAILED)
11070915e464SHarshad Shirwadkar 			stats->fc_failed_commits++;
11080915e464SHarshad Shirwadkar 		stats->fc_ineligible_commits++;
11090915e464SHarshad Shirwadkar 	} else {
11100915e464SHarshad Shirwadkar 		stats->fc_skipped_commits++;
11110915e464SHarshad Shirwadkar 	}
11120915e464SHarshad Shirwadkar 	trace_ext4_fc_commit_stop(sb, nblks, status);
11130915e464SHarshad Shirwadkar }
11140915e464SHarshad Shirwadkar 
1115aa75f4d3SHarshad Shirwadkar /*
1116aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1117aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1118aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1119aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1120aa75f4d3SHarshad Shirwadkar  */
1121aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1122aa75f4d3SHarshad Shirwadkar {
1123aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1124aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1125aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1126aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
11270915e464SHarshad Shirwadkar 	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1128aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1129aa75f4d3SHarshad Shirwadkar 
1130aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_start(sb);
1131aa75f4d3SHarshad Shirwadkar 
1132aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1133aa75f4d3SHarshad Shirwadkar 
11347bbbe241SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
11357bbbe241SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1136aa75f4d3SHarshad Shirwadkar 
1137aa75f4d3SHarshad Shirwadkar restart_fc:
1138aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1139aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1140aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1141aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1142aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1143aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
11440915e464SHarshad Shirwadkar 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
11450915e464SHarshad Shirwadkar 		return 0;
1146aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
11470915e464SHarshad Shirwadkar 		/*
11480915e464SHarshad Shirwadkar 		 * Commit couldn't start. Just update stats and perform a
11490915e464SHarshad Shirwadkar 		 * full commit.
11500915e464SHarshad Shirwadkar 		 */
11510915e464SHarshad Shirwadkar 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
11520915e464SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1153aa75f4d3SHarshad Shirwadkar 	}
11540915e464SHarshad Shirwadkar 
11557bbbe241SHarshad Shirwadkar 	/*
11567bbbe241SHarshad Shirwadkar 	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
11577bbbe241SHarshad Shirwadkar 	 * if we are fast commit ineligible.
11587bbbe241SHarshad Shirwadkar 	 */
11597bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
11600915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_INELIGIBLE;
11610915e464SHarshad Shirwadkar 		goto fallback;
11627bbbe241SHarshad Shirwadkar 	}
1163aa75f4d3SHarshad Shirwadkar 
1164aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1165aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1166aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
11670915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
11680915e464SHarshad Shirwadkar 		goto fallback;
1169aa75f4d3SHarshad Shirwadkar 	}
1170aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1171aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1172aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
11730915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
11740915e464SHarshad Shirwadkar 		goto fallback;
1175aa75f4d3SHarshad Shirwadkar 	}
1176aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
11770915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit(journal);
1178aa75f4d3SHarshad Shirwadkar 	/*
11790915e464SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we
11800915e464SHarshad Shirwadkar 	 * don't react too strongly to vast changes in the commit time
1181aa75f4d3SHarshad Shirwadkar 	 */
11820915e464SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
11830915e464SHarshad Shirwadkar 	ext4_fc_update_stats(sb, status, commit_time, nblks);
11840915e464SHarshad Shirwadkar 	return ret;
11850915e464SHarshad Shirwadkar 
11860915e464SHarshad Shirwadkar fallback:
11870915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit_fallback(journal);
11880915e464SHarshad Shirwadkar 	ext4_fc_update_stats(sb, status, 0, 0);
11890915e464SHarshad Shirwadkar 	return ret;
1190aa75f4d3SHarshad Shirwadkar }
1191aa75f4d3SHarshad Shirwadkar 
1192ff780b91SHarshad Shirwadkar /*
1193ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1194ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1195ff780b91SHarshad Shirwadkar  */
1196*e85c81baSXin Yin static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1197ff780b91SHarshad Shirwadkar {
1198aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1199aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
120096e7c02dSDaejun Park 	struct ext4_inode_info *iter, *iter_n;
1201aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1202aa75f4d3SHarshad Shirwadkar 
1203aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1204aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1205aa75f4d3SHarshad Shirwadkar 
1206aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1207aa75f4d3SHarshad Shirwadkar 
1208aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
120996e7c02dSDaejun Park 	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
121096e7c02dSDaejun Park 				 i_fc_list) {
1211aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1212aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1213aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1214aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(&iter->vfs_inode);
1215aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1216aa75f4d3SHarshad Shirwadkar 		smp_mb();
1217aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1218aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1219aa75f4d3SHarshad Shirwadkar #else
1220aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1221aa75f4d3SHarshad Shirwadkar #endif
1222aa75f4d3SHarshad Shirwadkar 	}
1223aa75f4d3SHarshad Shirwadkar 
1224aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1225aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1226aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1227aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1228aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1229aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1230aa75f4d3SHarshad Shirwadkar 
1231aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1232aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1233aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1234aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1235aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1236aa75f4d3SHarshad Shirwadkar 	}
1237aa75f4d3SHarshad Shirwadkar 
1238aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1239aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1240aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
124131e203e0SDaejun Park 				&sbi->s_fc_q[FC_Q_MAIN]);
1242aa75f4d3SHarshad Shirwadkar 
12439b5f6c9bSHarshad Shirwadkar 	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
1244*e85c81baSXin Yin 	if (tid >= sbi->s_fc_ineligible_tid) {
1245*e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = 0;
12469b5f6c9bSHarshad Shirwadkar 		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1247*e85c81baSXin Yin 	}
1248aa75f4d3SHarshad Shirwadkar 
1249aa75f4d3SHarshad Shirwadkar 	if (full)
1250aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1251aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1252aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1253ff780b91SHarshad Shirwadkar }
12546866d7b3SHarshad Shirwadkar 
12558016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
12568016e29fSHarshad Shirwadkar 
12578016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
12588016e29fSHarshad Shirwadkar struct dentry_info_args {
12598016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
12608016e29fSHarshad Shirwadkar 	char *dname;
12618016e29fSHarshad Shirwadkar };
12628016e29fSHarshad Shirwadkar 
12638016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
1264a7ba36bcSHarshad Shirwadkar 			      struct  ext4_fc_tl *tl, u8 *val)
12658016e29fSHarshad Shirwadkar {
1266a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
12678016e29fSHarshad Shirwadkar 
1268a7ba36bcSHarshad Shirwadkar 	memcpy(&fcd, val, sizeof(fcd));
12698016e29fSHarshad Shirwadkar 
1270a7ba36bcSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1271a7ba36bcSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd.fc_ino);
1272a7ba36bcSHarshad Shirwadkar 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1273a7ba36bcSHarshad Shirwadkar 	darg->dname_len = le16_to_cpu(tl->fc_len) -
12748016e29fSHarshad Shirwadkar 		sizeof(struct ext4_fc_dentry_info);
12758016e29fSHarshad Shirwadkar }
12768016e29fSHarshad Shirwadkar 
12778016e29fSHarshad Shirwadkar /* Unlink replay function */
1278a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1279a7ba36bcSHarshad Shirwadkar 				 u8 *val)
12808016e29fSHarshad Shirwadkar {
12818016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
12828016e29fSHarshad Shirwadkar 	struct qstr entry;
12838016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
12848016e29fSHarshad Shirwadkar 	int ret = 0;
12858016e29fSHarshad Shirwadkar 
1286a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
12878016e29fSHarshad Shirwadkar 
12888016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
12898016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
12908016e29fSHarshad Shirwadkar 
12918016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
12928016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
12938016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
12948016e29fSHarshad Shirwadkar 
129523dd561aSYi Li 	if (IS_ERR(inode)) {
12968016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", darg.ino);
12978016e29fSHarshad Shirwadkar 		return 0;
12988016e29fSHarshad Shirwadkar 	}
12998016e29fSHarshad Shirwadkar 
13008016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
13018016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
130223dd561aSYi Li 	if (IS_ERR(old_parent)) {
13038016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
13048016e29fSHarshad Shirwadkar 		iput(inode);
13058016e29fSHarshad Shirwadkar 		return 0;
13068016e29fSHarshad Shirwadkar 	}
13078016e29fSHarshad Shirwadkar 
1308a80f7fcfSHarshad Shirwadkar 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
13098016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
13108016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
13118016e29fSHarshad Shirwadkar 		ret = 0;
13128016e29fSHarshad Shirwadkar 	iput(old_parent);
13138016e29fSHarshad Shirwadkar 	iput(inode);
13148016e29fSHarshad Shirwadkar 	return ret;
13158016e29fSHarshad Shirwadkar }
13168016e29fSHarshad Shirwadkar 
13178016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
13188016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
13198016e29fSHarshad Shirwadkar 				struct inode *inode)
13208016e29fSHarshad Shirwadkar {
13218016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
13228016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
13238016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
13248016e29fSHarshad Shirwadkar 	int ret = 0;
13258016e29fSHarshad Shirwadkar 
13268016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
13278016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
13288016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
13298016e29fSHarshad Shirwadkar 		dir = NULL;
13308016e29fSHarshad Shirwadkar 		goto out;
13318016e29fSHarshad Shirwadkar 	}
13328016e29fSHarshad Shirwadkar 
13338016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
13348016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
13358016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to obtain dentry");
13368016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
13378016e29fSHarshad Shirwadkar 		goto out;
13388016e29fSHarshad Shirwadkar 	}
13398016e29fSHarshad Shirwadkar 
13408016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
13418016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
13428016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode dentry not created.");
13438016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
13448016e29fSHarshad Shirwadkar 		goto out;
13458016e29fSHarshad Shirwadkar 	}
13468016e29fSHarshad Shirwadkar 
13478016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
13488016e29fSHarshad Shirwadkar 	/*
13498016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
13508016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
13518016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
13528016e29fSHarshad Shirwadkar 	 * could complete.
13538016e29fSHarshad Shirwadkar 	 */
13548016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
13558016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to link\n");
13568016e29fSHarshad Shirwadkar 		goto out;
13578016e29fSHarshad Shirwadkar 	}
13588016e29fSHarshad Shirwadkar 
13598016e29fSHarshad Shirwadkar 	ret = 0;
13608016e29fSHarshad Shirwadkar out:
13618016e29fSHarshad Shirwadkar 	if (dentry_dir) {
13628016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
13638016e29fSHarshad Shirwadkar 		dput(dentry_dir);
13648016e29fSHarshad Shirwadkar 	} else if (dir) {
13658016e29fSHarshad Shirwadkar 		iput(dir);
13668016e29fSHarshad Shirwadkar 	}
13678016e29fSHarshad Shirwadkar 	if (dentry_inode) {
13688016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
13698016e29fSHarshad Shirwadkar 		dput(dentry_inode);
13708016e29fSHarshad Shirwadkar 	}
13718016e29fSHarshad Shirwadkar 
13728016e29fSHarshad Shirwadkar 	return ret;
13738016e29fSHarshad Shirwadkar }
13748016e29fSHarshad Shirwadkar 
13758016e29fSHarshad Shirwadkar /* Link replay function */
1376a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1377a7ba36bcSHarshad Shirwadkar 			       u8 *val)
13788016e29fSHarshad Shirwadkar {
13798016e29fSHarshad Shirwadkar 	struct inode *inode;
13808016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13818016e29fSHarshad Shirwadkar 	int ret = 0;
13828016e29fSHarshad Shirwadkar 
1383a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
13848016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
13858016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13868016e29fSHarshad Shirwadkar 
13878016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
138823dd561aSYi Li 	if (IS_ERR(inode)) {
13898016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
13908016e29fSHarshad Shirwadkar 		return 0;
13918016e29fSHarshad Shirwadkar 	}
13928016e29fSHarshad Shirwadkar 
13938016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
13948016e29fSHarshad Shirwadkar 	iput(inode);
13958016e29fSHarshad Shirwadkar 	return ret;
13968016e29fSHarshad Shirwadkar }
13978016e29fSHarshad Shirwadkar 
13988016e29fSHarshad Shirwadkar /*
13998016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
14008016e29fSHarshad Shirwadkar  * block bitmaps correctly.
14018016e29fSHarshad Shirwadkar  */
14028016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
14038016e29fSHarshad Shirwadkar {
14048016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
14058016e29fSHarshad Shirwadkar 	int i;
14068016e29fSHarshad Shirwadkar 
14078016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
14088016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
14098016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
14108016e29fSHarshad Shirwadkar 			return 0;
14118016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
14128016e29fSHarshad Shirwadkar 		state->fc_modified_inodes_size +=
14138016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
14148016e29fSHarshad Shirwadkar 		state->fc_modified_inodes = krealloc(
14158016e29fSHarshad Shirwadkar 					state->fc_modified_inodes, sizeof(int) *
14168016e29fSHarshad Shirwadkar 					state->fc_modified_inodes_size,
14178016e29fSHarshad Shirwadkar 					GFP_KERNEL);
14188016e29fSHarshad Shirwadkar 		if (!state->fc_modified_inodes)
14198016e29fSHarshad Shirwadkar 			return -ENOMEM;
14208016e29fSHarshad Shirwadkar 	}
14218016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
14228016e29fSHarshad Shirwadkar 	return 0;
14238016e29fSHarshad Shirwadkar }
14248016e29fSHarshad Shirwadkar 
14258016e29fSHarshad Shirwadkar /*
14268016e29fSHarshad Shirwadkar  * Inode replay function
14278016e29fSHarshad Shirwadkar  */
1428a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1429a7ba36bcSHarshad Shirwadkar 				u8 *val)
14308016e29fSHarshad Shirwadkar {
1431a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
14328016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
14338016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
14348016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
14358016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
14368016e29fSHarshad Shirwadkar 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
14378016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
14388016e29fSHarshad Shirwadkar 
1439a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_inode, val, sizeof(fc_inode));
14408016e29fSHarshad Shirwadkar 
1441a7ba36bcSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode.fc_ino);
14428016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
14438016e29fSHarshad Shirwadkar 
14448016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
144523dd561aSYi Li 	if (!IS_ERR(inode)) {
14468016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
14478016e29fSHarshad Shirwadkar 		iput(inode);
14488016e29fSHarshad Shirwadkar 	}
144923dd561aSYi Li 	inode = NULL;
14508016e29fSHarshad Shirwadkar 
14518016e29fSHarshad Shirwadkar 	ext4_fc_record_modified_inode(sb, ino);
14528016e29fSHarshad Shirwadkar 
1453a7ba36bcSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)
1454a7ba36bcSHarshad Shirwadkar 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
14558016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
14568016e29fSHarshad Shirwadkar 	if (ret)
14578016e29fSHarshad Shirwadkar 		goto out;
14588016e29fSHarshad Shirwadkar 
1459a7ba36bcSHarshad Shirwadkar 	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
14608016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
14618016e29fSHarshad Shirwadkar 
14628016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
14638016e29fSHarshad Shirwadkar 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
14648016e29fSHarshad Shirwadkar 		inode_len - offsetof(struct ext4_inode, i_generation));
14658016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
14668016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
14678016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
14688016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
14698016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
14708016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
14718016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
14728016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
14738016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
14748016e29fSHarshad Shirwadkar 		}
14758016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
14768016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
14778016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
14788016e29fSHarshad Shirwadkar 	}
14798016e29fSHarshad Shirwadkar 
14808016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
14818016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
14828016e29fSHarshad Shirwadkar 	if (ret)
14838016e29fSHarshad Shirwadkar 		goto out;
14848016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
14858016e29fSHarshad Shirwadkar 	if (ret)
14868016e29fSHarshad Shirwadkar 		goto out;
14878016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
14888016e29fSHarshad Shirwadkar 	if (ret)
14898016e29fSHarshad Shirwadkar 		goto out;
14908016e29fSHarshad Shirwadkar 
14918016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
14928016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
149323dd561aSYi Li 	if (IS_ERR(inode)) {
14948016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
14958016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
14968016e29fSHarshad Shirwadkar 	}
14978016e29fSHarshad Shirwadkar 
14988016e29fSHarshad Shirwadkar 	/*
14998016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
15008016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
15018016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
15028016e29fSHarshad Shirwadkar 	 */
15031ebf2178SHarshad Shirwadkar 	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
15048016e29fSHarshad Shirwadkar 		ext4_ext_replay_set_iblocks(inode);
15058016e29fSHarshad Shirwadkar 
15068016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
15078016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
15088016e29fSHarshad Shirwadkar 
15098016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
15108016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15118016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
15128016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
15138016e29fSHarshad Shirwadkar out:
15148016e29fSHarshad Shirwadkar 	iput(inode);
15158016e29fSHarshad Shirwadkar 	if (!ret)
1516c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(sb->s_bdev);
15178016e29fSHarshad Shirwadkar 
15188016e29fSHarshad Shirwadkar 	return 0;
15198016e29fSHarshad Shirwadkar }
15208016e29fSHarshad Shirwadkar 
15218016e29fSHarshad Shirwadkar /*
15228016e29fSHarshad Shirwadkar  * Dentry create replay function.
15238016e29fSHarshad Shirwadkar  *
15248016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
15258016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
15268016e29fSHarshad Shirwadkar  * been replayed before we start here.
15278016e29fSHarshad Shirwadkar  */
1528a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1529a7ba36bcSHarshad Shirwadkar 				 u8 *val)
15308016e29fSHarshad Shirwadkar {
15318016e29fSHarshad Shirwadkar 	int ret = 0;
15328016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
15338016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
15348016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
15358016e29fSHarshad Shirwadkar 
1536a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
15378016e29fSHarshad Shirwadkar 
15388016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
15398016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
15408016e29fSHarshad Shirwadkar 
15418016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
15428016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
15438016e29fSHarshad Shirwadkar 	if (ret)
15448016e29fSHarshad Shirwadkar 		goto out;
15458016e29fSHarshad Shirwadkar 
15468016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
154723dd561aSYi Li 	if (IS_ERR(inode)) {
15488016e29fSHarshad Shirwadkar 		jbd_debug(1, "inode %d not found.", darg.ino);
15498016e29fSHarshad Shirwadkar 		inode = NULL;
15508016e29fSHarshad Shirwadkar 		ret = -EINVAL;
15518016e29fSHarshad Shirwadkar 		goto out;
15528016e29fSHarshad Shirwadkar 	}
15538016e29fSHarshad Shirwadkar 
15548016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
15558016e29fSHarshad Shirwadkar 		/*
15568016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
15578016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
15588016e29fSHarshad Shirwadkar 		 */
15598016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
156023dd561aSYi Li 		if (IS_ERR(dir)) {
15618016e29fSHarshad Shirwadkar 			jbd_debug(1, "Dir %d not found.", darg.ino);
15628016e29fSHarshad Shirwadkar 			goto out;
15638016e29fSHarshad Shirwadkar 		}
15648016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
15658016e29fSHarshad Shirwadkar 		iput(dir);
15668016e29fSHarshad Shirwadkar 		if (ret) {
15678016e29fSHarshad Shirwadkar 			ret = 0;
15688016e29fSHarshad Shirwadkar 			goto out;
15698016e29fSHarshad Shirwadkar 		}
15708016e29fSHarshad Shirwadkar 	}
15718016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
15728016e29fSHarshad Shirwadkar 	if (ret)
15738016e29fSHarshad Shirwadkar 		goto out;
15748016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
15758016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
15768016e29fSHarshad Shirwadkar out:
15778016e29fSHarshad Shirwadkar 	if (inode)
15788016e29fSHarshad Shirwadkar 		iput(inode);
15798016e29fSHarshad Shirwadkar 	return ret;
15808016e29fSHarshad Shirwadkar }
15818016e29fSHarshad Shirwadkar 
15828016e29fSHarshad Shirwadkar /*
1583599ea31dSXin Yin  * Record physical disk regions which are in use as per fast commit area,
1584599ea31dSXin Yin  * and used by inodes during replay phase. Our simple replay phase
1585599ea31dSXin Yin  * allocator excludes these regions from allocation.
15868016e29fSHarshad Shirwadkar  */
1587599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino,
1588599ea31dSXin Yin 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
15898016e29fSHarshad Shirwadkar {
15908016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
15918016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
15928016e29fSHarshad Shirwadkar 
15938016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
1594599ea31dSXin Yin 	/*
1595599ea31dSXin Yin 	 * during replay phase, the fc_regions_valid may not same as
1596599ea31dSXin Yin 	 * fc_regions_used, update it when do new additions.
1597599ea31dSXin Yin 	 */
1598599ea31dSXin Yin 	if (replay && state->fc_regions_used != state->fc_regions_valid)
1599599ea31dSXin Yin 		state->fc_regions_used = state->fc_regions_valid;
16008016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
16018016e29fSHarshad Shirwadkar 		state->fc_regions_size +=
16028016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
16038016e29fSHarshad Shirwadkar 		state->fc_regions = krealloc(
16048016e29fSHarshad Shirwadkar 					state->fc_regions,
16058016e29fSHarshad Shirwadkar 					state->fc_regions_size *
16068016e29fSHarshad Shirwadkar 					sizeof(struct ext4_fc_alloc_region),
16078016e29fSHarshad Shirwadkar 					GFP_KERNEL);
16088016e29fSHarshad Shirwadkar 		if (!state->fc_regions)
16098016e29fSHarshad Shirwadkar 			return -ENOMEM;
16108016e29fSHarshad Shirwadkar 	}
16118016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
16128016e29fSHarshad Shirwadkar 	region->ino = ino;
16138016e29fSHarshad Shirwadkar 	region->lblk = lblk;
16148016e29fSHarshad Shirwadkar 	region->pblk = pblk;
16158016e29fSHarshad Shirwadkar 	region->len = len;
16168016e29fSHarshad Shirwadkar 
1617599ea31dSXin Yin 	if (replay)
1618599ea31dSXin Yin 		state->fc_regions_valid++;
1619599ea31dSXin Yin 
16208016e29fSHarshad Shirwadkar 	return 0;
16218016e29fSHarshad Shirwadkar }
16228016e29fSHarshad Shirwadkar 
16238016e29fSHarshad Shirwadkar /* Replay add range tag */
16248016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
1625a7ba36bcSHarshad Shirwadkar 				    struct ext4_fc_tl *tl, u8 *val)
16268016e29fSHarshad Shirwadkar {
1627a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range fc_add_ex;
16288016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
16298016e29fSHarshad Shirwadkar 	struct inode *inode;
16308016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
16318016e29fSHarshad Shirwadkar 	int remaining, len;
16328016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
16338016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
16348016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
16358016e29fSHarshad Shirwadkar 	int ret;
16368016e29fSHarshad Shirwadkar 
1637a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1638a7ba36bcSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
16398016e29fSHarshad Shirwadkar 
16408016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1641a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
16428016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
16438016e29fSHarshad Shirwadkar 
1644a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
164523dd561aSYi Li 	if (IS_ERR(inode)) {
16468016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
16478016e29fSHarshad Shirwadkar 		return 0;
16488016e29fSHarshad Shirwadkar 	}
16498016e29fSHarshad Shirwadkar 
16508016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
16518016e29fSHarshad Shirwadkar 
16528016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
16538016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
16548016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
16558016e29fSHarshad Shirwadkar 
16568016e29fSHarshad Shirwadkar 	cur = start;
16578016e29fSHarshad Shirwadkar 	remaining = len;
16588016e29fSHarshad Shirwadkar 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
16598016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
16608016e29fSHarshad Shirwadkar 		  inode->i_ino);
16618016e29fSHarshad Shirwadkar 
16628016e29fSHarshad Shirwadkar 	while (remaining > 0) {
16638016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
16648016e29fSHarshad Shirwadkar 		map.m_len = remaining;
16658016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
16668016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
16678016e29fSHarshad Shirwadkar 
16688016e29fSHarshad Shirwadkar 		if (ret < 0) {
16698016e29fSHarshad Shirwadkar 			iput(inode);
16708016e29fSHarshad Shirwadkar 			return 0;
16718016e29fSHarshad Shirwadkar 		}
16728016e29fSHarshad Shirwadkar 
16738016e29fSHarshad Shirwadkar 		if (ret == 0) {
16748016e29fSHarshad Shirwadkar 			/* Range is not mapped */
16758016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
16768c9be1e5SHarshad Shirwadkar 			if (IS_ERR(path)) {
16778c9be1e5SHarshad Shirwadkar 				iput(inode);
16788c9be1e5SHarshad Shirwadkar 				return 0;
16798c9be1e5SHarshad Shirwadkar 			}
16808016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
16818016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
16828016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
16838016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
16848016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
16858016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
16868016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
16878016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
16888016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
16898016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
16908016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
16918016e29fSHarshad Shirwadkar 			ext4_ext_drop_refs(path);
16928016e29fSHarshad Shirwadkar 			kfree(path);
16938016e29fSHarshad Shirwadkar 			if (ret) {
16948016e29fSHarshad Shirwadkar 				iput(inode);
16958016e29fSHarshad Shirwadkar 				return 0;
16968016e29fSHarshad Shirwadkar 			}
16978016e29fSHarshad Shirwadkar 			goto next;
16988016e29fSHarshad Shirwadkar 		}
16998016e29fSHarshad Shirwadkar 
17008016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
17018016e29fSHarshad Shirwadkar 			/*
17028016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
17038016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
17048016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
17058016e29fSHarshad Shirwadkar 			 */
17068016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17078016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
17088016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
17098016e29fSHarshad Shirwadkar 			if (ret) {
17108016e29fSHarshad Shirwadkar 				iput(inode);
17118016e29fSHarshad Shirwadkar 				return 0;
17128016e29fSHarshad Shirwadkar 			}
17138016e29fSHarshad Shirwadkar 			/*
17148016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
17158016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
17168016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
17178016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
17188016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
17198016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
17208016e29fSHarshad Shirwadkar 			 * modified inodes.
17218016e29fSHarshad Shirwadkar 			 */
17228016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17238016e29fSHarshad Shirwadkar 			goto next;
17248016e29fSHarshad Shirwadkar 		}
17258016e29fSHarshad Shirwadkar 
17268016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
1727fcdf3c34SArnd Bergmann 		jbd_debug(1, "Converting from %ld to %d %lld",
17288016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
17298016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
17308016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17318016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
17328016e29fSHarshad Shirwadkar 		if (ret) {
17338016e29fSHarshad Shirwadkar 			iput(inode);
17348016e29fSHarshad Shirwadkar 			return 0;
17358016e29fSHarshad Shirwadkar 		}
17368016e29fSHarshad Shirwadkar 		/*
17378016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
17388016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
17398016e29fSHarshad Shirwadkar 		 */
17408016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
17418016e29fSHarshad Shirwadkar next:
17428016e29fSHarshad Shirwadkar 		cur += map.m_len;
17438016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
17448016e29fSHarshad Shirwadkar 	}
17458016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
17468016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
17478016e29fSHarshad Shirwadkar 	iput(inode);
17488016e29fSHarshad Shirwadkar 	return 0;
17498016e29fSHarshad Shirwadkar }
17508016e29fSHarshad Shirwadkar 
17518016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
17528016e29fSHarshad Shirwadkar static int
1753a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1754a7ba36bcSHarshad Shirwadkar 			 u8 *val)
17558016e29fSHarshad Shirwadkar {
17568016e29fSHarshad Shirwadkar 	struct inode *inode;
1757a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
17588016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17598016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
17608016e29fSHarshad Shirwadkar 	int ret;
17618016e29fSHarshad Shirwadkar 
1762a7ba36bcSHarshad Shirwadkar 	memcpy(&lrange, val, sizeof(lrange));
1763a7ba36bcSHarshad Shirwadkar 	cur = le32_to_cpu(lrange.fc_lblk);
1764a7ba36bcSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange.fc_len);
17658016e29fSHarshad Shirwadkar 
17668016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1767a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_ino), cur, remaining);
17688016e29fSHarshad Shirwadkar 
1769a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
177023dd561aSYi Li 	if (IS_ERR(inode)) {
1771a7ba36bcSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
17728016e29fSHarshad Shirwadkar 		return 0;
17738016e29fSHarshad Shirwadkar 	}
17748016e29fSHarshad Shirwadkar 
17758016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
17768016e29fSHarshad Shirwadkar 
17778016e29fSHarshad Shirwadkar 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
1778a7ba36bcSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1779a7ba36bcSHarshad Shirwadkar 			le32_to_cpu(lrange.fc_len));
17808016e29fSHarshad Shirwadkar 	while (remaining > 0) {
17818016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
17828016e29fSHarshad Shirwadkar 		map.m_len = remaining;
17838016e29fSHarshad Shirwadkar 
17848016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
17858016e29fSHarshad Shirwadkar 		if (ret < 0) {
17868016e29fSHarshad Shirwadkar 			iput(inode);
17878016e29fSHarshad Shirwadkar 			return 0;
17888016e29fSHarshad Shirwadkar 		}
17898016e29fSHarshad Shirwadkar 		if (ret > 0) {
17908016e29fSHarshad Shirwadkar 			remaining -= ret;
17918016e29fSHarshad Shirwadkar 			cur += ret;
17928016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17938016e29fSHarshad Shirwadkar 		} else {
17948016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
17958016e29fSHarshad Shirwadkar 			cur += map.m_len;
17968016e29fSHarshad Shirwadkar 		}
17978016e29fSHarshad Shirwadkar 	}
17988016e29fSHarshad Shirwadkar 
17990b5b5a62SXin Yin 	down_write(&EXT4_I(inode)->i_data_sem);
18000b5b5a62SXin Yin 	ret = ext4_ext_remove_space(inode, lrange.fc_lblk,
18010b5b5a62SXin Yin 				lrange.fc_lblk + lrange.fc_len - 1);
18020b5b5a62SXin Yin 	up_write(&EXT4_I(inode)->i_data_sem);
18030b5b5a62SXin Yin 	if (ret) {
18040b5b5a62SXin Yin 		iput(inode);
18050b5b5a62SXin Yin 		return 0;
18060b5b5a62SXin Yin 	}
18078016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
18088016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
18098016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
18108016e29fSHarshad Shirwadkar 	iput(inode);
18118016e29fSHarshad Shirwadkar 
18128016e29fSHarshad Shirwadkar 	return 0;
18138016e29fSHarshad Shirwadkar }
18148016e29fSHarshad Shirwadkar 
18158016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
18168016e29fSHarshad Shirwadkar {
18178016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18188016e29fSHarshad Shirwadkar 	struct inode *inode;
18198016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
18208016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
18218016e29fSHarshad Shirwadkar 	int i, ret, j;
18228016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
18238016e29fSHarshad Shirwadkar 
18248016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18258016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
18268016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
18278016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
182823dd561aSYi Li 		if (IS_ERR(inode)) {
18298016e29fSHarshad Shirwadkar 			jbd_debug(1, "Inode %d not found.",
18308016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
18318016e29fSHarshad Shirwadkar 			continue;
18328016e29fSHarshad Shirwadkar 		}
18338016e29fSHarshad Shirwadkar 		cur = 0;
18348016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
18351ebf2178SHarshad Shirwadkar 		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
18361ebf2178SHarshad Shirwadkar 			iput(inode);
18371ebf2178SHarshad Shirwadkar 			continue;
18381ebf2178SHarshad Shirwadkar 		}
18398016e29fSHarshad Shirwadkar 		while (cur < end) {
18408016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
18418016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
18428016e29fSHarshad Shirwadkar 
18438016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
18448016e29fSHarshad Shirwadkar 			if (ret < 0)
18458016e29fSHarshad Shirwadkar 				break;
18468016e29fSHarshad Shirwadkar 
18478016e29fSHarshad Shirwadkar 			if (ret > 0) {
18488016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
184923dd561aSYi Li 				if (!IS_ERR(path)) {
18508016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
18518016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
18528016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
18538016e29fSHarshad Shirwadkar 					ext4_ext_drop_refs(path);
18548016e29fSHarshad Shirwadkar 					kfree(path);
18558016e29fSHarshad Shirwadkar 				}
18568016e29fSHarshad Shirwadkar 				cur += ret;
18578016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
18588016e29fSHarshad Shirwadkar 							map.m_len, 1);
18598016e29fSHarshad Shirwadkar 			} else {
18608016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
18618016e29fSHarshad Shirwadkar 			}
18628016e29fSHarshad Shirwadkar 		}
18638016e29fSHarshad Shirwadkar 		iput(inode);
18648016e29fSHarshad Shirwadkar 	}
18658016e29fSHarshad Shirwadkar }
18668016e29fSHarshad Shirwadkar 
18678016e29fSHarshad Shirwadkar /*
18688016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
18698016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
18708016e29fSHarshad Shirwadkar  * if it is okay to use a block.
18718016e29fSHarshad Shirwadkar  */
18728016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
18738016e29fSHarshad Shirwadkar {
18748016e29fSHarshad Shirwadkar 	int i;
18758016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18768016e29fSHarshad Shirwadkar 
18778016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18788016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
18798016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
18808016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
18818016e29fSHarshad Shirwadkar 			continue;
18828016e29fSHarshad Shirwadkar 		if (blk >= state->fc_regions[i].pblk &&
18838016e29fSHarshad Shirwadkar 		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
18848016e29fSHarshad Shirwadkar 			return true;
18858016e29fSHarshad Shirwadkar 	}
18868016e29fSHarshad Shirwadkar 	return false;
18878016e29fSHarshad Shirwadkar }
18888016e29fSHarshad Shirwadkar 
18898016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
18908016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
18918016e29fSHarshad Shirwadkar {
18928016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
18938016e29fSHarshad Shirwadkar 
18948016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
18958016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
18968016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
18978016e29fSHarshad Shirwadkar }
18988016e29fSHarshad Shirwadkar 
18998016e29fSHarshad Shirwadkar /*
19008016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
19018016e29fSHarshad Shirwadkar  *
19028016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
19038016e29fSHarshad Shirwadkar  * for doing following things:
19048016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
19058016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
19068016e29fSHarshad Shirwadkar  * - Verify CRC
19078016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
19088016e29fSHarshad Shirwadkar  *
19098016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
19108016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
19118016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
19128016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
19138016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
19148016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
19158016e29fSHarshad Shirwadkar  */
19168016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
19178016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
19188016e29fSHarshad Shirwadkar 				tid_t expected_tid)
19198016e29fSHarshad Shirwadkar {
19208016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
19218016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19228016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19238016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
1924a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range ext;
1925a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
1926a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
1927a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
1928a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_head head;
19298016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
19308016e29fSHarshad Shirwadkar 
19318016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
19328016e29fSHarshad Shirwadkar 
19338016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
19348016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
19358016e29fSHarshad Shirwadkar 
19368016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
19378016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
19388016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
19398016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
19408016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
19418016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
19428016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
19438016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
19448016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
19458016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
19468016e29fSHarshad Shirwadkar 			return 0;
19478016e29fSHarshad Shirwadkar 	}
19488016e29fSHarshad Shirwadkar 
19498016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
19508016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
19518016e29fSHarshad Shirwadkar 		goto out_err;
19528016e29fSHarshad Shirwadkar 	}
19538016e29fSHarshad Shirwadkar 
19548016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
1955a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
1956a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
1957a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
19588016e29fSHarshad Shirwadkar 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
1959a7ba36bcSHarshad Shirwadkar 			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
1960a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
19618016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
1962a7ba36bcSHarshad Shirwadkar 			memcpy(&ext, val, sizeof(ext));
1963a7ba36bcSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext.fc_ex;
19648016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
1965a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(ext.fc_ino),
19668016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
1967599ea31dSXin Yin 				ext4_ext_get_actual_len(ex), 0);
19688016e29fSHarshad Shirwadkar 			if (ret < 0)
19698016e29fSHarshad Shirwadkar 				break;
19708016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
19718016e29fSHarshad Shirwadkar 			fallthrough;
19728016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
19738016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
19748016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
19758016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
19768016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
19778016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
19788016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
1979a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1980a7ba36bcSHarshad Shirwadkar 					sizeof(tl) + le16_to_cpu(tl.fc_len));
19818016e29fSHarshad Shirwadkar 			break;
19828016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
19838016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
1984a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
1985a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1986a7ba36bcSHarshad Shirwadkar 						sizeof(tl) +
19878016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
19888016e29fSHarshad Shirwadkar 						fc_crc));
1989a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
1990a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
19918016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
19928016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
19938016e29fSHarshad Shirwadkar 					state->fc_regions_used;
19948016e29fSHarshad Shirwadkar 			} else {
19958016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
19968016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
19978016e29fSHarshad Shirwadkar 			}
19988016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
19998016e29fSHarshad Shirwadkar 			break;
20008016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
2001a7ba36bcSHarshad Shirwadkar 			memcpy(&head, val, sizeof(head));
2002a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_features) &
20038016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
20048016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
20058016e29fSHarshad Shirwadkar 				break;
20068016e29fSHarshad Shirwadkar 			}
2007a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
20088016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
20098016e29fSHarshad Shirwadkar 				break;
20108016e29fSHarshad Shirwadkar 			}
20118016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2012a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2013a7ba36bcSHarshad Shirwadkar 					    sizeof(tl) + le16_to_cpu(tl.fc_len));
20148016e29fSHarshad Shirwadkar 			break;
20158016e29fSHarshad Shirwadkar 		default:
20168016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
20178016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
20188016e29fSHarshad Shirwadkar 		}
20198016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
20208016e29fSHarshad Shirwadkar 			break;
20218016e29fSHarshad Shirwadkar 	}
20228016e29fSHarshad Shirwadkar 
20238016e29fSHarshad Shirwadkar out_err:
20248016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
20258016e29fSHarshad Shirwadkar 	return ret;
20268016e29fSHarshad Shirwadkar }
20278016e29fSHarshad Shirwadkar 
20285b849b5fSHarshad Shirwadkar /*
20295b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
20308016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
20315b849b5fSHarshad Shirwadkar  */
20325b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
20335b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
20345b849b5fSHarshad Shirwadkar {
20358016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
20368016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2037a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
2038a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
20398016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
20408016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2041a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
20428016e29fSHarshad Shirwadkar 
20438016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
20448016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
20458016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
20468016e29fSHarshad Shirwadkar 	}
20478016e29fSHarshad Shirwadkar 
20488016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
20498016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
20508016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
20518016e29fSHarshad Shirwadkar 	}
20528016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
20538016e29fSHarshad Shirwadkar 		jbd_debug(1, "Replay stops\n");
20548016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
20555b849b5fSHarshad Shirwadkar 		return 0;
20565b849b5fSHarshad Shirwadkar 	}
20575b849b5fSHarshad Shirwadkar 
20588016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
20598016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
20608016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
20618016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
20628016e29fSHarshad Shirwadkar 	}
20638016e29fSHarshad Shirwadkar #endif
20648016e29fSHarshad Shirwadkar 
20658016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
20668016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
20678016e29fSHarshad Shirwadkar 
2068a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
2069a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
2070a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
2071a7ba36bcSHarshad Shirwadkar 
20728016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
20738016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
20748016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
20758016e29fSHarshad Shirwadkar 			break;
20768016e29fSHarshad Shirwadkar 		}
20778016e29fSHarshad Shirwadkar 		jbd_debug(3, "Replay phase, tag:%s\n",
2078a7ba36bcSHarshad Shirwadkar 				tag2str(le16_to_cpu(tl.fc_tag)));
20798016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
2080a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
20818016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
2082a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, &tl, val);
20838016e29fSHarshad Shirwadkar 			break;
20848016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
2085a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, &tl, val);
20868016e29fSHarshad Shirwadkar 			break;
20878016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
2088a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, &tl, val);
20898016e29fSHarshad Shirwadkar 			break;
20908016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
2091a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, &tl, val);
20928016e29fSHarshad Shirwadkar 			break;
20938016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
2094a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, &tl, val);
20958016e29fSHarshad Shirwadkar 			break;
20968016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
2097a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, &tl, val);
20988016e29fSHarshad Shirwadkar 			break;
20998016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
21008016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2101a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
21028016e29fSHarshad Shirwadkar 			break;
21038016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
21048016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
2105a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
2106a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2107a7ba36bcSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
21088016e29fSHarshad Shirwadkar 			break;
21098016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
21108016e29fSHarshad Shirwadkar 			break;
21118016e29fSHarshad Shirwadkar 		default:
2112a7ba36bcSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
2113a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
21148016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
21158016e29fSHarshad Shirwadkar 			break;
21168016e29fSHarshad Shirwadkar 		}
21178016e29fSHarshad Shirwadkar 		if (ret < 0)
21188016e29fSHarshad Shirwadkar 			break;
21198016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
21208016e29fSHarshad Shirwadkar 	}
21218016e29fSHarshad Shirwadkar 	return ret;
21228016e29fSHarshad Shirwadkar }
21238016e29fSHarshad Shirwadkar 
21246866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
21256866d7b3SHarshad Shirwadkar {
21265b849b5fSHarshad Shirwadkar 	/*
21275b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
21285b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
21295b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
21305b849b5fSHarshad Shirwadkar 	 */
21315b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
21326866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
21336866d7b3SHarshad Shirwadkar 		return;
2134ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
21356866d7b3SHarshad Shirwadkar }
2136aa75f4d3SHarshad Shirwadkar 
2137fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = {
2138ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2139ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2140ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2141ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2142ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2143ce8c59d1SHarshad Shirwadkar 	"Resize",
2144ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2145ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2146556e0319SHarshad Shirwadkar 	"Data journalling",
2147ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2148ce8c59d1SHarshad Shirwadkar };
2149ce8c59d1SHarshad Shirwadkar 
2150ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2151ce8c59d1SHarshad Shirwadkar {
2152ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2153ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2154ce8c59d1SHarshad Shirwadkar 	int i;
2155ce8c59d1SHarshad Shirwadkar 
2156ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2157ce8c59d1SHarshad Shirwadkar 		return 0;
2158ce8c59d1SHarshad Shirwadkar 
2159ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2160ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2161ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2162ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
21630915e464SHarshad Shirwadkar 		   div_u64(stats->s_fc_avg_commit_time, 1000));
2164ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2165ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2166ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2167ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2168ce8c59d1SHarshad Shirwadkar 
2169ce8c59d1SHarshad Shirwadkar 	return 0;
2170ce8c59d1SHarshad Shirwadkar }
2171ce8c59d1SHarshad Shirwadkar 
2172aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2173aa75f4d3SHarshad Shirwadkar {
2174aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2175aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2176aa75f4d3SHarshad Shirwadkar 
2177aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2178aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2179aa75f4d3SHarshad Shirwadkar 
2180aa75f4d3SHarshad Shirwadkar 	return 0;
2181aa75f4d3SHarshad Shirwadkar }
2182ab047d51SSebastian Andrzej Siewior 
2183ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void)
2184ab047d51SSebastian Andrzej Siewior {
2185ab047d51SSebastian Andrzej Siewior 	kmem_cache_destroy(ext4_fc_dentry_cachep);
2186ab047d51SSebastian Andrzej Siewior }
2187