xref: /openbmc/linux/fs/ext4/fast_commit.c (revision 0d043351e5baf3857f915367deba2a518b6a0809)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
687bbbe241SHarshad Shirwadkar  *
69aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
707bbbe241SHarshad Shirwadkar  * attributes). Fast commit ineligibility is marked by calling
717bbbe241SHarshad Shirwadkar  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
727bbbe241SHarshad Shirwadkar  * to full commit.
73aa75f4d3SHarshad Shirwadkar  *
74aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
75aa75f4d3SHarshad Shirwadkar  * --------------------
76a740762fSHarshad Shirwadkar  * In order to guarantee atomicity during the commit operation, fast commit
77aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
79aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
80aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
81aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
82aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
85aa75f4d3SHarshad Shirwadkar  * - fsync()
86aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
87aa75f4d3SHarshad Shirwadkar  * - Truncate file A
88aa75f4d3SHarshad Shirwadkar  * - fsync()
89aa75f4d3SHarshad Shirwadkar  *
90aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
91aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
95aa75f4d3SHarshad Shirwadkar  *
96b1b7dce3SHarshad Shirwadkar  * Fast Commit Replay Idempotence
97b1b7dce3SHarshad Shirwadkar  * ------------------------------
98b1b7dce3SHarshad Shirwadkar  *
99b1b7dce3SHarshad Shirwadkar  * Fast commits tags are idempotent in nature provided the recovery code follows
100b1b7dce3SHarshad Shirwadkar  * certain rules. The guiding principle that the commit path follows while
101b1b7dce3SHarshad Shirwadkar  * committing is that it stores the result of a particular operation instead of
102b1b7dce3SHarshad Shirwadkar  * storing the procedure.
103b1b7dce3SHarshad Shirwadkar  *
104b1b7dce3SHarshad Shirwadkar  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105b1b7dce3SHarshad Shirwadkar  * was associated with inode 10. During fast commit, instead of storing this
106b1b7dce3SHarshad Shirwadkar  * operation as a procedure "rename a to b", we store the resulting file system
107b1b7dce3SHarshad Shirwadkar  * state as a "series" of outcomes:
108b1b7dce3SHarshad Shirwadkar  *
109b1b7dce3SHarshad Shirwadkar  * - Link dirent b to inode 10
110b1b7dce3SHarshad Shirwadkar  * - Unlink dirent a
111b1b7dce3SHarshad Shirwadkar  * - Inode <10> with valid refcount
112b1b7dce3SHarshad Shirwadkar  *
113b1b7dce3SHarshad Shirwadkar  * Now when recovery code runs, it needs "enforce" this state on the file
114b1b7dce3SHarshad Shirwadkar  * system. This is what guarantees idempotence of fast commit replay.
115b1b7dce3SHarshad Shirwadkar  *
116b1b7dce3SHarshad Shirwadkar  * Let's take an example of a procedure that is not idempotent and see how fast
117b1b7dce3SHarshad Shirwadkar  * commits make it idempotent. Consider following sequence of operations:
118b1b7dce3SHarshad Shirwadkar  *
119b1b7dce3SHarshad Shirwadkar  *     rm A;    mv B A;    read A
120b1b7dce3SHarshad Shirwadkar  *  (x)     (y)        (z)
121b1b7dce3SHarshad Shirwadkar  *
122b1b7dce3SHarshad Shirwadkar  * (x), (y) and (z) are the points at which we can crash. If we store this
123b1b7dce3SHarshad Shirwadkar  * sequence of operations as is then the replay is not idempotent. Let's say
124b1b7dce3SHarshad Shirwadkar  * while in replay, we crash at (z). During the second replay, file A (which was
125b1b7dce3SHarshad Shirwadkar  * actually created as a result of "mv B A" operation) would get deleted. Thus,
126b1b7dce3SHarshad Shirwadkar  * file named A would be absent when we try to read A. So, this sequence of
127b1b7dce3SHarshad Shirwadkar  * operations is not idempotent. However, as mentioned above, instead of storing
128b1b7dce3SHarshad Shirwadkar  * the procedure fast commits store the outcome of each procedure. Thus the fast
129b1b7dce3SHarshad Shirwadkar  * commit log for above procedure would be as follows:
130b1b7dce3SHarshad Shirwadkar  *
131b1b7dce3SHarshad Shirwadkar  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132b1b7dce3SHarshad Shirwadkar  * inode 11 before the replay)
133b1b7dce3SHarshad Shirwadkar  *
134b1b7dce3SHarshad Shirwadkar  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
135b1b7dce3SHarshad Shirwadkar  * (w)          (x)                    (y)          (z)
136b1b7dce3SHarshad Shirwadkar  *
137b1b7dce3SHarshad Shirwadkar  * If we crash at (z), we will have file A linked to inode 11. During the second
138b1b7dce3SHarshad Shirwadkar  * replay, we will remove file A (inode 11). But we will create it back and make
139b1b7dce3SHarshad Shirwadkar  * it point to inode 11. We won't find B, so we'll just skip that step. At this
140b1b7dce3SHarshad Shirwadkar  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141b1b7dce3SHarshad Shirwadkar  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142b1b7dce3SHarshad Shirwadkar  * similarly. Thus, by converting a non-idempotent procedure into a series of
143b1b7dce3SHarshad Shirwadkar  * idempotent outcomes, fast commits ensured idempotence during the replay.
144b1b7dce3SHarshad Shirwadkar  *
145aa75f4d3SHarshad Shirwadkar  * TODOs
146aa75f4d3SHarshad Shirwadkar  * -----
147b1b7dce3SHarshad Shirwadkar  *
148b1b7dce3SHarshad Shirwadkar  * 0) Fast commit replay path hardening: Fast commit replay code should use
149b1b7dce3SHarshad Shirwadkar  *    journal handles to make sure all the updates it does during the replay
150b1b7dce3SHarshad Shirwadkar  *    path are atomic. With that if we crash during fast commit replay, after
151b1b7dce3SHarshad Shirwadkar  *    trying to do recovery again, we will find a file system where fast commit
152b1b7dce3SHarshad Shirwadkar  *    area is invalid (because new full commit would be found). In order to deal
153b1b7dce3SHarshad Shirwadkar  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
154b1b7dce3SHarshad Shirwadkar  *    superblock state is persisted before starting the replay, so that after
155b1b7dce3SHarshad Shirwadkar  *    the crash, fast commit recovery code can look at that flag and perform
156b1b7dce3SHarshad Shirwadkar  *    fast commit recovery even if that area is invalidated by later full
157b1b7dce3SHarshad Shirwadkar  *    commits.
158b1b7dce3SHarshad Shirwadkar  *
159d1199b94SHarshad Shirwadkar  * 1) Fast commit's commit path locks the entire file system during fast
160d1199b94SHarshad Shirwadkar  *    commit. This has significant performance penalty. Instead of that, we
161d1199b94SHarshad Shirwadkar  *    should use ext4_fc_start/stop_update functions to start inode level
162d1199b94SHarshad Shirwadkar  *    updates from ext4_journal_start/stop. Once we do that we can drop file
163d1199b94SHarshad Shirwadkar  *    system locking during commit path.
164aa75f4d3SHarshad Shirwadkar  *
165d1199b94SHarshad Shirwadkar  * 2) Handle more ineligible cases.
166aa75f4d3SHarshad Shirwadkar  */
167aa75f4d3SHarshad Shirwadkar 
168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
170aa75f4d3SHarshad Shirwadkar 
171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
172aa75f4d3SHarshad Shirwadkar {
173aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
174aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
175aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
176aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
177aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
178aa75f4d3SHarshad Shirwadkar 	} else {
179aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
180aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
181aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
182aa75f4d3SHarshad Shirwadkar 	}
183aa75f4d3SHarshad Shirwadkar 
184aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
185aa75f4d3SHarshad Shirwadkar }
186aa75f4d3SHarshad Shirwadkar 
187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
188aa75f4d3SHarshad Shirwadkar {
189aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
190aa75f4d3SHarshad Shirwadkar 
191aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
192aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
193aa75f4d3SHarshad Shirwadkar }
194aa75f4d3SHarshad Shirwadkar 
195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
196aa75f4d3SHarshad Shirwadkar {
197aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
198aa75f4d3SHarshad Shirwadkar 
199aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
200aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
202b3998b3bSRitesh Harjani 	INIT_LIST_HEAD(&ei->i_fc_dilist);
203aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
204aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
205aa75f4d3SHarshad Shirwadkar }
206aa75f4d3SHarshad Shirwadkar 
207f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */
208f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode)
209fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
210f6634e26SHarshad Shirwadkar {
211f6634e26SHarshad Shirwadkar 	wait_queue_head_t *wq;
212f6634e26SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
213f6634e26SHarshad Shirwadkar 
214f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
215f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
216f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
217f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_state_flags,
218f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
219f6634e26SHarshad Shirwadkar #else
220f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
221f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
222f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_flags,
223f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
224f6634e26SHarshad Shirwadkar #endif
225f6634e26SHarshad Shirwadkar 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
226f6634e26SHarshad Shirwadkar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
227f6634e26SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
228f6634e26SHarshad Shirwadkar 	schedule();
229f6634e26SHarshad Shirwadkar 	finish_wait(wq, &wait.wq_entry);
230f6634e26SHarshad Shirwadkar }
231f6634e26SHarshad Shirwadkar 
232b7b80a35SYe Bin static bool ext4_fc_disabled(struct super_block *sb)
233b7b80a35SYe Bin {
234b7b80a35SYe Bin 	return (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
235b7b80a35SYe Bin 		(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
236b7b80a35SYe Bin }
237b7b80a35SYe Bin 
238aa75f4d3SHarshad Shirwadkar /*
239aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
240aa75f4d3SHarshad Shirwadkar  *
241aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
242aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
243aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
244aa75f4d3SHarshad Shirwadkar  */
245aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
246aa75f4d3SHarshad Shirwadkar {
247aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
248aa75f4d3SHarshad Shirwadkar 
249b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
250aa75f4d3SHarshad Shirwadkar 		return;
251aa75f4d3SHarshad Shirwadkar 
252aa75f4d3SHarshad Shirwadkar restart:
253aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
254aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
255aa75f4d3SHarshad Shirwadkar 		goto out;
256aa75f4d3SHarshad Shirwadkar 
257aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
258f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
259aa75f4d3SHarshad Shirwadkar 		goto restart;
260aa75f4d3SHarshad Shirwadkar 	}
261aa75f4d3SHarshad Shirwadkar out:
262aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
263aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
264aa75f4d3SHarshad Shirwadkar }
265aa75f4d3SHarshad Shirwadkar 
266aa75f4d3SHarshad Shirwadkar /*
267aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
268aa75f4d3SHarshad Shirwadkar  */
269aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
270aa75f4d3SHarshad Shirwadkar {
271aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
272aa75f4d3SHarshad Shirwadkar 
273b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
274aa75f4d3SHarshad Shirwadkar 		return;
275aa75f4d3SHarshad Shirwadkar 
276aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
277aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
278aa75f4d3SHarshad Shirwadkar }
279aa75f4d3SHarshad Shirwadkar 
280aa75f4d3SHarshad Shirwadkar /*
281aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
282aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
283aa75f4d3SHarshad Shirwadkar  */
284aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
285aa75f4d3SHarshad Shirwadkar {
286aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
287b3998b3bSRitesh Harjani 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
288b3998b3bSRitesh Harjani 	struct ext4_fc_dentry_update *fc_dentry;
289aa75f4d3SHarshad Shirwadkar 
290b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
291aa75f4d3SHarshad Shirwadkar 		return;
292aa75f4d3SHarshad Shirwadkar 
293aa75f4d3SHarshad Shirwadkar restart:
294aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
295b3998b3bSRitesh Harjani 	if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
296aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
297aa75f4d3SHarshad Shirwadkar 		return;
298aa75f4d3SHarshad Shirwadkar 	}
299aa75f4d3SHarshad Shirwadkar 
300aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
301f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
302aa75f4d3SHarshad Shirwadkar 		goto restart;
303aa75f4d3SHarshad Shirwadkar 	}
304b3998b3bSRitesh Harjani 
305b3998b3bSRitesh Harjani 	if (!list_empty(&ei->i_fc_list))
306aa75f4d3SHarshad Shirwadkar 		list_del_init(&ei->i_fc_list);
307b3998b3bSRitesh Harjani 
308b3998b3bSRitesh Harjani 	/*
309b3998b3bSRitesh Harjani 	 * Since this inode is getting removed, let's also remove all FC
310b3998b3bSRitesh Harjani 	 * dentry create references, since it is not needed to log it anyways.
311b3998b3bSRitesh Harjani 	 */
312b3998b3bSRitesh Harjani 	if (list_empty(&ei->i_fc_dilist)) {
313b3998b3bSRitesh Harjani 		spin_unlock(&sbi->s_fc_lock);
314b3998b3bSRitesh Harjani 		return;
315b3998b3bSRitesh Harjani 	}
316b3998b3bSRitesh Harjani 
317b3998b3bSRitesh Harjani 	fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
318b3998b3bSRitesh Harjani 	WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
319b3998b3bSRitesh Harjani 	list_del_init(&fc_dentry->fcd_list);
320b3998b3bSRitesh Harjani 	list_del_init(&fc_dentry->fcd_dilist);
321b3998b3bSRitesh Harjani 
322b3998b3bSRitesh Harjani 	WARN_ON(!list_empty(&ei->i_fc_dilist));
323b3998b3bSRitesh Harjani 	spin_unlock(&sbi->s_fc_lock);
324b3998b3bSRitesh Harjani 
325b3998b3bSRitesh Harjani 	if (fc_dentry->fcd_name.name &&
326b3998b3bSRitesh Harjani 		fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
327b3998b3bSRitesh Harjani 		kfree(fc_dentry->fcd_name.name);
328b3998b3bSRitesh Harjani 	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
329b3998b3bSRitesh Harjani 
330b3998b3bSRitesh Harjani 	return;
331aa75f4d3SHarshad Shirwadkar }
332aa75f4d3SHarshad Shirwadkar 
333aa75f4d3SHarshad Shirwadkar /*
334e85c81baSXin Yin  * Mark file system as fast commit ineligible, and record latest
335e85c81baSXin Yin  * ineligible transaction tid. This means until the recorded
336e85c81baSXin Yin  * transaction, commit operation would result in a full jbd2 commit.
337aa75f4d3SHarshad Shirwadkar  */
338e85c81baSXin Yin void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
339aa75f4d3SHarshad Shirwadkar {
340aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
341e85c81baSXin Yin 	tid_t tid;
342aa75f4d3SHarshad Shirwadkar 
343b7b80a35SYe Bin 	if (ext4_fc_disabled(sb))
3448016e29fSHarshad Shirwadkar 		return;
3458016e29fSHarshad Shirwadkar 
3469b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
347e85c81baSXin Yin 	if (handle && !IS_ERR(handle))
348e85c81baSXin Yin 		tid = handle->h_transaction->t_tid;
349e85c81baSXin Yin 	else {
350e85c81baSXin Yin 		read_lock(&sbi->s_journal->j_state_lock);
351e85c81baSXin Yin 		tid = sbi->s_journal->j_running_transaction ?
352e85c81baSXin Yin 				sbi->s_journal->j_running_transaction->t_tid : 0;
353e85c81baSXin Yin 		read_unlock(&sbi->s_journal->j_state_lock);
354e85c81baSXin Yin 	}
355e85c81baSXin Yin 	spin_lock(&sbi->s_fc_lock);
356e85c81baSXin Yin 	if (sbi->s_fc_ineligible_tid < tid)
357e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = tid;
358e85c81baSXin Yin 	spin_unlock(&sbi->s_fc_lock);
359aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
360aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
361aa75f4d3SHarshad Shirwadkar }
362aa75f4d3SHarshad Shirwadkar 
363aa75f4d3SHarshad Shirwadkar /*
364aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
365aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
366aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
367aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
368aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
369aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
370aa75f4d3SHarshad Shirwadkar  *
371aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
372aa75f4d3SHarshad Shirwadkar  */
373aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
374a80f7fcfSHarshad Shirwadkar 	handle_t *handle, struct inode *inode,
375a80f7fcfSHarshad Shirwadkar 	int (*__fc_track_fn)(struct inode *, void *, bool),
376aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
377aa75f4d3SHarshad Shirwadkar {
378aa75f4d3SHarshad Shirwadkar 	bool update = false;
379aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
380aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
381a80f7fcfSHarshad Shirwadkar 	tid_t tid = 0;
382aa75f4d3SHarshad Shirwadkar 	int ret;
383aa75f4d3SHarshad Shirwadkar 
384a80f7fcfSHarshad Shirwadkar 	tid = handle->h_transaction->t_tid;
385aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
386a80f7fcfSHarshad Shirwadkar 	if (tid == ei->i_sync_tid) {
387aa75f4d3SHarshad Shirwadkar 		update = true;
388aa75f4d3SHarshad Shirwadkar 	} else {
389aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
390a80f7fcfSHarshad Shirwadkar 		ei->i_sync_tid = tid;
391aa75f4d3SHarshad Shirwadkar 	}
392aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
393aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
394aa75f4d3SHarshad Shirwadkar 
395aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
396aa75f4d3SHarshad Shirwadkar 		return ret;
397aa75f4d3SHarshad Shirwadkar 
398aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
399aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
400aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
401bdc8a53aSXin Yin 				(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
402bdc8a53aSXin Yin 				 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
403aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
404aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
405aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
406aa75f4d3SHarshad Shirwadkar 
407aa75f4d3SHarshad Shirwadkar 	return ret;
408aa75f4d3SHarshad Shirwadkar }
409aa75f4d3SHarshad Shirwadkar 
410aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
411aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
412aa75f4d3SHarshad Shirwadkar 	int op;
413aa75f4d3SHarshad Shirwadkar };
414aa75f4d3SHarshad Shirwadkar 
415aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
416aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
417aa75f4d3SHarshad Shirwadkar {
418aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
419aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
420aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
421aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
422aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
423aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
424aa75f4d3SHarshad Shirwadkar 
425aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
426aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
427aa75f4d3SHarshad Shirwadkar 	if (!node) {
428e85c81baSXin Yin 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
429aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
430aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
431aa75f4d3SHarshad Shirwadkar 	}
432aa75f4d3SHarshad Shirwadkar 
433aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
434aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
435aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
436aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
437aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
438aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
439aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
440aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
441e85c81baSXin Yin 				EXT4_FC_REASON_NOMEM, NULL);
442aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
443aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
444aa75f4d3SHarshad Shirwadkar 		}
445aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
446aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
447aa75f4d3SHarshad Shirwadkar 	} else {
448aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
449aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
450aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
451aa75f4d3SHarshad Shirwadkar 	}
452aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
453b3998b3bSRitesh Harjani 	INIT_LIST_HEAD(&node->fcd_dilist);
454aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
455bdc8a53aSXin Yin 	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
456bdc8a53aSXin Yin 		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
457aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
458aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
459aa75f4d3SHarshad Shirwadkar 	else
460aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
461b3998b3bSRitesh Harjani 
462b3998b3bSRitesh Harjani 	/*
463b3998b3bSRitesh Harjani 	 * This helps us keep a track of all fc_dentry updates which is part of
464b3998b3bSRitesh Harjani 	 * this ext4 inode. So in case the inode is getting unlinked, before
465b3998b3bSRitesh Harjani 	 * even we get a chance to fsync, we could remove all fc_dentry
466b3998b3bSRitesh Harjani 	 * references while evicting the inode in ext4_fc_del().
467b3998b3bSRitesh Harjani 	 * Also with this, we don't need to loop over all the inodes in
468b3998b3bSRitesh Harjani 	 * sbi->s_fc_q to get the corresponding inode in
469b3998b3bSRitesh Harjani 	 * ext4_fc_commit_dentry_updates().
470b3998b3bSRitesh Harjani 	 */
471b3998b3bSRitesh Harjani 	if (dentry_update->op == EXT4_FC_TAG_CREAT) {
472b3998b3bSRitesh Harjani 		WARN_ON(!list_empty(&ei->i_fc_dilist));
473b3998b3bSRitesh Harjani 		list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
474b3998b3bSRitesh Harjani 	}
475aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
476aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
477aa75f4d3SHarshad Shirwadkar 
478aa75f4d3SHarshad Shirwadkar 	return 0;
479aa75f4d3SHarshad Shirwadkar }
480aa75f4d3SHarshad Shirwadkar 
481a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle,
482a80f7fcfSHarshad Shirwadkar 		struct inode *inode, struct dentry *dentry)
483aa75f4d3SHarshad Shirwadkar {
484aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
485aa75f4d3SHarshad Shirwadkar 	int ret;
486aa75f4d3SHarshad Shirwadkar 
487aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
488aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
489aa75f4d3SHarshad Shirwadkar 
490a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
491aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
4921d2e2440SRitesh Harjani 	trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
493aa75f4d3SHarshad Shirwadkar }
494aa75f4d3SHarshad Shirwadkar 
495a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
496a80f7fcfSHarshad Shirwadkar {
49778be0471SRitesh Harjani 	struct inode *inode = d_inode(dentry);
49878be0471SRitesh Harjani 
499b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
50078be0471SRitesh Harjani 		return;
50178be0471SRitesh Harjani 
50278be0471SRitesh Harjani 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
50378be0471SRitesh Harjani 		return;
50478be0471SRitesh Harjani 
50578be0471SRitesh Harjani 	__ext4_fc_track_unlink(handle, inode, dentry);
506a80f7fcfSHarshad Shirwadkar }
507a80f7fcfSHarshad Shirwadkar 
508a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle,
509a80f7fcfSHarshad Shirwadkar 	struct inode *inode, struct dentry *dentry)
510aa75f4d3SHarshad Shirwadkar {
511aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
512aa75f4d3SHarshad Shirwadkar 	int ret;
513aa75f4d3SHarshad Shirwadkar 
514aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
515aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
516aa75f4d3SHarshad Shirwadkar 
517a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
518aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
5191d2e2440SRitesh Harjani 	trace_ext4_fc_track_link(handle, inode, dentry, ret);
520aa75f4d3SHarshad Shirwadkar }
521aa75f4d3SHarshad Shirwadkar 
522a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
523a80f7fcfSHarshad Shirwadkar {
52478be0471SRitesh Harjani 	struct inode *inode = d_inode(dentry);
52578be0471SRitesh Harjani 
526b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
52778be0471SRitesh Harjani 		return;
52878be0471SRitesh Harjani 
52978be0471SRitesh Harjani 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
53078be0471SRitesh Harjani 		return;
53178be0471SRitesh Harjani 
53278be0471SRitesh Harjani 	__ext4_fc_track_link(handle, inode, dentry);
533a80f7fcfSHarshad Shirwadkar }
534a80f7fcfSHarshad Shirwadkar 
5358210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
5368210bb29SHarshad Shirwadkar 			  struct dentry *dentry)
537aa75f4d3SHarshad Shirwadkar {
538aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
539aa75f4d3SHarshad Shirwadkar 	int ret;
540aa75f4d3SHarshad Shirwadkar 
541aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
542aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
543aa75f4d3SHarshad Shirwadkar 
544a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
545aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
5461d2e2440SRitesh Harjani 	trace_ext4_fc_track_create(handle, inode, dentry, ret);
547aa75f4d3SHarshad Shirwadkar }
548aa75f4d3SHarshad Shirwadkar 
5498210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
5508210bb29SHarshad Shirwadkar {
55178be0471SRitesh Harjani 	struct inode *inode = d_inode(dentry);
55278be0471SRitesh Harjani 
553b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
55478be0471SRitesh Harjani 		return;
55578be0471SRitesh Harjani 
55678be0471SRitesh Harjani 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
55778be0471SRitesh Harjani 		return;
55878be0471SRitesh Harjani 
55978be0471SRitesh Harjani 	__ext4_fc_track_create(handle, inode, dentry);
5608210bb29SHarshad Shirwadkar }
5618210bb29SHarshad Shirwadkar 
562aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
563aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
564aa75f4d3SHarshad Shirwadkar {
565aa75f4d3SHarshad Shirwadkar 	if (update)
566aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
567aa75f4d3SHarshad Shirwadkar 
568aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
569aa75f4d3SHarshad Shirwadkar 
570aa75f4d3SHarshad Shirwadkar 	return 0;
571aa75f4d3SHarshad Shirwadkar }
572aa75f4d3SHarshad Shirwadkar 
573a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
574aa75f4d3SHarshad Shirwadkar {
575aa75f4d3SHarshad Shirwadkar 	int ret;
576aa75f4d3SHarshad Shirwadkar 
577aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
578aa75f4d3SHarshad Shirwadkar 		return;
579aa75f4d3SHarshad Shirwadkar 
580e64e6ca9SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
581e64e6ca9SYe Bin 		return;
582e64e6ca9SYe Bin 
583556e0319SHarshad Shirwadkar 	if (ext4_should_journal_data(inode)) {
584556e0319SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb,
585e85c81baSXin Yin 					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
586556e0319SHarshad Shirwadkar 		return;
587556e0319SHarshad Shirwadkar 	}
588556e0319SHarshad Shirwadkar 
58978be0471SRitesh Harjani 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
59078be0471SRitesh Harjani 		return;
59178be0471SRitesh Harjani 
592a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
5931d2e2440SRitesh Harjani 	trace_ext4_fc_track_inode(handle, inode, ret);
594aa75f4d3SHarshad Shirwadkar }
595aa75f4d3SHarshad Shirwadkar 
596aa75f4d3SHarshad Shirwadkar struct __track_range_args {
597aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
598aa75f4d3SHarshad Shirwadkar };
599aa75f4d3SHarshad Shirwadkar 
600aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
601aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
602aa75f4d3SHarshad Shirwadkar {
603aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
604aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
605aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
606aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
607aa75f4d3SHarshad Shirwadkar 
608aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
609aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
610aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
611aa75f4d3SHarshad Shirwadkar 	}
612aa75f4d3SHarshad Shirwadkar 
613aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
614aa75f4d3SHarshad Shirwadkar 
615aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
616aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
617aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
618aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
619aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
620aa75f4d3SHarshad Shirwadkar 	} else {
621aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
622aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
623aa75f4d3SHarshad Shirwadkar 	}
624aa75f4d3SHarshad Shirwadkar 
625aa75f4d3SHarshad Shirwadkar 	return 0;
626aa75f4d3SHarshad Shirwadkar }
627aa75f4d3SHarshad Shirwadkar 
628a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
629aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
630aa75f4d3SHarshad Shirwadkar {
631aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
632aa75f4d3SHarshad Shirwadkar 	int ret;
633aa75f4d3SHarshad Shirwadkar 
634aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
635aa75f4d3SHarshad Shirwadkar 		return;
636aa75f4d3SHarshad Shirwadkar 
637b7b80a35SYe Bin 	if (ext4_fc_disabled(inode->i_sb))
63878be0471SRitesh Harjani 		return;
63978be0471SRitesh Harjani 
64078be0471SRitesh Harjani 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
64178be0471SRitesh Harjani 		return;
64278be0471SRitesh Harjani 
643aa75f4d3SHarshad Shirwadkar 	args.start = start;
644aa75f4d3SHarshad Shirwadkar 	args.end = end;
645aa75f4d3SHarshad Shirwadkar 
646a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
647aa75f4d3SHarshad Shirwadkar 
6481d2e2440SRitesh Harjani 	trace_ext4_fc_track_range(handle, inode, start, end, ret);
649aa75f4d3SHarshad Shirwadkar }
650aa75f4d3SHarshad Shirwadkar 
651e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
652aa75f4d3SHarshad Shirwadkar {
65367c0f556SBart Van Assche 	blk_opf_t write_flags = REQ_SYNC;
654aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
655aa75f4d3SHarshad Shirwadkar 
656e9f53353SDaejun Park 	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
657e9f53353SDaejun Park 	if (test_opt(sb, BARRIER) && is_tail)
658aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
659aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
660764b3fd3SHarshad Shirwadkar 	set_buffer_dirty(bh);
661aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
662aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
6631420c4a5SBart Van Assche 	submit_bh(REQ_OP_WRITE | write_flags, bh);
664aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
665aa75f4d3SHarshad Shirwadkar }
666aa75f4d3SHarshad Shirwadkar 
667aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
668aa75f4d3SHarshad Shirwadkar 
669aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
670aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
671aa75f4d3SHarshad Shirwadkar 				u32 *crc)
672aa75f4d3SHarshad Shirwadkar {
673aa75f4d3SHarshad Shirwadkar 	void *ret;
674aa75f4d3SHarshad Shirwadkar 
675aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
676aa75f4d3SHarshad Shirwadkar 	if (crc)
677aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
678aa75f4d3SHarshad Shirwadkar 	return ret;
679aa75f4d3SHarshad Shirwadkar }
680aa75f4d3SHarshad Shirwadkar 
681aa75f4d3SHarshad Shirwadkar /*
682aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
683aa75f4d3SHarshad Shirwadkar  *
684aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
685aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
686aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
687aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
688aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
689aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
690aa75f4d3SHarshad Shirwadkar  * we added.
691aa75f4d3SHarshad Shirwadkar  */
692aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
693aa75f4d3SHarshad Shirwadkar {
694aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
695aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
696aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
697aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
698aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
699aa75f4d3SHarshad Shirwadkar 	int pad_len;
700aa75f4d3SHarshad Shirwadkar 
701aa75f4d3SHarshad Shirwadkar 	/*
702aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
703aa75f4d3SHarshad Shirwadkar 	 * padding.
704aa75f4d3SHarshad Shirwadkar 	 */
705fdc2a3c7SYe Bin 	if (len + EXT4_FC_TAG_BASE_LEN > bsize)
706aa75f4d3SHarshad Shirwadkar 		return NULL;
707aa75f4d3SHarshad Shirwadkar 
708fdc2a3c7SYe Bin 	if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
709aa75f4d3SHarshad Shirwadkar 		/*
710aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
711aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
712aa75f4d3SHarshad Shirwadkar 		 */
713aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
714aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
715aa75f4d3SHarshad Shirwadkar 			if (ret)
716aa75f4d3SHarshad Shirwadkar 				return NULL;
717aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
718aa75f4d3SHarshad Shirwadkar 		}
719aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
720aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
721aa75f4d3SHarshad Shirwadkar 	}
722aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
723aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
724aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
725fdc2a3c7SYe Bin 	pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
726aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
727aa75f4d3SHarshad Shirwadkar 	if (crc)
728fdc2a3c7SYe Bin 		*crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
729aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
730aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
731e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, false);
732aa75f4d3SHarshad Shirwadkar 
733aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
734aa75f4d3SHarshad Shirwadkar 	if (ret)
735aa75f4d3SHarshad Shirwadkar 		return NULL;
736aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
737aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
738aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
739aa75f4d3SHarshad Shirwadkar }
740aa75f4d3SHarshad Shirwadkar 
741aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
742aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
743aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
744aa75f4d3SHarshad Shirwadkar {
745aa75f4d3SHarshad Shirwadkar 	if (crc)
746aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
747aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
748aa75f4d3SHarshad Shirwadkar }
749aa75f4d3SHarshad Shirwadkar 
750aa75f4d3SHarshad Shirwadkar /*
751aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
752aa75f4d3SHarshad Shirwadkar  *
753aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
754aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
755aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
756aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
757aa75f4d3SHarshad Shirwadkar  */
758aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
759aa75f4d3SHarshad Shirwadkar {
760aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
761aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
762aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
763aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
764aa75f4d3SHarshad Shirwadkar 	u8 *dst;
765aa75f4d3SHarshad Shirwadkar 
766aa75f4d3SHarshad Shirwadkar 	/*
767aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
768aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
769aa75f4d3SHarshad Shirwadkar 	 */
770fdc2a3c7SYe Bin 	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
771aa75f4d3SHarshad Shirwadkar 	if (!dst)
772aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
773aa75f4d3SHarshad Shirwadkar 
774aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
775aa75f4d3SHarshad Shirwadkar 
776aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
777aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
778aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
779aa75f4d3SHarshad Shirwadkar 
780fdc2a3c7SYe Bin 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
781fdc2a3c7SYe Bin 	dst += EXT4_FC_TAG_BASE_LEN;
782aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
783aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
784aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
785aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
786aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
787aa75f4d3SHarshad Shirwadkar 
788e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, true);
789aa75f4d3SHarshad Shirwadkar 
790aa75f4d3SHarshad Shirwadkar 	return 0;
791aa75f4d3SHarshad Shirwadkar }
792aa75f4d3SHarshad Shirwadkar 
793aa75f4d3SHarshad Shirwadkar /*
794aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
795aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
796aa75f4d3SHarshad Shirwadkar  */
797aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
798aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
799aa75f4d3SHarshad Shirwadkar {
800aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
801aa75f4d3SHarshad Shirwadkar 	u8 *dst;
802aa75f4d3SHarshad Shirwadkar 
803fdc2a3c7SYe Bin 	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
804aa75f4d3SHarshad Shirwadkar 	if (!dst)
805aa75f4d3SHarshad Shirwadkar 		return false;
806aa75f4d3SHarshad Shirwadkar 
807aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
808aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
809aa75f4d3SHarshad Shirwadkar 
810fdc2a3c7SYe Bin 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
811fdc2a3c7SYe Bin 	ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
812aa75f4d3SHarshad Shirwadkar 
813aa75f4d3SHarshad Shirwadkar 	return true;
814aa75f4d3SHarshad Shirwadkar }
815aa75f4d3SHarshad Shirwadkar 
816aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
817facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
818facec450SGuoqing Jiang 				   struct ext4_fc_dentry_update *fc_dentry)
819aa75f4d3SHarshad Shirwadkar {
820aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
821aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
822facec450SGuoqing Jiang 	int dlen = fc_dentry->fcd_name.len;
823fdc2a3c7SYe Bin 	u8 *dst = ext4_fc_reserve_space(sb,
824fdc2a3c7SYe Bin 			EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
825aa75f4d3SHarshad Shirwadkar 
826aa75f4d3SHarshad Shirwadkar 	if (!dst)
827aa75f4d3SHarshad Shirwadkar 		return false;
828aa75f4d3SHarshad Shirwadkar 
829facec450SGuoqing Jiang 	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
830facec450SGuoqing Jiang 	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
831facec450SGuoqing Jiang 	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
832aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
833fdc2a3c7SYe Bin 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
834fdc2a3c7SYe Bin 	dst += EXT4_FC_TAG_BASE_LEN;
835aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
836aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
837facec450SGuoqing Jiang 	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
838aa75f4d3SHarshad Shirwadkar 
839aa75f4d3SHarshad Shirwadkar 	return true;
840aa75f4d3SHarshad Shirwadkar }
841aa75f4d3SHarshad Shirwadkar 
842aa75f4d3SHarshad Shirwadkar /*
843aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
844aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
845aa75f4d3SHarshad Shirwadkar  */
846aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
847aa75f4d3SHarshad Shirwadkar {
848aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
849aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
850aa75f4d3SHarshad Shirwadkar 	int ret;
851aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
852aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
853aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
854aa75f4d3SHarshad Shirwadkar 	u8 *dst;
855aa75f4d3SHarshad Shirwadkar 
856aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
857aa75f4d3SHarshad Shirwadkar 	if (ret)
858aa75f4d3SHarshad Shirwadkar 		return ret;
859aa75f4d3SHarshad Shirwadkar 
8606c31a689SHarshad Shirwadkar 	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
8616c31a689SHarshad Shirwadkar 		inode_len = EXT4_INODE_SIZE(inode->i_sb);
8626c31a689SHarshad Shirwadkar 	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
863aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
864aa75f4d3SHarshad Shirwadkar 
865aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
866aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
867aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
868aa75f4d3SHarshad Shirwadkar 
869ccbf8eebSYe Bin 	ret = -ECANCELED;
870aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
871fdc2a3c7SYe Bin 		EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
872aa75f4d3SHarshad Shirwadkar 	if (!dst)
873ccbf8eebSYe Bin 		goto err;
874aa75f4d3SHarshad Shirwadkar 
875fdc2a3c7SYe Bin 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
876ccbf8eebSYe Bin 		goto err;
877fdc2a3c7SYe Bin 	dst += EXT4_FC_TAG_BASE_LEN;
878aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
879ccbf8eebSYe Bin 		goto err;
880aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
881aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
882aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
883ccbf8eebSYe Bin 		goto err;
884ccbf8eebSYe Bin 	ret = 0;
885ccbf8eebSYe Bin err:
886ccbf8eebSYe Bin 	brelse(iloc.bh);
887ccbf8eebSYe Bin 	return ret;
888aa75f4d3SHarshad Shirwadkar }
889aa75f4d3SHarshad Shirwadkar 
890aa75f4d3SHarshad Shirwadkar /*
891aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
892aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
893aa75f4d3SHarshad Shirwadkar  */
894aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
895aa75f4d3SHarshad Shirwadkar {
896aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
897aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
898aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
899aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
900aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
901aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
902aa75f4d3SHarshad Shirwadkar 	int ret;
903aa75f4d3SHarshad Shirwadkar 
904aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
905aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
906aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
907aa75f4d3SHarshad Shirwadkar 		return 0;
908aa75f4d3SHarshad Shirwadkar 	}
909aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
910aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
911aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
912aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
913aa75f4d3SHarshad Shirwadkar 
914aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
9154978c659SJan Kara 	ext4_debug("will try writing %d to %d for inode %ld\n",
9164978c659SJan Kara 		   cur_lblk_off, new_blk_size, inode->i_ino);
917aa75f4d3SHarshad Shirwadkar 
918aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
919aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
920aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
921aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
922aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
923aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
924aa75f4d3SHarshad Shirwadkar 
925aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
926aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
927aa75f4d3SHarshad Shirwadkar 			continue;
928aa75f4d3SHarshad Shirwadkar 		}
929aa75f4d3SHarshad Shirwadkar 
930aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
931aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
932aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
933aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
934aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
935aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
936aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
937aa75f4d3SHarshad Shirwadkar 		} else {
938a2c2f082SHou Tao 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
939a2c2f082SHou Tao 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
940a2c2f082SHou Tao 
941a2c2f082SHou Tao 			/* Limit the number of blocks in one extent */
942a2c2f082SHou Tao 			map.m_len = min(max, map.m_len);
943a2c2f082SHou Tao 
944aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
945aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
946aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
947aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
948aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
949aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
950aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
951aa75f4d3SHarshad Shirwadkar 			else
952aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
953aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
954aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
955aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
956aa75f4d3SHarshad Shirwadkar 		}
957aa75f4d3SHarshad Shirwadkar 
958aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
959aa75f4d3SHarshad Shirwadkar 	}
960aa75f4d3SHarshad Shirwadkar 
961aa75f4d3SHarshad Shirwadkar 	return 0;
962aa75f4d3SHarshad Shirwadkar }
963aa75f4d3SHarshad Shirwadkar 
964aa75f4d3SHarshad Shirwadkar 
965aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
966aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
967aa75f4d3SHarshad Shirwadkar {
968c30365b9SYu Zhe 	struct super_block *sb = journal->j_private;
969aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
970aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
971aa75f4d3SHarshad Shirwadkar 	int ret = 0;
972aa75f4d3SHarshad Shirwadkar 
973aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
97496e7c02dSDaejun Park 	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
975aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
976aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
977aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
978aa75f4d3SHarshad Shirwadkar 
979aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
980aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
981aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
982aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
983aa75f4d3SHarshad Shirwadkar 				schedule();
984aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
985aa75f4d3SHarshad Shirwadkar 			}
986aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
987aa75f4d3SHarshad Shirwadkar 		}
988aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
989aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
990aa75f4d3SHarshad Shirwadkar 		if (ret)
991aa75f4d3SHarshad Shirwadkar 			return ret;
992aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
993aa75f4d3SHarshad Shirwadkar 	}
994aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
995aa75f4d3SHarshad Shirwadkar 
996aa75f4d3SHarshad Shirwadkar 	return ret;
997aa75f4d3SHarshad Shirwadkar }
998aa75f4d3SHarshad Shirwadkar 
999aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
1000aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
1001aa75f4d3SHarshad Shirwadkar {
1002c30365b9SYu Zhe 	struct super_block *sb = journal->j_private;
1003aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1004aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
1005aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1006aa75f4d3SHarshad Shirwadkar 
1007aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1008aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1009aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
1010aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
1011aa75f4d3SHarshad Shirwadkar 			continue;
1012aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1013aa75f4d3SHarshad Shirwadkar 
1014aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
1015aa75f4d3SHarshad Shirwadkar 		if (ret)
1016aa75f4d3SHarshad Shirwadkar 			return ret;
1017aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1018aa75f4d3SHarshad Shirwadkar 	}
1019aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1020aa75f4d3SHarshad Shirwadkar 
1021aa75f4d3SHarshad Shirwadkar 	return 0;
1022aa75f4d3SHarshad Shirwadkar }
1023aa75f4d3SHarshad Shirwadkar 
1024aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
1025aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
1026fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock)
1027fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock)
1028aa75f4d3SHarshad Shirwadkar {
1029c30365b9SYu Zhe 	struct super_block *sb = journal->j_private;
1030aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
103196e7c02dSDaejun Park 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
1032aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1033b3998b3bSRitesh Harjani 	struct ext4_inode_info *ei;
1034aa75f4d3SHarshad Shirwadkar 	int ret;
1035aa75f4d3SHarshad Shirwadkar 
1036aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
1037aa75f4d3SHarshad Shirwadkar 		return 0;
103896e7c02dSDaejun Park 	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
103996e7c02dSDaejun Park 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
1040aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
1041aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
1042facec450SGuoqing Jiang 			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1043aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
1044aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
1045aa75f4d3SHarshad Shirwadkar 			}
1046aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
1047aa75f4d3SHarshad Shirwadkar 			continue;
1048aa75f4d3SHarshad Shirwadkar 		}
1049aa75f4d3SHarshad Shirwadkar 		/*
1050b3998b3bSRitesh Harjani 		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
1051b3998b3bSRitesh Harjani 		 * corresponding inode pointer
1052aa75f4d3SHarshad Shirwadkar 		 */
1053b3998b3bSRitesh Harjani 		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1054b3998b3bSRitesh Harjani 		ei = list_first_entry(&fc_dentry->fcd_dilist,
1055b3998b3bSRitesh Harjani 				struct ext4_inode_info, i_fc_dilist);
1056b3998b3bSRitesh Harjani 		inode = &ei->vfs_inode;
1057b3998b3bSRitesh Harjani 		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1058b3998b3bSRitesh Harjani 
1059aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1060aa75f4d3SHarshad Shirwadkar 
1061aa75f4d3SHarshad Shirwadkar 		/*
1062aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
1063aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
1064aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
1065aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
1066aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
1067aa75f4d3SHarshad Shirwadkar 		 */
1068aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
1069aa75f4d3SHarshad Shirwadkar 		if (ret)
1070aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1071aa75f4d3SHarshad Shirwadkar 
1072aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
1073aa75f4d3SHarshad Shirwadkar 		if (ret)
1074aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1075aa75f4d3SHarshad Shirwadkar 
1076facec450SGuoqing Jiang 		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1077aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
1078aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1079aa75f4d3SHarshad Shirwadkar 		}
1080aa75f4d3SHarshad Shirwadkar 
1081aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1082aa75f4d3SHarshad Shirwadkar 	}
1083aa75f4d3SHarshad Shirwadkar 	return 0;
1084aa75f4d3SHarshad Shirwadkar lock_and_exit:
1085aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1086aa75f4d3SHarshad Shirwadkar 	return ret;
1087aa75f4d3SHarshad Shirwadkar }
1088aa75f4d3SHarshad Shirwadkar 
1089aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
1090aa75f4d3SHarshad Shirwadkar {
1091c30365b9SYu Zhe 	struct super_block *sb = journal->j_private;
1092aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1093aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1094aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
1095aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1096aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
1097aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1098aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
1099aa75f4d3SHarshad Shirwadkar 
1100aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
1101aa75f4d3SHarshad Shirwadkar 	if (ret)
1102aa75f4d3SHarshad Shirwadkar 		return ret;
1103aa75f4d3SHarshad Shirwadkar 
1104aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
1105aa75f4d3SHarshad Shirwadkar 	if (ret)
1106aa75f4d3SHarshad Shirwadkar 		return ret;
1107aa75f4d3SHarshad Shirwadkar 
1108da0c5d26SHarshad Shirwadkar 	/*
1109da0c5d26SHarshad Shirwadkar 	 * If file system device is different from journal device, issue a cache
1110da0c5d26SHarshad Shirwadkar 	 * flush before we start writing fast commit blocks.
1111da0c5d26SHarshad Shirwadkar 	 */
1112da0c5d26SHarshad Shirwadkar 	if (journal->j_fs_dev != journal->j_dev)
1113c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_fs_dev);
1114da0c5d26SHarshad Shirwadkar 
1115aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1116aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1117aa75f4d3SHarshad Shirwadkar 		/*
1118aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1119aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1120aa75f4d3SHarshad Shirwadkar 		 */
1121aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1122aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1123aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1124aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1125e1262cd2SXu Yihang 			(u8 *)&head, &crc)) {
1126e1262cd2SXu Yihang 			ret = -ENOSPC;
1127aa75f4d3SHarshad Shirwadkar 			goto out;
1128aa75f4d3SHarshad Shirwadkar 		}
1129e1262cd2SXu Yihang 	}
1130aa75f4d3SHarshad Shirwadkar 
1131aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1132aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1133aa75f4d3SHarshad Shirwadkar 	if (ret) {
1134aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1135aa75f4d3SHarshad Shirwadkar 		goto out;
1136aa75f4d3SHarshad Shirwadkar 	}
1137aa75f4d3SHarshad Shirwadkar 
113896e7c02dSDaejun Park 	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1139aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1140aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1141aa75f4d3SHarshad Shirwadkar 			continue;
1142aa75f4d3SHarshad Shirwadkar 
1143aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1144aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1145aa75f4d3SHarshad Shirwadkar 		if (ret)
1146aa75f4d3SHarshad Shirwadkar 			goto out;
1147aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1148aa75f4d3SHarshad Shirwadkar 		if (ret)
1149aa75f4d3SHarshad Shirwadkar 			goto out;
1150aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1151aa75f4d3SHarshad Shirwadkar 	}
1152aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1153aa75f4d3SHarshad Shirwadkar 
1154aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1155aa75f4d3SHarshad Shirwadkar 
1156aa75f4d3SHarshad Shirwadkar out:
1157aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1158aa75f4d3SHarshad Shirwadkar 	return ret;
1159aa75f4d3SHarshad Shirwadkar }
1160aa75f4d3SHarshad Shirwadkar 
11610915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status,
1162d9bf099cSRitesh Harjani 				 u64 commit_time, int nblks, tid_t commit_tid)
11630915e464SHarshad Shirwadkar {
11640915e464SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
11650915e464SHarshad Shirwadkar 
11664978c659SJan Kara 	ext4_debug("Fast commit ended with status = %d for tid %u",
1167d9bf099cSRitesh Harjani 			status, commit_tid);
11680915e464SHarshad Shirwadkar 	if (status == EXT4_FC_STATUS_OK) {
11690915e464SHarshad Shirwadkar 		stats->fc_num_commits++;
11700915e464SHarshad Shirwadkar 		stats->fc_numblks += nblks;
11710915e464SHarshad Shirwadkar 		if (likely(stats->s_fc_avg_commit_time))
11720915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time =
11730915e464SHarshad Shirwadkar 				(commit_time +
11740915e464SHarshad Shirwadkar 				 stats->s_fc_avg_commit_time * 3) / 4;
11750915e464SHarshad Shirwadkar 		else
11760915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time = commit_time;
11770915e464SHarshad Shirwadkar 	} else if (status == EXT4_FC_STATUS_FAILED ||
11780915e464SHarshad Shirwadkar 		   status == EXT4_FC_STATUS_INELIGIBLE) {
11790915e464SHarshad Shirwadkar 		if (status == EXT4_FC_STATUS_FAILED)
11800915e464SHarshad Shirwadkar 			stats->fc_failed_commits++;
11810915e464SHarshad Shirwadkar 		stats->fc_ineligible_commits++;
11820915e464SHarshad Shirwadkar 	} else {
11830915e464SHarshad Shirwadkar 		stats->fc_skipped_commits++;
11840915e464SHarshad Shirwadkar 	}
11855641ace5SRitesh Harjani 	trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
11860915e464SHarshad Shirwadkar }
11870915e464SHarshad Shirwadkar 
1188aa75f4d3SHarshad Shirwadkar /*
1189aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1190aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1191aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1192aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1193aa75f4d3SHarshad Shirwadkar  */
1194aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1195aa75f4d3SHarshad Shirwadkar {
1196c30365b9SYu Zhe 	struct super_block *sb = journal->j_private;
1197aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1198aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1199aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
12000915e464SHarshad Shirwadkar 	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1201aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1202aa75f4d3SHarshad Shirwadkar 
12037f142440SRitesh Harjani 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
12047f142440SRitesh Harjani 		return jbd2_complete_transaction(journal, commit_tid);
12057f142440SRitesh Harjani 
12065641ace5SRitesh Harjani 	trace_ext4_fc_commit_start(sb, commit_tid);
1207aa75f4d3SHarshad Shirwadkar 
1208aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1209aa75f4d3SHarshad Shirwadkar 
1210aa75f4d3SHarshad Shirwadkar restart_fc:
1211aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1212aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1213aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1214aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1215aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1216aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
1217d9bf099cSRitesh Harjani 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
1218d9bf099cSRitesh Harjani 				commit_tid);
12190915e464SHarshad Shirwadkar 		return 0;
1220aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
12210915e464SHarshad Shirwadkar 		/*
12220915e464SHarshad Shirwadkar 		 * Commit couldn't start. Just update stats and perform a
12230915e464SHarshad Shirwadkar 		 * full commit.
12240915e464SHarshad Shirwadkar 		 */
1225d9bf099cSRitesh Harjani 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
1226d9bf099cSRitesh Harjani 				commit_tid);
12270915e464SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1228aa75f4d3SHarshad Shirwadkar 	}
12290915e464SHarshad Shirwadkar 
12307bbbe241SHarshad Shirwadkar 	/*
12317bbbe241SHarshad Shirwadkar 	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
12327bbbe241SHarshad Shirwadkar 	 * if we are fast commit ineligible.
12337bbbe241SHarshad Shirwadkar 	 */
12347bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
12350915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_INELIGIBLE;
12360915e464SHarshad Shirwadkar 		goto fallback;
12377bbbe241SHarshad Shirwadkar 	}
1238aa75f4d3SHarshad Shirwadkar 
1239aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1240aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1241aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
12420915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
12430915e464SHarshad Shirwadkar 		goto fallback;
1244aa75f4d3SHarshad Shirwadkar 	}
1245aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1246aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1247aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
12480915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
12490915e464SHarshad Shirwadkar 		goto fallback;
1250aa75f4d3SHarshad Shirwadkar 	}
1251aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
12520915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit(journal);
1253aa75f4d3SHarshad Shirwadkar 	/*
12540915e464SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we
12550915e464SHarshad Shirwadkar 	 * don't react too strongly to vast changes in the commit time
1256aa75f4d3SHarshad Shirwadkar 	 */
12570915e464SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1258d9bf099cSRitesh Harjani 	ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
12590915e464SHarshad Shirwadkar 	return ret;
12600915e464SHarshad Shirwadkar 
12610915e464SHarshad Shirwadkar fallback:
12620915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit_fallback(journal);
1263d9bf099cSRitesh Harjani 	ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
12640915e464SHarshad Shirwadkar 	return ret;
1265aa75f4d3SHarshad Shirwadkar }
1266aa75f4d3SHarshad Shirwadkar 
1267ff780b91SHarshad Shirwadkar /*
1268ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1269ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1270ff780b91SHarshad Shirwadkar  */
1271e85c81baSXin Yin static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1272ff780b91SHarshad Shirwadkar {
1273aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1274aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
127596e7c02dSDaejun Park 	struct ext4_inode_info *iter, *iter_n;
1276aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1277aa75f4d3SHarshad Shirwadkar 
1278aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1279aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1280aa75f4d3SHarshad Shirwadkar 
128108f4c42aSRitesh Harjani 	trace_ext4_fc_cleanup(journal, full, tid);
1282aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1283aa75f4d3SHarshad Shirwadkar 
1284aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
128596e7c02dSDaejun Park 	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
128696e7c02dSDaejun Park 				 i_fc_list) {
1287aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1288aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1289aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1290bdc8a53aSXin Yin 		if (iter->i_sync_tid <= tid)
1291aa75f4d3SHarshad Shirwadkar 			ext4_fc_reset_inode(&iter->vfs_inode);
1292aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1293aa75f4d3SHarshad Shirwadkar 		smp_mb();
1294aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1295aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1296aa75f4d3SHarshad Shirwadkar #else
1297aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1298aa75f4d3SHarshad Shirwadkar #endif
1299aa75f4d3SHarshad Shirwadkar 	}
1300aa75f4d3SHarshad Shirwadkar 
1301aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1302aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1303aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1304aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1305aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1306b3998b3bSRitesh Harjani 		list_del_init(&fc_dentry->fcd_dilist);
1307aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1308aa75f4d3SHarshad Shirwadkar 
1309aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1310aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1311aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1312aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1313aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1314aa75f4d3SHarshad Shirwadkar 	}
1315aa75f4d3SHarshad Shirwadkar 
1316aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1317aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1318aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
131931e203e0SDaejun Park 				&sbi->s_fc_q[FC_Q_MAIN]);
1320aa75f4d3SHarshad Shirwadkar 
1321e85c81baSXin Yin 	if (tid >= sbi->s_fc_ineligible_tid) {
1322e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = 0;
13239b5f6c9bSHarshad Shirwadkar 		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1324e85c81baSXin Yin 	}
1325aa75f4d3SHarshad Shirwadkar 
1326aa75f4d3SHarshad Shirwadkar 	if (full)
1327aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1328aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1329aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1330ff780b91SHarshad Shirwadkar }
13316866d7b3SHarshad Shirwadkar 
13328016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
13338016e29fSHarshad Shirwadkar 
13348016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
13358016e29fSHarshad Shirwadkar struct dentry_info_args {
13368016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
13378016e29fSHarshad Shirwadkar 	char *dname;
13388016e29fSHarshad Shirwadkar };
13398016e29fSHarshad Shirwadkar 
13408016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
1341a7ba36bcSHarshad Shirwadkar 			      struct ext4_fc_tl *tl, u8 *val)
13428016e29fSHarshad Shirwadkar {
1343a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
13448016e29fSHarshad Shirwadkar 
1345a7ba36bcSHarshad Shirwadkar 	memcpy(&fcd, val, sizeof(fcd));
13468016e29fSHarshad Shirwadkar 
1347a7ba36bcSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1348a7ba36bcSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd.fc_ino);
1349a7ba36bcSHarshad Shirwadkar 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1350dcc58274SYe Bin 	darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
1351dcc58274SYe Bin }
1352dcc58274SYe Bin 
1353dcc58274SYe Bin static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
1354dcc58274SYe Bin {
1355dcc58274SYe Bin 	memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
1356dcc58274SYe Bin 	tl->fc_len = le16_to_cpu(tl->fc_len);
1357dcc58274SYe Bin 	tl->fc_tag = le16_to_cpu(tl->fc_tag);
13588016e29fSHarshad Shirwadkar }
13598016e29fSHarshad Shirwadkar 
13608016e29fSHarshad Shirwadkar /* Unlink replay function */
1361a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1362a7ba36bcSHarshad Shirwadkar 				 u8 *val)
13638016e29fSHarshad Shirwadkar {
13648016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
13658016e29fSHarshad Shirwadkar 	struct qstr entry;
13668016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13678016e29fSHarshad Shirwadkar 	int ret = 0;
13688016e29fSHarshad Shirwadkar 
1369a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
13708016e29fSHarshad Shirwadkar 
13718016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
13728016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13738016e29fSHarshad Shirwadkar 
13748016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
13758016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
13768016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
13778016e29fSHarshad Shirwadkar 
137823dd561aSYi Li 	if (IS_ERR(inode)) {
13794978c659SJan Kara 		ext4_debug("Inode %d not found", darg.ino);
13808016e29fSHarshad Shirwadkar 		return 0;
13818016e29fSHarshad Shirwadkar 	}
13828016e29fSHarshad Shirwadkar 
13838016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
13848016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
138523dd561aSYi Li 	if (IS_ERR(old_parent)) {
13864978c659SJan Kara 		ext4_debug("Dir with inode %d not found", darg.parent_ino);
13878016e29fSHarshad Shirwadkar 		iput(inode);
13888016e29fSHarshad Shirwadkar 		return 0;
13898016e29fSHarshad Shirwadkar 	}
13908016e29fSHarshad Shirwadkar 
1391a80f7fcfSHarshad Shirwadkar 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
13928016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
13938016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
13948016e29fSHarshad Shirwadkar 		ret = 0;
13958016e29fSHarshad Shirwadkar 	iput(old_parent);
13968016e29fSHarshad Shirwadkar 	iput(inode);
13978016e29fSHarshad Shirwadkar 	return ret;
13988016e29fSHarshad Shirwadkar }
13998016e29fSHarshad Shirwadkar 
14008016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
14018016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
14028016e29fSHarshad Shirwadkar 				struct inode *inode)
14038016e29fSHarshad Shirwadkar {
14048016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
14058016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
14068016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
14078016e29fSHarshad Shirwadkar 	int ret = 0;
14088016e29fSHarshad Shirwadkar 
14098016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
14108016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
14114978c659SJan Kara 		ext4_debug("Dir with inode %d not found.", darg->parent_ino);
14128016e29fSHarshad Shirwadkar 		dir = NULL;
14138016e29fSHarshad Shirwadkar 		goto out;
14148016e29fSHarshad Shirwadkar 	}
14158016e29fSHarshad Shirwadkar 
14168016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
14178016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
14184978c659SJan Kara 		ext4_debug("Failed to obtain dentry");
14198016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
14208016e29fSHarshad Shirwadkar 		goto out;
14218016e29fSHarshad Shirwadkar 	}
14228016e29fSHarshad Shirwadkar 
14238016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
14248016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
14254978c659SJan Kara 		ext4_debug("Inode dentry not created.");
14268016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
14278016e29fSHarshad Shirwadkar 		goto out;
14288016e29fSHarshad Shirwadkar 	}
14298016e29fSHarshad Shirwadkar 
14308016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
14318016e29fSHarshad Shirwadkar 	/*
14328016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
14338016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
14348016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
14358016e29fSHarshad Shirwadkar 	 * could complete.
14368016e29fSHarshad Shirwadkar 	 */
14378016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
14384978c659SJan Kara 		ext4_debug("Failed to link\n");
14398016e29fSHarshad Shirwadkar 		goto out;
14408016e29fSHarshad Shirwadkar 	}
14418016e29fSHarshad Shirwadkar 
14428016e29fSHarshad Shirwadkar 	ret = 0;
14438016e29fSHarshad Shirwadkar out:
14448016e29fSHarshad Shirwadkar 	if (dentry_dir) {
14458016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
14468016e29fSHarshad Shirwadkar 		dput(dentry_dir);
14478016e29fSHarshad Shirwadkar 	} else if (dir) {
14488016e29fSHarshad Shirwadkar 		iput(dir);
14498016e29fSHarshad Shirwadkar 	}
14508016e29fSHarshad Shirwadkar 	if (dentry_inode) {
14518016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
14528016e29fSHarshad Shirwadkar 		dput(dentry_inode);
14538016e29fSHarshad Shirwadkar 	}
14548016e29fSHarshad Shirwadkar 
14558016e29fSHarshad Shirwadkar 	return ret;
14568016e29fSHarshad Shirwadkar }
14578016e29fSHarshad Shirwadkar 
14588016e29fSHarshad Shirwadkar /* Link replay function */
1459a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1460a7ba36bcSHarshad Shirwadkar 			       u8 *val)
14618016e29fSHarshad Shirwadkar {
14628016e29fSHarshad Shirwadkar 	struct inode *inode;
14638016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
14648016e29fSHarshad Shirwadkar 	int ret = 0;
14658016e29fSHarshad Shirwadkar 
1466a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
14678016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
14688016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
14698016e29fSHarshad Shirwadkar 
14708016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
147123dd561aSYi Li 	if (IS_ERR(inode)) {
14724978c659SJan Kara 		ext4_debug("Inode not found.");
14738016e29fSHarshad Shirwadkar 		return 0;
14748016e29fSHarshad Shirwadkar 	}
14758016e29fSHarshad Shirwadkar 
14768016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
14778016e29fSHarshad Shirwadkar 	iput(inode);
14788016e29fSHarshad Shirwadkar 	return ret;
14798016e29fSHarshad Shirwadkar }
14808016e29fSHarshad Shirwadkar 
14818016e29fSHarshad Shirwadkar /*
14828016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
14838016e29fSHarshad Shirwadkar  * block bitmaps correctly.
14848016e29fSHarshad Shirwadkar  */
14858016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
14868016e29fSHarshad Shirwadkar {
14878016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
14888016e29fSHarshad Shirwadkar 	int i;
14898016e29fSHarshad Shirwadkar 
14908016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
14918016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
14928016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
14938016e29fSHarshad Shirwadkar 			return 0;
14948016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
14959305721aSYe Bin 		int *fc_modified_inodes;
14969305721aSYe Bin 
14979305721aSYe Bin 		fc_modified_inodes = krealloc(state->fc_modified_inodes,
1498cdce59a1SRitesh Harjani 				sizeof(int) * (state->fc_modified_inodes_size +
1499cdce59a1SRitesh Harjani 				EXT4_FC_REPLAY_REALLOC_INCREMENT),
15008016e29fSHarshad Shirwadkar 				GFP_KERNEL);
15019305721aSYe Bin 		if (!fc_modified_inodes)
15028016e29fSHarshad Shirwadkar 			return -ENOMEM;
15039305721aSYe Bin 		state->fc_modified_inodes = fc_modified_inodes;
1504cdce59a1SRitesh Harjani 		state->fc_modified_inodes_size +=
1505cdce59a1SRitesh Harjani 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
15068016e29fSHarshad Shirwadkar 	}
15078016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
15088016e29fSHarshad Shirwadkar 	return 0;
15098016e29fSHarshad Shirwadkar }
15108016e29fSHarshad Shirwadkar 
15118016e29fSHarshad Shirwadkar /*
15128016e29fSHarshad Shirwadkar  * Inode replay function
15138016e29fSHarshad Shirwadkar  */
1514a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1515a7ba36bcSHarshad Shirwadkar 				u8 *val)
15168016e29fSHarshad Shirwadkar {
1517a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
15188016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
15198016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
15208016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
15218016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
1522dcc58274SYe Bin 	int inode_len, ino, ret, tag = tl->fc_tag;
15238016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
1524*0d043351STheodore Ts'o 	size_t off_gen = offsetof(struct ext4_inode, i_generation);
15258016e29fSHarshad Shirwadkar 
1526a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_inode, val, sizeof(fc_inode));
15278016e29fSHarshad Shirwadkar 
1528a7ba36bcSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode.fc_ino);
15298016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
15308016e29fSHarshad Shirwadkar 
15318016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
153223dd561aSYi Li 	if (!IS_ERR(inode)) {
15338016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
15348016e29fSHarshad Shirwadkar 		iput(inode);
15358016e29fSHarshad Shirwadkar 	}
153623dd561aSYi Li 	inode = NULL;
15378016e29fSHarshad Shirwadkar 
1538cdce59a1SRitesh Harjani 	ret = ext4_fc_record_modified_inode(sb, ino);
1539cdce59a1SRitesh Harjani 	if (ret)
1540cdce59a1SRitesh Harjani 		goto out;
15418016e29fSHarshad Shirwadkar 
1542a7ba36bcSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)
1543a7ba36bcSHarshad Shirwadkar 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
15448016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
15458016e29fSHarshad Shirwadkar 	if (ret)
15468016e29fSHarshad Shirwadkar 		goto out;
15478016e29fSHarshad Shirwadkar 
1548dcc58274SYe Bin 	inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
15498016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
15508016e29fSHarshad Shirwadkar 
15518016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
1552*0d043351STheodore Ts'o 	memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen,
1553*0d043351STheodore Ts'o 	       inode_len - off_gen);
15548016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
15558016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
15568016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
15578016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
15588016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
15598016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
15608016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
15618016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
15628016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
15638016e29fSHarshad Shirwadkar 		}
15648016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
15658016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
15668016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
15678016e29fSHarshad Shirwadkar 	}
15688016e29fSHarshad Shirwadkar 
15698016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
15708016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15718016e29fSHarshad Shirwadkar 	if (ret)
15728016e29fSHarshad Shirwadkar 		goto out;
15738016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
15748016e29fSHarshad Shirwadkar 	if (ret)
15758016e29fSHarshad Shirwadkar 		goto out;
15768016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
15778016e29fSHarshad Shirwadkar 	if (ret)
15788016e29fSHarshad Shirwadkar 		goto out;
15798016e29fSHarshad Shirwadkar 
15808016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
15818016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
158223dd561aSYi Li 	if (IS_ERR(inode)) {
15834978c659SJan Kara 		ext4_debug("Inode not found.");
15848016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
15858016e29fSHarshad Shirwadkar 	}
15868016e29fSHarshad Shirwadkar 
15878016e29fSHarshad Shirwadkar 	/*
15888016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
15898016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
15908016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
15918016e29fSHarshad Shirwadkar 	 */
15921ebf2178SHarshad Shirwadkar 	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
15938016e29fSHarshad Shirwadkar 		ext4_ext_replay_set_iblocks(inode);
15948016e29fSHarshad Shirwadkar 
15958016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
15968016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
15978016e29fSHarshad Shirwadkar 
15988016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
15998016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
16008016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
16018016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
16028016e29fSHarshad Shirwadkar out:
16038016e29fSHarshad Shirwadkar 	iput(inode);
16048016e29fSHarshad Shirwadkar 	if (!ret)
1605c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(sb->s_bdev);
16068016e29fSHarshad Shirwadkar 
16078016e29fSHarshad Shirwadkar 	return 0;
16088016e29fSHarshad Shirwadkar }
16098016e29fSHarshad Shirwadkar 
16108016e29fSHarshad Shirwadkar /*
16118016e29fSHarshad Shirwadkar  * Dentry create replay function.
16128016e29fSHarshad Shirwadkar  *
16138016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
16148016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
16158016e29fSHarshad Shirwadkar  * been replayed before we start here.
16168016e29fSHarshad Shirwadkar  */
1617a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1618a7ba36bcSHarshad Shirwadkar 				 u8 *val)
16198016e29fSHarshad Shirwadkar {
16208016e29fSHarshad Shirwadkar 	int ret = 0;
16218016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
16228016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
16238016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
16248016e29fSHarshad Shirwadkar 
1625a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
16268016e29fSHarshad Shirwadkar 
16278016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
16288016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
16298016e29fSHarshad Shirwadkar 
16308016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
16318016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
16328016e29fSHarshad Shirwadkar 	if (ret)
16338016e29fSHarshad Shirwadkar 		goto out;
16348016e29fSHarshad Shirwadkar 
16358016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
163623dd561aSYi Li 	if (IS_ERR(inode)) {
16374978c659SJan Kara 		ext4_debug("inode %d not found.", darg.ino);
16388016e29fSHarshad Shirwadkar 		inode = NULL;
16398016e29fSHarshad Shirwadkar 		ret = -EINVAL;
16408016e29fSHarshad Shirwadkar 		goto out;
16418016e29fSHarshad Shirwadkar 	}
16428016e29fSHarshad Shirwadkar 
16438016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
16448016e29fSHarshad Shirwadkar 		/*
16458016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
16468016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
16478016e29fSHarshad Shirwadkar 		 */
16488016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
164923dd561aSYi Li 		if (IS_ERR(dir)) {
16504978c659SJan Kara 			ext4_debug("Dir %d not found.", darg.ino);
16518016e29fSHarshad Shirwadkar 			goto out;
16528016e29fSHarshad Shirwadkar 		}
16538016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
16548016e29fSHarshad Shirwadkar 		iput(dir);
16558016e29fSHarshad Shirwadkar 		if (ret) {
16568016e29fSHarshad Shirwadkar 			ret = 0;
16578016e29fSHarshad Shirwadkar 			goto out;
16588016e29fSHarshad Shirwadkar 		}
16598016e29fSHarshad Shirwadkar 	}
16608016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
16618016e29fSHarshad Shirwadkar 	if (ret)
16628016e29fSHarshad Shirwadkar 		goto out;
16638016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
16648016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
16658016e29fSHarshad Shirwadkar out:
16668016e29fSHarshad Shirwadkar 	iput(inode);
16678016e29fSHarshad Shirwadkar 	return ret;
16688016e29fSHarshad Shirwadkar }
16698016e29fSHarshad Shirwadkar 
16708016e29fSHarshad Shirwadkar /*
1671599ea31dSXin Yin  * Record physical disk regions which are in use as per fast commit area,
1672599ea31dSXin Yin  * and used by inodes during replay phase. Our simple replay phase
1673599ea31dSXin Yin  * allocator excludes these regions from allocation.
16748016e29fSHarshad Shirwadkar  */
1675599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino,
1676599ea31dSXin Yin 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
16778016e29fSHarshad Shirwadkar {
16788016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
16798016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
16808016e29fSHarshad Shirwadkar 
16818016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
1682599ea31dSXin Yin 	/*
1683599ea31dSXin Yin 	 * during replay phase, the fc_regions_valid may not same as
1684599ea31dSXin Yin 	 * fc_regions_used, update it when do new additions.
1685599ea31dSXin Yin 	 */
1686599ea31dSXin Yin 	if (replay && state->fc_regions_used != state->fc_regions_valid)
1687599ea31dSXin Yin 		state->fc_regions_used = state->fc_regions_valid;
16888016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
16897069d105SYe Bin 		struct ext4_fc_alloc_region *fc_regions;
16907069d105SYe Bin 
16917069d105SYe Bin 		fc_regions = krealloc(state->fc_regions,
169227cd4978SYe Bin 				      sizeof(struct ext4_fc_alloc_region) *
169327cd4978SYe Bin 				      (state->fc_regions_size +
169427cd4978SYe Bin 				       EXT4_FC_REPLAY_REALLOC_INCREMENT),
16958016e29fSHarshad Shirwadkar 				      GFP_KERNEL);
16967069d105SYe Bin 		if (!fc_regions)
16978016e29fSHarshad Shirwadkar 			return -ENOMEM;
169827cd4978SYe Bin 		state->fc_regions_size +=
169927cd4978SYe Bin 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
17007069d105SYe Bin 		state->fc_regions = fc_regions;
17018016e29fSHarshad Shirwadkar 	}
17028016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
17038016e29fSHarshad Shirwadkar 	region->ino = ino;
17048016e29fSHarshad Shirwadkar 	region->lblk = lblk;
17058016e29fSHarshad Shirwadkar 	region->pblk = pblk;
17068016e29fSHarshad Shirwadkar 	region->len = len;
17078016e29fSHarshad Shirwadkar 
1708599ea31dSXin Yin 	if (replay)
1709599ea31dSXin Yin 		state->fc_regions_valid++;
1710599ea31dSXin Yin 
17118016e29fSHarshad Shirwadkar 	return 0;
17128016e29fSHarshad Shirwadkar }
17138016e29fSHarshad Shirwadkar 
17148016e29fSHarshad Shirwadkar /* Replay add range tag */
17158016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
1716a7ba36bcSHarshad Shirwadkar 				    struct ext4_fc_tl *tl, u8 *val)
17178016e29fSHarshad Shirwadkar {
1718a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range fc_add_ex;
17198016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
17208016e29fSHarshad Shirwadkar 	struct inode *inode;
17218016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
17228016e29fSHarshad Shirwadkar 	int remaining, len;
17238016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
17248016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17258016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
17268016e29fSHarshad Shirwadkar 	int ret;
17278016e29fSHarshad Shirwadkar 
1728a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1729a7ba36bcSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
17308016e29fSHarshad Shirwadkar 
17318016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1732a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
17338016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
17348016e29fSHarshad Shirwadkar 
1735a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
173623dd561aSYi Li 	if (IS_ERR(inode)) {
17374978c659SJan Kara 		ext4_debug("Inode not found.");
17388016e29fSHarshad Shirwadkar 		return 0;
17398016e29fSHarshad Shirwadkar 	}
17408016e29fSHarshad Shirwadkar 
17418016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1742cdce59a1SRitesh Harjani 	if (ret)
1743cdce59a1SRitesh Harjani 		goto out;
17448016e29fSHarshad Shirwadkar 
17458016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
17468016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
17478016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
17488016e29fSHarshad Shirwadkar 
17498016e29fSHarshad Shirwadkar 	cur = start;
17508016e29fSHarshad Shirwadkar 	remaining = len;
17514978c659SJan Kara 	ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
17528016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
17538016e29fSHarshad Shirwadkar 		  inode->i_ino);
17548016e29fSHarshad Shirwadkar 
17558016e29fSHarshad Shirwadkar 	while (remaining > 0) {
17568016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
17578016e29fSHarshad Shirwadkar 		map.m_len = remaining;
17588016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
17598016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
17608016e29fSHarshad Shirwadkar 
1761cdce59a1SRitesh Harjani 		if (ret < 0)
1762cdce59a1SRitesh Harjani 			goto out;
17638016e29fSHarshad Shirwadkar 
17648016e29fSHarshad Shirwadkar 		if (ret == 0) {
17658016e29fSHarshad Shirwadkar 			/* Range is not mapped */
17668016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
1767cdce59a1SRitesh Harjani 			if (IS_ERR(path))
1768cdce59a1SRitesh Harjani 				goto out;
17698016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
17708016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
17718016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
17728016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
17738016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
17748016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
17758016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
17768016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
17778016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
17788016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
17798016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
17807ff5fddaSYe Bin 			ext4_free_ext_path(path);
1781cdce59a1SRitesh Harjani 			if (ret)
1782cdce59a1SRitesh Harjani 				goto out;
17838016e29fSHarshad Shirwadkar 			goto next;
17848016e29fSHarshad Shirwadkar 		}
17858016e29fSHarshad Shirwadkar 
17868016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
17878016e29fSHarshad Shirwadkar 			/*
17888016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
17898016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
17908016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
17918016e29fSHarshad Shirwadkar 			 */
17928016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17938016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
17948016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
1795cdce59a1SRitesh Harjani 			if (ret)
1796cdce59a1SRitesh Harjani 				goto out;
17978016e29fSHarshad Shirwadkar 			/*
17988016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
17998016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
18008016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
18018016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
18028016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
18038016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
18048016e29fSHarshad Shirwadkar 			 * modified inodes.
18058016e29fSHarshad Shirwadkar 			 */
18068016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
18078016e29fSHarshad Shirwadkar 			goto next;
18088016e29fSHarshad Shirwadkar 		}
18098016e29fSHarshad Shirwadkar 
18108016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
18114978c659SJan Kara 		ext4_debug("Converting from %ld to %d %lld",
18128016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
18138016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
18148016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
18158016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
1816cdce59a1SRitesh Harjani 		if (ret)
1817cdce59a1SRitesh Harjani 			goto out;
18188016e29fSHarshad Shirwadkar 		/*
18198016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
18208016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
18218016e29fSHarshad Shirwadkar 		 */
18228016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
18238016e29fSHarshad Shirwadkar next:
18248016e29fSHarshad Shirwadkar 		cur += map.m_len;
18258016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
18268016e29fSHarshad Shirwadkar 	}
18278016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
18288016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
1829cdce59a1SRitesh Harjani out:
18308016e29fSHarshad Shirwadkar 	iput(inode);
18318016e29fSHarshad Shirwadkar 	return 0;
18328016e29fSHarshad Shirwadkar }
18338016e29fSHarshad Shirwadkar 
18348016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
18358016e29fSHarshad Shirwadkar static int
1836a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1837a7ba36bcSHarshad Shirwadkar 			 u8 *val)
18388016e29fSHarshad Shirwadkar {
18398016e29fSHarshad Shirwadkar 	struct inode *inode;
1840a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
18418016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
18428016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
18438016e29fSHarshad Shirwadkar 	int ret;
18448016e29fSHarshad Shirwadkar 
1845a7ba36bcSHarshad Shirwadkar 	memcpy(&lrange, val, sizeof(lrange));
1846a7ba36bcSHarshad Shirwadkar 	cur = le32_to_cpu(lrange.fc_lblk);
1847a7ba36bcSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange.fc_len);
18488016e29fSHarshad Shirwadkar 
18498016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1850a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_ino), cur, remaining);
18518016e29fSHarshad Shirwadkar 
1852a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
185323dd561aSYi Li 	if (IS_ERR(inode)) {
18544978c659SJan Kara 		ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
18558016e29fSHarshad Shirwadkar 		return 0;
18568016e29fSHarshad Shirwadkar 	}
18578016e29fSHarshad Shirwadkar 
18588016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1859cdce59a1SRitesh Harjani 	if (ret)
1860cdce59a1SRitesh Harjani 		goto out;
18618016e29fSHarshad Shirwadkar 
18624978c659SJan Kara 	ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
1863a7ba36bcSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1864a7ba36bcSHarshad Shirwadkar 			le32_to_cpu(lrange.fc_len));
18658016e29fSHarshad Shirwadkar 	while (remaining > 0) {
18668016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
18678016e29fSHarshad Shirwadkar 		map.m_len = remaining;
18688016e29fSHarshad Shirwadkar 
18698016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1870cdce59a1SRitesh Harjani 		if (ret < 0)
1871cdce59a1SRitesh Harjani 			goto out;
18728016e29fSHarshad Shirwadkar 		if (ret > 0) {
18738016e29fSHarshad Shirwadkar 			remaining -= ret;
18748016e29fSHarshad Shirwadkar 			cur += ret;
18758016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
18768016e29fSHarshad Shirwadkar 		} else {
18778016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
18788016e29fSHarshad Shirwadkar 			cur += map.m_len;
18798016e29fSHarshad Shirwadkar 		}
18808016e29fSHarshad Shirwadkar 	}
18818016e29fSHarshad Shirwadkar 
18820b5b5a62SXin Yin 	down_write(&EXT4_I(inode)->i_data_sem);
18838fca8a2bSXin Yin 	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
18848fca8a2bSXin Yin 				le32_to_cpu(lrange.fc_lblk) +
18858fca8a2bSXin Yin 				le32_to_cpu(lrange.fc_len) - 1);
18860b5b5a62SXin Yin 	up_write(&EXT4_I(inode)->i_data_sem);
1887cdce59a1SRitesh Harjani 	if (ret)
1888cdce59a1SRitesh Harjani 		goto out;
18898016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
18908016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
18918016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
1892cdce59a1SRitesh Harjani out:
18938016e29fSHarshad Shirwadkar 	iput(inode);
18948016e29fSHarshad Shirwadkar 	return 0;
18958016e29fSHarshad Shirwadkar }
18968016e29fSHarshad Shirwadkar 
18978016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
18988016e29fSHarshad Shirwadkar {
18998016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19008016e29fSHarshad Shirwadkar 	struct inode *inode;
19018016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
19028016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
19038016e29fSHarshad Shirwadkar 	int i, ret, j;
19048016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
19058016e29fSHarshad Shirwadkar 
19068016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
19078016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
19088016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
19098016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
191023dd561aSYi Li 		if (IS_ERR(inode)) {
19114978c659SJan Kara 			ext4_debug("Inode %d not found.",
19128016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
19138016e29fSHarshad Shirwadkar 			continue;
19148016e29fSHarshad Shirwadkar 		}
19158016e29fSHarshad Shirwadkar 		cur = 0;
19168016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
19171ebf2178SHarshad Shirwadkar 		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
19181ebf2178SHarshad Shirwadkar 			iput(inode);
19191ebf2178SHarshad Shirwadkar 			continue;
19201ebf2178SHarshad Shirwadkar 		}
19218016e29fSHarshad Shirwadkar 		while (cur < end) {
19228016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
19238016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
19248016e29fSHarshad Shirwadkar 
19258016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
19268016e29fSHarshad Shirwadkar 			if (ret < 0)
19278016e29fSHarshad Shirwadkar 				break;
19288016e29fSHarshad Shirwadkar 
19298016e29fSHarshad Shirwadkar 			if (ret > 0) {
19308016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
193123dd561aSYi Li 				if (!IS_ERR(path)) {
19328016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
19338016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
19348016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
19357ff5fddaSYe Bin 					ext4_free_ext_path(path);
19368016e29fSHarshad Shirwadkar 				}
19378016e29fSHarshad Shirwadkar 				cur += ret;
19388016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
19398016e29fSHarshad Shirwadkar 							map.m_len, 1);
19408016e29fSHarshad Shirwadkar 			} else {
19418016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
19428016e29fSHarshad Shirwadkar 			}
19438016e29fSHarshad Shirwadkar 		}
19448016e29fSHarshad Shirwadkar 		iput(inode);
19458016e29fSHarshad Shirwadkar 	}
19468016e29fSHarshad Shirwadkar }
19478016e29fSHarshad Shirwadkar 
19488016e29fSHarshad Shirwadkar /*
19498016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
19508016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
19518016e29fSHarshad Shirwadkar  * if it is okay to use a block.
19528016e29fSHarshad Shirwadkar  */
19538016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
19548016e29fSHarshad Shirwadkar {
19558016e29fSHarshad Shirwadkar 	int i;
19568016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19578016e29fSHarshad Shirwadkar 
19588016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
19598016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
19608016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
19618016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
19628016e29fSHarshad Shirwadkar 			continue;
1963dbaafbadSRitesh Harjani 		if (in_range(blk, state->fc_regions[i].pblk,
1964dbaafbadSRitesh Harjani 					state->fc_regions[i].len))
19658016e29fSHarshad Shirwadkar 			return true;
19668016e29fSHarshad Shirwadkar 	}
19678016e29fSHarshad Shirwadkar 	return false;
19688016e29fSHarshad Shirwadkar }
19698016e29fSHarshad Shirwadkar 
19708016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
19718016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
19728016e29fSHarshad Shirwadkar {
19738016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19748016e29fSHarshad Shirwadkar 
19758016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
19768016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
19778016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
19788016e29fSHarshad Shirwadkar }
19798016e29fSHarshad Shirwadkar 
19801b45cc5cSYe Bin static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
19811b45cc5cSYe Bin 					   u8 *val, u8 *end)
19821b45cc5cSYe Bin {
19831b45cc5cSYe Bin 	if (val + tl->fc_len > end)
19841b45cc5cSYe Bin 		return false;
19851b45cc5cSYe Bin 
19861b45cc5cSYe Bin 	/* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
19871b45cc5cSYe Bin 	 * journal rescan before do CRC check. Other tags length check will
19881b45cc5cSYe Bin 	 * rely on CRC check.
19891b45cc5cSYe Bin 	 */
19901b45cc5cSYe Bin 	switch (tl->fc_tag) {
19911b45cc5cSYe Bin 	case EXT4_FC_TAG_ADD_RANGE:
19921b45cc5cSYe Bin 		return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
19931b45cc5cSYe Bin 	case EXT4_FC_TAG_TAIL:
19941b45cc5cSYe Bin 		return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
19951b45cc5cSYe Bin 	case EXT4_FC_TAG_HEAD:
19961b45cc5cSYe Bin 		return (sizeof(struct ext4_fc_head) == tl->fc_len);
19971b45cc5cSYe Bin 	case EXT4_FC_TAG_DEL_RANGE:
19981b45cc5cSYe Bin 	case EXT4_FC_TAG_LINK:
19991b45cc5cSYe Bin 	case EXT4_FC_TAG_UNLINK:
20001b45cc5cSYe Bin 	case EXT4_FC_TAG_CREAT:
20011b45cc5cSYe Bin 	case EXT4_FC_TAG_INODE:
20021b45cc5cSYe Bin 	case EXT4_FC_TAG_PAD:
20031b45cc5cSYe Bin 	default:
20041b45cc5cSYe Bin 		return true;
20051b45cc5cSYe Bin 	}
20061b45cc5cSYe Bin }
20071b45cc5cSYe Bin 
20088016e29fSHarshad Shirwadkar /*
20098016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
20108016e29fSHarshad Shirwadkar  *
20118016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
20128016e29fSHarshad Shirwadkar  * for doing following things:
20138016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
20148016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
20158016e29fSHarshad Shirwadkar  * - Verify CRC
20168016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
20178016e29fSHarshad Shirwadkar  *
20188016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
20198016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
20208016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
20218016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
20228016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
20238016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
20248016e29fSHarshad Shirwadkar  */
20258016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
20268016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
20278016e29fSHarshad Shirwadkar 				tid_t expected_tid)
20288016e29fSHarshad Shirwadkar {
20298016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
20308016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
20318016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
20328016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
2033a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range ext;
2034a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
2035a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
2036a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
2037a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_head head;
20388016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
20398016e29fSHarshad Shirwadkar 
20408016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
20418016e29fSHarshad Shirwadkar 
20428016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
20438016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
20448016e29fSHarshad Shirwadkar 
20458016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
20468016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
20478016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
20488016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
20498016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
20508016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
20518016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
20528016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
20538016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
20548016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
20558016e29fSHarshad Shirwadkar 			return 0;
20568016e29fSHarshad Shirwadkar 	}
20578016e29fSHarshad Shirwadkar 
20588016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
20598016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
20608016e29fSHarshad Shirwadkar 		goto out_err;
20618016e29fSHarshad Shirwadkar 	}
20628016e29fSHarshad Shirwadkar 
20638016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
20641b45cc5cSYe Bin 	for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2065dcc58274SYe Bin 	     cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2066dcc58274SYe Bin 		ext4_fc_get_tl(&tl, cur);
2067fdc2a3c7SYe Bin 		val = cur + EXT4_FC_TAG_BASE_LEN;
20681b45cc5cSYe Bin 		if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
20691b45cc5cSYe Bin 			ret = state->fc_replay_num_tags ?
20701b45cc5cSYe Bin 				JBD2_FC_REPLAY_STOP : -ECANCELED;
20711b45cc5cSYe Bin 			goto out_err;
20721b45cc5cSYe Bin 		}
20734978c659SJan Kara 		ext4_debug("Scan phase, tag:%s, blk %lld\n",
2074dcc58274SYe Bin 			   tag2str(tl.fc_tag), bh->b_blocknr);
2075dcc58274SYe Bin 		switch (tl.fc_tag) {
20768016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
2077a7ba36bcSHarshad Shirwadkar 			memcpy(&ext, val, sizeof(ext));
2078a7ba36bcSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext.fc_ex;
20798016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
2080a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(ext.fc_ino),
20818016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
2082599ea31dSXin Yin 				ext4_ext_get_actual_len(ex), 0);
20838016e29fSHarshad Shirwadkar 			if (ret < 0)
20848016e29fSHarshad Shirwadkar 				break;
20858016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
20868016e29fSHarshad Shirwadkar 			fallthrough;
20878016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
20888016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
20898016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
20908016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
20918016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
20928016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
20938016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2094a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2095dcc58274SYe Bin 				EXT4_FC_TAG_BASE_LEN + tl.fc_len);
20968016e29fSHarshad Shirwadkar 			break;
20978016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
20988016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2099a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2100a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2101fdc2a3c7SYe Bin 						EXT4_FC_TAG_BASE_LEN +
21028016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
21038016e29fSHarshad Shirwadkar 						fc_crc));
2104a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
2105a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
21068016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
21078016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
21088016e29fSHarshad Shirwadkar 					state->fc_regions_used;
21098016e29fSHarshad Shirwadkar 			} else {
21108016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
21118016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
21128016e29fSHarshad Shirwadkar 			}
21138016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
21148016e29fSHarshad Shirwadkar 			break;
21158016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
2116a7ba36bcSHarshad Shirwadkar 			memcpy(&head, val, sizeof(head));
2117a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_features) &
21188016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
21198016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
21208016e29fSHarshad Shirwadkar 				break;
21218016e29fSHarshad Shirwadkar 			}
2122a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
21238016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
21248016e29fSHarshad Shirwadkar 				break;
21258016e29fSHarshad Shirwadkar 			}
21268016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2127a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2128dcc58274SYe Bin 				EXT4_FC_TAG_BASE_LEN + tl.fc_len);
21298016e29fSHarshad Shirwadkar 			break;
21308016e29fSHarshad Shirwadkar 		default:
21318016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
21328016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
21338016e29fSHarshad Shirwadkar 		}
21348016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
21358016e29fSHarshad Shirwadkar 			break;
21368016e29fSHarshad Shirwadkar 	}
21378016e29fSHarshad Shirwadkar 
21388016e29fSHarshad Shirwadkar out_err:
21398016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
21408016e29fSHarshad Shirwadkar 	return ret;
21418016e29fSHarshad Shirwadkar }
21428016e29fSHarshad Shirwadkar 
21435b849b5fSHarshad Shirwadkar /*
21445b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
21458016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
21465b849b5fSHarshad Shirwadkar  */
21475b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
21485b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
21495b849b5fSHarshad Shirwadkar {
21508016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
21518016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2152a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
2153a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
21548016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
21558016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2156a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
21578016e29fSHarshad Shirwadkar 
21588016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
21598016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
21608016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
21618016e29fSHarshad Shirwadkar 	}
21628016e29fSHarshad Shirwadkar 
21638016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
21648016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
21658016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
21668016e29fSHarshad Shirwadkar 	}
21678016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
21684978c659SJan Kara 		ext4_debug("Replay stops\n");
21698016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
21705b849b5fSHarshad Shirwadkar 		return 0;
21715b849b5fSHarshad Shirwadkar 	}
21725b849b5fSHarshad Shirwadkar 
21738016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
21748016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
21758016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
21768016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
21778016e29fSHarshad Shirwadkar 	}
21788016e29fSHarshad Shirwadkar #endif
21798016e29fSHarshad Shirwadkar 
21808016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
21818016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
21828016e29fSHarshad Shirwadkar 
21831b45cc5cSYe Bin 	for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2184dcc58274SYe Bin 	     cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2185dcc58274SYe Bin 		ext4_fc_get_tl(&tl, cur);
2186fdc2a3c7SYe Bin 		val = cur + EXT4_FC_TAG_BASE_LEN;
2187a7ba36bcSHarshad Shirwadkar 
21888016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
21898016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
21908016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
21918016e29fSHarshad Shirwadkar 			break;
21928016e29fSHarshad Shirwadkar 		}
21931b45cc5cSYe Bin 
2194dcc58274SYe Bin 		ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
21958016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
2196dcc58274SYe Bin 		switch (tl.fc_tag) {
21978016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
2198a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, &tl, val);
21998016e29fSHarshad Shirwadkar 			break;
22008016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
2201a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, &tl, val);
22028016e29fSHarshad Shirwadkar 			break;
22038016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
2204a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, &tl, val);
22058016e29fSHarshad Shirwadkar 			break;
22068016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
2207a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, &tl, val);
22088016e29fSHarshad Shirwadkar 			break;
22098016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
2210a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, &tl, val);
22118016e29fSHarshad Shirwadkar 			break;
22128016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
2213a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, &tl, val);
22148016e29fSHarshad Shirwadkar 			break;
22158016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
22168016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2217dcc58274SYe Bin 					     tl.fc_len, 0);
22188016e29fSHarshad Shirwadkar 			break;
22198016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
2220dcc58274SYe Bin 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
2221dcc58274SYe Bin 					     0, tl.fc_len, 0);
2222a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2223a7ba36bcSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
22248016e29fSHarshad Shirwadkar 			break;
22258016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
22268016e29fSHarshad Shirwadkar 			break;
22278016e29fSHarshad Shirwadkar 		default:
2228dcc58274SYe Bin 			trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
22298016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
22308016e29fSHarshad Shirwadkar 			break;
22318016e29fSHarshad Shirwadkar 		}
22328016e29fSHarshad Shirwadkar 		if (ret < 0)
22338016e29fSHarshad Shirwadkar 			break;
22348016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
22358016e29fSHarshad Shirwadkar 	}
22368016e29fSHarshad Shirwadkar 	return ret;
22378016e29fSHarshad Shirwadkar }
22388016e29fSHarshad Shirwadkar 
22396866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
22406866d7b3SHarshad Shirwadkar {
22415b849b5fSHarshad Shirwadkar 	/*
22425b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
22435b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
22445b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
22455b849b5fSHarshad Shirwadkar 	 */
22465b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
22476866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
22486866d7b3SHarshad Shirwadkar 		return;
2249ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
22506866d7b3SHarshad Shirwadkar }
2251aa75f4d3SHarshad Shirwadkar 
2252fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = {
2253ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2254ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2255ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2256ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2257ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2258ce8c59d1SHarshad Shirwadkar 	"Resize",
2259ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2260ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2261556e0319SHarshad Shirwadkar 	"Data journalling",
2262ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2263ce8c59d1SHarshad Shirwadkar };
2264ce8c59d1SHarshad Shirwadkar 
2265ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2266ce8c59d1SHarshad Shirwadkar {
2267ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2268ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2269ce8c59d1SHarshad Shirwadkar 	int i;
2270ce8c59d1SHarshad Shirwadkar 
2271ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2272ce8c59d1SHarshad Shirwadkar 		return 0;
2273ce8c59d1SHarshad Shirwadkar 
2274ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2275ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2276ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2277ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
22780915e464SHarshad Shirwadkar 		   div_u64(stats->s_fc_avg_commit_time, 1000));
2279ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2280ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2281ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2282ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2283ce8c59d1SHarshad Shirwadkar 
2284ce8c59d1SHarshad Shirwadkar 	return 0;
2285ce8c59d1SHarshad Shirwadkar }
2286ce8c59d1SHarshad Shirwadkar 
2287aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2288aa75f4d3SHarshad Shirwadkar {
2289aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2290aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2291aa75f4d3SHarshad Shirwadkar 
2292aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2293aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2294aa75f4d3SHarshad Shirwadkar 
2295aa75f4d3SHarshad Shirwadkar 	return 0;
2296aa75f4d3SHarshad Shirwadkar }
2297ab047d51SSebastian Andrzej Siewior 
2298ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void)
2299ab047d51SSebastian Andrzej Siewior {
2300ab047d51SSebastian Andrzej Siewior 	kmem_cache_destroy(ext4_fc_dentry_cachep);
2301ab047d51SSebastian Andrzej Siewior }
2302