xref: /openbmc/linux/fs/ext4/fast_commit.c (revision 7f142440847480838e0c4b3092f24455cec111a7)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
687bbbe241SHarshad Shirwadkar  *
69aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
707bbbe241SHarshad Shirwadkar  * attributes). Fast commit ineligibility is marked by calling
717bbbe241SHarshad Shirwadkar  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
727bbbe241SHarshad Shirwadkar  * to full commit.
73aa75f4d3SHarshad Shirwadkar  *
74aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
75aa75f4d3SHarshad Shirwadkar  * --------------------
76a740762fSHarshad Shirwadkar  * In order to guarantee atomicity during the commit operation, fast commit
77aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
79aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
80aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
81aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
82aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
85aa75f4d3SHarshad Shirwadkar  * - fsync()
86aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
87aa75f4d3SHarshad Shirwadkar  * - Truncate file A
88aa75f4d3SHarshad Shirwadkar  * - fsync()
89aa75f4d3SHarshad Shirwadkar  *
90aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
91aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
95aa75f4d3SHarshad Shirwadkar  *
96b1b7dce3SHarshad Shirwadkar  * Fast Commit Replay Idempotence
97b1b7dce3SHarshad Shirwadkar  * ------------------------------
98b1b7dce3SHarshad Shirwadkar  *
99b1b7dce3SHarshad Shirwadkar  * Fast commits tags are idempotent in nature provided the recovery code follows
100b1b7dce3SHarshad Shirwadkar  * certain rules. The guiding principle that the commit path follows while
101b1b7dce3SHarshad Shirwadkar  * committing is that it stores the result of a particular operation instead of
102b1b7dce3SHarshad Shirwadkar  * storing the procedure.
103b1b7dce3SHarshad Shirwadkar  *
104b1b7dce3SHarshad Shirwadkar  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105b1b7dce3SHarshad Shirwadkar  * was associated with inode 10. During fast commit, instead of storing this
106b1b7dce3SHarshad Shirwadkar  * operation as a procedure "rename a to b", we store the resulting file system
107b1b7dce3SHarshad Shirwadkar  * state as a "series" of outcomes:
108b1b7dce3SHarshad Shirwadkar  *
109b1b7dce3SHarshad Shirwadkar  * - Link dirent b to inode 10
110b1b7dce3SHarshad Shirwadkar  * - Unlink dirent a
111b1b7dce3SHarshad Shirwadkar  * - Inode <10> with valid refcount
112b1b7dce3SHarshad Shirwadkar  *
113b1b7dce3SHarshad Shirwadkar  * Now when recovery code runs, it needs "enforce" this state on the file
114b1b7dce3SHarshad Shirwadkar  * system. This is what guarantees idempotence of fast commit replay.
115b1b7dce3SHarshad Shirwadkar  *
116b1b7dce3SHarshad Shirwadkar  * Let's take an example of a procedure that is not idempotent and see how fast
117b1b7dce3SHarshad Shirwadkar  * commits make it idempotent. Consider following sequence of operations:
118b1b7dce3SHarshad Shirwadkar  *
119b1b7dce3SHarshad Shirwadkar  *     rm A;    mv B A;    read A
120b1b7dce3SHarshad Shirwadkar  *  (x)     (y)        (z)
121b1b7dce3SHarshad Shirwadkar  *
122b1b7dce3SHarshad Shirwadkar  * (x), (y) and (z) are the points at which we can crash. If we store this
123b1b7dce3SHarshad Shirwadkar  * sequence of operations as is then the replay is not idempotent. Let's say
124b1b7dce3SHarshad Shirwadkar  * while in replay, we crash at (z). During the second replay, file A (which was
125b1b7dce3SHarshad Shirwadkar  * actually created as a result of "mv B A" operation) would get deleted. Thus,
126b1b7dce3SHarshad Shirwadkar  * file named A would be absent when we try to read A. So, this sequence of
127b1b7dce3SHarshad Shirwadkar  * operations is not idempotent. However, as mentioned above, instead of storing
128b1b7dce3SHarshad Shirwadkar  * the procedure fast commits store the outcome of each procedure. Thus the fast
129b1b7dce3SHarshad Shirwadkar  * commit log for above procedure would be as follows:
130b1b7dce3SHarshad Shirwadkar  *
131b1b7dce3SHarshad Shirwadkar  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132b1b7dce3SHarshad Shirwadkar  * inode 11 before the replay)
133b1b7dce3SHarshad Shirwadkar  *
134b1b7dce3SHarshad Shirwadkar  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
135b1b7dce3SHarshad Shirwadkar  * (w)          (x)                    (y)          (z)
136b1b7dce3SHarshad Shirwadkar  *
137b1b7dce3SHarshad Shirwadkar  * If we crash at (z), we will have file A linked to inode 11. During the second
138b1b7dce3SHarshad Shirwadkar  * replay, we will remove file A (inode 11). But we will create it back and make
139b1b7dce3SHarshad Shirwadkar  * it point to inode 11. We won't find B, so we'll just skip that step. At this
140b1b7dce3SHarshad Shirwadkar  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141b1b7dce3SHarshad Shirwadkar  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142b1b7dce3SHarshad Shirwadkar  * similarly. Thus, by converting a non-idempotent procedure into a series of
143b1b7dce3SHarshad Shirwadkar  * idempotent outcomes, fast commits ensured idempotence during the replay.
144b1b7dce3SHarshad Shirwadkar  *
145aa75f4d3SHarshad Shirwadkar  * TODOs
146aa75f4d3SHarshad Shirwadkar  * -----
147b1b7dce3SHarshad Shirwadkar  *
148b1b7dce3SHarshad Shirwadkar  * 0) Fast commit replay path hardening: Fast commit replay code should use
149b1b7dce3SHarshad Shirwadkar  *    journal handles to make sure all the updates it does during the replay
150b1b7dce3SHarshad Shirwadkar  *    path are atomic. With that if we crash during fast commit replay, after
151b1b7dce3SHarshad Shirwadkar  *    trying to do recovery again, we will find a file system where fast commit
152b1b7dce3SHarshad Shirwadkar  *    area is invalid (because new full commit would be found). In order to deal
153b1b7dce3SHarshad Shirwadkar  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
154b1b7dce3SHarshad Shirwadkar  *    superblock state is persisted before starting the replay, so that after
155b1b7dce3SHarshad Shirwadkar  *    the crash, fast commit recovery code can look at that flag and perform
156b1b7dce3SHarshad Shirwadkar  *    fast commit recovery even if that area is invalidated by later full
157b1b7dce3SHarshad Shirwadkar  *    commits.
158b1b7dce3SHarshad Shirwadkar  *
159d1199b94SHarshad Shirwadkar  * 1) Fast commit's commit path locks the entire file system during fast
160d1199b94SHarshad Shirwadkar  *    commit. This has significant performance penalty. Instead of that, we
161d1199b94SHarshad Shirwadkar  *    should use ext4_fc_start/stop_update functions to start inode level
162d1199b94SHarshad Shirwadkar  *    updates from ext4_journal_start/stop. Once we do that we can drop file
163d1199b94SHarshad Shirwadkar  *    system locking during commit path.
164aa75f4d3SHarshad Shirwadkar  *
165d1199b94SHarshad Shirwadkar  * 2) Handle more ineligible cases.
166aa75f4d3SHarshad Shirwadkar  */
167aa75f4d3SHarshad Shirwadkar 
168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
170aa75f4d3SHarshad Shirwadkar 
171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
172aa75f4d3SHarshad Shirwadkar {
173aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
174aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
175aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
176aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
177aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
178aa75f4d3SHarshad Shirwadkar 	} else {
179aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
180aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
181aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
182aa75f4d3SHarshad Shirwadkar 	}
183aa75f4d3SHarshad Shirwadkar 
184aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
185aa75f4d3SHarshad Shirwadkar }
186aa75f4d3SHarshad Shirwadkar 
187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
188aa75f4d3SHarshad Shirwadkar {
189aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
190aa75f4d3SHarshad Shirwadkar 
191aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
192aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
193aa75f4d3SHarshad Shirwadkar }
194aa75f4d3SHarshad Shirwadkar 
195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
196aa75f4d3SHarshad Shirwadkar {
197aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
198aa75f4d3SHarshad Shirwadkar 
199aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
200aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
202b3998b3bSRitesh Harjani 	INIT_LIST_HEAD(&ei->i_fc_dilist);
203aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
204aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
205aa75f4d3SHarshad Shirwadkar }
206aa75f4d3SHarshad Shirwadkar 
207f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */
208f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode)
209fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
210f6634e26SHarshad Shirwadkar {
211f6634e26SHarshad Shirwadkar 	wait_queue_head_t *wq;
212f6634e26SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
213f6634e26SHarshad Shirwadkar 
214f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
215f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
216f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
217f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_state_flags,
218f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
219f6634e26SHarshad Shirwadkar #else
220f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
221f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
222f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_flags,
223f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
224f6634e26SHarshad Shirwadkar #endif
225f6634e26SHarshad Shirwadkar 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
226f6634e26SHarshad Shirwadkar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
227f6634e26SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
228f6634e26SHarshad Shirwadkar 	schedule();
229f6634e26SHarshad Shirwadkar 	finish_wait(wq, &wait.wq_entry);
230f6634e26SHarshad Shirwadkar }
231f6634e26SHarshad Shirwadkar 
232aa75f4d3SHarshad Shirwadkar /*
233aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
234aa75f4d3SHarshad Shirwadkar  *
235aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
236aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
237aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
238aa75f4d3SHarshad Shirwadkar  */
239aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
240aa75f4d3SHarshad Shirwadkar {
241aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
242aa75f4d3SHarshad Shirwadkar 
2438016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2448016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
245aa75f4d3SHarshad Shirwadkar 		return;
246aa75f4d3SHarshad Shirwadkar 
247aa75f4d3SHarshad Shirwadkar restart:
248aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
249aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
250aa75f4d3SHarshad Shirwadkar 		goto out;
251aa75f4d3SHarshad Shirwadkar 
252aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
253f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
254aa75f4d3SHarshad Shirwadkar 		goto restart;
255aa75f4d3SHarshad Shirwadkar 	}
256aa75f4d3SHarshad Shirwadkar out:
257aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
258aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
259aa75f4d3SHarshad Shirwadkar }
260aa75f4d3SHarshad Shirwadkar 
261aa75f4d3SHarshad Shirwadkar /*
262aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
263aa75f4d3SHarshad Shirwadkar  */
264aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
265aa75f4d3SHarshad Shirwadkar {
266aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
267aa75f4d3SHarshad Shirwadkar 
2688016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2698016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
270aa75f4d3SHarshad Shirwadkar 		return;
271aa75f4d3SHarshad Shirwadkar 
272aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
273aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
274aa75f4d3SHarshad Shirwadkar }
275aa75f4d3SHarshad Shirwadkar 
276aa75f4d3SHarshad Shirwadkar /*
277aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
278aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
279aa75f4d3SHarshad Shirwadkar  */
280aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
281aa75f4d3SHarshad Shirwadkar {
282aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
283b3998b3bSRitesh Harjani 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
284b3998b3bSRitesh Harjani 	struct ext4_fc_dentry_update *fc_dentry;
285aa75f4d3SHarshad Shirwadkar 
2868016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2878016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
288aa75f4d3SHarshad Shirwadkar 		return;
289aa75f4d3SHarshad Shirwadkar 
290aa75f4d3SHarshad Shirwadkar restart:
291aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
292b3998b3bSRitesh Harjani 	if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
293aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
294aa75f4d3SHarshad Shirwadkar 		return;
295aa75f4d3SHarshad Shirwadkar 	}
296aa75f4d3SHarshad Shirwadkar 
297aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
298f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
299aa75f4d3SHarshad Shirwadkar 		goto restart;
300aa75f4d3SHarshad Shirwadkar 	}
301b3998b3bSRitesh Harjani 
302b3998b3bSRitesh Harjani 	if (!list_empty(&ei->i_fc_list))
303aa75f4d3SHarshad Shirwadkar 		list_del_init(&ei->i_fc_list);
304b3998b3bSRitesh Harjani 
305b3998b3bSRitesh Harjani 	/*
306b3998b3bSRitesh Harjani 	 * Since this inode is getting removed, let's also remove all FC
307b3998b3bSRitesh Harjani 	 * dentry create references, since it is not needed to log it anyways.
308b3998b3bSRitesh Harjani 	 */
309b3998b3bSRitesh Harjani 	if (list_empty(&ei->i_fc_dilist)) {
310b3998b3bSRitesh Harjani 		spin_unlock(&sbi->s_fc_lock);
311b3998b3bSRitesh Harjani 		return;
312b3998b3bSRitesh Harjani 	}
313b3998b3bSRitesh Harjani 
314b3998b3bSRitesh Harjani 	fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
315b3998b3bSRitesh Harjani 	WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
316b3998b3bSRitesh Harjani 	list_del_init(&fc_dentry->fcd_list);
317b3998b3bSRitesh Harjani 	list_del_init(&fc_dentry->fcd_dilist);
318b3998b3bSRitesh Harjani 
319b3998b3bSRitesh Harjani 	WARN_ON(!list_empty(&ei->i_fc_dilist));
320b3998b3bSRitesh Harjani 	spin_unlock(&sbi->s_fc_lock);
321b3998b3bSRitesh Harjani 
322b3998b3bSRitesh Harjani 	if (fc_dentry->fcd_name.name &&
323b3998b3bSRitesh Harjani 		fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
324b3998b3bSRitesh Harjani 		kfree(fc_dentry->fcd_name.name);
325b3998b3bSRitesh Harjani 	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
326b3998b3bSRitesh Harjani 
327b3998b3bSRitesh Harjani 	return;
328aa75f4d3SHarshad Shirwadkar }
329aa75f4d3SHarshad Shirwadkar 
330aa75f4d3SHarshad Shirwadkar /*
331e85c81baSXin Yin  * Mark file system as fast commit ineligible, and record latest
332e85c81baSXin Yin  * ineligible transaction tid. This means until the recorded
333e85c81baSXin Yin  * transaction, commit operation would result in a full jbd2 commit.
334aa75f4d3SHarshad Shirwadkar  */
335e85c81baSXin Yin void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
336aa75f4d3SHarshad Shirwadkar {
337aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
338e85c81baSXin Yin 	tid_t tid;
339aa75f4d3SHarshad Shirwadkar 
3408016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3418016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3428016e29fSHarshad Shirwadkar 		return;
3438016e29fSHarshad Shirwadkar 
3449b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
345e85c81baSXin Yin 	if (handle && !IS_ERR(handle))
346e85c81baSXin Yin 		tid = handle->h_transaction->t_tid;
347e85c81baSXin Yin 	else {
348e85c81baSXin Yin 		read_lock(&sbi->s_journal->j_state_lock);
349e85c81baSXin Yin 		tid = sbi->s_journal->j_running_transaction ?
350e85c81baSXin Yin 				sbi->s_journal->j_running_transaction->t_tid : 0;
351e85c81baSXin Yin 		read_unlock(&sbi->s_journal->j_state_lock);
352e85c81baSXin Yin 	}
353e85c81baSXin Yin 	spin_lock(&sbi->s_fc_lock);
354e85c81baSXin Yin 	if (sbi->s_fc_ineligible_tid < tid)
355e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = tid;
356e85c81baSXin Yin 	spin_unlock(&sbi->s_fc_lock);
357aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
358aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
359aa75f4d3SHarshad Shirwadkar }
360aa75f4d3SHarshad Shirwadkar 
361aa75f4d3SHarshad Shirwadkar /*
362aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
363aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
364aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
365aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
366aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
367aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
368aa75f4d3SHarshad Shirwadkar  *
369aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
370aa75f4d3SHarshad Shirwadkar  */
371aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
372a80f7fcfSHarshad Shirwadkar 	handle_t *handle, struct inode *inode,
373a80f7fcfSHarshad Shirwadkar 	int (*__fc_track_fn)(struct inode *, void *, bool),
374aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
375aa75f4d3SHarshad Shirwadkar {
376aa75f4d3SHarshad Shirwadkar 	bool update = false;
377aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
378aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
379a80f7fcfSHarshad Shirwadkar 	tid_t tid = 0;
380aa75f4d3SHarshad Shirwadkar 	int ret;
381aa75f4d3SHarshad Shirwadkar 
3828016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
3838016e29fSHarshad Shirwadkar 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
384aa75f4d3SHarshad Shirwadkar 		return -EOPNOTSUPP;
385aa75f4d3SHarshad Shirwadkar 
3867bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
387aa75f4d3SHarshad Shirwadkar 		return -EINVAL;
388aa75f4d3SHarshad Shirwadkar 
389a80f7fcfSHarshad Shirwadkar 	tid = handle->h_transaction->t_tid;
390aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
391a80f7fcfSHarshad Shirwadkar 	if (tid == ei->i_sync_tid) {
392aa75f4d3SHarshad Shirwadkar 		update = true;
393aa75f4d3SHarshad Shirwadkar 	} else {
394aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
395a80f7fcfSHarshad Shirwadkar 		ei->i_sync_tid = tid;
396aa75f4d3SHarshad Shirwadkar 	}
397aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
398aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
399aa75f4d3SHarshad Shirwadkar 
400aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
401aa75f4d3SHarshad Shirwadkar 		return ret;
402aa75f4d3SHarshad Shirwadkar 
403aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
404aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
405aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
406bdc8a53aSXin Yin 				(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
407bdc8a53aSXin Yin 				 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
408aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
409aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
410aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
411aa75f4d3SHarshad Shirwadkar 
412aa75f4d3SHarshad Shirwadkar 	return ret;
413aa75f4d3SHarshad Shirwadkar }
414aa75f4d3SHarshad Shirwadkar 
415aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
416aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
417aa75f4d3SHarshad Shirwadkar 	int op;
418aa75f4d3SHarshad Shirwadkar };
419aa75f4d3SHarshad Shirwadkar 
420aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
421aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
422aa75f4d3SHarshad Shirwadkar {
423aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
424aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
425aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
426aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
427aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
428aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
429aa75f4d3SHarshad Shirwadkar 
430aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
431aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
432aa75f4d3SHarshad Shirwadkar 	if (!node) {
433e85c81baSXin Yin 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
434aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
435aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
436aa75f4d3SHarshad Shirwadkar 	}
437aa75f4d3SHarshad Shirwadkar 
438aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
439aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
440aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
441aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
442aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
443aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
444aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
445aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
446e85c81baSXin Yin 				EXT4_FC_REASON_NOMEM, NULL);
447aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
448aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
449aa75f4d3SHarshad Shirwadkar 		}
450aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
451aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
452aa75f4d3SHarshad Shirwadkar 	} else {
453aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
454aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
455aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
456aa75f4d3SHarshad Shirwadkar 	}
457aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
458b3998b3bSRitesh Harjani 	INIT_LIST_HEAD(&node->fcd_dilist);
459aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
460bdc8a53aSXin Yin 	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
461bdc8a53aSXin Yin 		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
462aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
463aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
464aa75f4d3SHarshad Shirwadkar 	else
465aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
466b3998b3bSRitesh Harjani 
467b3998b3bSRitesh Harjani 	/*
468b3998b3bSRitesh Harjani 	 * This helps us keep a track of all fc_dentry updates which is part of
469b3998b3bSRitesh Harjani 	 * this ext4 inode. So in case the inode is getting unlinked, before
470b3998b3bSRitesh Harjani 	 * even we get a chance to fsync, we could remove all fc_dentry
471b3998b3bSRitesh Harjani 	 * references while evicting the inode in ext4_fc_del().
472b3998b3bSRitesh Harjani 	 * Also with this, we don't need to loop over all the inodes in
473b3998b3bSRitesh Harjani 	 * sbi->s_fc_q to get the corresponding inode in
474b3998b3bSRitesh Harjani 	 * ext4_fc_commit_dentry_updates().
475b3998b3bSRitesh Harjani 	 */
476b3998b3bSRitesh Harjani 	if (dentry_update->op == EXT4_FC_TAG_CREAT) {
477b3998b3bSRitesh Harjani 		WARN_ON(!list_empty(&ei->i_fc_dilist));
478b3998b3bSRitesh Harjani 		list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
479b3998b3bSRitesh Harjani 	}
480aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
481aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
482aa75f4d3SHarshad Shirwadkar 
483aa75f4d3SHarshad Shirwadkar 	return 0;
484aa75f4d3SHarshad Shirwadkar }
485aa75f4d3SHarshad Shirwadkar 
486a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle,
487a80f7fcfSHarshad Shirwadkar 		struct inode *inode, struct dentry *dentry)
488aa75f4d3SHarshad Shirwadkar {
489aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
490aa75f4d3SHarshad Shirwadkar 	int ret;
491aa75f4d3SHarshad Shirwadkar 
492aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
493aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
494aa75f4d3SHarshad Shirwadkar 
495a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
496aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
497aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_unlink(inode, dentry, ret);
498aa75f4d3SHarshad Shirwadkar }
499aa75f4d3SHarshad Shirwadkar 
500a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
501a80f7fcfSHarshad Shirwadkar {
502a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
503a80f7fcfSHarshad Shirwadkar }
504a80f7fcfSHarshad Shirwadkar 
505a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle,
506a80f7fcfSHarshad Shirwadkar 	struct inode *inode, struct dentry *dentry)
507aa75f4d3SHarshad Shirwadkar {
508aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
509aa75f4d3SHarshad Shirwadkar 	int ret;
510aa75f4d3SHarshad Shirwadkar 
511aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
512aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
513aa75f4d3SHarshad Shirwadkar 
514a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
515aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
516aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_link(inode, dentry, ret);
517aa75f4d3SHarshad Shirwadkar }
518aa75f4d3SHarshad Shirwadkar 
519a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
520a80f7fcfSHarshad Shirwadkar {
521a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
522a80f7fcfSHarshad Shirwadkar }
523a80f7fcfSHarshad Shirwadkar 
5248210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
5258210bb29SHarshad Shirwadkar 			  struct dentry *dentry)
526aa75f4d3SHarshad Shirwadkar {
527aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
528aa75f4d3SHarshad Shirwadkar 	int ret;
529aa75f4d3SHarshad Shirwadkar 
530aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
531aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
532aa75f4d3SHarshad Shirwadkar 
533a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
534aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
535aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_create(inode, dentry, ret);
536aa75f4d3SHarshad Shirwadkar }
537aa75f4d3SHarshad Shirwadkar 
5388210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
5398210bb29SHarshad Shirwadkar {
5408210bb29SHarshad Shirwadkar 	__ext4_fc_track_create(handle, d_inode(dentry), dentry);
5418210bb29SHarshad Shirwadkar }
5428210bb29SHarshad Shirwadkar 
543aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
544aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
545aa75f4d3SHarshad Shirwadkar {
546aa75f4d3SHarshad Shirwadkar 	if (update)
547aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
548aa75f4d3SHarshad Shirwadkar 
549aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
550aa75f4d3SHarshad Shirwadkar 
551aa75f4d3SHarshad Shirwadkar 	return 0;
552aa75f4d3SHarshad Shirwadkar }
553aa75f4d3SHarshad Shirwadkar 
554a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
555aa75f4d3SHarshad Shirwadkar {
556aa75f4d3SHarshad Shirwadkar 	int ret;
557aa75f4d3SHarshad Shirwadkar 
558aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
559aa75f4d3SHarshad Shirwadkar 		return;
560aa75f4d3SHarshad Shirwadkar 
561556e0319SHarshad Shirwadkar 	if (ext4_should_journal_data(inode)) {
562556e0319SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb,
563e85c81baSXin Yin 					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
564556e0319SHarshad Shirwadkar 		return;
565556e0319SHarshad Shirwadkar 	}
566556e0319SHarshad Shirwadkar 
567a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
568aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_inode(inode, ret);
569aa75f4d3SHarshad Shirwadkar }
570aa75f4d3SHarshad Shirwadkar 
571aa75f4d3SHarshad Shirwadkar struct __track_range_args {
572aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
573aa75f4d3SHarshad Shirwadkar };
574aa75f4d3SHarshad Shirwadkar 
575aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
576aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
577aa75f4d3SHarshad Shirwadkar {
578aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
579aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
580aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
581aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
582aa75f4d3SHarshad Shirwadkar 
583aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
584aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
585aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
586aa75f4d3SHarshad Shirwadkar 	}
587aa75f4d3SHarshad Shirwadkar 
588aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
589aa75f4d3SHarshad Shirwadkar 
590aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
591aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
592aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
593aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
594aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
595aa75f4d3SHarshad Shirwadkar 	} else {
596aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
597aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
598aa75f4d3SHarshad Shirwadkar 	}
599aa75f4d3SHarshad Shirwadkar 
600aa75f4d3SHarshad Shirwadkar 	return 0;
601aa75f4d3SHarshad Shirwadkar }
602aa75f4d3SHarshad Shirwadkar 
603a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
604aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
605aa75f4d3SHarshad Shirwadkar {
606aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
607aa75f4d3SHarshad Shirwadkar 	int ret;
608aa75f4d3SHarshad Shirwadkar 
609aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
610aa75f4d3SHarshad Shirwadkar 		return;
611aa75f4d3SHarshad Shirwadkar 
612aa75f4d3SHarshad Shirwadkar 	args.start = start;
613aa75f4d3SHarshad Shirwadkar 	args.end = end;
614aa75f4d3SHarshad Shirwadkar 
615a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
616aa75f4d3SHarshad Shirwadkar 
617aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_range(inode, start, end, ret);
618aa75f4d3SHarshad Shirwadkar }
619aa75f4d3SHarshad Shirwadkar 
620e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
621aa75f4d3SHarshad Shirwadkar {
622aa75f4d3SHarshad Shirwadkar 	int write_flags = REQ_SYNC;
623aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
624aa75f4d3SHarshad Shirwadkar 
625e9f53353SDaejun Park 	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
626e9f53353SDaejun Park 	if (test_opt(sb, BARRIER) && is_tail)
627aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
628aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
629764b3fd3SHarshad Shirwadkar 	set_buffer_dirty(bh);
630aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
631aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
632aa75f4d3SHarshad Shirwadkar 	submit_bh(REQ_OP_WRITE, write_flags, bh);
633aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
634aa75f4d3SHarshad Shirwadkar }
635aa75f4d3SHarshad Shirwadkar 
636aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
637aa75f4d3SHarshad Shirwadkar 
638aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
639aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
640aa75f4d3SHarshad Shirwadkar 				u32 *crc)
641aa75f4d3SHarshad Shirwadkar {
642aa75f4d3SHarshad Shirwadkar 	void *ret;
643aa75f4d3SHarshad Shirwadkar 
644aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
645aa75f4d3SHarshad Shirwadkar 	if (crc)
646aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
647aa75f4d3SHarshad Shirwadkar 	return ret;
648aa75f4d3SHarshad Shirwadkar }
649aa75f4d3SHarshad Shirwadkar 
650aa75f4d3SHarshad Shirwadkar /*
651aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
652aa75f4d3SHarshad Shirwadkar  *
653aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
654aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
655aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
656aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
657aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
658aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
659aa75f4d3SHarshad Shirwadkar  * we added.
660aa75f4d3SHarshad Shirwadkar  */
661aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
662aa75f4d3SHarshad Shirwadkar {
663aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
664aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
665aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
666aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
667aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
668aa75f4d3SHarshad Shirwadkar 	int pad_len;
669aa75f4d3SHarshad Shirwadkar 
670aa75f4d3SHarshad Shirwadkar 	/*
671aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
672aa75f4d3SHarshad Shirwadkar 	 * padding.
673aa75f4d3SHarshad Shirwadkar 	 */
674aa75f4d3SHarshad Shirwadkar 	if (len + sizeof(struct ext4_fc_tl) > bsize)
675aa75f4d3SHarshad Shirwadkar 		return NULL;
676aa75f4d3SHarshad Shirwadkar 
677aa75f4d3SHarshad Shirwadkar 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
678aa75f4d3SHarshad Shirwadkar 		/*
679aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
680aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
681aa75f4d3SHarshad Shirwadkar 		 */
682aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
683aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
684aa75f4d3SHarshad Shirwadkar 			if (ret)
685aa75f4d3SHarshad Shirwadkar 				return NULL;
686aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
687aa75f4d3SHarshad Shirwadkar 		}
688aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
689aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
690aa75f4d3SHarshad Shirwadkar 	}
691aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
692aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
693aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
694aa75f4d3SHarshad Shirwadkar 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
695aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
696aa75f4d3SHarshad Shirwadkar 	if (crc)
697aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
698aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
699aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
700e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, false);
701aa75f4d3SHarshad Shirwadkar 
702aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
703aa75f4d3SHarshad Shirwadkar 	if (ret)
704aa75f4d3SHarshad Shirwadkar 		return NULL;
705aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
706aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
707aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
708aa75f4d3SHarshad Shirwadkar }
709aa75f4d3SHarshad Shirwadkar 
710aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
711aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
712aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
713aa75f4d3SHarshad Shirwadkar {
714aa75f4d3SHarshad Shirwadkar 	if (crc)
715aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
716aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
717aa75f4d3SHarshad Shirwadkar }
718aa75f4d3SHarshad Shirwadkar 
719aa75f4d3SHarshad Shirwadkar /*
720aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
721aa75f4d3SHarshad Shirwadkar  *
722aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
723aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
724aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
725aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
726aa75f4d3SHarshad Shirwadkar  */
727aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
728aa75f4d3SHarshad Shirwadkar {
729aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
730aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
731aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
732aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
733aa75f4d3SHarshad Shirwadkar 	u8 *dst;
734aa75f4d3SHarshad Shirwadkar 
735aa75f4d3SHarshad Shirwadkar 	/*
736aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
737aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
738aa75f4d3SHarshad Shirwadkar 	 */
739aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
740aa75f4d3SHarshad Shirwadkar 	if (!dst)
741aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
742aa75f4d3SHarshad Shirwadkar 
743aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
744aa75f4d3SHarshad Shirwadkar 
745aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
746aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
747aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
748aa75f4d3SHarshad Shirwadkar 
749aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
750aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
751aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
752aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
753aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
754aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
755aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
756aa75f4d3SHarshad Shirwadkar 
757e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, true);
758aa75f4d3SHarshad Shirwadkar 
759aa75f4d3SHarshad Shirwadkar 	return 0;
760aa75f4d3SHarshad Shirwadkar }
761aa75f4d3SHarshad Shirwadkar 
762aa75f4d3SHarshad Shirwadkar /*
763aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
764aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
765aa75f4d3SHarshad Shirwadkar  */
766aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
767aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
768aa75f4d3SHarshad Shirwadkar {
769aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
770aa75f4d3SHarshad Shirwadkar 	u8 *dst;
771aa75f4d3SHarshad Shirwadkar 
772aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
773aa75f4d3SHarshad Shirwadkar 	if (!dst)
774aa75f4d3SHarshad Shirwadkar 		return false;
775aa75f4d3SHarshad Shirwadkar 
776aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
777aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
778aa75f4d3SHarshad Shirwadkar 
779aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
780aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
781aa75f4d3SHarshad Shirwadkar 
782aa75f4d3SHarshad Shirwadkar 	return true;
783aa75f4d3SHarshad Shirwadkar }
784aa75f4d3SHarshad Shirwadkar 
785aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
786facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
787facec450SGuoqing Jiang 				   struct ext4_fc_dentry_update *fc_dentry)
788aa75f4d3SHarshad Shirwadkar {
789aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
790aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
791facec450SGuoqing Jiang 	int dlen = fc_dentry->fcd_name.len;
792aa75f4d3SHarshad Shirwadkar 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
793aa75f4d3SHarshad Shirwadkar 					crc);
794aa75f4d3SHarshad Shirwadkar 
795aa75f4d3SHarshad Shirwadkar 	if (!dst)
796aa75f4d3SHarshad Shirwadkar 		return false;
797aa75f4d3SHarshad Shirwadkar 
798facec450SGuoqing Jiang 	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
799facec450SGuoqing Jiang 	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
800facec450SGuoqing Jiang 	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
801aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
802aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
803aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
804aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
805aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
806facec450SGuoqing Jiang 	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
807aa75f4d3SHarshad Shirwadkar 
808aa75f4d3SHarshad Shirwadkar 	return true;
809aa75f4d3SHarshad Shirwadkar }
810aa75f4d3SHarshad Shirwadkar 
811aa75f4d3SHarshad Shirwadkar /*
812aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
813aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
814aa75f4d3SHarshad Shirwadkar  */
815aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
816aa75f4d3SHarshad Shirwadkar {
817aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
818aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
819aa75f4d3SHarshad Shirwadkar 	int ret;
820aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
821aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
822aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
823aa75f4d3SHarshad Shirwadkar 	u8 *dst;
824aa75f4d3SHarshad Shirwadkar 
825aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
826aa75f4d3SHarshad Shirwadkar 	if (ret)
827aa75f4d3SHarshad Shirwadkar 		return ret;
828aa75f4d3SHarshad Shirwadkar 
8296c31a689SHarshad Shirwadkar 	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
8306c31a689SHarshad Shirwadkar 		inode_len = EXT4_INODE_SIZE(inode->i_sb);
8316c31a689SHarshad Shirwadkar 	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
832aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
833aa75f4d3SHarshad Shirwadkar 
834aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
835aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
836aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
837aa75f4d3SHarshad Shirwadkar 
838aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
839aa75f4d3SHarshad Shirwadkar 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
840aa75f4d3SHarshad Shirwadkar 	if (!dst)
841aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
842aa75f4d3SHarshad Shirwadkar 
843aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
844aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
845aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
846aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
847aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
848aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
849aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
850aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
851aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
852aa75f4d3SHarshad Shirwadkar 
853aa75f4d3SHarshad Shirwadkar 	return 0;
854aa75f4d3SHarshad Shirwadkar }
855aa75f4d3SHarshad Shirwadkar 
856aa75f4d3SHarshad Shirwadkar /*
857aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
858aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
859aa75f4d3SHarshad Shirwadkar  */
860aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
861aa75f4d3SHarshad Shirwadkar {
862aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
863aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
864aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
865aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
866aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
867aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
868aa75f4d3SHarshad Shirwadkar 	int ret;
869aa75f4d3SHarshad Shirwadkar 
870aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
871aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
872aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
873aa75f4d3SHarshad Shirwadkar 		return 0;
874aa75f4d3SHarshad Shirwadkar 	}
875aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
876aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
877aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
878aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
879aa75f4d3SHarshad Shirwadkar 
880aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
881aa75f4d3SHarshad Shirwadkar 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
882aa75f4d3SHarshad Shirwadkar 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
883aa75f4d3SHarshad Shirwadkar 
884aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
885aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
886aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
887aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
888aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
889aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
890aa75f4d3SHarshad Shirwadkar 
891aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
892aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
893aa75f4d3SHarshad Shirwadkar 			continue;
894aa75f4d3SHarshad Shirwadkar 		}
895aa75f4d3SHarshad Shirwadkar 
896aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
897aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
898aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
899aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
900aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
901aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
902aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
903aa75f4d3SHarshad Shirwadkar 		} else {
904a2c2f082SHou Tao 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
905a2c2f082SHou Tao 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
906a2c2f082SHou Tao 
907a2c2f082SHou Tao 			/* Limit the number of blocks in one extent */
908a2c2f082SHou Tao 			map.m_len = min(max, map.m_len);
909a2c2f082SHou Tao 
910aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
911aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
912aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
913aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
914aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
915aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
916aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
917aa75f4d3SHarshad Shirwadkar 			else
918aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
919aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
920aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
921aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
922aa75f4d3SHarshad Shirwadkar 		}
923aa75f4d3SHarshad Shirwadkar 
924aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
925aa75f4d3SHarshad Shirwadkar 	}
926aa75f4d3SHarshad Shirwadkar 
927aa75f4d3SHarshad Shirwadkar 	return 0;
928aa75f4d3SHarshad Shirwadkar }
929aa75f4d3SHarshad Shirwadkar 
930aa75f4d3SHarshad Shirwadkar 
931aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
932aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
933aa75f4d3SHarshad Shirwadkar {
934aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
935aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
936aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
937aa75f4d3SHarshad Shirwadkar 	int ret = 0;
938aa75f4d3SHarshad Shirwadkar 
939aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
94096e7c02dSDaejun Park 	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
941aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
942aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
943aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
944aa75f4d3SHarshad Shirwadkar 
945aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
946aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
947aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
948aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
949aa75f4d3SHarshad Shirwadkar 				schedule();
950aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
951aa75f4d3SHarshad Shirwadkar 			}
952aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
953aa75f4d3SHarshad Shirwadkar 		}
954aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
955aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
956aa75f4d3SHarshad Shirwadkar 		if (ret)
957aa75f4d3SHarshad Shirwadkar 			return ret;
958aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
959aa75f4d3SHarshad Shirwadkar 	}
960aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
961aa75f4d3SHarshad Shirwadkar 
962aa75f4d3SHarshad Shirwadkar 	return ret;
963aa75f4d3SHarshad Shirwadkar }
964aa75f4d3SHarshad Shirwadkar 
965aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
966aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
967aa75f4d3SHarshad Shirwadkar {
968aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
969aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
970aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
971aa75f4d3SHarshad Shirwadkar 	int ret = 0;
972aa75f4d3SHarshad Shirwadkar 
973aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
974aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
975aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
976aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
977aa75f4d3SHarshad Shirwadkar 			continue;
978aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
979aa75f4d3SHarshad Shirwadkar 
980aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
981aa75f4d3SHarshad Shirwadkar 		if (ret)
982aa75f4d3SHarshad Shirwadkar 			return ret;
983aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
984aa75f4d3SHarshad Shirwadkar 	}
985aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
986aa75f4d3SHarshad Shirwadkar 
987aa75f4d3SHarshad Shirwadkar 	return 0;
988aa75f4d3SHarshad Shirwadkar }
989aa75f4d3SHarshad Shirwadkar 
990aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
991aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
992fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock)
993fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock)
994aa75f4d3SHarshad Shirwadkar {
995aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
996aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
99796e7c02dSDaejun Park 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
998aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
999b3998b3bSRitesh Harjani 	struct ext4_inode_info *ei;
1000aa75f4d3SHarshad Shirwadkar 	int ret;
1001aa75f4d3SHarshad Shirwadkar 
1002aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
1003aa75f4d3SHarshad Shirwadkar 		return 0;
100496e7c02dSDaejun Park 	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
100596e7c02dSDaejun Park 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
1006aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
1007aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
1008facec450SGuoqing Jiang 			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1009aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
1010aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
1011aa75f4d3SHarshad Shirwadkar 			}
1012aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
1013aa75f4d3SHarshad Shirwadkar 			continue;
1014aa75f4d3SHarshad Shirwadkar 		}
1015aa75f4d3SHarshad Shirwadkar 		/*
1016b3998b3bSRitesh Harjani 		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
1017b3998b3bSRitesh Harjani 		 * corresponding inode pointer
1018aa75f4d3SHarshad Shirwadkar 		 */
1019b3998b3bSRitesh Harjani 		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1020b3998b3bSRitesh Harjani 		ei = list_first_entry(&fc_dentry->fcd_dilist,
1021b3998b3bSRitesh Harjani 				struct ext4_inode_info, i_fc_dilist);
1022b3998b3bSRitesh Harjani 		inode = &ei->vfs_inode;
1023b3998b3bSRitesh Harjani 		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1024b3998b3bSRitesh Harjani 
1025aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1026aa75f4d3SHarshad Shirwadkar 
1027aa75f4d3SHarshad Shirwadkar 		/*
1028aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
1029aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
1030aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
1031aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
1032aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
1033aa75f4d3SHarshad Shirwadkar 		 */
1034aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
1035aa75f4d3SHarshad Shirwadkar 		if (ret)
1036aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1037aa75f4d3SHarshad Shirwadkar 
1038aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
1039aa75f4d3SHarshad Shirwadkar 		if (ret)
1040aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1041aa75f4d3SHarshad Shirwadkar 
1042facec450SGuoqing Jiang 		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1043aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
1044aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
1045aa75f4d3SHarshad Shirwadkar 		}
1046aa75f4d3SHarshad Shirwadkar 
1047aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1048aa75f4d3SHarshad Shirwadkar 	}
1049aa75f4d3SHarshad Shirwadkar 	return 0;
1050aa75f4d3SHarshad Shirwadkar lock_and_exit:
1051aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1052aa75f4d3SHarshad Shirwadkar 	return ret;
1053aa75f4d3SHarshad Shirwadkar }
1054aa75f4d3SHarshad Shirwadkar 
1055aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
1056aa75f4d3SHarshad Shirwadkar {
1057aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1058aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1059aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1060aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
1061aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1062aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
1063aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1064aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
1065aa75f4d3SHarshad Shirwadkar 
1066aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
1067aa75f4d3SHarshad Shirwadkar 	if (ret)
1068aa75f4d3SHarshad Shirwadkar 		return ret;
1069aa75f4d3SHarshad Shirwadkar 
1070aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
1071aa75f4d3SHarshad Shirwadkar 	if (ret)
1072aa75f4d3SHarshad Shirwadkar 		return ret;
1073aa75f4d3SHarshad Shirwadkar 
1074da0c5d26SHarshad Shirwadkar 	/*
1075da0c5d26SHarshad Shirwadkar 	 * If file system device is different from journal device, issue a cache
1076da0c5d26SHarshad Shirwadkar 	 * flush before we start writing fast commit blocks.
1077da0c5d26SHarshad Shirwadkar 	 */
1078da0c5d26SHarshad Shirwadkar 	if (journal->j_fs_dev != journal->j_dev)
1079c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_fs_dev);
1080da0c5d26SHarshad Shirwadkar 
1081aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1082aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1083aa75f4d3SHarshad Shirwadkar 		/*
1084aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1085aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1086aa75f4d3SHarshad Shirwadkar 		 */
1087aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1088aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1089aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1090aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1091e1262cd2SXu Yihang 			(u8 *)&head, &crc)) {
1092e1262cd2SXu Yihang 			ret = -ENOSPC;
1093aa75f4d3SHarshad Shirwadkar 			goto out;
1094aa75f4d3SHarshad Shirwadkar 		}
1095e1262cd2SXu Yihang 	}
1096aa75f4d3SHarshad Shirwadkar 
1097aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1098aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1099aa75f4d3SHarshad Shirwadkar 	if (ret) {
1100aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1101aa75f4d3SHarshad Shirwadkar 		goto out;
1102aa75f4d3SHarshad Shirwadkar 	}
1103aa75f4d3SHarshad Shirwadkar 
110496e7c02dSDaejun Park 	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1105aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1106aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1107aa75f4d3SHarshad Shirwadkar 			continue;
1108aa75f4d3SHarshad Shirwadkar 
1109aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1110aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1111aa75f4d3SHarshad Shirwadkar 		if (ret)
1112aa75f4d3SHarshad Shirwadkar 			goto out;
1113aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1114aa75f4d3SHarshad Shirwadkar 		if (ret)
1115aa75f4d3SHarshad Shirwadkar 			goto out;
1116aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1117aa75f4d3SHarshad Shirwadkar 	}
1118aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1119aa75f4d3SHarshad Shirwadkar 
1120aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1121aa75f4d3SHarshad Shirwadkar 
1122aa75f4d3SHarshad Shirwadkar out:
1123aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1124aa75f4d3SHarshad Shirwadkar 	return ret;
1125aa75f4d3SHarshad Shirwadkar }
1126aa75f4d3SHarshad Shirwadkar 
11270915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status,
11280915e464SHarshad Shirwadkar 				 u64 commit_time, int nblks)
11290915e464SHarshad Shirwadkar {
11300915e464SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
11310915e464SHarshad Shirwadkar 
11320915e464SHarshad Shirwadkar 	jbd_debug(1, "Fast commit ended with status = %d", status);
11330915e464SHarshad Shirwadkar 	if (status == EXT4_FC_STATUS_OK) {
11340915e464SHarshad Shirwadkar 		stats->fc_num_commits++;
11350915e464SHarshad Shirwadkar 		stats->fc_numblks += nblks;
11360915e464SHarshad Shirwadkar 		if (likely(stats->s_fc_avg_commit_time))
11370915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time =
11380915e464SHarshad Shirwadkar 				(commit_time +
11390915e464SHarshad Shirwadkar 				 stats->s_fc_avg_commit_time * 3) / 4;
11400915e464SHarshad Shirwadkar 		else
11410915e464SHarshad Shirwadkar 			stats->s_fc_avg_commit_time = commit_time;
11420915e464SHarshad Shirwadkar 	} else if (status == EXT4_FC_STATUS_FAILED ||
11430915e464SHarshad Shirwadkar 		   status == EXT4_FC_STATUS_INELIGIBLE) {
11440915e464SHarshad Shirwadkar 		if (status == EXT4_FC_STATUS_FAILED)
11450915e464SHarshad Shirwadkar 			stats->fc_failed_commits++;
11460915e464SHarshad Shirwadkar 		stats->fc_ineligible_commits++;
11470915e464SHarshad Shirwadkar 	} else {
11480915e464SHarshad Shirwadkar 		stats->fc_skipped_commits++;
11490915e464SHarshad Shirwadkar 	}
11500915e464SHarshad Shirwadkar 	trace_ext4_fc_commit_stop(sb, nblks, status);
11510915e464SHarshad Shirwadkar }
11520915e464SHarshad Shirwadkar 
1153aa75f4d3SHarshad Shirwadkar /*
1154aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1155aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1156aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1157aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1158aa75f4d3SHarshad Shirwadkar  */
1159aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1160aa75f4d3SHarshad Shirwadkar {
1161aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1162aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1163aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1164aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
11650915e464SHarshad Shirwadkar 	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1166aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1167aa75f4d3SHarshad Shirwadkar 
1168*7f142440SRitesh Harjani 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1169*7f142440SRitesh Harjani 		return jbd2_complete_transaction(journal, commit_tid);
1170*7f142440SRitesh Harjani 
1171aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_start(sb);
1172aa75f4d3SHarshad Shirwadkar 
1173aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1174aa75f4d3SHarshad Shirwadkar 
1175aa75f4d3SHarshad Shirwadkar restart_fc:
1176aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1177aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1178aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1179aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1180aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1181aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
11820915e464SHarshad Shirwadkar 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
11830915e464SHarshad Shirwadkar 		return 0;
1184aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
11850915e464SHarshad Shirwadkar 		/*
11860915e464SHarshad Shirwadkar 		 * Commit couldn't start. Just update stats and perform a
11870915e464SHarshad Shirwadkar 		 * full commit.
11880915e464SHarshad Shirwadkar 		 */
11890915e464SHarshad Shirwadkar 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
11900915e464SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1191aa75f4d3SHarshad Shirwadkar 	}
11920915e464SHarshad Shirwadkar 
11937bbbe241SHarshad Shirwadkar 	/*
11947bbbe241SHarshad Shirwadkar 	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
11957bbbe241SHarshad Shirwadkar 	 * if we are fast commit ineligible.
11967bbbe241SHarshad Shirwadkar 	 */
11977bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
11980915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_INELIGIBLE;
11990915e464SHarshad Shirwadkar 		goto fallback;
12007bbbe241SHarshad Shirwadkar 	}
1201aa75f4d3SHarshad Shirwadkar 
1202aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1203aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1204aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
12050915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
12060915e464SHarshad Shirwadkar 		goto fallback;
1207aa75f4d3SHarshad Shirwadkar 	}
1208aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1209aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1210aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
12110915e464SHarshad Shirwadkar 		status = EXT4_FC_STATUS_FAILED;
12120915e464SHarshad Shirwadkar 		goto fallback;
1213aa75f4d3SHarshad Shirwadkar 	}
1214aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
12150915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit(journal);
1216aa75f4d3SHarshad Shirwadkar 	/*
12170915e464SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we
12180915e464SHarshad Shirwadkar 	 * don't react too strongly to vast changes in the commit time
1219aa75f4d3SHarshad Shirwadkar 	 */
12200915e464SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
12210915e464SHarshad Shirwadkar 	ext4_fc_update_stats(sb, status, commit_time, nblks);
12220915e464SHarshad Shirwadkar 	return ret;
12230915e464SHarshad Shirwadkar 
12240915e464SHarshad Shirwadkar fallback:
12250915e464SHarshad Shirwadkar 	ret = jbd2_fc_end_commit_fallback(journal);
12260915e464SHarshad Shirwadkar 	ext4_fc_update_stats(sb, status, 0, 0);
12270915e464SHarshad Shirwadkar 	return ret;
1228aa75f4d3SHarshad Shirwadkar }
1229aa75f4d3SHarshad Shirwadkar 
1230ff780b91SHarshad Shirwadkar /*
1231ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1232ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1233ff780b91SHarshad Shirwadkar  */
1234e85c81baSXin Yin static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1235ff780b91SHarshad Shirwadkar {
1236aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1237aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
123896e7c02dSDaejun Park 	struct ext4_inode_info *iter, *iter_n;
1239aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1240aa75f4d3SHarshad Shirwadkar 
1241aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1242aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1243aa75f4d3SHarshad Shirwadkar 
1244aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1245aa75f4d3SHarshad Shirwadkar 
1246aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
124796e7c02dSDaejun Park 	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
124896e7c02dSDaejun Park 				 i_fc_list) {
1249aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1250aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1251aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1252bdc8a53aSXin Yin 		if (iter->i_sync_tid <= tid)
1253aa75f4d3SHarshad Shirwadkar 			ext4_fc_reset_inode(&iter->vfs_inode);
1254aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1255aa75f4d3SHarshad Shirwadkar 		smp_mb();
1256aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1257aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1258aa75f4d3SHarshad Shirwadkar #else
1259aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1260aa75f4d3SHarshad Shirwadkar #endif
1261aa75f4d3SHarshad Shirwadkar 	}
1262aa75f4d3SHarshad Shirwadkar 
1263aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1264aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1265aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1266aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1267aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1268b3998b3bSRitesh Harjani 		list_del_init(&fc_dentry->fcd_dilist);
1269aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1270aa75f4d3SHarshad Shirwadkar 
1271aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1272aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1273aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1274aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1275aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1276aa75f4d3SHarshad Shirwadkar 	}
1277aa75f4d3SHarshad Shirwadkar 
1278aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1279aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1280aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
128131e203e0SDaejun Park 				&sbi->s_fc_q[FC_Q_MAIN]);
1282aa75f4d3SHarshad Shirwadkar 
1283e85c81baSXin Yin 	if (tid >= sbi->s_fc_ineligible_tid) {
1284e85c81baSXin Yin 		sbi->s_fc_ineligible_tid = 0;
12859b5f6c9bSHarshad Shirwadkar 		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1286e85c81baSXin Yin 	}
1287aa75f4d3SHarshad Shirwadkar 
1288aa75f4d3SHarshad Shirwadkar 	if (full)
1289aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1290aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1291aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1292ff780b91SHarshad Shirwadkar }
12936866d7b3SHarshad Shirwadkar 
12948016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
12958016e29fSHarshad Shirwadkar 
12968016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
12978016e29fSHarshad Shirwadkar struct dentry_info_args {
12988016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
12998016e29fSHarshad Shirwadkar 	char *dname;
13008016e29fSHarshad Shirwadkar };
13018016e29fSHarshad Shirwadkar 
13028016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
1303a7ba36bcSHarshad Shirwadkar 			      struct  ext4_fc_tl *tl, u8 *val)
13048016e29fSHarshad Shirwadkar {
1305a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
13068016e29fSHarshad Shirwadkar 
1307a7ba36bcSHarshad Shirwadkar 	memcpy(&fcd, val, sizeof(fcd));
13088016e29fSHarshad Shirwadkar 
1309a7ba36bcSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1310a7ba36bcSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd.fc_ino);
1311a7ba36bcSHarshad Shirwadkar 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1312a7ba36bcSHarshad Shirwadkar 	darg->dname_len = le16_to_cpu(tl->fc_len) -
13138016e29fSHarshad Shirwadkar 		sizeof(struct ext4_fc_dentry_info);
13148016e29fSHarshad Shirwadkar }
13158016e29fSHarshad Shirwadkar 
13168016e29fSHarshad Shirwadkar /* Unlink replay function */
1317a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1318a7ba36bcSHarshad Shirwadkar 				 u8 *val)
13198016e29fSHarshad Shirwadkar {
13208016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
13218016e29fSHarshad Shirwadkar 	struct qstr entry;
13228016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13238016e29fSHarshad Shirwadkar 	int ret = 0;
13248016e29fSHarshad Shirwadkar 
1325a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
13268016e29fSHarshad Shirwadkar 
13278016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
13288016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13298016e29fSHarshad Shirwadkar 
13308016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
13318016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
13328016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
13338016e29fSHarshad Shirwadkar 
133423dd561aSYi Li 	if (IS_ERR(inode)) {
13358016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", darg.ino);
13368016e29fSHarshad Shirwadkar 		return 0;
13378016e29fSHarshad Shirwadkar 	}
13388016e29fSHarshad Shirwadkar 
13398016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
13408016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
134123dd561aSYi Li 	if (IS_ERR(old_parent)) {
13428016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
13438016e29fSHarshad Shirwadkar 		iput(inode);
13448016e29fSHarshad Shirwadkar 		return 0;
13458016e29fSHarshad Shirwadkar 	}
13468016e29fSHarshad Shirwadkar 
1347a80f7fcfSHarshad Shirwadkar 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
13488016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
13498016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
13508016e29fSHarshad Shirwadkar 		ret = 0;
13518016e29fSHarshad Shirwadkar 	iput(old_parent);
13528016e29fSHarshad Shirwadkar 	iput(inode);
13538016e29fSHarshad Shirwadkar 	return ret;
13548016e29fSHarshad Shirwadkar }
13558016e29fSHarshad Shirwadkar 
13568016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
13578016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
13588016e29fSHarshad Shirwadkar 				struct inode *inode)
13598016e29fSHarshad Shirwadkar {
13608016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
13618016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
13628016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
13638016e29fSHarshad Shirwadkar 	int ret = 0;
13648016e29fSHarshad Shirwadkar 
13658016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
13668016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
13678016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
13688016e29fSHarshad Shirwadkar 		dir = NULL;
13698016e29fSHarshad Shirwadkar 		goto out;
13708016e29fSHarshad Shirwadkar 	}
13718016e29fSHarshad Shirwadkar 
13728016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
13738016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
13748016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to obtain dentry");
13758016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
13768016e29fSHarshad Shirwadkar 		goto out;
13778016e29fSHarshad Shirwadkar 	}
13788016e29fSHarshad Shirwadkar 
13798016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
13808016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
13818016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode dentry not created.");
13828016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
13838016e29fSHarshad Shirwadkar 		goto out;
13848016e29fSHarshad Shirwadkar 	}
13858016e29fSHarshad Shirwadkar 
13868016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
13878016e29fSHarshad Shirwadkar 	/*
13888016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
13898016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
13908016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
13918016e29fSHarshad Shirwadkar 	 * could complete.
13928016e29fSHarshad Shirwadkar 	 */
13938016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
13948016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to link\n");
13958016e29fSHarshad Shirwadkar 		goto out;
13968016e29fSHarshad Shirwadkar 	}
13978016e29fSHarshad Shirwadkar 
13988016e29fSHarshad Shirwadkar 	ret = 0;
13998016e29fSHarshad Shirwadkar out:
14008016e29fSHarshad Shirwadkar 	if (dentry_dir) {
14018016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
14028016e29fSHarshad Shirwadkar 		dput(dentry_dir);
14038016e29fSHarshad Shirwadkar 	} else if (dir) {
14048016e29fSHarshad Shirwadkar 		iput(dir);
14058016e29fSHarshad Shirwadkar 	}
14068016e29fSHarshad Shirwadkar 	if (dentry_inode) {
14078016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
14088016e29fSHarshad Shirwadkar 		dput(dentry_inode);
14098016e29fSHarshad Shirwadkar 	}
14108016e29fSHarshad Shirwadkar 
14118016e29fSHarshad Shirwadkar 	return ret;
14128016e29fSHarshad Shirwadkar }
14138016e29fSHarshad Shirwadkar 
14148016e29fSHarshad Shirwadkar /* Link replay function */
1415a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1416a7ba36bcSHarshad Shirwadkar 			       u8 *val)
14178016e29fSHarshad Shirwadkar {
14188016e29fSHarshad Shirwadkar 	struct inode *inode;
14198016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
14208016e29fSHarshad Shirwadkar 	int ret = 0;
14218016e29fSHarshad Shirwadkar 
1422a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
14238016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
14248016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
14258016e29fSHarshad Shirwadkar 
14268016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
142723dd561aSYi Li 	if (IS_ERR(inode)) {
14288016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
14298016e29fSHarshad Shirwadkar 		return 0;
14308016e29fSHarshad Shirwadkar 	}
14318016e29fSHarshad Shirwadkar 
14328016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
14338016e29fSHarshad Shirwadkar 	iput(inode);
14348016e29fSHarshad Shirwadkar 	return ret;
14358016e29fSHarshad Shirwadkar }
14368016e29fSHarshad Shirwadkar 
14378016e29fSHarshad Shirwadkar /*
14388016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
14398016e29fSHarshad Shirwadkar  * block bitmaps correctly.
14408016e29fSHarshad Shirwadkar  */
14418016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
14428016e29fSHarshad Shirwadkar {
14438016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
14448016e29fSHarshad Shirwadkar 	int i;
14458016e29fSHarshad Shirwadkar 
14468016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
14478016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
14488016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
14498016e29fSHarshad Shirwadkar 			return 0;
14508016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
14518016e29fSHarshad Shirwadkar 		state->fc_modified_inodes = krealloc(
1452cdce59a1SRitesh Harjani 				state->fc_modified_inodes,
1453cdce59a1SRitesh Harjani 				sizeof(int) * (state->fc_modified_inodes_size +
1454cdce59a1SRitesh Harjani 				EXT4_FC_REPLAY_REALLOC_INCREMENT),
14558016e29fSHarshad Shirwadkar 				GFP_KERNEL);
14568016e29fSHarshad Shirwadkar 		if (!state->fc_modified_inodes)
14578016e29fSHarshad Shirwadkar 			return -ENOMEM;
1458cdce59a1SRitesh Harjani 		state->fc_modified_inodes_size +=
1459cdce59a1SRitesh Harjani 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
14608016e29fSHarshad Shirwadkar 	}
14618016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
14628016e29fSHarshad Shirwadkar 	return 0;
14638016e29fSHarshad Shirwadkar }
14648016e29fSHarshad Shirwadkar 
14658016e29fSHarshad Shirwadkar /*
14668016e29fSHarshad Shirwadkar  * Inode replay function
14678016e29fSHarshad Shirwadkar  */
1468a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1469a7ba36bcSHarshad Shirwadkar 				u8 *val)
14708016e29fSHarshad Shirwadkar {
1471a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
14728016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
14738016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
14748016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
14758016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
14768016e29fSHarshad Shirwadkar 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
14778016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
14788016e29fSHarshad Shirwadkar 
1479a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_inode, val, sizeof(fc_inode));
14808016e29fSHarshad Shirwadkar 
1481a7ba36bcSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode.fc_ino);
14828016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
14838016e29fSHarshad Shirwadkar 
14848016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
148523dd561aSYi Li 	if (!IS_ERR(inode)) {
14868016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
14878016e29fSHarshad Shirwadkar 		iput(inode);
14888016e29fSHarshad Shirwadkar 	}
148923dd561aSYi Li 	inode = NULL;
14908016e29fSHarshad Shirwadkar 
1491cdce59a1SRitesh Harjani 	ret = ext4_fc_record_modified_inode(sb, ino);
1492cdce59a1SRitesh Harjani 	if (ret)
1493cdce59a1SRitesh Harjani 		goto out;
14948016e29fSHarshad Shirwadkar 
1495a7ba36bcSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)
1496a7ba36bcSHarshad Shirwadkar 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
14978016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
14988016e29fSHarshad Shirwadkar 	if (ret)
14998016e29fSHarshad Shirwadkar 		goto out;
15008016e29fSHarshad Shirwadkar 
1501a7ba36bcSHarshad Shirwadkar 	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
15028016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
15038016e29fSHarshad Shirwadkar 
15048016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
15058016e29fSHarshad Shirwadkar 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
15068016e29fSHarshad Shirwadkar 		inode_len - offsetof(struct ext4_inode, i_generation));
15078016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
15088016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
15098016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
15108016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
15118016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
15128016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
15138016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
15148016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
15158016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
15168016e29fSHarshad Shirwadkar 		}
15178016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
15188016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
15198016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
15208016e29fSHarshad Shirwadkar 	}
15218016e29fSHarshad Shirwadkar 
15228016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
15238016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15248016e29fSHarshad Shirwadkar 	if (ret)
15258016e29fSHarshad Shirwadkar 		goto out;
15268016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
15278016e29fSHarshad Shirwadkar 	if (ret)
15288016e29fSHarshad Shirwadkar 		goto out;
15298016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
15308016e29fSHarshad Shirwadkar 	if (ret)
15318016e29fSHarshad Shirwadkar 		goto out;
15328016e29fSHarshad Shirwadkar 
15338016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
15348016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
153523dd561aSYi Li 	if (IS_ERR(inode)) {
15368016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
15378016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
15388016e29fSHarshad Shirwadkar 	}
15398016e29fSHarshad Shirwadkar 
15408016e29fSHarshad Shirwadkar 	/*
15418016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
15428016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
15438016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
15448016e29fSHarshad Shirwadkar 	 */
15451ebf2178SHarshad Shirwadkar 	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
15468016e29fSHarshad Shirwadkar 		ext4_ext_replay_set_iblocks(inode);
15478016e29fSHarshad Shirwadkar 
15488016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
15498016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
15508016e29fSHarshad Shirwadkar 
15518016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
15528016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
15538016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
15548016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
15558016e29fSHarshad Shirwadkar out:
15568016e29fSHarshad Shirwadkar 	iput(inode);
15578016e29fSHarshad Shirwadkar 	if (!ret)
1558c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(sb->s_bdev);
15598016e29fSHarshad Shirwadkar 
15608016e29fSHarshad Shirwadkar 	return 0;
15618016e29fSHarshad Shirwadkar }
15628016e29fSHarshad Shirwadkar 
15638016e29fSHarshad Shirwadkar /*
15648016e29fSHarshad Shirwadkar  * Dentry create replay function.
15658016e29fSHarshad Shirwadkar  *
15668016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
15678016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
15688016e29fSHarshad Shirwadkar  * been replayed before we start here.
15698016e29fSHarshad Shirwadkar  */
1570a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1571a7ba36bcSHarshad Shirwadkar 				 u8 *val)
15728016e29fSHarshad Shirwadkar {
15738016e29fSHarshad Shirwadkar 	int ret = 0;
15748016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
15758016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
15768016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
15778016e29fSHarshad Shirwadkar 
1578a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
15798016e29fSHarshad Shirwadkar 
15808016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
15818016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
15828016e29fSHarshad Shirwadkar 
15838016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
15848016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
15858016e29fSHarshad Shirwadkar 	if (ret)
15868016e29fSHarshad Shirwadkar 		goto out;
15878016e29fSHarshad Shirwadkar 
15888016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
158923dd561aSYi Li 	if (IS_ERR(inode)) {
15908016e29fSHarshad Shirwadkar 		jbd_debug(1, "inode %d not found.", darg.ino);
15918016e29fSHarshad Shirwadkar 		inode = NULL;
15928016e29fSHarshad Shirwadkar 		ret = -EINVAL;
15938016e29fSHarshad Shirwadkar 		goto out;
15948016e29fSHarshad Shirwadkar 	}
15958016e29fSHarshad Shirwadkar 
15968016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
15978016e29fSHarshad Shirwadkar 		/*
15988016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
15998016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
16008016e29fSHarshad Shirwadkar 		 */
16018016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
160223dd561aSYi Li 		if (IS_ERR(dir)) {
16038016e29fSHarshad Shirwadkar 			jbd_debug(1, "Dir %d not found.", darg.ino);
16048016e29fSHarshad Shirwadkar 			goto out;
16058016e29fSHarshad Shirwadkar 		}
16068016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
16078016e29fSHarshad Shirwadkar 		iput(dir);
16088016e29fSHarshad Shirwadkar 		if (ret) {
16098016e29fSHarshad Shirwadkar 			ret = 0;
16108016e29fSHarshad Shirwadkar 			goto out;
16118016e29fSHarshad Shirwadkar 		}
16128016e29fSHarshad Shirwadkar 	}
16138016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
16148016e29fSHarshad Shirwadkar 	if (ret)
16158016e29fSHarshad Shirwadkar 		goto out;
16168016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
16178016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
16188016e29fSHarshad Shirwadkar out:
16198016e29fSHarshad Shirwadkar 	if (inode)
16208016e29fSHarshad Shirwadkar 		iput(inode);
16218016e29fSHarshad Shirwadkar 	return ret;
16228016e29fSHarshad Shirwadkar }
16238016e29fSHarshad Shirwadkar 
16248016e29fSHarshad Shirwadkar /*
1625599ea31dSXin Yin  * Record physical disk regions which are in use as per fast commit area,
1626599ea31dSXin Yin  * and used by inodes during replay phase. Our simple replay phase
1627599ea31dSXin Yin  * allocator excludes these regions from allocation.
16288016e29fSHarshad Shirwadkar  */
1629599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino,
1630599ea31dSXin Yin 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
16318016e29fSHarshad Shirwadkar {
16328016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
16338016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
16348016e29fSHarshad Shirwadkar 
16358016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
1636599ea31dSXin Yin 	/*
1637599ea31dSXin Yin 	 * during replay phase, the fc_regions_valid may not same as
1638599ea31dSXin Yin 	 * fc_regions_used, update it when do new additions.
1639599ea31dSXin Yin 	 */
1640599ea31dSXin Yin 	if (replay && state->fc_regions_used != state->fc_regions_valid)
1641599ea31dSXin Yin 		state->fc_regions_used = state->fc_regions_valid;
16428016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
16438016e29fSHarshad Shirwadkar 		state->fc_regions_size +=
16448016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
16458016e29fSHarshad Shirwadkar 		state->fc_regions = krealloc(
16468016e29fSHarshad Shirwadkar 					state->fc_regions,
16478016e29fSHarshad Shirwadkar 					state->fc_regions_size *
16488016e29fSHarshad Shirwadkar 					sizeof(struct ext4_fc_alloc_region),
16498016e29fSHarshad Shirwadkar 					GFP_KERNEL);
16508016e29fSHarshad Shirwadkar 		if (!state->fc_regions)
16518016e29fSHarshad Shirwadkar 			return -ENOMEM;
16528016e29fSHarshad Shirwadkar 	}
16538016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
16548016e29fSHarshad Shirwadkar 	region->ino = ino;
16558016e29fSHarshad Shirwadkar 	region->lblk = lblk;
16568016e29fSHarshad Shirwadkar 	region->pblk = pblk;
16578016e29fSHarshad Shirwadkar 	region->len = len;
16588016e29fSHarshad Shirwadkar 
1659599ea31dSXin Yin 	if (replay)
1660599ea31dSXin Yin 		state->fc_regions_valid++;
1661599ea31dSXin Yin 
16628016e29fSHarshad Shirwadkar 	return 0;
16638016e29fSHarshad Shirwadkar }
16648016e29fSHarshad Shirwadkar 
16658016e29fSHarshad Shirwadkar /* Replay add range tag */
16668016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
1667a7ba36bcSHarshad Shirwadkar 				    struct ext4_fc_tl *tl, u8 *val)
16688016e29fSHarshad Shirwadkar {
1669a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range fc_add_ex;
16708016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
16718016e29fSHarshad Shirwadkar 	struct inode *inode;
16728016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
16738016e29fSHarshad Shirwadkar 	int remaining, len;
16748016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
16758016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
16768016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
16778016e29fSHarshad Shirwadkar 	int ret;
16788016e29fSHarshad Shirwadkar 
1679a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1680a7ba36bcSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
16818016e29fSHarshad Shirwadkar 
16828016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1683a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
16848016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
16858016e29fSHarshad Shirwadkar 
1686a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
168723dd561aSYi Li 	if (IS_ERR(inode)) {
16888016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
16898016e29fSHarshad Shirwadkar 		return 0;
16908016e29fSHarshad Shirwadkar 	}
16918016e29fSHarshad Shirwadkar 
16928016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1693cdce59a1SRitesh Harjani 	if (ret)
1694cdce59a1SRitesh Harjani 		goto out;
16958016e29fSHarshad Shirwadkar 
16968016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
16978016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
16988016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
16998016e29fSHarshad Shirwadkar 
17008016e29fSHarshad Shirwadkar 	cur = start;
17018016e29fSHarshad Shirwadkar 	remaining = len;
17028016e29fSHarshad Shirwadkar 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
17038016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
17048016e29fSHarshad Shirwadkar 		  inode->i_ino);
17058016e29fSHarshad Shirwadkar 
17068016e29fSHarshad Shirwadkar 	while (remaining > 0) {
17078016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
17088016e29fSHarshad Shirwadkar 		map.m_len = remaining;
17098016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
17108016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
17118016e29fSHarshad Shirwadkar 
1712cdce59a1SRitesh Harjani 		if (ret < 0)
1713cdce59a1SRitesh Harjani 			goto out;
17148016e29fSHarshad Shirwadkar 
17158016e29fSHarshad Shirwadkar 		if (ret == 0) {
17168016e29fSHarshad Shirwadkar 			/* Range is not mapped */
17178016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
1718cdce59a1SRitesh Harjani 			if (IS_ERR(path))
1719cdce59a1SRitesh Harjani 				goto out;
17208016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
17218016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
17228016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
17238016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
17248016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
17258016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
17268016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
17278016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
17288016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
17298016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
17308016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
17318016e29fSHarshad Shirwadkar 			ext4_ext_drop_refs(path);
17328016e29fSHarshad Shirwadkar 			kfree(path);
1733cdce59a1SRitesh Harjani 			if (ret)
1734cdce59a1SRitesh Harjani 				goto out;
17358016e29fSHarshad Shirwadkar 			goto next;
17368016e29fSHarshad Shirwadkar 		}
17378016e29fSHarshad Shirwadkar 
17388016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
17398016e29fSHarshad Shirwadkar 			/*
17408016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
17418016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
17428016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
17438016e29fSHarshad Shirwadkar 			 */
17448016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17458016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
17468016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
1747cdce59a1SRitesh Harjani 			if (ret)
1748cdce59a1SRitesh Harjani 				goto out;
17498016e29fSHarshad Shirwadkar 			/*
17508016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
17518016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
17528016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
17538016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
17548016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
17558016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
17568016e29fSHarshad Shirwadkar 			 * modified inodes.
17578016e29fSHarshad Shirwadkar 			 */
17588016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17598016e29fSHarshad Shirwadkar 			goto next;
17608016e29fSHarshad Shirwadkar 		}
17618016e29fSHarshad Shirwadkar 
17628016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
1763fcdf3c34SArnd Bergmann 		jbd_debug(1, "Converting from %ld to %d %lld",
17648016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
17658016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
17668016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
17678016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
1768cdce59a1SRitesh Harjani 		if (ret)
1769cdce59a1SRitesh Harjani 			goto out;
17708016e29fSHarshad Shirwadkar 		/*
17718016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
17728016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
17738016e29fSHarshad Shirwadkar 		 */
17748016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
17758016e29fSHarshad Shirwadkar next:
17768016e29fSHarshad Shirwadkar 		cur += map.m_len;
17778016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
17788016e29fSHarshad Shirwadkar 	}
17798016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
17808016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
1781cdce59a1SRitesh Harjani out:
17828016e29fSHarshad Shirwadkar 	iput(inode);
17838016e29fSHarshad Shirwadkar 	return 0;
17848016e29fSHarshad Shirwadkar }
17858016e29fSHarshad Shirwadkar 
17868016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
17878016e29fSHarshad Shirwadkar static int
1788a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1789a7ba36bcSHarshad Shirwadkar 			 u8 *val)
17908016e29fSHarshad Shirwadkar {
17918016e29fSHarshad Shirwadkar 	struct inode *inode;
1792a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
17938016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17948016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
17958016e29fSHarshad Shirwadkar 	int ret;
17968016e29fSHarshad Shirwadkar 
1797a7ba36bcSHarshad Shirwadkar 	memcpy(&lrange, val, sizeof(lrange));
1798a7ba36bcSHarshad Shirwadkar 	cur = le32_to_cpu(lrange.fc_lblk);
1799a7ba36bcSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange.fc_len);
18008016e29fSHarshad Shirwadkar 
18018016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1802a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_ino), cur, remaining);
18038016e29fSHarshad Shirwadkar 
1804a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
180523dd561aSYi Li 	if (IS_ERR(inode)) {
1806a7ba36bcSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
18078016e29fSHarshad Shirwadkar 		return 0;
18088016e29fSHarshad Shirwadkar 	}
18098016e29fSHarshad Shirwadkar 
18108016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1811cdce59a1SRitesh Harjani 	if (ret)
1812cdce59a1SRitesh Harjani 		goto out;
18138016e29fSHarshad Shirwadkar 
18148016e29fSHarshad Shirwadkar 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
1815a7ba36bcSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1816a7ba36bcSHarshad Shirwadkar 			le32_to_cpu(lrange.fc_len));
18178016e29fSHarshad Shirwadkar 	while (remaining > 0) {
18188016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
18198016e29fSHarshad Shirwadkar 		map.m_len = remaining;
18208016e29fSHarshad Shirwadkar 
18218016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1822cdce59a1SRitesh Harjani 		if (ret < 0)
1823cdce59a1SRitesh Harjani 			goto out;
18248016e29fSHarshad Shirwadkar 		if (ret > 0) {
18258016e29fSHarshad Shirwadkar 			remaining -= ret;
18268016e29fSHarshad Shirwadkar 			cur += ret;
18278016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
18288016e29fSHarshad Shirwadkar 		} else {
18298016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
18308016e29fSHarshad Shirwadkar 			cur += map.m_len;
18318016e29fSHarshad Shirwadkar 		}
18328016e29fSHarshad Shirwadkar 	}
18338016e29fSHarshad Shirwadkar 
18340b5b5a62SXin Yin 	down_write(&EXT4_I(inode)->i_data_sem);
18358fca8a2bSXin Yin 	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
18368fca8a2bSXin Yin 				le32_to_cpu(lrange.fc_lblk) +
18378fca8a2bSXin Yin 				le32_to_cpu(lrange.fc_len) - 1);
18380b5b5a62SXin Yin 	up_write(&EXT4_I(inode)->i_data_sem);
1839cdce59a1SRitesh Harjani 	if (ret)
1840cdce59a1SRitesh Harjani 		goto out;
18418016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
18428016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
18438016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
1844cdce59a1SRitesh Harjani out:
18458016e29fSHarshad Shirwadkar 	iput(inode);
18468016e29fSHarshad Shirwadkar 	return 0;
18478016e29fSHarshad Shirwadkar }
18488016e29fSHarshad Shirwadkar 
18498016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
18508016e29fSHarshad Shirwadkar {
18518016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18528016e29fSHarshad Shirwadkar 	struct inode *inode;
18538016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
18548016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
18558016e29fSHarshad Shirwadkar 	int i, ret, j;
18568016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
18578016e29fSHarshad Shirwadkar 
18588016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18598016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
18608016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
18618016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
186223dd561aSYi Li 		if (IS_ERR(inode)) {
18638016e29fSHarshad Shirwadkar 			jbd_debug(1, "Inode %d not found.",
18648016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
18658016e29fSHarshad Shirwadkar 			continue;
18668016e29fSHarshad Shirwadkar 		}
18678016e29fSHarshad Shirwadkar 		cur = 0;
18688016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
18691ebf2178SHarshad Shirwadkar 		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
18701ebf2178SHarshad Shirwadkar 			iput(inode);
18711ebf2178SHarshad Shirwadkar 			continue;
18721ebf2178SHarshad Shirwadkar 		}
18738016e29fSHarshad Shirwadkar 		while (cur < end) {
18748016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
18758016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
18768016e29fSHarshad Shirwadkar 
18778016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
18788016e29fSHarshad Shirwadkar 			if (ret < 0)
18798016e29fSHarshad Shirwadkar 				break;
18808016e29fSHarshad Shirwadkar 
18818016e29fSHarshad Shirwadkar 			if (ret > 0) {
18828016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
188323dd561aSYi Li 				if (!IS_ERR(path)) {
18848016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
18858016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
18868016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
18878016e29fSHarshad Shirwadkar 					ext4_ext_drop_refs(path);
18888016e29fSHarshad Shirwadkar 					kfree(path);
18898016e29fSHarshad Shirwadkar 				}
18908016e29fSHarshad Shirwadkar 				cur += ret;
18918016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
18928016e29fSHarshad Shirwadkar 							map.m_len, 1);
18938016e29fSHarshad Shirwadkar 			} else {
18948016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
18958016e29fSHarshad Shirwadkar 			}
18968016e29fSHarshad Shirwadkar 		}
18978016e29fSHarshad Shirwadkar 		iput(inode);
18988016e29fSHarshad Shirwadkar 	}
18998016e29fSHarshad Shirwadkar }
19008016e29fSHarshad Shirwadkar 
19018016e29fSHarshad Shirwadkar /*
19028016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
19038016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
19048016e29fSHarshad Shirwadkar  * if it is okay to use a block.
19058016e29fSHarshad Shirwadkar  */
19068016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
19078016e29fSHarshad Shirwadkar {
19088016e29fSHarshad Shirwadkar 	int i;
19098016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19108016e29fSHarshad Shirwadkar 
19118016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
19128016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
19138016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
19148016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
19158016e29fSHarshad Shirwadkar 			continue;
1916dbaafbadSRitesh Harjani 		if (in_range(blk, state->fc_regions[i].pblk,
1917dbaafbadSRitesh Harjani 					state->fc_regions[i].len))
19188016e29fSHarshad Shirwadkar 			return true;
19198016e29fSHarshad Shirwadkar 	}
19208016e29fSHarshad Shirwadkar 	return false;
19218016e29fSHarshad Shirwadkar }
19228016e29fSHarshad Shirwadkar 
19238016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
19248016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
19258016e29fSHarshad Shirwadkar {
19268016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19278016e29fSHarshad Shirwadkar 
19288016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
19298016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
19308016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
19318016e29fSHarshad Shirwadkar }
19328016e29fSHarshad Shirwadkar 
19338016e29fSHarshad Shirwadkar /*
19348016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
19358016e29fSHarshad Shirwadkar  *
19368016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
19378016e29fSHarshad Shirwadkar  * for doing following things:
19388016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
19398016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
19408016e29fSHarshad Shirwadkar  * - Verify CRC
19418016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
19428016e29fSHarshad Shirwadkar  *
19438016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
19448016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
19458016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
19468016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
19478016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
19488016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
19498016e29fSHarshad Shirwadkar  */
19508016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
19518016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
19528016e29fSHarshad Shirwadkar 				tid_t expected_tid)
19538016e29fSHarshad Shirwadkar {
19548016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
19558016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19568016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
19578016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
1958a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range ext;
1959a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
1960a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
1961a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
1962a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_head head;
19638016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
19648016e29fSHarshad Shirwadkar 
19658016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
19668016e29fSHarshad Shirwadkar 
19678016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
19688016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
19698016e29fSHarshad Shirwadkar 
19708016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
19718016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
19728016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
19738016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
19748016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
19758016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
19768016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
19778016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
19788016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
19798016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
19808016e29fSHarshad Shirwadkar 			return 0;
19818016e29fSHarshad Shirwadkar 	}
19828016e29fSHarshad Shirwadkar 
19838016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
19848016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
19858016e29fSHarshad Shirwadkar 		goto out_err;
19868016e29fSHarshad Shirwadkar 	}
19878016e29fSHarshad Shirwadkar 
19888016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
1989a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
1990a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
1991a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
19928016e29fSHarshad Shirwadkar 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
1993a7ba36bcSHarshad Shirwadkar 			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
1994a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
19958016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
1996a7ba36bcSHarshad Shirwadkar 			memcpy(&ext, val, sizeof(ext));
1997a7ba36bcSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext.fc_ex;
19988016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
1999a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(ext.fc_ino),
20008016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
2001599ea31dSXin Yin 				ext4_ext_get_actual_len(ex), 0);
20028016e29fSHarshad Shirwadkar 			if (ret < 0)
20038016e29fSHarshad Shirwadkar 				break;
20048016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
20058016e29fSHarshad Shirwadkar 			fallthrough;
20068016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
20078016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
20088016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
20098016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
20108016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
20118016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
20128016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2013a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2014a7ba36bcSHarshad Shirwadkar 					sizeof(tl) + le16_to_cpu(tl.fc_len));
20158016e29fSHarshad Shirwadkar 			break;
20168016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
20178016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2018a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2019a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2020a7ba36bcSHarshad Shirwadkar 						sizeof(tl) +
20218016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
20228016e29fSHarshad Shirwadkar 						fc_crc));
2023a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
2024a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
20258016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
20268016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
20278016e29fSHarshad Shirwadkar 					state->fc_regions_used;
20288016e29fSHarshad Shirwadkar 			} else {
20298016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
20308016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
20318016e29fSHarshad Shirwadkar 			}
20328016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
20338016e29fSHarshad Shirwadkar 			break;
20348016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
2035a7ba36bcSHarshad Shirwadkar 			memcpy(&head, val, sizeof(head));
2036a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_features) &
20378016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
20388016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
20398016e29fSHarshad Shirwadkar 				break;
20408016e29fSHarshad Shirwadkar 			}
2041a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
20428016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
20438016e29fSHarshad Shirwadkar 				break;
20448016e29fSHarshad Shirwadkar 			}
20458016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
2046a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2047a7ba36bcSHarshad Shirwadkar 					    sizeof(tl) + le16_to_cpu(tl.fc_len));
20488016e29fSHarshad Shirwadkar 			break;
20498016e29fSHarshad Shirwadkar 		default:
20508016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
20518016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
20528016e29fSHarshad Shirwadkar 		}
20538016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
20548016e29fSHarshad Shirwadkar 			break;
20558016e29fSHarshad Shirwadkar 	}
20568016e29fSHarshad Shirwadkar 
20578016e29fSHarshad Shirwadkar out_err:
20588016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
20598016e29fSHarshad Shirwadkar 	return ret;
20608016e29fSHarshad Shirwadkar }
20618016e29fSHarshad Shirwadkar 
20625b849b5fSHarshad Shirwadkar /*
20635b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
20648016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
20655b849b5fSHarshad Shirwadkar  */
20665b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
20675b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
20685b849b5fSHarshad Shirwadkar {
20698016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
20708016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2071a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
2072a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
20738016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
20748016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2075a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
20768016e29fSHarshad Shirwadkar 
20778016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
20788016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
20798016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
20808016e29fSHarshad Shirwadkar 	}
20818016e29fSHarshad Shirwadkar 
20828016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
20838016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
20848016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
20858016e29fSHarshad Shirwadkar 	}
20868016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
20878016e29fSHarshad Shirwadkar 		jbd_debug(1, "Replay stops\n");
20888016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
20895b849b5fSHarshad Shirwadkar 		return 0;
20905b849b5fSHarshad Shirwadkar 	}
20915b849b5fSHarshad Shirwadkar 
20928016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
20938016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
20948016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
20958016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
20968016e29fSHarshad Shirwadkar 	}
20978016e29fSHarshad Shirwadkar #endif
20988016e29fSHarshad Shirwadkar 
20998016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
21008016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
21018016e29fSHarshad Shirwadkar 
2102a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
2103a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
2104a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
2105a7ba36bcSHarshad Shirwadkar 
21068016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
21078016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
21088016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
21098016e29fSHarshad Shirwadkar 			break;
21108016e29fSHarshad Shirwadkar 		}
21118016e29fSHarshad Shirwadkar 		jbd_debug(3, "Replay phase, tag:%s\n",
2112a7ba36bcSHarshad Shirwadkar 				tag2str(le16_to_cpu(tl.fc_tag)));
21138016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
2114a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
21158016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
2116a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, &tl, val);
21178016e29fSHarshad Shirwadkar 			break;
21188016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
2119a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, &tl, val);
21208016e29fSHarshad Shirwadkar 			break;
21218016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
2122a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, &tl, val);
21238016e29fSHarshad Shirwadkar 			break;
21248016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
2125a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, &tl, val);
21268016e29fSHarshad Shirwadkar 			break;
21278016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
2128a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, &tl, val);
21298016e29fSHarshad Shirwadkar 			break;
21308016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
2131a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, &tl, val);
21328016e29fSHarshad Shirwadkar 			break;
21338016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
21348016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2135a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
21368016e29fSHarshad Shirwadkar 			break;
21378016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
21388016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
2139a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
2140a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2141a7ba36bcSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
21428016e29fSHarshad Shirwadkar 			break;
21438016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
21448016e29fSHarshad Shirwadkar 			break;
21458016e29fSHarshad Shirwadkar 		default:
2146a7ba36bcSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
2147a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
21488016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
21498016e29fSHarshad Shirwadkar 			break;
21508016e29fSHarshad Shirwadkar 		}
21518016e29fSHarshad Shirwadkar 		if (ret < 0)
21528016e29fSHarshad Shirwadkar 			break;
21538016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
21548016e29fSHarshad Shirwadkar 	}
21558016e29fSHarshad Shirwadkar 	return ret;
21568016e29fSHarshad Shirwadkar }
21578016e29fSHarshad Shirwadkar 
21586866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
21596866d7b3SHarshad Shirwadkar {
21605b849b5fSHarshad Shirwadkar 	/*
21615b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
21625b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
21635b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
21645b849b5fSHarshad Shirwadkar 	 */
21655b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
21666866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
21676866d7b3SHarshad Shirwadkar 		return;
2168ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
21696866d7b3SHarshad Shirwadkar }
2170aa75f4d3SHarshad Shirwadkar 
2171fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = {
2172ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2173ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2174ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2175ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2176ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2177ce8c59d1SHarshad Shirwadkar 	"Resize",
2178ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2179ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2180556e0319SHarshad Shirwadkar 	"Data journalling",
2181ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2182ce8c59d1SHarshad Shirwadkar };
2183ce8c59d1SHarshad Shirwadkar 
2184ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2185ce8c59d1SHarshad Shirwadkar {
2186ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2187ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2188ce8c59d1SHarshad Shirwadkar 	int i;
2189ce8c59d1SHarshad Shirwadkar 
2190ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2191ce8c59d1SHarshad Shirwadkar 		return 0;
2192ce8c59d1SHarshad Shirwadkar 
2193ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2194ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2195ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2196ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
21970915e464SHarshad Shirwadkar 		   div_u64(stats->s_fc_avg_commit_time, 1000));
2198ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2199ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2200ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2201ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2202ce8c59d1SHarshad Shirwadkar 
2203ce8c59d1SHarshad Shirwadkar 	return 0;
2204ce8c59d1SHarshad Shirwadkar }
2205ce8c59d1SHarshad Shirwadkar 
2206aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2207aa75f4d3SHarshad Shirwadkar {
2208aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2209aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2210aa75f4d3SHarshad Shirwadkar 
2211aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2212aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2213aa75f4d3SHarshad Shirwadkar 
2214aa75f4d3SHarshad Shirwadkar 	return 0;
2215aa75f4d3SHarshad Shirwadkar }
2216ab047d51SSebastian Andrzej Siewior 
2217ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void)
2218ab047d51SSebastian Andrzej Siewior {
2219ab047d51SSebastian Andrzej Siewior 	kmem_cache_destroy(ext4_fc_dentry_cachep);
2220ab047d51SSebastian Andrzej Siewior }
2221