xref: /openbmc/linux/fs/ext4/fast_commit.c (revision 7bbbe241)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
68*7bbbe241SHarshad Shirwadkar  *
69aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
70*7bbbe241SHarshad Shirwadkar  * attributes). Fast commit ineligibility is marked by calling
71*7bbbe241SHarshad Shirwadkar  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
72*7bbbe241SHarshad Shirwadkar  * to full commit.
73aa75f4d3SHarshad Shirwadkar  *
74aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
75aa75f4d3SHarshad Shirwadkar  * --------------------
76a740762fSHarshad Shirwadkar  * In order to guarantee atomicity during the commit operation, fast commit
77aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
79aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
80aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
81aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
82aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
85aa75f4d3SHarshad Shirwadkar  * - fsync()
86aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
87aa75f4d3SHarshad Shirwadkar  * - Truncate file A
88aa75f4d3SHarshad Shirwadkar  * - fsync()
89aa75f4d3SHarshad Shirwadkar  *
90aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
91aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
95aa75f4d3SHarshad Shirwadkar  *
96b1b7dce3SHarshad Shirwadkar  * Fast Commit Replay Idempotence
97b1b7dce3SHarshad Shirwadkar  * ------------------------------
98b1b7dce3SHarshad Shirwadkar  *
99b1b7dce3SHarshad Shirwadkar  * Fast commits tags are idempotent in nature provided the recovery code follows
100b1b7dce3SHarshad Shirwadkar  * certain rules. The guiding principle that the commit path follows while
101b1b7dce3SHarshad Shirwadkar  * committing is that it stores the result of a particular operation instead of
102b1b7dce3SHarshad Shirwadkar  * storing the procedure.
103b1b7dce3SHarshad Shirwadkar  *
104b1b7dce3SHarshad Shirwadkar  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105b1b7dce3SHarshad Shirwadkar  * was associated with inode 10. During fast commit, instead of storing this
106b1b7dce3SHarshad Shirwadkar  * operation as a procedure "rename a to b", we store the resulting file system
107b1b7dce3SHarshad Shirwadkar  * state as a "series" of outcomes:
108b1b7dce3SHarshad Shirwadkar  *
109b1b7dce3SHarshad Shirwadkar  * - Link dirent b to inode 10
110b1b7dce3SHarshad Shirwadkar  * - Unlink dirent a
111b1b7dce3SHarshad Shirwadkar  * - Inode <10> with valid refcount
112b1b7dce3SHarshad Shirwadkar  *
113b1b7dce3SHarshad Shirwadkar  * Now when recovery code runs, it needs "enforce" this state on the file
114b1b7dce3SHarshad Shirwadkar  * system. This is what guarantees idempotence of fast commit replay.
115b1b7dce3SHarshad Shirwadkar  *
116b1b7dce3SHarshad Shirwadkar  * Let's take an example of a procedure that is not idempotent and see how fast
117b1b7dce3SHarshad Shirwadkar  * commits make it idempotent. Consider following sequence of operations:
118b1b7dce3SHarshad Shirwadkar  *
119b1b7dce3SHarshad Shirwadkar  *     rm A;    mv B A;    read A
120b1b7dce3SHarshad Shirwadkar  *  (x)     (y)        (z)
121b1b7dce3SHarshad Shirwadkar  *
122b1b7dce3SHarshad Shirwadkar  * (x), (y) and (z) are the points at which we can crash. If we store this
123b1b7dce3SHarshad Shirwadkar  * sequence of operations as is then the replay is not idempotent. Let's say
124b1b7dce3SHarshad Shirwadkar  * while in replay, we crash at (z). During the second replay, file A (which was
125b1b7dce3SHarshad Shirwadkar  * actually created as a result of "mv B A" operation) would get deleted. Thus,
126b1b7dce3SHarshad Shirwadkar  * file named A would be absent when we try to read A. So, this sequence of
127b1b7dce3SHarshad Shirwadkar  * operations is not idempotent. However, as mentioned above, instead of storing
128b1b7dce3SHarshad Shirwadkar  * the procedure fast commits store the outcome of each procedure. Thus the fast
129b1b7dce3SHarshad Shirwadkar  * commit log for above procedure would be as follows:
130b1b7dce3SHarshad Shirwadkar  *
131b1b7dce3SHarshad Shirwadkar  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132b1b7dce3SHarshad Shirwadkar  * inode 11 before the replay)
133b1b7dce3SHarshad Shirwadkar  *
134b1b7dce3SHarshad Shirwadkar  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
135b1b7dce3SHarshad Shirwadkar  * (w)          (x)                    (y)          (z)
136b1b7dce3SHarshad Shirwadkar  *
137b1b7dce3SHarshad Shirwadkar  * If we crash at (z), we will have file A linked to inode 11. During the second
138b1b7dce3SHarshad Shirwadkar  * replay, we will remove file A (inode 11). But we will create it back and make
139b1b7dce3SHarshad Shirwadkar  * it point to inode 11. We won't find B, so we'll just skip that step. At this
140b1b7dce3SHarshad Shirwadkar  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141b1b7dce3SHarshad Shirwadkar  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142b1b7dce3SHarshad Shirwadkar  * similarly. Thus, by converting a non-idempotent procedure into a series of
143b1b7dce3SHarshad Shirwadkar  * idempotent outcomes, fast commits ensured idempotence during the replay.
144b1b7dce3SHarshad Shirwadkar  *
145aa75f4d3SHarshad Shirwadkar  * TODOs
146aa75f4d3SHarshad Shirwadkar  * -----
147b1b7dce3SHarshad Shirwadkar  *
148b1b7dce3SHarshad Shirwadkar  * 0) Fast commit replay path hardening: Fast commit replay code should use
149b1b7dce3SHarshad Shirwadkar  *    journal handles to make sure all the updates it does during the replay
150b1b7dce3SHarshad Shirwadkar  *    path are atomic. With that if we crash during fast commit replay, after
151b1b7dce3SHarshad Shirwadkar  *    trying to do recovery again, we will find a file system where fast commit
152b1b7dce3SHarshad Shirwadkar  *    area is invalid (because new full commit would be found). In order to deal
153b1b7dce3SHarshad Shirwadkar  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
154b1b7dce3SHarshad Shirwadkar  *    superblock state is persisted before starting the replay, so that after
155b1b7dce3SHarshad Shirwadkar  *    the crash, fast commit recovery code can look at that flag and perform
156b1b7dce3SHarshad Shirwadkar  *    fast commit recovery even if that area is invalidated by later full
157b1b7dce3SHarshad Shirwadkar  *    commits.
158b1b7dce3SHarshad Shirwadkar  *
159aa75f4d3SHarshad Shirwadkar  * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
160aa75f4d3SHarshad Shirwadkar  *    eligible update must be protected within ext4_fc_start_update() and
161aa75f4d3SHarshad Shirwadkar  *    ext4_fc_stop_update(). These routines are called at much higher
162aa75f4d3SHarshad Shirwadkar  *    routines. This can be made more fine grained by combining with
163aa75f4d3SHarshad Shirwadkar  *    ext4_journal_start().
164aa75f4d3SHarshad Shirwadkar  *
165aa75f4d3SHarshad Shirwadkar  * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
166aa75f4d3SHarshad Shirwadkar  *
167aa75f4d3SHarshad Shirwadkar  * 3) Handle more ineligible cases.
168aa75f4d3SHarshad Shirwadkar  */
169aa75f4d3SHarshad Shirwadkar 
170aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
171aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
172aa75f4d3SHarshad Shirwadkar 
173aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
174aa75f4d3SHarshad Shirwadkar {
175aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
176aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
177aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
178aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
179aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
180aa75f4d3SHarshad Shirwadkar 	} else {
181aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
182aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
183aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
184aa75f4d3SHarshad Shirwadkar 	}
185aa75f4d3SHarshad Shirwadkar 
186aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
187aa75f4d3SHarshad Shirwadkar }
188aa75f4d3SHarshad Shirwadkar 
189aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
190aa75f4d3SHarshad Shirwadkar {
191aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
192aa75f4d3SHarshad Shirwadkar 
193aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
194aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
195aa75f4d3SHarshad Shirwadkar }
196aa75f4d3SHarshad Shirwadkar 
197aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
198aa75f4d3SHarshad Shirwadkar {
199aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
200aa75f4d3SHarshad Shirwadkar 
201aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
202aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
203aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
204aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
205aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
206aa75f4d3SHarshad Shirwadkar }
207aa75f4d3SHarshad Shirwadkar 
208f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */
209f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode)
210fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
211f6634e26SHarshad Shirwadkar {
212f6634e26SHarshad Shirwadkar 	wait_queue_head_t *wq;
213f6634e26SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
214f6634e26SHarshad Shirwadkar 
215f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
216f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
217f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
218f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_state_flags,
219f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
220f6634e26SHarshad Shirwadkar #else
221f6634e26SHarshad Shirwadkar 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
222f6634e26SHarshad Shirwadkar 			EXT4_STATE_FC_COMMITTING);
223f6634e26SHarshad Shirwadkar 	wq = bit_waitqueue(&ei->i_flags,
224f6634e26SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
225f6634e26SHarshad Shirwadkar #endif
226f6634e26SHarshad Shirwadkar 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
227f6634e26SHarshad Shirwadkar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
228f6634e26SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
229f6634e26SHarshad Shirwadkar 	schedule();
230f6634e26SHarshad Shirwadkar 	finish_wait(wq, &wait.wq_entry);
231f6634e26SHarshad Shirwadkar }
232f6634e26SHarshad Shirwadkar 
233aa75f4d3SHarshad Shirwadkar /*
234aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
235aa75f4d3SHarshad Shirwadkar  *
236aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
237aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
238aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
239aa75f4d3SHarshad Shirwadkar  */
240aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
241aa75f4d3SHarshad Shirwadkar {
242aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
243aa75f4d3SHarshad Shirwadkar 
2448016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2458016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
246aa75f4d3SHarshad Shirwadkar 		return;
247aa75f4d3SHarshad Shirwadkar 
248aa75f4d3SHarshad Shirwadkar restart:
249aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
250aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
251aa75f4d3SHarshad Shirwadkar 		goto out;
252aa75f4d3SHarshad Shirwadkar 
253aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
254f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
255aa75f4d3SHarshad Shirwadkar 		goto restart;
256aa75f4d3SHarshad Shirwadkar 	}
257aa75f4d3SHarshad Shirwadkar out:
258aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
259aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
260aa75f4d3SHarshad Shirwadkar }
261aa75f4d3SHarshad Shirwadkar 
262aa75f4d3SHarshad Shirwadkar /*
263aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
264aa75f4d3SHarshad Shirwadkar  */
265aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
266aa75f4d3SHarshad Shirwadkar {
267aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
268aa75f4d3SHarshad Shirwadkar 
2698016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2708016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
271aa75f4d3SHarshad Shirwadkar 		return;
272aa75f4d3SHarshad Shirwadkar 
273aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
274aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
275aa75f4d3SHarshad Shirwadkar }
276aa75f4d3SHarshad Shirwadkar 
277aa75f4d3SHarshad Shirwadkar /*
278aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
279aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
280aa75f4d3SHarshad Shirwadkar  */
281aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
282aa75f4d3SHarshad Shirwadkar {
283aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
284aa75f4d3SHarshad Shirwadkar 
2858016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2868016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
287aa75f4d3SHarshad Shirwadkar 		return;
288aa75f4d3SHarshad Shirwadkar 
289aa75f4d3SHarshad Shirwadkar restart:
290aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
291aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list)) {
292aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
293aa75f4d3SHarshad Shirwadkar 		return;
294aa75f4d3SHarshad Shirwadkar 	}
295aa75f4d3SHarshad Shirwadkar 
296aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
297f6634e26SHarshad Shirwadkar 		ext4_fc_wait_committing_inode(inode);
298aa75f4d3SHarshad Shirwadkar 		goto restart;
299aa75f4d3SHarshad Shirwadkar 	}
300aa75f4d3SHarshad Shirwadkar 	list_del_init(&ei->i_fc_list);
301aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
302aa75f4d3SHarshad Shirwadkar }
303aa75f4d3SHarshad Shirwadkar 
304aa75f4d3SHarshad Shirwadkar /*
305aa75f4d3SHarshad Shirwadkar  * Mark file system as fast commit ineligible. This means that next commit
306aa75f4d3SHarshad Shirwadkar  * operation would result in a full jbd2 commit.
307aa75f4d3SHarshad Shirwadkar  */
308aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
309aa75f4d3SHarshad Shirwadkar {
310aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
311aa75f4d3SHarshad Shirwadkar 
3128016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3138016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3148016e29fSHarshad Shirwadkar 		return;
3158016e29fSHarshad Shirwadkar 
3169b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
317aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
318aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
319aa75f4d3SHarshad Shirwadkar }
320aa75f4d3SHarshad Shirwadkar 
321aa75f4d3SHarshad Shirwadkar /*
322aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
323aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
324aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
325aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
326aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
327aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
328aa75f4d3SHarshad Shirwadkar  *
329aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
330aa75f4d3SHarshad Shirwadkar  */
331aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
332a80f7fcfSHarshad Shirwadkar 	handle_t *handle, struct inode *inode,
333a80f7fcfSHarshad Shirwadkar 	int (*__fc_track_fn)(struct inode *, void *, bool),
334aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
335aa75f4d3SHarshad Shirwadkar {
336aa75f4d3SHarshad Shirwadkar 	bool update = false;
337aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
338aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
339a80f7fcfSHarshad Shirwadkar 	tid_t tid = 0;
340aa75f4d3SHarshad Shirwadkar 	int ret;
341aa75f4d3SHarshad Shirwadkar 
3428016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
3438016e29fSHarshad Shirwadkar 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
344aa75f4d3SHarshad Shirwadkar 		return -EOPNOTSUPP;
345aa75f4d3SHarshad Shirwadkar 
346*7bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
347aa75f4d3SHarshad Shirwadkar 		return -EINVAL;
348aa75f4d3SHarshad Shirwadkar 
349a80f7fcfSHarshad Shirwadkar 	tid = handle->h_transaction->t_tid;
350aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
351a80f7fcfSHarshad Shirwadkar 	if (tid == ei->i_sync_tid) {
352aa75f4d3SHarshad Shirwadkar 		update = true;
353aa75f4d3SHarshad Shirwadkar 	} else {
354aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
355a80f7fcfSHarshad Shirwadkar 		ei->i_sync_tid = tid;
356aa75f4d3SHarshad Shirwadkar 	}
357aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
358aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
359aa75f4d3SHarshad Shirwadkar 
360aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
361aa75f4d3SHarshad Shirwadkar 		return ret;
362aa75f4d3SHarshad Shirwadkar 
363aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
364aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
365aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
3669b5f6c9bSHarshad Shirwadkar 				(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
367aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
368aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
369aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
370aa75f4d3SHarshad Shirwadkar 
371aa75f4d3SHarshad Shirwadkar 	return ret;
372aa75f4d3SHarshad Shirwadkar }
373aa75f4d3SHarshad Shirwadkar 
374aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
375aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
376aa75f4d3SHarshad Shirwadkar 	int op;
377aa75f4d3SHarshad Shirwadkar };
378aa75f4d3SHarshad Shirwadkar 
379aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
380aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
381aa75f4d3SHarshad Shirwadkar {
382aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
383aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
384aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
385aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
386aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
387aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
388aa75f4d3SHarshad Shirwadkar 
389aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
390aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
391aa75f4d3SHarshad Shirwadkar 	if (!node) {
392b21ebf14SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
393aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
394aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
395aa75f4d3SHarshad Shirwadkar 	}
396aa75f4d3SHarshad Shirwadkar 
397aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
398aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
399aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
400aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
401aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
402aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
403aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
404aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
405b21ebf14SHarshad Shirwadkar 				EXT4_FC_REASON_NOMEM);
406aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
407aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
408aa75f4d3SHarshad Shirwadkar 		}
409aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
410aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
411aa75f4d3SHarshad Shirwadkar 	} else {
412aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
413aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
414aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
415aa75f4d3SHarshad Shirwadkar 	}
416aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
417aa75f4d3SHarshad Shirwadkar 
418aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
4199b5f6c9bSHarshad Shirwadkar 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
420aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
421aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
422aa75f4d3SHarshad Shirwadkar 	else
423aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
424aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
425aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
426aa75f4d3SHarshad Shirwadkar 
427aa75f4d3SHarshad Shirwadkar 	return 0;
428aa75f4d3SHarshad Shirwadkar }
429aa75f4d3SHarshad Shirwadkar 
430a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle,
431a80f7fcfSHarshad Shirwadkar 		struct inode *inode, struct dentry *dentry)
432aa75f4d3SHarshad Shirwadkar {
433aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
434aa75f4d3SHarshad Shirwadkar 	int ret;
435aa75f4d3SHarshad Shirwadkar 
436aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
437aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
438aa75f4d3SHarshad Shirwadkar 
439a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
440aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
441aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_unlink(inode, dentry, ret);
442aa75f4d3SHarshad Shirwadkar }
443aa75f4d3SHarshad Shirwadkar 
444a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
445a80f7fcfSHarshad Shirwadkar {
446a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
447a80f7fcfSHarshad Shirwadkar }
448a80f7fcfSHarshad Shirwadkar 
449a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle,
450a80f7fcfSHarshad Shirwadkar 	struct inode *inode, struct dentry *dentry)
451aa75f4d3SHarshad Shirwadkar {
452aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
453aa75f4d3SHarshad Shirwadkar 	int ret;
454aa75f4d3SHarshad Shirwadkar 
455aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
456aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
457aa75f4d3SHarshad Shirwadkar 
458a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
459aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
460aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_link(inode, dentry, ret);
461aa75f4d3SHarshad Shirwadkar }
462aa75f4d3SHarshad Shirwadkar 
463a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
464a80f7fcfSHarshad Shirwadkar {
465a80f7fcfSHarshad Shirwadkar 	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
466a80f7fcfSHarshad Shirwadkar }
467a80f7fcfSHarshad Shirwadkar 
4688210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
4698210bb29SHarshad Shirwadkar 			  struct dentry *dentry)
470aa75f4d3SHarshad Shirwadkar {
471aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
472aa75f4d3SHarshad Shirwadkar 	int ret;
473aa75f4d3SHarshad Shirwadkar 
474aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
475aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
476aa75f4d3SHarshad Shirwadkar 
477a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
478aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
479aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_create(inode, dentry, ret);
480aa75f4d3SHarshad Shirwadkar }
481aa75f4d3SHarshad Shirwadkar 
4828210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
4838210bb29SHarshad Shirwadkar {
4848210bb29SHarshad Shirwadkar 	__ext4_fc_track_create(handle, d_inode(dentry), dentry);
4858210bb29SHarshad Shirwadkar }
4868210bb29SHarshad Shirwadkar 
487aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
488aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
489aa75f4d3SHarshad Shirwadkar {
490aa75f4d3SHarshad Shirwadkar 	if (update)
491aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
492aa75f4d3SHarshad Shirwadkar 
493aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
494aa75f4d3SHarshad Shirwadkar 
495aa75f4d3SHarshad Shirwadkar 	return 0;
496aa75f4d3SHarshad Shirwadkar }
497aa75f4d3SHarshad Shirwadkar 
498a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
499aa75f4d3SHarshad Shirwadkar {
500aa75f4d3SHarshad Shirwadkar 	int ret;
501aa75f4d3SHarshad Shirwadkar 
502aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
503aa75f4d3SHarshad Shirwadkar 		return;
504aa75f4d3SHarshad Shirwadkar 
505556e0319SHarshad Shirwadkar 	if (ext4_should_journal_data(inode)) {
506556e0319SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb,
507556e0319SHarshad Shirwadkar 					EXT4_FC_REASON_INODE_JOURNAL_DATA);
508556e0319SHarshad Shirwadkar 		return;
509556e0319SHarshad Shirwadkar 	}
510556e0319SHarshad Shirwadkar 
511a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
512aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_inode(inode, ret);
513aa75f4d3SHarshad Shirwadkar }
514aa75f4d3SHarshad Shirwadkar 
515aa75f4d3SHarshad Shirwadkar struct __track_range_args {
516aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
517aa75f4d3SHarshad Shirwadkar };
518aa75f4d3SHarshad Shirwadkar 
519aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
520aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
521aa75f4d3SHarshad Shirwadkar {
522aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
523aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
524aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
525aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
526aa75f4d3SHarshad Shirwadkar 
527aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
528aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
529aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
530aa75f4d3SHarshad Shirwadkar 	}
531aa75f4d3SHarshad Shirwadkar 
532aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
533aa75f4d3SHarshad Shirwadkar 
534aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
535aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
536aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
537aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
538aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
539aa75f4d3SHarshad Shirwadkar 	} else {
540aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
541aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
542aa75f4d3SHarshad Shirwadkar 	}
543aa75f4d3SHarshad Shirwadkar 
544aa75f4d3SHarshad Shirwadkar 	return 0;
545aa75f4d3SHarshad Shirwadkar }
546aa75f4d3SHarshad Shirwadkar 
547a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
548aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
549aa75f4d3SHarshad Shirwadkar {
550aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
551aa75f4d3SHarshad Shirwadkar 	int ret;
552aa75f4d3SHarshad Shirwadkar 
553aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
554aa75f4d3SHarshad Shirwadkar 		return;
555aa75f4d3SHarshad Shirwadkar 
556aa75f4d3SHarshad Shirwadkar 	args.start = start;
557aa75f4d3SHarshad Shirwadkar 	args.end = end;
558aa75f4d3SHarshad Shirwadkar 
559a80f7fcfSHarshad Shirwadkar 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
560aa75f4d3SHarshad Shirwadkar 
561aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_range(inode, start, end, ret);
562aa75f4d3SHarshad Shirwadkar }
563aa75f4d3SHarshad Shirwadkar 
564e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
565aa75f4d3SHarshad Shirwadkar {
566aa75f4d3SHarshad Shirwadkar 	int write_flags = REQ_SYNC;
567aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
568aa75f4d3SHarshad Shirwadkar 
569e9f53353SDaejun Park 	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
570e9f53353SDaejun Park 	if (test_opt(sb, BARRIER) && is_tail)
571aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
572aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
573764b3fd3SHarshad Shirwadkar 	set_buffer_dirty(bh);
574aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
575aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
576aa75f4d3SHarshad Shirwadkar 	submit_bh(REQ_OP_WRITE, write_flags, bh);
577aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
578aa75f4d3SHarshad Shirwadkar }
579aa75f4d3SHarshad Shirwadkar 
580aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
581aa75f4d3SHarshad Shirwadkar 
582aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
583aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
584aa75f4d3SHarshad Shirwadkar 				u32 *crc)
585aa75f4d3SHarshad Shirwadkar {
586aa75f4d3SHarshad Shirwadkar 	void *ret;
587aa75f4d3SHarshad Shirwadkar 
588aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
589aa75f4d3SHarshad Shirwadkar 	if (crc)
590aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
591aa75f4d3SHarshad Shirwadkar 	return ret;
592aa75f4d3SHarshad Shirwadkar }
593aa75f4d3SHarshad Shirwadkar 
594aa75f4d3SHarshad Shirwadkar /*
595aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
596aa75f4d3SHarshad Shirwadkar  *
597aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
598aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
599aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
600aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
601aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
602aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
603aa75f4d3SHarshad Shirwadkar  * we added.
604aa75f4d3SHarshad Shirwadkar  */
605aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
606aa75f4d3SHarshad Shirwadkar {
607aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
608aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
609aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
610aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
611aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
612aa75f4d3SHarshad Shirwadkar 	int pad_len;
613aa75f4d3SHarshad Shirwadkar 
614aa75f4d3SHarshad Shirwadkar 	/*
615aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
616aa75f4d3SHarshad Shirwadkar 	 * padding.
617aa75f4d3SHarshad Shirwadkar 	 */
618aa75f4d3SHarshad Shirwadkar 	if (len + sizeof(struct ext4_fc_tl) > bsize)
619aa75f4d3SHarshad Shirwadkar 		return NULL;
620aa75f4d3SHarshad Shirwadkar 
621aa75f4d3SHarshad Shirwadkar 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
622aa75f4d3SHarshad Shirwadkar 		/*
623aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
624aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
625aa75f4d3SHarshad Shirwadkar 		 */
626aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
627aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
628aa75f4d3SHarshad Shirwadkar 			if (ret)
629aa75f4d3SHarshad Shirwadkar 				return NULL;
630aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
631aa75f4d3SHarshad Shirwadkar 		}
632aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
633aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
634aa75f4d3SHarshad Shirwadkar 	}
635aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
636aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
637aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
638aa75f4d3SHarshad Shirwadkar 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
639aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
640aa75f4d3SHarshad Shirwadkar 	if (crc)
641aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
642aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
643aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
644e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, false);
645aa75f4d3SHarshad Shirwadkar 
646aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
647aa75f4d3SHarshad Shirwadkar 	if (ret)
648aa75f4d3SHarshad Shirwadkar 		return NULL;
649aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
650aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
651aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
652aa75f4d3SHarshad Shirwadkar }
653aa75f4d3SHarshad Shirwadkar 
654aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
655aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
656aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
657aa75f4d3SHarshad Shirwadkar {
658aa75f4d3SHarshad Shirwadkar 	if (crc)
659aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
660aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
661aa75f4d3SHarshad Shirwadkar }
662aa75f4d3SHarshad Shirwadkar 
663aa75f4d3SHarshad Shirwadkar /*
664aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
665aa75f4d3SHarshad Shirwadkar  *
666aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
667aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
668aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
669aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
670aa75f4d3SHarshad Shirwadkar  */
671aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
672aa75f4d3SHarshad Shirwadkar {
673aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
674aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
675aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
676aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
677aa75f4d3SHarshad Shirwadkar 	u8 *dst;
678aa75f4d3SHarshad Shirwadkar 
679aa75f4d3SHarshad Shirwadkar 	/*
680aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
681aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
682aa75f4d3SHarshad Shirwadkar 	 */
683aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
684aa75f4d3SHarshad Shirwadkar 	if (!dst)
685aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
686aa75f4d3SHarshad Shirwadkar 
687aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
688aa75f4d3SHarshad Shirwadkar 
689aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
690aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
691aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
692aa75f4d3SHarshad Shirwadkar 
693aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
694aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
695aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
696aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
697aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
698aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
699aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
700aa75f4d3SHarshad Shirwadkar 
701e9f53353SDaejun Park 	ext4_fc_submit_bh(sb, true);
702aa75f4d3SHarshad Shirwadkar 
703aa75f4d3SHarshad Shirwadkar 	return 0;
704aa75f4d3SHarshad Shirwadkar }
705aa75f4d3SHarshad Shirwadkar 
706aa75f4d3SHarshad Shirwadkar /*
707aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
708aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
709aa75f4d3SHarshad Shirwadkar  */
710aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
711aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
712aa75f4d3SHarshad Shirwadkar {
713aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
714aa75f4d3SHarshad Shirwadkar 	u8 *dst;
715aa75f4d3SHarshad Shirwadkar 
716aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
717aa75f4d3SHarshad Shirwadkar 	if (!dst)
718aa75f4d3SHarshad Shirwadkar 		return false;
719aa75f4d3SHarshad Shirwadkar 
720aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
721aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
722aa75f4d3SHarshad Shirwadkar 
723aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
724aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
725aa75f4d3SHarshad Shirwadkar 
726aa75f4d3SHarshad Shirwadkar 	return true;
727aa75f4d3SHarshad Shirwadkar }
728aa75f4d3SHarshad Shirwadkar 
729aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
730facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
731facec450SGuoqing Jiang 				   struct ext4_fc_dentry_update *fc_dentry)
732aa75f4d3SHarshad Shirwadkar {
733aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
734aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
735facec450SGuoqing Jiang 	int dlen = fc_dentry->fcd_name.len;
736aa75f4d3SHarshad Shirwadkar 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
737aa75f4d3SHarshad Shirwadkar 					crc);
738aa75f4d3SHarshad Shirwadkar 
739aa75f4d3SHarshad Shirwadkar 	if (!dst)
740aa75f4d3SHarshad Shirwadkar 		return false;
741aa75f4d3SHarshad Shirwadkar 
742facec450SGuoqing Jiang 	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
743facec450SGuoqing Jiang 	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
744facec450SGuoqing Jiang 	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
745aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
746aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
747aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
748aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
749aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
750facec450SGuoqing Jiang 	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
751aa75f4d3SHarshad Shirwadkar 	dst += dlen;
752aa75f4d3SHarshad Shirwadkar 
753aa75f4d3SHarshad Shirwadkar 	return true;
754aa75f4d3SHarshad Shirwadkar }
755aa75f4d3SHarshad Shirwadkar 
756aa75f4d3SHarshad Shirwadkar /*
757aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
758aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
759aa75f4d3SHarshad Shirwadkar  */
760aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
761aa75f4d3SHarshad Shirwadkar {
762aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
763aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
764aa75f4d3SHarshad Shirwadkar 	int ret;
765aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
766aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
767aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
768aa75f4d3SHarshad Shirwadkar 	u8 *dst;
769aa75f4d3SHarshad Shirwadkar 
770aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
771aa75f4d3SHarshad Shirwadkar 	if (ret)
772aa75f4d3SHarshad Shirwadkar 		return ret;
773aa75f4d3SHarshad Shirwadkar 
7746c31a689SHarshad Shirwadkar 	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
7756c31a689SHarshad Shirwadkar 		inode_len = EXT4_INODE_SIZE(inode->i_sb);
7766c31a689SHarshad Shirwadkar 	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
777aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
778aa75f4d3SHarshad Shirwadkar 
779aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
780aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
781aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
782aa75f4d3SHarshad Shirwadkar 
783aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
784aa75f4d3SHarshad Shirwadkar 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
785aa75f4d3SHarshad Shirwadkar 	if (!dst)
786aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
787aa75f4d3SHarshad Shirwadkar 
788aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
789aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
790aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
791aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
792aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
793aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
794aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
795aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
796aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
797aa75f4d3SHarshad Shirwadkar 
798aa75f4d3SHarshad Shirwadkar 	return 0;
799aa75f4d3SHarshad Shirwadkar }
800aa75f4d3SHarshad Shirwadkar 
801aa75f4d3SHarshad Shirwadkar /*
802aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
803aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
804aa75f4d3SHarshad Shirwadkar  */
805aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
806aa75f4d3SHarshad Shirwadkar {
807aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
808aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
809aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
810aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
811aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
812aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
813aa75f4d3SHarshad Shirwadkar 	int ret;
814aa75f4d3SHarshad Shirwadkar 
815aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
816aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
817aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
818aa75f4d3SHarshad Shirwadkar 		return 0;
819aa75f4d3SHarshad Shirwadkar 	}
820aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
821aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
822aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
823aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
824aa75f4d3SHarshad Shirwadkar 
825aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
826aa75f4d3SHarshad Shirwadkar 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
827aa75f4d3SHarshad Shirwadkar 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
828aa75f4d3SHarshad Shirwadkar 
829aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
830aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
831aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
832aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
833aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
834aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
835aa75f4d3SHarshad Shirwadkar 
836aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
837aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
838aa75f4d3SHarshad Shirwadkar 			continue;
839aa75f4d3SHarshad Shirwadkar 		}
840aa75f4d3SHarshad Shirwadkar 
841aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
842aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
843aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
844aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
845aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
846aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
847aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
848aa75f4d3SHarshad Shirwadkar 		} else {
849a2c2f082SHou Tao 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
850a2c2f082SHou Tao 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
851a2c2f082SHou Tao 
852a2c2f082SHou Tao 			/* Limit the number of blocks in one extent */
853a2c2f082SHou Tao 			map.m_len = min(max, map.m_len);
854a2c2f082SHou Tao 
855aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
856aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
857aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
858aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
859aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
860aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
861aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
862aa75f4d3SHarshad Shirwadkar 			else
863aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
864aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
865aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
866aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
867aa75f4d3SHarshad Shirwadkar 		}
868aa75f4d3SHarshad Shirwadkar 
869aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
870aa75f4d3SHarshad Shirwadkar 	}
871aa75f4d3SHarshad Shirwadkar 
872aa75f4d3SHarshad Shirwadkar 	return 0;
873aa75f4d3SHarshad Shirwadkar }
874aa75f4d3SHarshad Shirwadkar 
875aa75f4d3SHarshad Shirwadkar 
876aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
877aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
878aa75f4d3SHarshad Shirwadkar {
879aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
880aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
881aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
882aa75f4d3SHarshad Shirwadkar 	int ret = 0;
883aa75f4d3SHarshad Shirwadkar 
884aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
8859b5f6c9bSHarshad Shirwadkar 	ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
88696e7c02dSDaejun Park 	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
887aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
888aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
889aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
890aa75f4d3SHarshad Shirwadkar 
891aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
892aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
893aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
894aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
895aa75f4d3SHarshad Shirwadkar 				schedule();
896aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
897aa75f4d3SHarshad Shirwadkar 			}
898aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
899aa75f4d3SHarshad Shirwadkar 		}
900aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
901aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
902aa75f4d3SHarshad Shirwadkar 		if (ret)
903aa75f4d3SHarshad Shirwadkar 			return ret;
904aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
905aa75f4d3SHarshad Shirwadkar 	}
906aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
907aa75f4d3SHarshad Shirwadkar 
908aa75f4d3SHarshad Shirwadkar 	return ret;
909aa75f4d3SHarshad Shirwadkar }
910aa75f4d3SHarshad Shirwadkar 
911aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
912aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
913aa75f4d3SHarshad Shirwadkar {
914aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
915aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
916aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
917aa75f4d3SHarshad Shirwadkar 	int ret = 0;
918aa75f4d3SHarshad Shirwadkar 
919aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
920aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
921aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
922aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
923aa75f4d3SHarshad Shirwadkar 			continue;
924aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
925aa75f4d3SHarshad Shirwadkar 
926aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
927aa75f4d3SHarshad Shirwadkar 		if (ret)
928aa75f4d3SHarshad Shirwadkar 			return ret;
929aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
930aa75f4d3SHarshad Shirwadkar 	}
931aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
932aa75f4d3SHarshad Shirwadkar 
933aa75f4d3SHarshad Shirwadkar 	return 0;
934aa75f4d3SHarshad Shirwadkar }
935aa75f4d3SHarshad Shirwadkar 
936aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
937aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
938fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock)
939fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock)
940aa75f4d3SHarshad Shirwadkar {
941aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
942aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
94396e7c02dSDaejun Park 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
944aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
94596e7c02dSDaejun Park 	struct ext4_inode_info *ei, *ei_n;
946aa75f4d3SHarshad Shirwadkar 	int ret;
947aa75f4d3SHarshad Shirwadkar 
948aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
949aa75f4d3SHarshad Shirwadkar 		return 0;
95096e7c02dSDaejun Park 	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
95196e7c02dSDaejun Park 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
952aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
953aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
954facec450SGuoqing Jiang 			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
955aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
956aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
957aa75f4d3SHarshad Shirwadkar 			}
958aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
959aa75f4d3SHarshad Shirwadkar 			continue;
960aa75f4d3SHarshad Shirwadkar 		}
961aa75f4d3SHarshad Shirwadkar 
962aa75f4d3SHarshad Shirwadkar 		inode = NULL;
96396e7c02dSDaejun Park 		list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
96496e7c02dSDaejun Park 					 i_fc_list) {
965aa75f4d3SHarshad Shirwadkar 			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
966aa75f4d3SHarshad Shirwadkar 				inode = &ei->vfs_inode;
967aa75f4d3SHarshad Shirwadkar 				break;
968aa75f4d3SHarshad Shirwadkar 			}
969aa75f4d3SHarshad Shirwadkar 		}
970aa75f4d3SHarshad Shirwadkar 		/*
971aa75f4d3SHarshad Shirwadkar 		 * If we don't find inode in our list, then it was deleted,
972aa75f4d3SHarshad Shirwadkar 		 * in which case, we don't need to record it's create tag.
973aa75f4d3SHarshad Shirwadkar 		 */
974aa75f4d3SHarshad Shirwadkar 		if (!inode)
975aa75f4d3SHarshad Shirwadkar 			continue;
976aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
977aa75f4d3SHarshad Shirwadkar 
978aa75f4d3SHarshad Shirwadkar 		/*
979aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
980aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
981aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
982aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
983aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
984aa75f4d3SHarshad Shirwadkar 		 */
985aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
986aa75f4d3SHarshad Shirwadkar 		if (ret)
987aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
988aa75f4d3SHarshad Shirwadkar 
989aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
990aa75f4d3SHarshad Shirwadkar 		if (ret)
991aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
992aa75f4d3SHarshad Shirwadkar 
993facec450SGuoqing Jiang 		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
994aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
995aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
996aa75f4d3SHarshad Shirwadkar 		}
997aa75f4d3SHarshad Shirwadkar 
998aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
999aa75f4d3SHarshad Shirwadkar 	}
1000aa75f4d3SHarshad Shirwadkar 	return 0;
1001aa75f4d3SHarshad Shirwadkar lock_and_exit:
1002aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1003aa75f4d3SHarshad Shirwadkar 	return ret;
1004aa75f4d3SHarshad Shirwadkar }
1005aa75f4d3SHarshad Shirwadkar 
1006aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
1007aa75f4d3SHarshad Shirwadkar {
1008aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1009aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1010aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1011aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
1012aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
1013aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
1014aa75f4d3SHarshad Shirwadkar 	int ret = 0;
1015aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
1016aa75f4d3SHarshad Shirwadkar 
1017aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
1018aa75f4d3SHarshad Shirwadkar 	if (ret)
1019aa75f4d3SHarshad Shirwadkar 		return ret;
1020aa75f4d3SHarshad Shirwadkar 
1021aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
1022aa75f4d3SHarshad Shirwadkar 	if (ret)
1023aa75f4d3SHarshad Shirwadkar 		return ret;
1024aa75f4d3SHarshad Shirwadkar 
1025da0c5d26SHarshad Shirwadkar 	/*
1026da0c5d26SHarshad Shirwadkar 	 * If file system device is different from journal device, issue a cache
1027da0c5d26SHarshad Shirwadkar 	 * flush before we start writing fast commit blocks.
1028da0c5d26SHarshad Shirwadkar 	 */
1029da0c5d26SHarshad Shirwadkar 	if (journal->j_fs_dev != journal->j_dev)
1030c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_fs_dev);
1031da0c5d26SHarshad Shirwadkar 
1032aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1033aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1034aa75f4d3SHarshad Shirwadkar 		/*
1035aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1036aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1037aa75f4d3SHarshad Shirwadkar 		 */
1038aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1039aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1040aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1041aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1042e1262cd2SXu Yihang 			(u8 *)&head, &crc)) {
1043e1262cd2SXu Yihang 			ret = -ENOSPC;
1044aa75f4d3SHarshad Shirwadkar 			goto out;
1045aa75f4d3SHarshad Shirwadkar 		}
1046e1262cd2SXu Yihang 	}
1047aa75f4d3SHarshad Shirwadkar 
1048aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1049aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1050aa75f4d3SHarshad Shirwadkar 	if (ret) {
1051aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1052aa75f4d3SHarshad Shirwadkar 		goto out;
1053aa75f4d3SHarshad Shirwadkar 	}
1054aa75f4d3SHarshad Shirwadkar 
105596e7c02dSDaejun Park 	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1056aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1057aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1058aa75f4d3SHarshad Shirwadkar 			continue;
1059aa75f4d3SHarshad Shirwadkar 
1060aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1061aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1062aa75f4d3SHarshad Shirwadkar 		if (ret)
1063aa75f4d3SHarshad Shirwadkar 			goto out;
1064aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1065aa75f4d3SHarshad Shirwadkar 		if (ret)
1066aa75f4d3SHarshad Shirwadkar 			goto out;
1067aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1068aa75f4d3SHarshad Shirwadkar 	}
1069aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1070aa75f4d3SHarshad Shirwadkar 
1071aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1072aa75f4d3SHarshad Shirwadkar 
1073aa75f4d3SHarshad Shirwadkar out:
1074aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1075aa75f4d3SHarshad Shirwadkar 	return ret;
1076aa75f4d3SHarshad Shirwadkar }
1077aa75f4d3SHarshad Shirwadkar 
1078aa75f4d3SHarshad Shirwadkar /*
1079aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1080aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1081aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1082aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1083aa75f4d3SHarshad Shirwadkar  */
1084aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1085aa75f4d3SHarshad Shirwadkar {
1086aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1087aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1088aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1089aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
1090aa75f4d3SHarshad Shirwadkar 	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
1091aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1092aa75f4d3SHarshad Shirwadkar 
1093aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_start(sb);
1094aa75f4d3SHarshad Shirwadkar 
1095aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1096aa75f4d3SHarshad Shirwadkar 
1097*7bbbe241SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1098*7bbbe241SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1099aa75f4d3SHarshad Shirwadkar 
1100aa75f4d3SHarshad Shirwadkar restart_fc:
1101aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1102aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1103aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1104aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1105aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1106aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
1107aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_ALREADY_COMMITTED;
1108aa75f4d3SHarshad Shirwadkar 		goto out;
1109aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
1110aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1111aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_START_FAILED;
1112aa75f4d3SHarshad Shirwadkar 		goto out;
1113aa75f4d3SHarshad Shirwadkar 	}
1114*7bbbe241SHarshad Shirwadkar 	/*
1115*7bbbe241SHarshad Shirwadkar 	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
1116*7bbbe241SHarshad Shirwadkar 	 * if we are fast commit ineligible.
1117*7bbbe241SHarshad Shirwadkar 	 */
1118*7bbbe241SHarshad Shirwadkar 	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
1119*7bbbe241SHarshad Shirwadkar 		reason = EXT4_FC_REASON_INELIGIBLE;
1120*7bbbe241SHarshad Shirwadkar 		goto out;
1121*7bbbe241SHarshad Shirwadkar 	}
1122aa75f4d3SHarshad Shirwadkar 
1123aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1124aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1125aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1126aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1127aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1128aa75f4d3SHarshad Shirwadkar 		goto out;
1129aa75f4d3SHarshad Shirwadkar 	}
1130aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1131aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1132aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1133aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1134aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1135aa75f4d3SHarshad Shirwadkar 		goto out;
1136aa75f4d3SHarshad Shirwadkar 	}
1137aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
1138aa75f4d3SHarshad Shirwadkar 	jbd2_fc_end_commit(journal);
1139aa75f4d3SHarshad Shirwadkar out:
1140aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1141aa75f4d3SHarshad Shirwadkar 	if (reason != EXT4_FC_REASON_OK &&
1142aa75f4d3SHarshad Shirwadkar 		reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
1143aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_commits++;
1144aa75f4d3SHarshad Shirwadkar 	} else {
1145aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_num_commits++;
1146aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_numblks += nblks;
1147aa75f4d3SHarshad Shirwadkar 	}
1148aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1149aa75f4d3SHarshad Shirwadkar 	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
1150aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_stop(sb, nblks, reason);
1151aa75f4d3SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1152aa75f4d3SHarshad Shirwadkar 	/*
1153aa75f4d3SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we don't
1154aa75f4d3SHarshad Shirwadkar 	 * react too strongly to vast changes in the commit time
1155aa75f4d3SHarshad Shirwadkar 	 */
1156aa75f4d3SHarshad Shirwadkar 	if (likely(sbi->s_fc_avg_commit_time))
1157aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = (commit_time +
1158aa75f4d3SHarshad Shirwadkar 				sbi->s_fc_avg_commit_time * 3) / 4;
1159aa75f4d3SHarshad Shirwadkar 	else
1160aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = commit_time;
1161aa75f4d3SHarshad Shirwadkar 	jbd_debug(1,
1162aa75f4d3SHarshad Shirwadkar 		"Fast commit ended with blks = %d, reason = %d, subtid - %d",
1163aa75f4d3SHarshad Shirwadkar 		nblks, reason, subtid);
1164aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_FAILED)
11650bce577bSHarshad Shirwadkar 		return jbd2_fc_end_commit_fallback(journal);
1166aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_START_FAILED ||
1167aa75f4d3SHarshad Shirwadkar 		reason == EXT4_FC_REASON_INELIGIBLE)
1168aa75f4d3SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1169aa75f4d3SHarshad Shirwadkar 	return 0;
1170aa75f4d3SHarshad Shirwadkar }
1171aa75f4d3SHarshad Shirwadkar 
1172ff780b91SHarshad Shirwadkar /*
1173ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1174ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1175ff780b91SHarshad Shirwadkar  */
1176ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full)
1177ff780b91SHarshad Shirwadkar {
1178aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1179aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
118096e7c02dSDaejun Park 	struct ext4_inode_info *iter, *iter_n;
1181aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1182aa75f4d3SHarshad Shirwadkar 
1183aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1184aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1185aa75f4d3SHarshad Shirwadkar 
1186aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1187aa75f4d3SHarshad Shirwadkar 
1188aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
118996e7c02dSDaejun Park 	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
119096e7c02dSDaejun Park 				 i_fc_list) {
1191aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1192aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1193aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1194aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(&iter->vfs_inode);
1195aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1196aa75f4d3SHarshad Shirwadkar 		smp_mb();
1197aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1198aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1199aa75f4d3SHarshad Shirwadkar #else
1200aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1201aa75f4d3SHarshad Shirwadkar #endif
1202aa75f4d3SHarshad Shirwadkar 	}
1203aa75f4d3SHarshad Shirwadkar 
1204aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1205aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1206aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1207aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1208aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1209aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1210aa75f4d3SHarshad Shirwadkar 
1211aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1212aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1213aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1214aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1215aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1216aa75f4d3SHarshad Shirwadkar 	}
1217aa75f4d3SHarshad Shirwadkar 
1218aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1219aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1220aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
122131e203e0SDaejun Park 				&sbi->s_fc_q[FC_Q_MAIN]);
1222aa75f4d3SHarshad Shirwadkar 
12239b5f6c9bSHarshad Shirwadkar 	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
12249b5f6c9bSHarshad Shirwadkar 	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1225aa75f4d3SHarshad Shirwadkar 
1226aa75f4d3SHarshad Shirwadkar 	if (full)
1227aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1228aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1229aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1230ff780b91SHarshad Shirwadkar }
12316866d7b3SHarshad Shirwadkar 
12328016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
12338016e29fSHarshad Shirwadkar 
12348016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
12358016e29fSHarshad Shirwadkar struct dentry_info_args {
12368016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
12378016e29fSHarshad Shirwadkar 	char *dname;
12388016e29fSHarshad Shirwadkar };
12398016e29fSHarshad Shirwadkar 
12408016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
1241a7ba36bcSHarshad Shirwadkar 			      struct  ext4_fc_tl *tl, u8 *val)
12428016e29fSHarshad Shirwadkar {
1243a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
12448016e29fSHarshad Shirwadkar 
1245a7ba36bcSHarshad Shirwadkar 	memcpy(&fcd, val, sizeof(fcd));
12468016e29fSHarshad Shirwadkar 
1247a7ba36bcSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1248a7ba36bcSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd.fc_ino);
1249a7ba36bcSHarshad Shirwadkar 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1250a7ba36bcSHarshad Shirwadkar 	darg->dname_len = le16_to_cpu(tl->fc_len) -
12518016e29fSHarshad Shirwadkar 		sizeof(struct ext4_fc_dentry_info);
12528016e29fSHarshad Shirwadkar }
12538016e29fSHarshad Shirwadkar 
12548016e29fSHarshad Shirwadkar /* Unlink replay function */
1255a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1256a7ba36bcSHarshad Shirwadkar 				 u8 *val)
12578016e29fSHarshad Shirwadkar {
12588016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
12598016e29fSHarshad Shirwadkar 	struct qstr entry;
12608016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
12618016e29fSHarshad Shirwadkar 	int ret = 0;
12628016e29fSHarshad Shirwadkar 
1263a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
12648016e29fSHarshad Shirwadkar 
12658016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
12668016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
12678016e29fSHarshad Shirwadkar 
12688016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
12698016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
12708016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
12718016e29fSHarshad Shirwadkar 
127223dd561aSYi Li 	if (IS_ERR(inode)) {
12738016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", darg.ino);
12748016e29fSHarshad Shirwadkar 		return 0;
12758016e29fSHarshad Shirwadkar 	}
12768016e29fSHarshad Shirwadkar 
12778016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
12788016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
127923dd561aSYi Li 	if (IS_ERR(old_parent)) {
12808016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
12818016e29fSHarshad Shirwadkar 		iput(inode);
12828016e29fSHarshad Shirwadkar 		return 0;
12838016e29fSHarshad Shirwadkar 	}
12848016e29fSHarshad Shirwadkar 
1285a80f7fcfSHarshad Shirwadkar 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
12868016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
12878016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
12888016e29fSHarshad Shirwadkar 		ret = 0;
12898016e29fSHarshad Shirwadkar 	iput(old_parent);
12908016e29fSHarshad Shirwadkar 	iput(inode);
12918016e29fSHarshad Shirwadkar 	return ret;
12928016e29fSHarshad Shirwadkar }
12938016e29fSHarshad Shirwadkar 
12948016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
12958016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
12968016e29fSHarshad Shirwadkar 				struct inode *inode)
12978016e29fSHarshad Shirwadkar {
12988016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
12998016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
13008016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
13018016e29fSHarshad Shirwadkar 	int ret = 0;
13028016e29fSHarshad Shirwadkar 
13038016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
13048016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
13058016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
13068016e29fSHarshad Shirwadkar 		dir = NULL;
13078016e29fSHarshad Shirwadkar 		goto out;
13088016e29fSHarshad Shirwadkar 	}
13098016e29fSHarshad Shirwadkar 
13108016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
13118016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
13128016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to obtain dentry");
13138016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
13148016e29fSHarshad Shirwadkar 		goto out;
13158016e29fSHarshad Shirwadkar 	}
13168016e29fSHarshad Shirwadkar 
13178016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
13188016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
13198016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode dentry not created.");
13208016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
13218016e29fSHarshad Shirwadkar 		goto out;
13228016e29fSHarshad Shirwadkar 	}
13238016e29fSHarshad Shirwadkar 
13248016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
13258016e29fSHarshad Shirwadkar 	/*
13268016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
13278016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
13288016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
13298016e29fSHarshad Shirwadkar 	 * could complete.
13308016e29fSHarshad Shirwadkar 	 */
13318016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
13328016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to link\n");
13338016e29fSHarshad Shirwadkar 		goto out;
13348016e29fSHarshad Shirwadkar 	}
13358016e29fSHarshad Shirwadkar 
13368016e29fSHarshad Shirwadkar 	ret = 0;
13378016e29fSHarshad Shirwadkar out:
13388016e29fSHarshad Shirwadkar 	if (dentry_dir) {
13398016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
13408016e29fSHarshad Shirwadkar 		dput(dentry_dir);
13418016e29fSHarshad Shirwadkar 	} else if (dir) {
13428016e29fSHarshad Shirwadkar 		iput(dir);
13438016e29fSHarshad Shirwadkar 	}
13448016e29fSHarshad Shirwadkar 	if (dentry_inode) {
13458016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
13468016e29fSHarshad Shirwadkar 		dput(dentry_inode);
13478016e29fSHarshad Shirwadkar 	}
13488016e29fSHarshad Shirwadkar 
13498016e29fSHarshad Shirwadkar 	return ret;
13508016e29fSHarshad Shirwadkar }
13518016e29fSHarshad Shirwadkar 
13528016e29fSHarshad Shirwadkar /* Link replay function */
1353a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1354a7ba36bcSHarshad Shirwadkar 			       u8 *val)
13558016e29fSHarshad Shirwadkar {
13568016e29fSHarshad Shirwadkar 	struct inode *inode;
13578016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13588016e29fSHarshad Shirwadkar 	int ret = 0;
13598016e29fSHarshad Shirwadkar 
1360a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
13618016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
13628016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13638016e29fSHarshad Shirwadkar 
13648016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
136523dd561aSYi Li 	if (IS_ERR(inode)) {
13668016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
13678016e29fSHarshad Shirwadkar 		return 0;
13688016e29fSHarshad Shirwadkar 	}
13698016e29fSHarshad Shirwadkar 
13708016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
13718016e29fSHarshad Shirwadkar 	iput(inode);
13728016e29fSHarshad Shirwadkar 	return ret;
13738016e29fSHarshad Shirwadkar }
13748016e29fSHarshad Shirwadkar 
13758016e29fSHarshad Shirwadkar /*
13768016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
13778016e29fSHarshad Shirwadkar  * block bitmaps correctly.
13788016e29fSHarshad Shirwadkar  */
13798016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
13808016e29fSHarshad Shirwadkar {
13818016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
13828016e29fSHarshad Shirwadkar 	int i;
13838016e29fSHarshad Shirwadkar 
13848016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
13858016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
13868016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
13878016e29fSHarshad Shirwadkar 			return 0;
13888016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
13898016e29fSHarshad Shirwadkar 		state->fc_modified_inodes_size +=
13908016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
13918016e29fSHarshad Shirwadkar 		state->fc_modified_inodes = krealloc(
13928016e29fSHarshad Shirwadkar 					state->fc_modified_inodes, sizeof(int) *
13938016e29fSHarshad Shirwadkar 					state->fc_modified_inodes_size,
13948016e29fSHarshad Shirwadkar 					GFP_KERNEL);
13958016e29fSHarshad Shirwadkar 		if (!state->fc_modified_inodes)
13968016e29fSHarshad Shirwadkar 			return -ENOMEM;
13978016e29fSHarshad Shirwadkar 	}
13988016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
13998016e29fSHarshad Shirwadkar 	return 0;
14008016e29fSHarshad Shirwadkar }
14018016e29fSHarshad Shirwadkar 
14028016e29fSHarshad Shirwadkar /*
14038016e29fSHarshad Shirwadkar  * Inode replay function
14048016e29fSHarshad Shirwadkar  */
1405a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1406a7ba36bcSHarshad Shirwadkar 				u8 *val)
14078016e29fSHarshad Shirwadkar {
1408a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
14098016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
14108016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
14118016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
14128016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
14138016e29fSHarshad Shirwadkar 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
14148016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
14158016e29fSHarshad Shirwadkar 
1416a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_inode, val, sizeof(fc_inode));
14178016e29fSHarshad Shirwadkar 
1418a7ba36bcSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode.fc_ino);
14198016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
14208016e29fSHarshad Shirwadkar 
14218016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
142223dd561aSYi Li 	if (!IS_ERR(inode)) {
14238016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
14248016e29fSHarshad Shirwadkar 		iput(inode);
14258016e29fSHarshad Shirwadkar 	}
142623dd561aSYi Li 	inode = NULL;
14278016e29fSHarshad Shirwadkar 
14288016e29fSHarshad Shirwadkar 	ext4_fc_record_modified_inode(sb, ino);
14298016e29fSHarshad Shirwadkar 
1430a7ba36bcSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)
1431a7ba36bcSHarshad Shirwadkar 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
14328016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
14338016e29fSHarshad Shirwadkar 	if (ret)
14348016e29fSHarshad Shirwadkar 		goto out;
14358016e29fSHarshad Shirwadkar 
1436a7ba36bcSHarshad Shirwadkar 	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
14378016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
14388016e29fSHarshad Shirwadkar 
14398016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
14408016e29fSHarshad Shirwadkar 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
14418016e29fSHarshad Shirwadkar 		inode_len - offsetof(struct ext4_inode, i_generation));
14428016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
14438016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
14448016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
14458016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
14468016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
14478016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
14488016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
14498016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
14508016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
14518016e29fSHarshad Shirwadkar 		}
14528016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
14538016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
14548016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
14558016e29fSHarshad Shirwadkar 	}
14568016e29fSHarshad Shirwadkar 
14578016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
14588016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
14598016e29fSHarshad Shirwadkar 	if (ret)
14608016e29fSHarshad Shirwadkar 		goto out;
14618016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
14628016e29fSHarshad Shirwadkar 	if (ret)
14638016e29fSHarshad Shirwadkar 		goto out;
14648016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
14658016e29fSHarshad Shirwadkar 	if (ret)
14668016e29fSHarshad Shirwadkar 		goto out;
14678016e29fSHarshad Shirwadkar 
14688016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
14698016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
147023dd561aSYi Li 	if (IS_ERR(inode)) {
14718016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
14728016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
14738016e29fSHarshad Shirwadkar 	}
14748016e29fSHarshad Shirwadkar 
14758016e29fSHarshad Shirwadkar 	/*
14768016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
14778016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
14788016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
14798016e29fSHarshad Shirwadkar 	 */
14801ebf2178SHarshad Shirwadkar 	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
14818016e29fSHarshad Shirwadkar 		ext4_ext_replay_set_iblocks(inode);
14828016e29fSHarshad Shirwadkar 
14838016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
14848016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
14858016e29fSHarshad Shirwadkar 
14868016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
14878016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
14888016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
14898016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
14908016e29fSHarshad Shirwadkar out:
14918016e29fSHarshad Shirwadkar 	iput(inode);
14928016e29fSHarshad Shirwadkar 	if (!ret)
1493c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(sb->s_bdev);
14948016e29fSHarshad Shirwadkar 
14958016e29fSHarshad Shirwadkar 	return 0;
14968016e29fSHarshad Shirwadkar }
14978016e29fSHarshad Shirwadkar 
14988016e29fSHarshad Shirwadkar /*
14998016e29fSHarshad Shirwadkar  * Dentry create replay function.
15008016e29fSHarshad Shirwadkar  *
15018016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
15028016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
15038016e29fSHarshad Shirwadkar  * been replayed before we start here.
15048016e29fSHarshad Shirwadkar  */
1505a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1506a7ba36bcSHarshad Shirwadkar 				 u8 *val)
15078016e29fSHarshad Shirwadkar {
15088016e29fSHarshad Shirwadkar 	int ret = 0;
15098016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
15108016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
15118016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
15128016e29fSHarshad Shirwadkar 
1513a7ba36bcSHarshad Shirwadkar 	tl_to_darg(&darg, tl, val);
15148016e29fSHarshad Shirwadkar 
15158016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
15168016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
15178016e29fSHarshad Shirwadkar 
15188016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
15198016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
15208016e29fSHarshad Shirwadkar 	if (ret)
15218016e29fSHarshad Shirwadkar 		goto out;
15228016e29fSHarshad Shirwadkar 
15238016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
152423dd561aSYi Li 	if (IS_ERR(inode)) {
15258016e29fSHarshad Shirwadkar 		jbd_debug(1, "inode %d not found.", darg.ino);
15268016e29fSHarshad Shirwadkar 		inode = NULL;
15278016e29fSHarshad Shirwadkar 		ret = -EINVAL;
15288016e29fSHarshad Shirwadkar 		goto out;
15298016e29fSHarshad Shirwadkar 	}
15308016e29fSHarshad Shirwadkar 
15318016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
15328016e29fSHarshad Shirwadkar 		/*
15338016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
15348016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
15358016e29fSHarshad Shirwadkar 		 */
15368016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
153723dd561aSYi Li 		if (IS_ERR(dir)) {
15388016e29fSHarshad Shirwadkar 			jbd_debug(1, "Dir %d not found.", darg.ino);
15398016e29fSHarshad Shirwadkar 			goto out;
15408016e29fSHarshad Shirwadkar 		}
15418016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
15428016e29fSHarshad Shirwadkar 		iput(dir);
15438016e29fSHarshad Shirwadkar 		if (ret) {
15448016e29fSHarshad Shirwadkar 			ret = 0;
15458016e29fSHarshad Shirwadkar 			goto out;
15468016e29fSHarshad Shirwadkar 		}
15478016e29fSHarshad Shirwadkar 	}
15488016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
15498016e29fSHarshad Shirwadkar 	if (ret)
15508016e29fSHarshad Shirwadkar 		goto out;
15518016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
15528016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
15538016e29fSHarshad Shirwadkar out:
15548016e29fSHarshad Shirwadkar 	if (inode)
15558016e29fSHarshad Shirwadkar 		iput(inode);
15568016e29fSHarshad Shirwadkar 	return ret;
15578016e29fSHarshad Shirwadkar }
15588016e29fSHarshad Shirwadkar 
15598016e29fSHarshad Shirwadkar /*
15608016e29fSHarshad Shirwadkar  * Record physical disk regions which are in use as per fast commit area. Our
15618016e29fSHarshad Shirwadkar  * simple replay phase allocator excludes these regions from allocation.
15628016e29fSHarshad Shirwadkar  */
15638016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino,
15648016e29fSHarshad Shirwadkar 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
15658016e29fSHarshad Shirwadkar {
15668016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
15678016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
15688016e29fSHarshad Shirwadkar 
15698016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
15708016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
15718016e29fSHarshad Shirwadkar 		state->fc_regions_size +=
15728016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
15738016e29fSHarshad Shirwadkar 		state->fc_regions = krealloc(
15748016e29fSHarshad Shirwadkar 					state->fc_regions,
15758016e29fSHarshad Shirwadkar 					state->fc_regions_size *
15768016e29fSHarshad Shirwadkar 					sizeof(struct ext4_fc_alloc_region),
15778016e29fSHarshad Shirwadkar 					GFP_KERNEL);
15788016e29fSHarshad Shirwadkar 		if (!state->fc_regions)
15798016e29fSHarshad Shirwadkar 			return -ENOMEM;
15808016e29fSHarshad Shirwadkar 	}
15818016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
15828016e29fSHarshad Shirwadkar 	region->ino = ino;
15838016e29fSHarshad Shirwadkar 	region->lblk = lblk;
15848016e29fSHarshad Shirwadkar 	region->pblk = pblk;
15858016e29fSHarshad Shirwadkar 	region->len = len;
15868016e29fSHarshad Shirwadkar 
15878016e29fSHarshad Shirwadkar 	return 0;
15888016e29fSHarshad Shirwadkar }
15898016e29fSHarshad Shirwadkar 
15908016e29fSHarshad Shirwadkar /* Replay add range tag */
15918016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
1592a7ba36bcSHarshad Shirwadkar 				    struct ext4_fc_tl *tl, u8 *val)
15938016e29fSHarshad Shirwadkar {
1594a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range fc_add_ex;
15958016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
15968016e29fSHarshad Shirwadkar 	struct inode *inode;
15978016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
15988016e29fSHarshad Shirwadkar 	int remaining, len;
15998016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
16008016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
16018016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
16028016e29fSHarshad Shirwadkar 	int ret;
16038016e29fSHarshad Shirwadkar 
1604a7ba36bcSHarshad Shirwadkar 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1605a7ba36bcSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
16068016e29fSHarshad Shirwadkar 
16078016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1608a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
16098016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
16108016e29fSHarshad Shirwadkar 
1611a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
161223dd561aSYi Li 	if (IS_ERR(inode)) {
16138016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
16148016e29fSHarshad Shirwadkar 		return 0;
16158016e29fSHarshad Shirwadkar 	}
16168016e29fSHarshad Shirwadkar 
16178016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
16188016e29fSHarshad Shirwadkar 
16198016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
16208016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
16218016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
16228016e29fSHarshad Shirwadkar 
16238016e29fSHarshad Shirwadkar 	cur = start;
16248016e29fSHarshad Shirwadkar 	remaining = len;
16258016e29fSHarshad Shirwadkar 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
16268016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
16278016e29fSHarshad Shirwadkar 		  inode->i_ino);
16288016e29fSHarshad Shirwadkar 
16298016e29fSHarshad Shirwadkar 	while (remaining > 0) {
16308016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
16318016e29fSHarshad Shirwadkar 		map.m_len = remaining;
16328016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
16338016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
16348016e29fSHarshad Shirwadkar 
16358016e29fSHarshad Shirwadkar 		if (ret < 0) {
16368016e29fSHarshad Shirwadkar 			iput(inode);
16378016e29fSHarshad Shirwadkar 			return 0;
16388016e29fSHarshad Shirwadkar 		}
16398016e29fSHarshad Shirwadkar 
16408016e29fSHarshad Shirwadkar 		if (ret == 0) {
16418016e29fSHarshad Shirwadkar 			/* Range is not mapped */
16428016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
16438c9be1e5SHarshad Shirwadkar 			if (IS_ERR(path)) {
16448c9be1e5SHarshad Shirwadkar 				iput(inode);
16458c9be1e5SHarshad Shirwadkar 				return 0;
16468c9be1e5SHarshad Shirwadkar 			}
16478016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
16488016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
16498016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
16508016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
16518016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
16528016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
16538016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
16548016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
16558016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
16568016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
16578016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
16588016e29fSHarshad Shirwadkar 			ext4_ext_drop_refs(path);
16598016e29fSHarshad Shirwadkar 			kfree(path);
16608016e29fSHarshad Shirwadkar 			if (ret) {
16618016e29fSHarshad Shirwadkar 				iput(inode);
16628016e29fSHarshad Shirwadkar 				return 0;
16638016e29fSHarshad Shirwadkar 			}
16648016e29fSHarshad Shirwadkar 			goto next;
16658016e29fSHarshad Shirwadkar 		}
16668016e29fSHarshad Shirwadkar 
16678016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
16688016e29fSHarshad Shirwadkar 			/*
16698016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
16708016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
16718016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
16728016e29fSHarshad Shirwadkar 			 */
16738016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
16748016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
16758016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
16768016e29fSHarshad Shirwadkar 			if (ret) {
16778016e29fSHarshad Shirwadkar 				iput(inode);
16788016e29fSHarshad Shirwadkar 				return 0;
16798016e29fSHarshad Shirwadkar 			}
16808016e29fSHarshad Shirwadkar 			/*
16818016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
16828016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
16838016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
16848016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
16858016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
16868016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
16878016e29fSHarshad Shirwadkar 			 * modified inodes.
16888016e29fSHarshad Shirwadkar 			 */
16898016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
16908016e29fSHarshad Shirwadkar 			goto next;
16918016e29fSHarshad Shirwadkar 		}
16928016e29fSHarshad Shirwadkar 
16938016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
1694fcdf3c34SArnd Bergmann 		jbd_debug(1, "Converting from %ld to %d %lld",
16958016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
16968016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
16978016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
16988016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
16998016e29fSHarshad Shirwadkar 		if (ret) {
17008016e29fSHarshad Shirwadkar 			iput(inode);
17018016e29fSHarshad Shirwadkar 			return 0;
17028016e29fSHarshad Shirwadkar 		}
17038016e29fSHarshad Shirwadkar 		/*
17048016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
17058016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
17068016e29fSHarshad Shirwadkar 		 */
17078016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
17088016e29fSHarshad Shirwadkar next:
17098016e29fSHarshad Shirwadkar 		cur += map.m_len;
17108016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
17118016e29fSHarshad Shirwadkar 	}
17128016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
17138016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
17148016e29fSHarshad Shirwadkar 	iput(inode);
17158016e29fSHarshad Shirwadkar 	return 0;
17168016e29fSHarshad Shirwadkar }
17178016e29fSHarshad Shirwadkar 
17188016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
17198016e29fSHarshad Shirwadkar static int
1720a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1721a7ba36bcSHarshad Shirwadkar 			 u8 *val)
17228016e29fSHarshad Shirwadkar {
17238016e29fSHarshad Shirwadkar 	struct inode *inode;
1724a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
17258016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17268016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
17278016e29fSHarshad Shirwadkar 	int ret;
17288016e29fSHarshad Shirwadkar 
1729a7ba36bcSHarshad Shirwadkar 	memcpy(&lrange, val, sizeof(lrange));
1730a7ba36bcSHarshad Shirwadkar 	cur = le32_to_cpu(lrange.fc_lblk);
1731a7ba36bcSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange.fc_len);
17328016e29fSHarshad Shirwadkar 
17338016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1734a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_ino), cur, remaining);
17358016e29fSHarshad Shirwadkar 
1736a7ba36bcSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
173723dd561aSYi Li 	if (IS_ERR(inode)) {
1738a7ba36bcSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
17398016e29fSHarshad Shirwadkar 		return 0;
17408016e29fSHarshad Shirwadkar 	}
17418016e29fSHarshad Shirwadkar 
17428016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
17438016e29fSHarshad Shirwadkar 
17448016e29fSHarshad Shirwadkar 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
1745a7ba36bcSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1746a7ba36bcSHarshad Shirwadkar 			le32_to_cpu(lrange.fc_len));
17478016e29fSHarshad Shirwadkar 	while (remaining > 0) {
17488016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
17498016e29fSHarshad Shirwadkar 		map.m_len = remaining;
17508016e29fSHarshad Shirwadkar 
17518016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
17528016e29fSHarshad Shirwadkar 		if (ret < 0) {
17538016e29fSHarshad Shirwadkar 			iput(inode);
17548016e29fSHarshad Shirwadkar 			return 0;
17558016e29fSHarshad Shirwadkar 		}
17568016e29fSHarshad Shirwadkar 		if (ret > 0) {
17578016e29fSHarshad Shirwadkar 			remaining -= ret;
17588016e29fSHarshad Shirwadkar 			cur += ret;
17598016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17608016e29fSHarshad Shirwadkar 		} else {
17618016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
17628016e29fSHarshad Shirwadkar 			cur += map.m_len;
17638016e29fSHarshad Shirwadkar 		}
17648016e29fSHarshad Shirwadkar 	}
17658016e29fSHarshad Shirwadkar 
17668016e29fSHarshad Shirwadkar 	ret = ext4_punch_hole(inode,
1767a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits,
1768a7ba36bcSHarshad Shirwadkar 		le32_to_cpu(lrange.fc_len) <<  sb->s_blocksize_bits);
17698016e29fSHarshad Shirwadkar 	if (ret)
17708016e29fSHarshad Shirwadkar 		jbd_debug(1, "ext4_punch_hole returned %d", ret);
17718016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
17728016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
17738016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
17748016e29fSHarshad Shirwadkar 	iput(inode);
17758016e29fSHarshad Shirwadkar 
17768016e29fSHarshad Shirwadkar 	return 0;
17778016e29fSHarshad Shirwadkar }
17788016e29fSHarshad Shirwadkar 
17798016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
17808016e29fSHarshad Shirwadkar {
17818016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
17828016e29fSHarshad Shirwadkar 	struct inode *inode;
17838016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
17848016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17858016e29fSHarshad Shirwadkar 	int i, ret, j;
17868016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
17878016e29fSHarshad Shirwadkar 
17888016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
17898016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
17908016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
17918016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
179223dd561aSYi Li 		if (IS_ERR(inode)) {
17938016e29fSHarshad Shirwadkar 			jbd_debug(1, "Inode %d not found.",
17948016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
17958016e29fSHarshad Shirwadkar 			continue;
17968016e29fSHarshad Shirwadkar 		}
17978016e29fSHarshad Shirwadkar 		cur = 0;
17988016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
17991ebf2178SHarshad Shirwadkar 		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
18001ebf2178SHarshad Shirwadkar 			iput(inode);
18011ebf2178SHarshad Shirwadkar 			continue;
18021ebf2178SHarshad Shirwadkar 		}
18038016e29fSHarshad Shirwadkar 		while (cur < end) {
18048016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
18058016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
18068016e29fSHarshad Shirwadkar 
18078016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
18088016e29fSHarshad Shirwadkar 			if (ret < 0)
18098016e29fSHarshad Shirwadkar 				break;
18108016e29fSHarshad Shirwadkar 
18118016e29fSHarshad Shirwadkar 			if (ret > 0) {
18128016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
181323dd561aSYi Li 				if (!IS_ERR(path)) {
18148016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
18158016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
18168016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
18178016e29fSHarshad Shirwadkar 					ext4_ext_drop_refs(path);
18188016e29fSHarshad Shirwadkar 					kfree(path);
18198016e29fSHarshad Shirwadkar 				}
18208016e29fSHarshad Shirwadkar 				cur += ret;
18218016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
18228016e29fSHarshad Shirwadkar 							map.m_len, 1);
18238016e29fSHarshad Shirwadkar 			} else {
18248016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
18258016e29fSHarshad Shirwadkar 			}
18268016e29fSHarshad Shirwadkar 		}
18278016e29fSHarshad Shirwadkar 		iput(inode);
18288016e29fSHarshad Shirwadkar 	}
18298016e29fSHarshad Shirwadkar }
18308016e29fSHarshad Shirwadkar 
18318016e29fSHarshad Shirwadkar /*
18328016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
18338016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
18348016e29fSHarshad Shirwadkar  * if it is okay to use a block.
18358016e29fSHarshad Shirwadkar  */
18368016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
18378016e29fSHarshad Shirwadkar {
18388016e29fSHarshad Shirwadkar 	int i;
18398016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18408016e29fSHarshad Shirwadkar 
18418016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18428016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
18438016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
18448016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
18458016e29fSHarshad Shirwadkar 			continue;
18468016e29fSHarshad Shirwadkar 		if (blk >= state->fc_regions[i].pblk &&
18478016e29fSHarshad Shirwadkar 		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
18488016e29fSHarshad Shirwadkar 			return true;
18498016e29fSHarshad Shirwadkar 	}
18508016e29fSHarshad Shirwadkar 	return false;
18518016e29fSHarshad Shirwadkar }
18528016e29fSHarshad Shirwadkar 
18538016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
18548016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
18558016e29fSHarshad Shirwadkar {
18568016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
18578016e29fSHarshad Shirwadkar 
18588016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
18598016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
18608016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
18618016e29fSHarshad Shirwadkar }
18628016e29fSHarshad Shirwadkar 
18638016e29fSHarshad Shirwadkar /*
18648016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
18658016e29fSHarshad Shirwadkar  *
18668016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
18678016e29fSHarshad Shirwadkar  * for doing following things:
18688016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
18698016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
18708016e29fSHarshad Shirwadkar  * - Verify CRC
18718016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
18728016e29fSHarshad Shirwadkar  *
18738016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
18748016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
18758016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
18768016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
18778016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
18788016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
18798016e29fSHarshad Shirwadkar  */
18808016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
18818016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
18828016e29fSHarshad Shirwadkar 				tid_t expected_tid)
18838016e29fSHarshad Shirwadkar {
18848016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
18858016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
18868016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18878016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
1888a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_add_range ext;
1889a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
1890a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
1891a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
1892a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_head head;
18938016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
18948016e29fSHarshad Shirwadkar 
18958016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
18968016e29fSHarshad Shirwadkar 
18978016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
18988016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
18998016e29fSHarshad Shirwadkar 
19008016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
19018016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
19028016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
19038016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
19048016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
19058016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
19068016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
19078016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
19088016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
19098016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
19108016e29fSHarshad Shirwadkar 			return 0;
19118016e29fSHarshad Shirwadkar 	}
19128016e29fSHarshad Shirwadkar 
19138016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
19148016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
19158016e29fSHarshad Shirwadkar 		goto out_err;
19168016e29fSHarshad Shirwadkar 	}
19178016e29fSHarshad Shirwadkar 
19188016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
1919a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
1920a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
1921a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
19228016e29fSHarshad Shirwadkar 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
1923a7ba36bcSHarshad Shirwadkar 			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
1924a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
19258016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
1926a7ba36bcSHarshad Shirwadkar 			memcpy(&ext, val, sizeof(ext));
1927a7ba36bcSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext.fc_ex;
19288016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
1929a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(ext.fc_ino),
19308016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
19318016e29fSHarshad Shirwadkar 				ext4_ext_get_actual_len(ex));
19328016e29fSHarshad Shirwadkar 			if (ret < 0)
19338016e29fSHarshad Shirwadkar 				break;
19348016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
19358016e29fSHarshad Shirwadkar 			fallthrough;
19368016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
19378016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
19388016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
19398016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
19408016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
19418016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
19428016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
1943a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1944a7ba36bcSHarshad Shirwadkar 					sizeof(tl) + le16_to_cpu(tl.fc_len));
19458016e29fSHarshad Shirwadkar 			break;
19468016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
19478016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
1948a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
1949a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1950a7ba36bcSHarshad Shirwadkar 						sizeof(tl) +
19518016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
19528016e29fSHarshad Shirwadkar 						fc_crc));
1953a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
1954a7ba36bcSHarshad Shirwadkar 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
19558016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
19568016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
19578016e29fSHarshad Shirwadkar 					state->fc_regions_used;
19588016e29fSHarshad Shirwadkar 			} else {
19598016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
19608016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
19618016e29fSHarshad Shirwadkar 			}
19628016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
19638016e29fSHarshad Shirwadkar 			break;
19648016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
1965a7ba36bcSHarshad Shirwadkar 			memcpy(&head, val, sizeof(head));
1966a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_features) &
19678016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
19688016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
19698016e29fSHarshad Shirwadkar 				break;
19708016e29fSHarshad Shirwadkar 			}
1971a7ba36bcSHarshad Shirwadkar 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
19728016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
19738016e29fSHarshad Shirwadkar 				break;
19748016e29fSHarshad Shirwadkar 			}
19758016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
1976a7ba36bcSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1977a7ba36bcSHarshad Shirwadkar 					    sizeof(tl) + le16_to_cpu(tl.fc_len));
19788016e29fSHarshad Shirwadkar 			break;
19798016e29fSHarshad Shirwadkar 		default:
19808016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
19818016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
19828016e29fSHarshad Shirwadkar 		}
19838016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
19848016e29fSHarshad Shirwadkar 			break;
19858016e29fSHarshad Shirwadkar 	}
19868016e29fSHarshad Shirwadkar 
19878016e29fSHarshad Shirwadkar out_err:
19888016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
19898016e29fSHarshad Shirwadkar 	return ret;
19908016e29fSHarshad Shirwadkar }
19918016e29fSHarshad Shirwadkar 
19925b849b5fSHarshad Shirwadkar /*
19935b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
19948016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
19955b849b5fSHarshad Shirwadkar  */
19965b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
19975b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
19985b849b5fSHarshad Shirwadkar {
19998016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
20008016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2001a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tl tl;
2002a7ba36bcSHarshad Shirwadkar 	__u8 *start, *end, *cur, *val;
20038016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
20048016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2005a7ba36bcSHarshad Shirwadkar 	struct ext4_fc_tail tail;
20068016e29fSHarshad Shirwadkar 
20078016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
20088016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
20098016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
20108016e29fSHarshad Shirwadkar 	}
20118016e29fSHarshad Shirwadkar 
20128016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
20138016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
20148016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
20158016e29fSHarshad Shirwadkar 	}
20168016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
20178016e29fSHarshad Shirwadkar 		jbd_debug(1, "Replay stops\n");
20188016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
20195b849b5fSHarshad Shirwadkar 		return 0;
20205b849b5fSHarshad Shirwadkar 	}
20215b849b5fSHarshad Shirwadkar 
20228016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
20238016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
20248016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
20258016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
20268016e29fSHarshad Shirwadkar 	}
20278016e29fSHarshad Shirwadkar #endif
20288016e29fSHarshad Shirwadkar 
20298016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
20308016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
20318016e29fSHarshad Shirwadkar 
2032a7ba36bcSHarshad Shirwadkar 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
2033a7ba36bcSHarshad Shirwadkar 		memcpy(&tl, cur, sizeof(tl));
2034a7ba36bcSHarshad Shirwadkar 		val = cur + sizeof(tl);
2035a7ba36bcSHarshad Shirwadkar 
20368016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
20378016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
20388016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
20398016e29fSHarshad Shirwadkar 			break;
20408016e29fSHarshad Shirwadkar 		}
20418016e29fSHarshad Shirwadkar 		jbd_debug(3, "Replay phase, tag:%s\n",
2042a7ba36bcSHarshad Shirwadkar 				tag2str(le16_to_cpu(tl.fc_tag)));
20438016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
2044a7ba36bcSHarshad Shirwadkar 		switch (le16_to_cpu(tl.fc_tag)) {
20458016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
2046a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, &tl, val);
20478016e29fSHarshad Shirwadkar 			break;
20488016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
2049a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, &tl, val);
20508016e29fSHarshad Shirwadkar 			break;
20518016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
2052a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, &tl, val);
20538016e29fSHarshad Shirwadkar 			break;
20548016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
2055a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, &tl, val);
20568016e29fSHarshad Shirwadkar 			break;
20578016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
2058a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, &tl, val);
20598016e29fSHarshad Shirwadkar 			break;
20608016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
2061a7ba36bcSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, &tl, val);
20628016e29fSHarshad Shirwadkar 			break;
20638016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
20648016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2065a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
20668016e29fSHarshad Shirwadkar 			break;
20678016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
20688016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
2069a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
2070a7ba36bcSHarshad Shirwadkar 			memcpy(&tail, val, sizeof(tail));
2071a7ba36bcSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
20728016e29fSHarshad Shirwadkar 			break;
20738016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
20748016e29fSHarshad Shirwadkar 			break;
20758016e29fSHarshad Shirwadkar 		default:
2076a7ba36bcSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
2077a7ba36bcSHarshad Shirwadkar 					     le16_to_cpu(tl.fc_len), 0);
20788016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
20798016e29fSHarshad Shirwadkar 			break;
20808016e29fSHarshad Shirwadkar 		}
20818016e29fSHarshad Shirwadkar 		if (ret < 0)
20828016e29fSHarshad Shirwadkar 			break;
20838016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
20848016e29fSHarshad Shirwadkar 	}
20858016e29fSHarshad Shirwadkar 	return ret;
20868016e29fSHarshad Shirwadkar }
20878016e29fSHarshad Shirwadkar 
20886866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
20896866d7b3SHarshad Shirwadkar {
20905b849b5fSHarshad Shirwadkar 	/*
20915b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
20925b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
20935b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
20945b849b5fSHarshad Shirwadkar 	 */
20955b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
20966866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
20976866d7b3SHarshad Shirwadkar 		return;
2098ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
20996866d7b3SHarshad Shirwadkar }
2100aa75f4d3SHarshad Shirwadkar 
2101fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = {
2102ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2103ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2104ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2105ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2106ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2107ce8c59d1SHarshad Shirwadkar 	"Resize",
2108ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2109ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2110556e0319SHarshad Shirwadkar 	"Data journalling",
2111ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2112ce8c59d1SHarshad Shirwadkar };
2113ce8c59d1SHarshad Shirwadkar 
2114ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2115ce8c59d1SHarshad Shirwadkar {
2116ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2117ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2118ce8c59d1SHarshad Shirwadkar 	int i;
2119ce8c59d1SHarshad Shirwadkar 
2120ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2121ce8c59d1SHarshad Shirwadkar 		return 0;
2122ce8c59d1SHarshad Shirwadkar 
2123ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2124ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2125ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2126ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
2127ce8c59d1SHarshad Shirwadkar 		   div_u64(sbi->s_fc_avg_commit_time, 1000));
2128ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2129ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2130ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2131ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2132ce8c59d1SHarshad Shirwadkar 
2133ce8c59d1SHarshad Shirwadkar 	return 0;
2134ce8c59d1SHarshad Shirwadkar }
2135ce8c59d1SHarshad Shirwadkar 
2136aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2137aa75f4d3SHarshad Shirwadkar {
2138aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2139aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2140aa75f4d3SHarshad Shirwadkar 
2141aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2142aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2143aa75f4d3SHarshad Shirwadkar 
2144aa75f4d3SHarshad Shirwadkar 	return 0;
2145aa75f4d3SHarshad Shirwadkar }
2146