xref: /openbmc/linux/fs/ext4/fast_commit.c (revision ce8c59d1)
16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0
26866d7b3SHarshad Shirwadkar 
36866d7b3SHarshad Shirwadkar /*
46866d7b3SHarshad Shirwadkar  * fs/ext4/fast_commit.c
56866d7b3SHarshad Shirwadkar  *
66866d7b3SHarshad Shirwadkar  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
76866d7b3SHarshad Shirwadkar  *
86866d7b3SHarshad Shirwadkar  * Ext4 fast commits routines.
96866d7b3SHarshad Shirwadkar  */
10aa75f4d3SHarshad Shirwadkar #include "ext4.h"
116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h"
12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h"
13aa75f4d3SHarshad Shirwadkar #include "mballoc.h"
14aa75f4d3SHarshad Shirwadkar 
15aa75f4d3SHarshad Shirwadkar /*
16aa75f4d3SHarshad Shirwadkar  * Ext4 Fast Commits
17aa75f4d3SHarshad Shirwadkar  * -----------------
18aa75f4d3SHarshad Shirwadkar  *
19aa75f4d3SHarshad Shirwadkar  * Ext4 fast commits implement fine grained journalling for Ext4.
20aa75f4d3SHarshad Shirwadkar  *
21aa75f4d3SHarshad Shirwadkar  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22aa75f4d3SHarshad Shirwadkar  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23aa75f4d3SHarshad Shirwadkar  * TLV during the recovery phase. For the scenarios for which we currently
24aa75f4d3SHarshad Shirwadkar  * don't have replay code, fast commit falls back to full commits.
25aa75f4d3SHarshad Shirwadkar  * Fast commits record delta in one of the following three categories.
26aa75f4d3SHarshad Shirwadkar  *
27aa75f4d3SHarshad Shirwadkar  * (A) Directory entry updates:
28aa75f4d3SHarshad Shirwadkar  *
29aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_LINK		- records directory entry link
31aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32aa75f4d3SHarshad Shirwadkar  *
33aa75f4d3SHarshad Shirwadkar  * (B) File specific data range updates:
34aa75f4d3SHarshad Shirwadkar  *
35aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37aa75f4d3SHarshad Shirwadkar  *
38aa75f4d3SHarshad Shirwadkar  * (C) Inode metadata (mtime / ctime etc):
39aa75f4d3SHarshad Shirwadkar  *
40aa75f4d3SHarshad Shirwadkar  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41aa75f4d3SHarshad Shirwadkar  *				  during recovery. Note that iblocks field is
42aa75f4d3SHarshad Shirwadkar  *				  not replayed and instead derived during
43aa75f4d3SHarshad Shirwadkar  *				  replay.
44aa75f4d3SHarshad Shirwadkar  * Commit Operation
45aa75f4d3SHarshad Shirwadkar  * ----------------
46aa75f4d3SHarshad Shirwadkar  * With fast commits, we maintain all the directory entry operations in the
47aa75f4d3SHarshad Shirwadkar  * order in which they are issued in an in-memory queue. This queue is flushed
48aa75f4d3SHarshad Shirwadkar  * to disk during the commit operation. We also maintain a list of inodes
49aa75f4d3SHarshad Shirwadkar  * that need to be committed during a fast commit in another in memory queue of
50aa75f4d3SHarshad Shirwadkar  * inodes. During the commit operation, we commit in the following order:
51aa75f4d3SHarshad Shirwadkar  *
52aa75f4d3SHarshad Shirwadkar  * [1] Lock inodes for any further data updates by setting COMMITTING state
53aa75f4d3SHarshad Shirwadkar  * [2] Submit data buffers of all the inodes
54aa75f4d3SHarshad Shirwadkar  * [3] Wait for [2] to complete
55aa75f4d3SHarshad Shirwadkar  * [4] Commit all the directory entry updates in the fast commit space
56aa75f4d3SHarshad Shirwadkar  * [5] Commit all the changed inode structures
57aa75f4d3SHarshad Shirwadkar  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58aa75f4d3SHarshad Shirwadkar  *     section for more details).
59aa75f4d3SHarshad Shirwadkar  * [7] Wait for [4], [5] and [6] to complete.
60aa75f4d3SHarshad Shirwadkar  *
61aa75f4d3SHarshad Shirwadkar  * All the inode updates must call ext4_fc_start_update() before starting an
62aa75f4d3SHarshad Shirwadkar  * update. If such an ongoing update is present, fast commit waits for it to
63aa75f4d3SHarshad Shirwadkar  * complete. The completion of such an update is marked by
64aa75f4d3SHarshad Shirwadkar  * ext4_fc_stop_update().
65aa75f4d3SHarshad Shirwadkar  *
66aa75f4d3SHarshad Shirwadkar  * Fast Commit Ineligibility
67aa75f4d3SHarshad Shirwadkar  * -------------------------
68aa75f4d3SHarshad Shirwadkar  * Not all operations are supported by fast commits today (e.g extended
69aa75f4d3SHarshad Shirwadkar  * attributes). Fast commit ineligiblity is marked by calling one of the
70aa75f4d3SHarshad Shirwadkar  * two following functions:
71aa75f4d3SHarshad Shirwadkar  *
72aa75f4d3SHarshad Shirwadkar  * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
73aa75f4d3SHarshad Shirwadkar  *   back to full commit. This is useful in case of transient errors.
74aa75f4d3SHarshad Shirwadkar  *
75aa75f4d3SHarshad Shirwadkar  * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
76aa75f4d3SHarshad Shirwadkar  *   the fast commits happening between ext4_fc_start_ineligible() and
77aa75f4d3SHarshad Shirwadkar  *   ext4_fc_stop_ineligible() and one fast commit after the call to
78aa75f4d3SHarshad Shirwadkar  *   ext4_fc_stop_ineligible() to fall back to full commits. It is important to
79aa75f4d3SHarshad Shirwadkar  *   make one more fast commit to fall back to full commit after stop call so
80aa75f4d3SHarshad Shirwadkar  *   that it guaranteed that the fast commit ineligible operation contained
81aa75f4d3SHarshad Shirwadkar  *   within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
82aa75f4d3SHarshad Shirwadkar  *   followed by at least 1 full commit.
83aa75f4d3SHarshad Shirwadkar  *
84aa75f4d3SHarshad Shirwadkar  * Atomicity of commits
85aa75f4d3SHarshad Shirwadkar  * --------------------
86aa75f4d3SHarshad Shirwadkar  * In order to gaurantee atomicity during the commit operation, fast commit
87aa75f4d3SHarshad Shirwadkar  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
88aa75f4d3SHarshad Shirwadkar  * tag contains CRC of the contents and TID of the transaction after which
89aa75f4d3SHarshad Shirwadkar  * this fast commit should be applied. Recovery code replays fast commit
90aa75f4d3SHarshad Shirwadkar  * logs only if there's at least 1 valid tail present. For every fast commit
91aa75f4d3SHarshad Shirwadkar  * operation, there is 1 tail. This means, we may end up with multiple tails
92aa75f4d3SHarshad Shirwadkar  * in the fast commit space. Here's an example:
93aa75f4d3SHarshad Shirwadkar  *
94aa75f4d3SHarshad Shirwadkar  * - Create a new file A and remove existing file B
95aa75f4d3SHarshad Shirwadkar  * - fsync()
96aa75f4d3SHarshad Shirwadkar  * - Append contents to file A
97aa75f4d3SHarshad Shirwadkar  * - Truncate file A
98aa75f4d3SHarshad Shirwadkar  * - fsync()
99aa75f4d3SHarshad Shirwadkar  *
100aa75f4d3SHarshad Shirwadkar  * The fast commit space at the end of above operations would look like this:
101aa75f4d3SHarshad Shirwadkar  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
102aa75f4d3SHarshad Shirwadkar  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
103aa75f4d3SHarshad Shirwadkar  *
104aa75f4d3SHarshad Shirwadkar  * Replay code should thus check for all the valid tails in the FC area.
105aa75f4d3SHarshad Shirwadkar  *
106aa75f4d3SHarshad Shirwadkar  * TODOs
107aa75f4d3SHarshad Shirwadkar  * -----
108aa75f4d3SHarshad Shirwadkar  * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
109aa75f4d3SHarshad Shirwadkar  *    eligible update must be protected within ext4_fc_start_update() and
110aa75f4d3SHarshad Shirwadkar  *    ext4_fc_stop_update(). These routines are called at much higher
111aa75f4d3SHarshad Shirwadkar  *    routines. This can be made more fine grained by combining with
112aa75f4d3SHarshad Shirwadkar  *    ext4_journal_start().
113aa75f4d3SHarshad Shirwadkar  *
114aa75f4d3SHarshad Shirwadkar  * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
115aa75f4d3SHarshad Shirwadkar  *
116aa75f4d3SHarshad Shirwadkar  * 3) Handle more ineligible cases.
117aa75f4d3SHarshad Shirwadkar  */
118aa75f4d3SHarshad Shirwadkar 
119aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h>
120aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep;
121aa75f4d3SHarshad Shirwadkar 
122aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
123aa75f4d3SHarshad Shirwadkar {
124aa75f4d3SHarshad Shirwadkar 	BUFFER_TRACE(bh, "");
125aa75f4d3SHarshad Shirwadkar 	if (uptodate) {
126aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld up-to-date",
127aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
128aa75f4d3SHarshad Shirwadkar 		set_buffer_uptodate(bh);
129aa75f4d3SHarshad Shirwadkar 	} else {
130aa75f4d3SHarshad Shirwadkar 		ext4_debug("%s: Block %lld not up-to-date",
131aa75f4d3SHarshad Shirwadkar 			   __func__, bh->b_blocknr);
132aa75f4d3SHarshad Shirwadkar 		clear_buffer_uptodate(bh);
133aa75f4d3SHarshad Shirwadkar 	}
134aa75f4d3SHarshad Shirwadkar 
135aa75f4d3SHarshad Shirwadkar 	unlock_buffer(bh);
136aa75f4d3SHarshad Shirwadkar }
137aa75f4d3SHarshad Shirwadkar 
138aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode)
139aa75f4d3SHarshad Shirwadkar {
140aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
141aa75f4d3SHarshad Shirwadkar 
142aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_start = 0;
143aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
144aa75f4d3SHarshad Shirwadkar }
145aa75f4d3SHarshad Shirwadkar 
146aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode)
147aa75f4d3SHarshad Shirwadkar {
148aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
149aa75f4d3SHarshad Shirwadkar 
150aa75f4d3SHarshad Shirwadkar 	ext4_fc_reset_inode(inode);
151aa75f4d3SHarshad Shirwadkar 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
152aa75f4d3SHarshad Shirwadkar 	INIT_LIST_HEAD(&ei->i_fc_list);
153aa75f4d3SHarshad Shirwadkar 	init_waitqueue_head(&ei->i_fc_wait);
154aa75f4d3SHarshad Shirwadkar 	atomic_set(&ei->i_fc_updates, 0);
155aa75f4d3SHarshad Shirwadkar 	ei->i_fc_committed_subtid = 0;
156aa75f4d3SHarshad Shirwadkar }
157aa75f4d3SHarshad Shirwadkar 
158aa75f4d3SHarshad Shirwadkar /*
159aa75f4d3SHarshad Shirwadkar  * Inform Ext4's fast about start of an inode update
160aa75f4d3SHarshad Shirwadkar  *
161aa75f4d3SHarshad Shirwadkar  * This function is called by the high level call VFS callbacks before
162aa75f4d3SHarshad Shirwadkar  * performing any inode update. This function blocks if there's an ongoing
163aa75f4d3SHarshad Shirwadkar  * fast commit on the inode in question.
164aa75f4d3SHarshad Shirwadkar  */
165aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode)
166aa75f4d3SHarshad Shirwadkar {
167aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
168aa75f4d3SHarshad Shirwadkar 
1698016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
1708016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
171aa75f4d3SHarshad Shirwadkar 		return;
172aa75f4d3SHarshad Shirwadkar 
173aa75f4d3SHarshad Shirwadkar restart:
174aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
175aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list))
176aa75f4d3SHarshad Shirwadkar 		goto out;
177aa75f4d3SHarshad Shirwadkar 
178aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
179aa75f4d3SHarshad Shirwadkar 		wait_queue_head_t *wq;
180aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
181aa75f4d3SHarshad Shirwadkar 		DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
182aa75f4d3SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
183aa75f4d3SHarshad Shirwadkar 		wq = bit_waitqueue(&ei->i_state_flags,
184aa75f4d3SHarshad Shirwadkar 				   EXT4_STATE_FC_COMMITTING);
185aa75f4d3SHarshad Shirwadkar #else
186aa75f4d3SHarshad Shirwadkar 		DEFINE_WAIT_BIT(wait, &ei->i_flags,
187aa75f4d3SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
188aa75f4d3SHarshad Shirwadkar 		wq = bit_waitqueue(&ei->i_flags,
189aa75f4d3SHarshad Shirwadkar 				   EXT4_STATE_FC_COMMITTING);
190aa75f4d3SHarshad Shirwadkar #endif
191aa75f4d3SHarshad Shirwadkar 		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
192aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
193aa75f4d3SHarshad Shirwadkar 		schedule();
194aa75f4d3SHarshad Shirwadkar 		finish_wait(wq, &wait.wq_entry);
195aa75f4d3SHarshad Shirwadkar 		goto restart;
196aa75f4d3SHarshad Shirwadkar 	}
197aa75f4d3SHarshad Shirwadkar out:
198aa75f4d3SHarshad Shirwadkar 	atomic_inc(&ei->i_fc_updates);
199aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
200aa75f4d3SHarshad Shirwadkar }
201aa75f4d3SHarshad Shirwadkar 
202aa75f4d3SHarshad Shirwadkar /*
203aa75f4d3SHarshad Shirwadkar  * Stop inode update and wake up waiting fast commits if any.
204aa75f4d3SHarshad Shirwadkar  */
205aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode)
206aa75f4d3SHarshad Shirwadkar {
207aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
208aa75f4d3SHarshad Shirwadkar 
2098016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2108016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
211aa75f4d3SHarshad Shirwadkar 		return;
212aa75f4d3SHarshad Shirwadkar 
213aa75f4d3SHarshad Shirwadkar 	if (atomic_dec_and_test(&ei->i_fc_updates))
214aa75f4d3SHarshad Shirwadkar 		wake_up_all(&ei->i_fc_wait);
215aa75f4d3SHarshad Shirwadkar }
216aa75f4d3SHarshad Shirwadkar 
217aa75f4d3SHarshad Shirwadkar /*
218aa75f4d3SHarshad Shirwadkar  * Remove inode from fast commit list. If the inode is being committed
219aa75f4d3SHarshad Shirwadkar  * we wait until inode commit is done.
220aa75f4d3SHarshad Shirwadkar  */
221aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode)
222aa75f4d3SHarshad Shirwadkar {
223aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
224aa75f4d3SHarshad Shirwadkar 
2258016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
2268016e29fSHarshad Shirwadkar 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
227aa75f4d3SHarshad Shirwadkar 		return;
228aa75f4d3SHarshad Shirwadkar 
229aa75f4d3SHarshad Shirwadkar restart:
230aa75f4d3SHarshad Shirwadkar 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
231aa75f4d3SHarshad Shirwadkar 	if (list_empty(&ei->i_fc_list)) {
232aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
233aa75f4d3SHarshad Shirwadkar 		return;
234aa75f4d3SHarshad Shirwadkar 	}
235aa75f4d3SHarshad Shirwadkar 
236aa75f4d3SHarshad Shirwadkar 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
237aa75f4d3SHarshad Shirwadkar 		wait_queue_head_t *wq;
238aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
239aa75f4d3SHarshad Shirwadkar 		DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
240aa75f4d3SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
241aa75f4d3SHarshad Shirwadkar 		wq = bit_waitqueue(&ei->i_state_flags,
242aa75f4d3SHarshad Shirwadkar 				   EXT4_STATE_FC_COMMITTING);
243aa75f4d3SHarshad Shirwadkar #else
244aa75f4d3SHarshad Shirwadkar 		DEFINE_WAIT_BIT(wait, &ei->i_flags,
245aa75f4d3SHarshad Shirwadkar 				EXT4_STATE_FC_COMMITTING);
246aa75f4d3SHarshad Shirwadkar 		wq = bit_waitqueue(&ei->i_flags,
247aa75f4d3SHarshad Shirwadkar 				   EXT4_STATE_FC_COMMITTING);
248aa75f4d3SHarshad Shirwadkar #endif
249aa75f4d3SHarshad Shirwadkar 		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
250aa75f4d3SHarshad Shirwadkar 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
251aa75f4d3SHarshad Shirwadkar 		schedule();
252aa75f4d3SHarshad Shirwadkar 		finish_wait(wq, &wait.wq_entry);
253aa75f4d3SHarshad Shirwadkar 		goto restart;
254aa75f4d3SHarshad Shirwadkar 	}
255aa75f4d3SHarshad Shirwadkar 	if (!list_empty(&ei->i_fc_list))
256aa75f4d3SHarshad Shirwadkar 		list_del_init(&ei->i_fc_list);
257aa75f4d3SHarshad Shirwadkar 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
258aa75f4d3SHarshad Shirwadkar }
259aa75f4d3SHarshad Shirwadkar 
260aa75f4d3SHarshad Shirwadkar /*
261aa75f4d3SHarshad Shirwadkar  * Mark file system as fast commit ineligible. This means that next commit
262aa75f4d3SHarshad Shirwadkar  * operation would result in a full jbd2 commit.
263aa75f4d3SHarshad Shirwadkar  */
264aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
265aa75f4d3SHarshad Shirwadkar {
266aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
267aa75f4d3SHarshad Shirwadkar 
2688016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
2698016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
2708016e29fSHarshad Shirwadkar 		return;
2718016e29fSHarshad Shirwadkar 
272aa75f4d3SHarshad Shirwadkar 	sbi->s_mount_state |= EXT4_FC_INELIGIBLE;
273aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
274aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
275aa75f4d3SHarshad Shirwadkar }
276aa75f4d3SHarshad Shirwadkar 
277aa75f4d3SHarshad Shirwadkar /*
278aa75f4d3SHarshad Shirwadkar  * Start a fast commit ineligible update. Any commits that happen while
279aa75f4d3SHarshad Shirwadkar  * such an operation is in progress fall back to full commits.
280aa75f4d3SHarshad Shirwadkar  */
281aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason)
282aa75f4d3SHarshad Shirwadkar {
283aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
284aa75f4d3SHarshad Shirwadkar 
2858016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
2868016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
2878016e29fSHarshad Shirwadkar 		return;
2888016e29fSHarshad Shirwadkar 
289aa75f4d3SHarshad Shirwadkar 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
290aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
291aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_ineligible_updates);
292aa75f4d3SHarshad Shirwadkar }
293aa75f4d3SHarshad Shirwadkar 
294aa75f4d3SHarshad Shirwadkar /*
295aa75f4d3SHarshad Shirwadkar  * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here
296aa75f4d3SHarshad Shirwadkar  * to ensure that after stopping the ineligible update, at least one full
297aa75f4d3SHarshad Shirwadkar  * commit takes place.
298aa75f4d3SHarshad Shirwadkar  */
299aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb)
300aa75f4d3SHarshad Shirwadkar {
3018016e29fSHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
3028016e29fSHarshad Shirwadkar 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
3038016e29fSHarshad Shirwadkar 		return;
3048016e29fSHarshad Shirwadkar 
305aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE;
306aa75f4d3SHarshad Shirwadkar 	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
307aa75f4d3SHarshad Shirwadkar }
308aa75f4d3SHarshad Shirwadkar 
309aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb)
310aa75f4d3SHarshad Shirwadkar {
311aa75f4d3SHarshad Shirwadkar 	return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) ||
312aa75f4d3SHarshad Shirwadkar 		atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
313aa75f4d3SHarshad Shirwadkar }
314aa75f4d3SHarshad Shirwadkar 
315aa75f4d3SHarshad Shirwadkar /*
316aa75f4d3SHarshad Shirwadkar  * Generic fast commit tracking function. If this is the first time this we are
317aa75f4d3SHarshad Shirwadkar  * called after a full commit, we initialize fast commit fields and then call
318aa75f4d3SHarshad Shirwadkar  * __fc_track_fn() with update = 0. If we have already been called after a full
319aa75f4d3SHarshad Shirwadkar  * commit, we pass update = 1. Based on that, the track function can determine
320aa75f4d3SHarshad Shirwadkar  * if it needs to track a field for the first time or if it needs to just
321aa75f4d3SHarshad Shirwadkar  * update the previously tracked value.
322aa75f4d3SHarshad Shirwadkar  *
323aa75f4d3SHarshad Shirwadkar  * If enqueue is set, this function enqueues the inode in fast commit list.
324aa75f4d3SHarshad Shirwadkar  */
325aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template(
326aa75f4d3SHarshad Shirwadkar 	struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
327aa75f4d3SHarshad Shirwadkar 	void *args, int enqueue)
328aa75f4d3SHarshad Shirwadkar {
329aa75f4d3SHarshad Shirwadkar 	tid_t running_txn_tid;
330aa75f4d3SHarshad Shirwadkar 	bool update = false;
331aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
332aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
333aa75f4d3SHarshad Shirwadkar 	int ret;
334aa75f4d3SHarshad Shirwadkar 
3358016e29fSHarshad Shirwadkar 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
3368016e29fSHarshad Shirwadkar 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
337aa75f4d3SHarshad Shirwadkar 		return -EOPNOTSUPP;
338aa75f4d3SHarshad Shirwadkar 
339aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_is_ineligible(inode->i_sb))
340aa75f4d3SHarshad Shirwadkar 		return -EINVAL;
341aa75f4d3SHarshad Shirwadkar 
342aa75f4d3SHarshad Shirwadkar 	running_txn_tid = sbi->s_journal ?
343aa75f4d3SHarshad Shirwadkar 		sbi->s_journal->j_commit_sequence + 1 : 0;
344aa75f4d3SHarshad Shirwadkar 
345aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
346aa75f4d3SHarshad Shirwadkar 	if (running_txn_tid == ei->i_sync_tid) {
347aa75f4d3SHarshad Shirwadkar 		update = true;
348aa75f4d3SHarshad Shirwadkar 	} else {
349aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(inode);
350aa75f4d3SHarshad Shirwadkar 		ei->i_sync_tid = running_txn_tid;
351aa75f4d3SHarshad Shirwadkar 	}
352aa75f4d3SHarshad Shirwadkar 	ret = __fc_track_fn(inode, args, update);
353aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
354aa75f4d3SHarshad Shirwadkar 
355aa75f4d3SHarshad Shirwadkar 	if (!enqueue)
356aa75f4d3SHarshad Shirwadkar 		return ret;
357aa75f4d3SHarshad Shirwadkar 
358aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
359aa75f4d3SHarshad Shirwadkar 	if (list_empty(&EXT4_I(inode)->i_fc_list))
360aa75f4d3SHarshad Shirwadkar 		list_add_tail(&EXT4_I(inode)->i_fc_list,
361aa75f4d3SHarshad Shirwadkar 				(sbi->s_mount_state & EXT4_FC_COMMITTING) ?
362aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING] :
363aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_MAIN]);
364aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
365aa75f4d3SHarshad Shirwadkar 
366aa75f4d3SHarshad Shirwadkar 	return ret;
367aa75f4d3SHarshad Shirwadkar }
368aa75f4d3SHarshad Shirwadkar 
369aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args {
370aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry;
371aa75f4d3SHarshad Shirwadkar 	int op;
372aa75f4d3SHarshad Shirwadkar };
373aa75f4d3SHarshad Shirwadkar 
374aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
375aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update)
376aa75f4d3SHarshad Shirwadkar {
377aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *node;
378aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
379aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args *dentry_update =
380aa75f4d3SHarshad Shirwadkar 		(struct __track_dentry_update_args *)arg;
381aa75f4d3SHarshad Shirwadkar 	struct dentry *dentry = dentry_update->dentry;
382aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
383aa75f4d3SHarshad Shirwadkar 
384aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
385aa75f4d3SHarshad Shirwadkar 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
386aa75f4d3SHarshad Shirwadkar 	if (!node) {
387aa75f4d3SHarshad Shirwadkar 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
388aa75f4d3SHarshad Shirwadkar 		mutex_lock(&ei->i_fc_lock);
389aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
390aa75f4d3SHarshad Shirwadkar 	}
391aa75f4d3SHarshad Shirwadkar 
392aa75f4d3SHarshad Shirwadkar 	node->fcd_op = dentry_update->op;
393aa75f4d3SHarshad Shirwadkar 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
394aa75f4d3SHarshad Shirwadkar 	node->fcd_ino = inode->i_ino;
395aa75f4d3SHarshad Shirwadkar 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
396aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
397aa75f4d3SHarshad Shirwadkar 		if (!node->fcd_name.name) {
398aa75f4d3SHarshad Shirwadkar 			kmem_cache_free(ext4_fc_dentry_cachep, node);
399aa75f4d3SHarshad Shirwadkar 			ext4_fc_mark_ineligible(inode->i_sb,
400aa75f4d3SHarshad Shirwadkar 				EXT4_FC_REASON_MEM);
401aa75f4d3SHarshad Shirwadkar 			mutex_lock(&ei->i_fc_lock);
402aa75f4d3SHarshad Shirwadkar 			return -ENOMEM;
403aa75f4d3SHarshad Shirwadkar 		}
404aa75f4d3SHarshad Shirwadkar 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
405aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
406aa75f4d3SHarshad Shirwadkar 	} else {
407aa75f4d3SHarshad Shirwadkar 		memcpy(node->fcd_iname, dentry->d_name.name,
408aa75f4d3SHarshad Shirwadkar 			dentry->d_name.len);
409aa75f4d3SHarshad Shirwadkar 		node->fcd_name.name = node->fcd_iname;
410aa75f4d3SHarshad Shirwadkar 	}
411aa75f4d3SHarshad Shirwadkar 	node->fcd_name.len = dentry->d_name.len;
412aa75f4d3SHarshad Shirwadkar 
413aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
414aa75f4d3SHarshad Shirwadkar 	if (sbi->s_mount_state & EXT4_FC_COMMITTING)
415aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list,
416aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
417aa75f4d3SHarshad Shirwadkar 	else
418aa75f4d3SHarshad Shirwadkar 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
419aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
420aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
421aa75f4d3SHarshad Shirwadkar 
422aa75f4d3SHarshad Shirwadkar 	return 0;
423aa75f4d3SHarshad Shirwadkar }
424aa75f4d3SHarshad Shirwadkar 
425aa75f4d3SHarshad Shirwadkar void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
426aa75f4d3SHarshad Shirwadkar {
427aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
428aa75f4d3SHarshad Shirwadkar 	int ret;
429aa75f4d3SHarshad Shirwadkar 
430aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
431aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_UNLINK;
432aa75f4d3SHarshad Shirwadkar 
433aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_track_template(inode, __track_dentry_update,
434aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
435aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_unlink(inode, dentry, ret);
436aa75f4d3SHarshad Shirwadkar }
437aa75f4d3SHarshad Shirwadkar 
438aa75f4d3SHarshad Shirwadkar void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
439aa75f4d3SHarshad Shirwadkar {
440aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
441aa75f4d3SHarshad Shirwadkar 	int ret;
442aa75f4d3SHarshad Shirwadkar 
443aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
444aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_LINK;
445aa75f4d3SHarshad Shirwadkar 
446aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_track_template(inode, __track_dentry_update,
447aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
448aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_link(inode, dentry, ret);
449aa75f4d3SHarshad Shirwadkar }
450aa75f4d3SHarshad Shirwadkar 
451aa75f4d3SHarshad Shirwadkar void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
452aa75f4d3SHarshad Shirwadkar {
453aa75f4d3SHarshad Shirwadkar 	struct __track_dentry_update_args args;
454aa75f4d3SHarshad Shirwadkar 	int ret;
455aa75f4d3SHarshad Shirwadkar 
456aa75f4d3SHarshad Shirwadkar 	args.dentry = dentry;
457aa75f4d3SHarshad Shirwadkar 	args.op = EXT4_FC_TAG_CREAT;
458aa75f4d3SHarshad Shirwadkar 
459aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_track_template(inode, __track_dentry_update,
460aa75f4d3SHarshad Shirwadkar 					(void *)&args, 0);
461aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_create(inode, dentry, ret);
462aa75f4d3SHarshad Shirwadkar }
463aa75f4d3SHarshad Shirwadkar 
464aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */
465aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update)
466aa75f4d3SHarshad Shirwadkar {
467aa75f4d3SHarshad Shirwadkar 	if (update)
468aa75f4d3SHarshad Shirwadkar 		return -EEXIST;
469aa75f4d3SHarshad Shirwadkar 
470aa75f4d3SHarshad Shirwadkar 	EXT4_I(inode)->i_fc_lblk_len = 0;
471aa75f4d3SHarshad Shirwadkar 
472aa75f4d3SHarshad Shirwadkar 	return 0;
473aa75f4d3SHarshad Shirwadkar }
474aa75f4d3SHarshad Shirwadkar 
475aa75f4d3SHarshad Shirwadkar void ext4_fc_track_inode(struct inode *inode)
476aa75f4d3SHarshad Shirwadkar {
477aa75f4d3SHarshad Shirwadkar 	int ret;
478aa75f4d3SHarshad Shirwadkar 
479aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
480aa75f4d3SHarshad Shirwadkar 		return;
481aa75f4d3SHarshad Shirwadkar 
482aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
483aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_inode(inode, ret);
484aa75f4d3SHarshad Shirwadkar }
485aa75f4d3SHarshad Shirwadkar 
486aa75f4d3SHarshad Shirwadkar struct __track_range_args {
487aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t start, end;
488aa75f4d3SHarshad Shirwadkar };
489aa75f4d3SHarshad Shirwadkar 
490aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */
491aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update)
492aa75f4d3SHarshad Shirwadkar {
493aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
494aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t oldstart;
495aa75f4d3SHarshad Shirwadkar 	struct __track_range_args *__arg =
496aa75f4d3SHarshad Shirwadkar 		(struct __track_range_args *)arg;
497aa75f4d3SHarshad Shirwadkar 
498aa75f4d3SHarshad Shirwadkar 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
499aa75f4d3SHarshad Shirwadkar 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
500aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
501aa75f4d3SHarshad Shirwadkar 	}
502aa75f4d3SHarshad Shirwadkar 
503aa75f4d3SHarshad Shirwadkar 	oldstart = ei->i_fc_lblk_start;
504aa75f4d3SHarshad Shirwadkar 
505aa75f4d3SHarshad Shirwadkar 	if (update && ei->i_fc_lblk_len > 0) {
506aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
507aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len =
508aa75f4d3SHarshad Shirwadkar 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
509aa75f4d3SHarshad Shirwadkar 				ei->i_fc_lblk_start + 1;
510aa75f4d3SHarshad Shirwadkar 	} else {
511aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_start = __arg->start;
512aa75f4d3SHarshad Shirwadkar 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
513aa75f4d3SHarshad Shirwadkar 	}
514aa75f4d3SHarshad Shirwadkar 
515aa75f4d3SHarshad Shirwadkar 	return 0;
516aa75f4d3SHarshad Shirwadkar }
517aa75f4d3SHarshad Shirwadkar 
518aa75f4d3SHarshad Shirwadkar void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
519aa75f4d3SHarshad Shirwadkar 			 ext4_lblk_t end)
520aa75f4d3SHarshad Shirwadkar {
521aa75f4d3SHarshad Shirwadkar 	struct __track_range_args args;
522aa75f4d3SHarshad Shirwadkar 	int ret;
523aa75f4d3SHarshad Shirwadkar 
524aa75f4d3SHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode))
525aa75f4d3SHarshad Shirwadkar 		return;
526aa75f4d3SHarshad Shirwadkar 
527aa75f4d3SHarshad Shirwadkar 	args.start = start;
528aa75f4d3SHarshad Shirwadkar 	args.end = end;
529aa75f4d3SHarshad Shirwadkar 
530aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_track_template(inode,  __track_range, &args, 1);
531aa75f4d3SHarshad Shirwadkar 
532aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_track_range(inode, start, end, ret);
533aa75f4d3SHarshad Shirwadkar }
534aa75f4d3SHarshad Shirwadkar 
535aa75f4d3SHarshad Shirwadkar static void ext4_fc_submit_bh(struct super_block *sb)
536aa75f4d3SHarshad Shirwadkar {
537aa75f4d3SHarshad Shirwadkar 	int write_flags = REQ_SYNC;
538aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
539aa75f4d3SHarshad Shirwadkar 
540aa75f4d3SHarshad Shirwadkar 	if (test_opt(sb, BARRIER))
541aa75f4d3SHarshad Shirwadkar 		write_flags |= REQ_FUA | REQ_PREFLUSH;
542aa75f4d3SHarshad Shirwadkar 	lock_buffer(bh);
543aa75f4d3SHarshad Shirwadkar 	clear_buffer_dirty(bh);
544aa75f4d3SHarshad Shirwadkar 	set_buffer_uptodate(bh);
545aa75f4d3SHarshad Shirwadkar 	bh->b_end_io = ext4_end_buffer_io_sync;
546aa75f4d3SHarshad Shirwadkar 	submit_bh(REQ_OP_WRITE, write_flags, bh);
547aa75f4d3SHarshad Shirwadkar 	EXT4_SB(sb)->s_fc_bh = NULL;
548aa75f4d3SHarshad Shirwadkar }
549aa75f4d3SHarshad Shirwadkar 
550aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */
551aa75f4d3SHarshad Shirwadkar 
552aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */
553aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
554aa75f4d3SHarshad Shirwadkar 				u32 *crc)
555aa75f4d3SHarshad Shirwadkar {
556aa75f4d3SHarshad Shirwadkar 	void *ret;
557aa75f4d3SHarshad Shirwadkar 
558aa75f4d3SHarshad Shirwadkar 	ret = memset(dst, 0, len);
559aa75f4d3SHarshad Shirwadkar 	if (crc)
560aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
561aa75f4d3SHarshad Shirwadkar 	return ret;
562aa75f4d3SHarshad Shirwadkar }
563aa75f4d3SHarshad Shirwadkar 
564aa75f4d3SHarshad Shirwadkar /*
565aa75f4d3SHarshad Shirwadkar  * Allocate len bytes on a fast commit buffer.
566aa75f4d3SHarshad Shirwadkar  *
567aa75f4d3SHarshad Shirwadkar  * During the commit time this function is used to manage fast commit
568aa75f4d3SHarshad Shirwadkar  * block space. We don't split a fast commit log onto different
569aa75f4d3SHarshad Shirwadkar  * blocks. So this function makes sure that if there's not enough space
570aa75f4d3SHarshad Shirwadkar  * on the current block, the remaining space in the current block is
571aa75f4d3SHarshad Shirwadkar  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
572aa75f4d3SHarshad Shirwadkar  * new block is from jbd2 and CRC is updated to reflect the padding
573aa75f4d3SHarshad Shirwadkar  * we added.
574aa75f4d3SHarshad Shirwadkar  */
575aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
576aa75f4d3SHarshad Shirwadkar {
577aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl *tl;
578aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
579aa75f4d3SHarshad Shirwadkar 	struct buffer_head *bh;
580aa75f4d3SHarshad Shirwadkar 	int bsize = sbi->s_journal->j_blocksize;
581aa75f4d3SHarshad Shirwadkar 	int ret, off = sbi->s_fc_bytes % bsize;
582aa75f4d3SHarshad Shirwadkar 	int pad_len;
583aa75f4d3SHarshad Shirwadkar 
584aa75f4d3SHarshad Shirwadkar 	/*
585aa75f4d3SHarshad Shirwadkar 	 * After allocating len, we should have space at least for a 0 byte
586aa75f4d3SHarshad Shirwadkar 	 * padding.
587aa75f4d3SHarshad Shirwadkar 	 */
588aa75f4d3SHarshad Shirwadkar 	if (len + sizeof(struct ext4_fc_tl) > bsize)
589aa75f4d3SHarshad Shirwadkar 		return NULL;
590aa75f4d3SHarshad Shirwadkar 
591aa75f4d3SHarshad Shirwadkar 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
592aa75f4d3SHarshad Shirwadkar 		/*
593aa75f4d3SHarshad Shirwadkar 		 * Only allocate from current buffer if we have enough space for
594aa75f4d3SHarshad Shirwadkar 		 * this request AND we have space to add a zero byte padding.
595aa75f4d3SHarshad Shirwadkar 		 */
596aa75f4d3SHarshad Shirwadkar 		if (!sbi->s_fc_bh) {
597aa75f4d3SHarshad Shirwadkar 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
598aa75f4d3SHarshad Shirwadkar 			if (ret)
599aa75f4d3SHarshad Shirwadkar 				return NULL;
600aa75f4d3SHarshad Shirwadkar 			sbi->s_fc_bh = bh;
601aa75f4d3SHarshad Shirwadkar 		}
602aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes += len;
603aa75f4d3SHarshad Shirwadkar 		return sbi->s_fc_bh->b_data + off;
604aa75f4d3SHarshad Shirwadkar 	}
605aa75f4d3SHarshad Shirwadkar 	/* Need to add PAD tag */
606aa75f4d3SHarshad Shirwadkar 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
607aa75f4d3SHarshad Shirwadkar 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
608aa75f4d3SHarshad Shirwadkar 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
609aa75f4d3SHarshad Shirwadkar 	tl->fc_len = cpu_to_le16(pad_len);
610aa75f4d3SHarshad Shirwadkar 	if (crc)
611aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
612aa75f4d3SHarshad Shirwadkar 	if (pad_len > 0)
613aa75f4d3SHarshad Shirwadkar 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
614aa75f4d3SHarshad Shirwadkar 	ext4_fc_submit_bh(sb);
615aa75f4d3SHarshad Shirwadkar 
616aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
617aa75f4d3SHarshad Shirwadkar 	if (ret)
618aa75f4d3SHarshad Shirwadkar 		return NULL;
619aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bh = bh;
620aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
621aa75f4d3SHarshad Shirwadkar 	return sbi->s_fc_bh->b_data;
622aa75f4d3SHarshad Shirwadkar }
623aa75f4d3SHarshad Shirwadkar 
624aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */
625aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
626aa75f4d3SHarshad Shirwadkar 				int len, u32 *crc)
627aa75f4d3SHarshad Shirwadkar {
628aa75f4d3SHarshad Shirwadkar 	if (crc)
629aa75f4d3SHarshad Shirwadkar 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
630aa75f4d3SHarshad Shirwadkar 	return memcpy(dst, src, len);
631aa75f4d3SHarshad Shirwadkar }
632aa75f4d3SHarshad Shirwadkar 
633aa75f4d3SHarshad Shirwadkar /*
634aa75f4d3SHarshad Shirwadkar  * Complete a fast commit by writing tail tag.
635aa75f4d3SHarshad Shirwadkar  *
636aa75f4d3SHarshad Shirwadkar  * Writing tail tag marks the end of a fast commit. In order to guarantee
637aa75f4d3SHarshad Shirwadkar  * atomicity, after writing tail tag, even if there's space remaining
638aa75f4d3SHarshad Shirwadkar  * in the block, next commit shouldn't use it. That's why tail tag
639aa75f4d3SHarshad Shirwadkar  * has the length as that of the remaining space on the block.
640aa75f4d3SHarshad Shirwadkar  */
641aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
642aa75f4d3SHarshad Shirwadkar {
643aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
644aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
645aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tail tail;
646aa75f4d3SHarshad Shirwadkar 	int off, bsize = sbi->s_journal->j_blocksize;
647aa75f4d3SHarshad Shirwadkar 	u8 *dst;
648aa75f4d3SHarshad Shirwadkar 
649aa75f4d3SHarshad Shirwadkar 	/*
650aa75f4d3SHarshad Shirwadkar 	 * ext4_fc_reserve_space takes care of allocating an extra block if
651aa75f4d3SHarshad Shirwadkar 	 * there's no enough space on this block for accommodating this tail.
652aa75f4d3SHarshad Shirwadkar 	 */
653aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
654aa75f4d3SHarshad Shirwadkar 	if (!dst)
655aa75f4d3SHarshad Shirwadkar 		return -ENOSPC;
656aa75f4d3SHarshad Shirwadkar 
657aa75f4d3SHarshad Shirwadkar 	off = sbi->s_fc_bytes % bsize;
658aa75f4d3SHarshad Shirwadkar 
659aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
660aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
661aa75f4d3SHarshad Shirwadkar 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
662aa75f4d3SHarshad Shirwadkar 
663aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
664aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
665aa75f4d3SHarshad Shirwadkar 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
666aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
667aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tail.fc_tid);
668aa75f4d3SHarshad Shirwadkar 	tail.fc_crc = cpu_to_le32(crc);
669aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
670aa75f4d3SHarshad Shirwadkar 
671aa75f4d3SHarshad Shirwadkar 	ext4_fc_submit_bh(sb);
672aa75f4d3SHarshad Shirwadkar 
673aa75f4d3SHarshad Shirwadkar 	return 0;
674aa75f4d3SHarshad Shirwadkar }
675aa75f4d3SHarshad Shirwadkar 
676aa75f4d3SHarshad Shirwadkar /*
677aa75f4d3SHarshad Shirwadkar  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
678aa75f4d3SHarshad Shirwadkar  * Returns false if there's not enough space.
679aa75f4d3SHarshad Shirwadkar  */
680aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
681aa75f4d3SHarshad Shirwadkar 			   u32 *crc)
682aa75f4d3SHarshad Shirwadkar {
683aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
684aa75f4d3SHarshad Shirwadkar 	u8 *dst;
685aa75f4d3SHarshad Shirwadkar 
686aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
687aa75f4d3SHarshad Shirwadkar 	if (!dst)
688aa75f4d3SHarshad Shirwadkar 		return false;
689aa75f4d3SHarshad Shirwadkar 
690aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
691aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(len);
692aa75f4d3SHarshad Shirwadkar 
693aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
694aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
695aa75f4d3SHarshad Shirwadkar 
696aa75f4d3SHarshad Shirwadkar 	return true;
697aa75f4d3SHarshad Shirwadkar }
698aa75f4d3SHarshad Shirwadkar 
699aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */
700aa75f4d3SHarshad Shirwadkar static  bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
701aa75f4d3SHarshad Shirwadkar 					int parent_ino, int ino, int dlen,
702aa75f4d3SHarshad Shirwadkar 					const unsigned char *dname,
703aa75f4d3SHarshad Shirwadkar 					u32 *crc)
704aa75f4d3SHarshad Shirwadkar {
705aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_info fcd;
706aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
707aa75f4d3SHarshad Shirwadkar 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
708aa75f4d3SHarshad Shirwadkar 					crc);
709aa75f4d3SHarshad Shirwadkar 
710aa75f4d3SHarshad Shirwadkar 	if (!dst)
711aa75f4d3SHarshad Shirwadkar 		return false;
712aa75f4d3SHarshad Shirwadkar 
713aa75f4d3SHarshad Shirwadkar 	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
714aa75f4d3SHarshad Shirwadkar 	fcd.fc_ino = cpu_to_le32(ino);
715aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(tag);
716aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
717aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
718aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
719aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
720aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fcd);
721aa75f4d3SHarshad Shirwadkar 	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
722aa75f4d3SHarshad Shirwadkar 	dst += dlen;
723aa75f4d3SHarshad Shirwadkar 
724aa75f4d3SHarshad Shirwadkar 	return true;
725aa75f4d3SHarshad Shirwadkar }
726aa75f4d3SHarshad Shirwadkar 
727aa75f4d3SHarshad Shirwadkar /*
728aa75f4d3SHarshad Shirwadkar  * Writes inode in the fast commit space under TLV with tag @tag.
729aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error on failure.
730aa75f4d3SHarshad Shirwadkar  */
731aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
732aa75f4d3SHarshad Shirwadkar {
733aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
734aa75f4d3SHarshad Shirwadkar 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
735aa75f4d3SHarshad Shirwadkar 	int ret;
736aa75f4d3SHarshad Shirwadkar 	struct ext4_iloc iloc;
737aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_inode fc_inode;
738aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_tl tl;
739aa75f4d3SHarshad Shirwadkar 	u8 *dst;
740aa75f4d3SHarshad Shirwadkar 
741aa75f4d3SHarshad Shirwadkar 	ret = ext4_get_inode_loc(inode, &iloc);
742aa75f4d3SHarshad Shirwadkar 	if (ret)
743aa75f4d3SHarshad Shirwadkar 		return ret;
744aa75f4d3SHarshad Shirwadkar 
745aa75f4d3SHarshad Shirwadkar 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
746aa75f4d3SHarshad Shirwadkar 		inode_len += ei->i_extra_isize;
747aa75f4d3SHarshad Shirwadkar 
748aa75f4d3SHarshad Shirwadkar 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
749aa75f4d3SHarshad Shirwadkar 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
750aa75f4d3SHarshad Shirwadkar 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
751aa75f4d3SHarshad Shirwadkar 
752aa75f4d3SHarshad Shirwadkar 	dst = ext4_fc_reserve_space(inode->i_sb,
753aa75f4d3SHarshad Shirwadkar 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
754aa75f4d3SHarshad Shirwadkar 	if (!dst)
755aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
756aa75f4d3SHarshad Shirwadkar 
757aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
758aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
759aa75f4d3SHarshad Shirwadkar 	dst += sizeof(tl);
760aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
761aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
762aa75f4d3SHarshad Shirwadkar 	dst += sizeof(fc_inode);
763aa75f4d3SHarshad Shirwadkar 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
764aa75f4d3SHarshad Shirwadkar 					inode_len, crc))
765aa75f4d3SHarshad Shirwadkar 		return -ECANCELED;
766aa75f4d3SHarshad Shirwadkar 
767aa75f4d3SHarshad Shirwadkar 	return 0;
768aa75f4d3SHarshad Shirwadkar }
769aa75f4d3SHarshad Shirwadkar 
770aa75f4d3SHarshad Shirwadkar /*
771aa75f4d3SHarshad Shirwadkar  * Writes updated data ranges for the inode in question. Updates CRC.
772aa75f4d3SHarshad Shirwadkar  * Returns 0 on success, error otherwise.
773aa75f4d3SHarshad Shirwadkar  */
774aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
775aa75f4d3SHarshad Shirwadkar {
776aa75f4d3SHarshad Shirwadkar 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
777aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei = EXT4_I(inode);
778aa75f4d3SHarshad Shirwadkar 	struct ext4_map_blocks map;
779aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_add_range fc_ext;
780aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_del_range lrange;
781aa75f4d3SHarshad Shirwadkar 	struct ext4_extent *ex;
782aa75f4d3SHarshad Shirwadkar 	int ret;
783aa75f4d3SHarshad Shirwadkar 
784aa75f4d3SHarshad Shirwadkar 	mutex_lock(&ei->i_fc_lock);
785aa75f4d3SHarshad Shirwadkar 	if (ei->i_fc_lblk_len == 0) {
786aa75f4d3SHarshad Shirwadkar 		mutex_unlock(&ei->i_fc_lock);
787aa75f4d3SHarshad Shirwadkar 		return 0;
788aa75f4d3SHarshad Shirwadkar 	}
789aa75f4d3SHarshad Shirwadkar 	old_blk_size = ei->i_fc_lblk_start;
790aa75f4d3SHarshad Shirwadkar 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
791aa75f4d3SHarshad Shirwadkar 	ei->i_fc_lblk_len = 0;
792aa75f4d3SHarshad Shirwadkar 	mutex_unlock(&ei->i_fc_lock);
793aa75f4d3SHarshad Shirwadkar 
794aa75f4d3SHarshad Shirwadkar 	cur_lblk_off = old_blk_size;
795aa75f4d3SHarshad Shirwadkar 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
796aa75f4d3SHarshad Shirwadkar 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
797aa75f4d3SHarshad Shirwadkar 
798aa75f4d3SHarshad Shirwadkar 	while (cur_lblk_off <= new_blk_size) {
799aa75f4d3SHarshad Shirwadkar 		map.m_lblk = cur_lblk_off;
800aa75f4d3SHarshad Shirwadkar 		map.m_len = new_blk_size - cur_lblk_off + 1;
801aa75f4d3SHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
802aa75f4d3SHarshad Shirwadkar 		if (ret < 0)
803aa75f4d3SHarshad Shirwadkar 			return -ECANCELED;
804aa75f4d3SHarshad Shirwadkar 
805aa75f4d3SHarshad Shirwadkar 		if (map.m_len == 0) {
806aa75f4d3SHarshad Shirwadkar 			cur_lblk_off++;
807aa75f4d3SHarshad Shirwadkar 			continue;
808aa75f4d3SHarshad Shirwadkar 		}
809aa75f4d3SHarshad Shirwadkar 
810aa75f4d3SHarshad Shirwadkar 		if (ret == 0) {
811aa75f4d3SHarshad Shirwadkar 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
812aa75f4d3SHarshad Shirwadkar 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
813aa75f4d3SHarshad Shirwadkar 			lrange.fc_len = cpu_to_le32(map.m_len);
814aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
815aa75f4d3SHarshad Shirwadkar 					    sizeof(lrange), (u8 *)&lrange, crc))
816aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
817aa75f4d3SHarshad Shirwadkar 		} else {
818aa75f4d3SHarshad Shirwadkar 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
819aa75f4d3SHarshad Shirwadkar 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
820aa75f4d3SHarshad Shirwadkar 			ex->ee_block = cpu_to_le32(map.m_lblk);
821aa75f4d3SHarshad Shirwadkar 			ex->ee_len = cpu_to_le16(map.m_len);
822aa75f4d3SHarshad Shirwadkar 			ext4_ext_store_pblock(ex, map.m_pblk);
823aa75f4d3SHarshad Shirwadkar 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
824aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_unwritten(ex);
825aa75f4d3SHarshad Shirwadkar 			else
826aa75f4d3SHarshad Shirwadkar 				ext4_ext_mark_initialized(ex);
827aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
828aa75f4d3SHarshad Shirwadkar 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
829aa75f4d3SHarshad Shirwadkar 				return -ENOSPC;
830aa75f4d3SHarshad Shirwadkar 		}
831aa75f4d3SHarshad Shirwadkar 
832aa75f4d3SHarshad Shirwadkar 		cur_lblk_off += map.m_len;
833aa75f4d3SHarshad Shirwadkar 	}
834aa75f4d3SHarshad Shirwadkar 
835aa75f4d3SHarshad Shirwadkar 	return 0;
836aa75f4d3SHarshad Shirwadkar }
837aa75f4d3SHarshad Shirwadkar 
838aa75f4d3SHarshad Shirwadkar 
839aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */
840aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal)
841aa75f4d3SHarshad Shirwadkar {
842aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
843aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
844aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
845aa75f4d3SHarshad Shirwadkar 	struct list_head *pos;
846aa75f4d3SHarshad Shirwadkar 	int ret = 0;
847aa75f4d3SHarshad Shirwadkar 
848aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
849aa75f4d3SHarshad Shirwadkar 	sbi->s_mount_state |= EXT4_FC_COMMITTING;
850aa75f4d3SHarshad Shirwadkar 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
851aa75f4d3SHarshad Shirwadkar 		ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
852aa75f4d3SHarshad Shirwadkar 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
853aa75f4d3SHarshad Shirwadkar 		while (atomic_read(&ei->i_fc_updates)) {
854aa75f4d3SHarshad Shirwadkar 			DEFINE_WAIT(wait);
855aa75f4d3SHarshad Shirwadkar 
856aa75f4d3SHarshad Shirwadkar 			prepare_to_wait(&ei->i_fc_wait, &wait,
857aa75f4d3SHarshad Shirwadkar 						TASK_UNINTERRUPTIBLE);
858aa75f4d3SHarshad Shirwadkar 			if (atomic_read(&ei->i_fc_updates)) {
859aa75f4d3SHarshad Shirwadkar 				spin_unlock(&sbi->s_fc_lock);
860aa75f4d3SHarshad Shirwadkar 				schedule();
861aa75f4d3SHarshad Shirwadkar 				spin_lock(&sbi->s_fc_lock);
862aa75f4d3SHarshad Shirwadkar 			}
863aa75f4d3SHarshad Shirwadkar 			finish_wait(&ei->i_fc_wait, &wait);
864aa75f4d3SHarshad Shirwadkar 		}
865aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
866aa75f4d3SHarshad Shirwadkar 		ret = jbd2_submit_inode_data(ei->jinode);
867aa75f4d3SHarshad Shirwadkar 		if (ret)
868aa75f4d3SHarshad Shirwadkar 			return ret;
869aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
870aa75f4d3SHarshad Shirwadkar 	}
871aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
872aa75f4d3SHarshad Shirwadkar 
873aa75f4d3SHarshad Shirwadkar 	return ret;
874aa75f4d3SHarshad Shirwadkar }
875aa75f4d3SHarshad Shirwadkar 
876aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */
877aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal)
878aa75f4d3SHarshad Shirwadkar {
879aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
880aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
881aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *pos, *n;
882aa75f4d3SHarshad Shirwadkar 	int ret = 0;
883aa75f4d3SHarshad Shirwadkar 
884aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
885aa75f4d3SHarshad Shirwadkar 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
886aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(&pos->vfs_inode,
887aa75f4d3SHarshad Shirwadkar 					   EXT4_STATE_FC_COMMITTING))
888aa75f4d3SHarshad Shirwadkar 			continue;
889aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
890aa75f4d3SHarshad Shirwadkar 
891aa75f4d3SHarshad Shirwadkar 		ret = jbd2_wait_inode_data(journal, pos->jinode);
892aa75f4d3SHarshad Shirwadkar 		if (ret)
893aa75f4d3SHarshad Shirwadkar 			return ret;
894aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
895aa75f4d3SHarshad Shirwadkar 	}
896aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
897aa75f4d3SHarshad Shirwadkar 
898aa75f4d3SHarshad Shirwadkar 	return 0;
899aa75f4d3SHarshad Shirwadkar }
900aa75f4d3SHarshad Shirwadkar 
901aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */
902aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
903aa75f4d3SHarshad Shirwadkar {
904aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
905aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
906aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
907aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
908aa75f4d3SHarshad Shirwadkar 	struct list_head *pos, *n, *fcd_pos, *fcd_n;
909aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *ei;
910aa75f4d3SHarshad Shirwadkar 	int ret;
911aa75f4d3SHarshad Shirwadkar 
912aa75f4d3SHarshad Shirwadkar 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
913aa75f4d3SHarshad Shirwadkar 		return 0;
914aa75f4d3SHarshad Shirwadkar 	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
915aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
916aa75f4d3SHarshad Shirwadkar 					fcd_list);
917aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
918aa75f4d3SHarshad Shirwadkar 			spin_unlock(&sbi->s_fc_lock);
919aa75f4d3SHarshad Shirwadkar 			if (!ext4_fc_add_dentry_tlv(
920aa75f4d3SHarshad Shirwadkar 				sb, fc_dentry->fcd_op,
921aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_parent, fc_dentry->fcd_ino,
922aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_name.len,
923aa75f4d3SHarshad Shirwadkar 				fc_dentry->fcd_name.name, crc)) {
924aa75f4d3SHarshad Shirwadkar 				ret = -ENOSPC;
925aa75f4d3SHarshad Shirwadkar 				goto lock_and_exit;
926aa75f4d3SHarshad Shirwadkar 			}
927aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
928aa75f4d3SHarshad Shirwadkar 			continue;
929aa75f4d3SHarshad Shirwadkar 		}
930aa75f4d3SHarshad Shirwadkar 
931aa75f4d3SHarshad Shirwadkar 		inode = NULL;
932aa75f4d3SHarshad Shirwadkar 		list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
933aa75f4d3SHarshad Shirwadkar 			ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
934aa75f4d3SHarshad Shirwadkar 			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
935aa75f4d3SHarshad Shirwadkar 				inode = &ei->vfs_inode;
936aa75f4d3SHarshad Shirwadkar 				break;
937aa75f4d3SHarshad Shirwadkar 			}
938aa75f4d3SHarshad Shirwadkar 		}
939aa75f4d3SHarshad Shirwadkar 		/*
940aa75f4d3SHarshad Shirwadkar 		 * If we don't find inode in our list, then it was deleted,
941aa75f4d3SHarshad Shirwadkar 		 * in which case, we don't need to record it's create tag.
942aa75f4d3SHarshad Shirwadkar 		 */
943aa75f4d3SHarshad Shirwadkar 		if (!inode)
944aa75f4d3SHarshad Shirwadkar 			continue;
945aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
946aa75f4d3SHarshad Shirwadkar 
947aa75f4d3SHarshad Shirwadkar 		/*
948aa75f4d3SHarshad Shirwadkar 		 * We first write the inode and then the create dirent. This
949aa75f4d3SHarshad Shirwadkar 		 * allows the recovery code to create an unnamed inode first
950aa75f4d3SHarshad Shirwadkar 		 * and then link it to a directory entry. This allows us
951aa75f4d3SHarshad Shirwadkar 		 * to use namei.c routines almost as is and simplifies
952aa75f4d3SHarshad Shirwadkar 		 * the recovery code.
953aa75f4d3SHarshad Shirwadkar 		 */
954aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, crc);
955aa75f4d3SHarshad Shirwadkar 		if (ret)
956aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
957aa75f4d3SHarshad Shirwadkar 
958aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, crc);
959aa75f4d3SHarshad Shirwadkar 		if (ret)
960aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
961aa75f4d3SHarshad Shirwadkar 
962aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_dentry_tlv(
963aa75f4d3SHarshad Shirwadkar 			sb, fc_dentry->fcd_op,
964aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_parent, fc_dentry->fcd_ino,
965aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len,
966aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.name, crc)) {
967aa75f4d3SHarshad Shirwadkar 			spin_lock(&sbi->s_fc_lock);
968aa75f4d3SHarshad Shirwadkar 			ret = -ENOSPC;
969aa75f4d3SHarshad Shirwadkar 			goto lock_and_exit;
970aa75f4d3SHarshad Shirwadkar 		}
971aa75f4d3SHarshad Shirwadkar 
972aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
973aa75f4d3SHarshad Shirwadkar 	}
974aa75f4d3SHarshad Shirwadkar 	return 0;
975aa75f4d3SHarshad Shirwadkar lock_and_exit:
976aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
977aa75f4d3SHarshad Shirwadkar 	return ret;
978aa75f4d3SHarshad Shirwadkar }
979aa75f4d3SHarshad Shirwadkar 
980aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal)
981aa75f4d3SHarshad Shirwadkar {
982aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
983aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
984aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
985aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_head head;
986aa75f4d3SHarshad Shirwadkar 	struct list_head *pos;
987aa75f4d3SHarshad Shirwadkar 	struct inode *inode;
988aa75f4d3SHarshad Shirwadkar 	struct blk_plug plug;
989aa75f4d3SHarshad Shirwadkar 	int ret = 0;
990aa75f4d3SHarshad Shirwadkar 	u32 crc = 0;
991aa75f4d3SHarshad Shirwadkar 
992aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_submit_inode_data_all(journal);
993aa75f4d3SHarshad Shirwadkar 	if (ret)
994aa75f4d3SHarshad Shirwadkar 		return ret;
995aa75f4d3SHarshad Shirwadkar 
996aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_wait_inode_data_all(journal);
997aa75f4d3SHarshad Shirwadkar 	if (ret)
998aa75f4d3SHarshad Shirwadkar 		return ret;
999aa75f4d3SHarshad Shirwadkar 
1000aa75f4d3SHarshad Shirwadkar 	blk_start_plug(&plug);
1001aa75f4d3SHarshad Shirwadkar 	if (sbi->s_fc_bytes == 0) {
1002aa75f4d3SHarshad Shirwadkar 		/*
1003aa75f4d3SHarshad Shirwadkar 		 * Add a head tag only if this is the first fast commit
1004aa75f4d3SHarshad Shirwadkar 		 * in this TID.
1005aa75f4d3SHarshad Shirwadkar 		 */
1006aa75f4d3SHarshad Shirwadkar 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1007aa75f4d3SHarshad Shirwadkar 		head.fc_tid = cpu_to_le32(
1008aa75f4d3SHarshad Shirwadkar 			sbi->s_journal->j_running_transaction->t_tid);
1009aa75f4d3SHarshad Shirwadkar 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1010aa75f4d3SHarshad Shirwadkar 			(u8 *)&head, &crc))
1011aa75f4d3SHarshad Shirwadkar 			goto out;
1012aa75f4d3SHarshad Shirwadkar 	}
1013aa75f4d3SHarshad Shirwadkar 
1014aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1015aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1016aa75f4d3SHarshad Shirwadkar 	if (ret) {
1017aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1018aa75f4d3SHarshad Shirwadkar 		goto out;
1019aa75f4d3SHarshad Shirwadkar 	}
1020aa75f4d3SHarshad Shirwadkar 
1021aa75f4d3SHarshad Shirwadkar 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
1022aa75f4d3SHarshad Shirwadkar 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1023aa75f4d3SHarshad Shirwadkar 		inode = &iter->vfs_inode;
1024aa75f4d3SHarshad Shirwadkar 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1025aa75f4d3SHarshad Shirwadkar 			continue;
1026aa75f4d3SHarshad Shirwadkar 
1027aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1028aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode_data(inode, &crc);
1029aa75f4d3SHarshad Shirwadkar 		if (ret)
1030aa75f4d3SHarshad Shirwadkar 			goto out;
1031aa75f4d3SHarshad Shirwadkar 		ret = ext4_fc_write_inode(inode, &crc);
1032aa75f4d3SHarshad Shirwadkar 		if (ret)
1033aa75f4d3SHarshad Shirwadkar 			goto out;
1034aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1035aa75f4d3SHarshad Shirwadkar 		EXT4_I(inode)->i_fc_committed_subtid =
1036aa75f4d3SHarshad Shirwadkar 			atomic_read(&sbi->s_fc_subtid);
1037aa75f4d3SHarshad Shirwadkar 	}
1038aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1039aa75f4d3SHarshad Shirwadkar 
1040aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_write_tail(sb, crc);
1041aa75f4d3SHarshad Shirwadkar 
1042aa75f4d3SHarshad Shirwadkar out:
1043aa75f4d3SHarshad Shirwadkar 	blk_finish_plug(&plug);
1044aa75f4d3SHarshad Shirwadkar 	return ret;
1045aa75f4d3SHarshad Shirwadkar }
1046aa75f4d3SHarshad Shirwadkar 
1047aa75f4d3SHarshad Shirwadkar /*
1048aa75f4d3SHarshad Shirwadkar  * The main commit entry point. Performs a fast commit for transaction
1049aa75f4d3SHarshad Shirwadkar  * commit_tid if needed. If it's not possible to perform a fast commit
1050aa75f4d3SHarshad Shirwadkar  * due to various reasons, we fall back to full commit. Returns 0
1051aa75f4d3SHarshad Shirwadkar  * on success, error otherwise.
1052aa75f4d3SHarshad Shirwadkar  */
1053aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1054aa75f4d3SHarshad Shirwadkar {
1055aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = (struct super_block *)(journal->j_private);
1056aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1057aa75f4d3SHarshad Shirwadkar 	int nblks = 0, ret, bsize = journal->j_blocksize;
1058aa75f4d3SHarshad Shirwadkar 	int subtid = atomic_read(&sbi->s_fc_subtid);
1059aa75f4d3SHarshad Shirwadkar 	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
1060aa75f4d3SHarshad Shirwadkar 	ktime_t start_time, commit_time;
1061aa75f4d3SHarshad Shirwadkar 
1062aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_start(sb);
1063aa75f4d3SHarshad Shirwadkar 
1064aa75f4d3SHarshad Shirwadkar 	start_time = ktime_get();
1065aa75f4d3SHarshad Shirwadkar 
1066aa75f4d3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
1067aa75f4d3SHarshad Shirwadkar 		(ext4_fc_is_ineligible(sb))) {
1068aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_INELIGIBLE;
1069aa75f4d3SHarshad Shirwadkar 		goto out;
1070aa75f4d3SHarshad Shirwadkar 	}
1071aa75f4d3SHarshad Shirwadkar 
1072aa75f4d3SHarshad Shirwadkar restart_fc:
1073aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1074aa75f4d3SHarshad Shirwadkar 	if (ret == -EALREADY) {
1075aa75f4d3SHarshad Shirwadkar 		/* There was an ongoing commit, check if we need to restart */
1076aa75f4d3SHarshad Shirwadkar 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1077aa75f4d3SHarshad Shirwadkar 			commit_tid > journal->j_commit_sequence)
1078aa75f4d3SHarshad Shirwadkar 			goto restart_fc;
1079aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_ALREADY_COMMITTED;
1080aa75f4d3SHarshad Shirwadkar 		goto out;
1081aa75f4d3SHarshad Shirwadkar 	} else if (ret) {
1082aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1083aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_START_FAILED;
1084aa75f4d3SHarshad Shirwadkar 		goto out;
1085aa75f4d3SHarshad Shirwadkar 	}
1086aa75f4d3SHarshad Shirwadkar 
1087aa75f4d3SHarshad Shirwadkar 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1088aa75f4d3SHarshad Shirwadkar 	ret = ext4_fc_perform_commit(journal);
1089aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1090aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1091aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1092aa75f4d3SHarshad Shirwadkar 		goto out;
1093aa75f4d3SHarshad Shirwadkar 	}
1094aa75f4d3SHarshad Shirwadkar 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1095aa75f4d3SHarshad Shirwadkar 	ret = jbd2_fc_wait_bufs(journal, nblks);
1096aa75f4d3SHarshad Shirwadkar 	if (ret < 0) {
1097aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1098aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_FC_FAILED;
1099aa75f4d3SHarshad Shirwadkar 		goto out;
1100aa75f4d3SHarshad Shirwadkar 	}
1101aa75f4d3SHarshad Shirwadkar 	atomic_inc(&sbi->s_fc_subtid);
1102aa75f4d3SHarshad Shirwadkar 	jbd2_fc_end_commit(journal);
1103aa75f4d3SHarshad Shirwadkar out:
1104aa75f4d3SHarshad Shirwadkar 	/* Has any ineligible update happened since we started? */
1105aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
1106aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1107aa75f4d3SHarshad Shirwadkar 		reason = EXT4_FC_REASON_INELIGIBLE;
1108aa75f4d3SHarshad Shirwadkar 	}
1109aa75f4d3SHarshad Shirwadkar 
1110aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1111aa75f4d3SHarshad Shirwadkar 	if (reason != EXT4_FC_REASON_OK &&
1112aa75f4d3SHarshad Shirwadkar 		reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
1113aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_ineligible_commits++;
1114aa75f4d3SHarshad Shirwadkar 	} else {
1115aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_num_commits++;
1116aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_stats.fc_numblks += nblks;
1117aa75f4d3SHarshad Shirwadkar 	}
1118aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1119aa75f4d3SHarshad Shirwadkar 	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
1120aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_commit_stop(sb, nblks, reason);
1121aa75f4d3SHarshad Shirwadkar 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1122aa75f4d3SHarshad Shirwadkar 	/*
1123aa75f4d3SHarshad Shirwadkar 	 * weight the commit time higher than the average time so we don't
1124aa75f4d3SHarshad Shirwadkar 	 * react too strongly to vast changes in the commit time
1125aa75f4d3SHarshad Shirwadkar 	 */
1126aa75f4d3SHarshad Shirwadkar 	if (likely(sbi->s_fc_avg_commit_time))
1127aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = (commit_time +
1128aa75f4d3SHarshad Shirwadkar 				sbi->s_fc_avg_commit_time * 3) / 4;
1129aa75f4d3SHarshad Shirwadkar 	else
1130aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_avg_commit_time = commit_time;
1131aa75f4d3SHarshad Shirwadkar 	jbd_debug(1,
1132aa75f4d3SHarshad Shirwadkar 		"Fast commit ended with blks = %d, reason = %d, subtid - %d",
1133aa75f4d3SHarshad Shirwadkar 		nblks, reason, subtid);
1134aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_FAILED)
1135aa75f4d3SHarshad Shirwadkar 		return jbd2_fc_end_commit_fallback(journal, commit_tid);
1136aa75f4d3SHarshad Shirwadkar 	if (reason == EXT4_FC_REASON_FC_START_FAILED ||
1137aa75f4d3SHarshad Shirwadkar 		reason == EXT4_FC_REASON_INELIGIBLE)
1138aa75f4d3SHarshad Shirwadkar 		return jbd2_complete_transaction(journal, commit_tid);
1139aa75f4d3SHarshad Shirwadkar 	return 0;
1140aa75f4d3SHarshad Shirwadkar }
1141aa75f4d3SHarshad Shirwadkar 
1142ff780b91SHarshad Shirwadkar /*
1143ff780b91SHarshad Shirwadkar  * Fast commit cleanup routine. This is called after every fast commit and
1144ff780b91SHarshad Shirwadkar  * full commit. full is true if we are called after a full commit.
1145ff780b91SHarshad Shirwadkar  */
1146ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full)
1147ff780b91SHarshad Shirwadkar {
1148aa75f4d3SHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
1149aa75f4d3SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1150aa75f4d3SHarshad Shirwadkar 	struct ext4_inode_info *iter;
1151aa75f4d3SHarshad Shirwadkar 	struct ext4_fc_dentry_update *fc_dentry;
1152aa75f4d3SHarshad Shirwadkar 	struct list_head *pos, *n;
1153aa75f4d3SHarshad Shirwadkar 
1154aa75f4d3SHarshad Shirwadkar 	if (full && sbi->s_fc_bh)
1155aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bh = NULL;
1156aa75f4d3SHarshad Shirwadkar 
1157aa75f4d3SHarshad Shirwadkar 	jbd2_fc_release_bufs(journal);
1158aa75f4d3SHarshad Shirwadkar 
1159aa75f4d3SHarshad Shirwadkar 	spin_lock(&sbi->s_fc_lock);
1160aa75f4d3SHarshad Shirwadkar 	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
1161aa75f4d3SHarshad Shirwadkar 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1162aa75f4d3SHarshad Shirwadkar 		list_del_init(&iter->i_fc_list);
1163aa75f4d3SHarshad Shirwadkar 		ext4_clear_inode_state(&iter->vfs_inode,
1164aa75f4d3SHarshad Shirwadkar 				       EXT4_STATE_FC_COMMITTING);
1165aa75f4d3SHarshad Shirwadkar 		ext4_fc_reset_inode(&iter->vfs_inode);
1166aa75f4d3SHarshad Shirwadkar 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1167aa75f4d3SHarshad Shirwadkar 		smp_mb();
1168aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64)
1169aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1170aa75f4d3SHarshad Shirwadkar #else
1171aa75f4d3SHarshad Shirwadkar 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1172aa75f4d3SHarshad Shirwadkar #endif
1173aa75f4d3SHarshad Shirwadkar 	}
1174aa75f4d3SHarshad Shirwadkar 
1175aa75f4d3SHarshad Shirwadkar 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1176aa75f4d3SHarshad Shirwadkar 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1177aa75f4d3SHarshad Shirwadkar 					     struct ext4_fc_dentry_update,
1178aa75f4d3SHarshad Shirwadkar 					     fcd_list);
1179aa75f4d3SHarshad Shirwadkar 		list_del_init(&fc_dentry->fcd_list);
1180aa75f4d3SHarshad Shirwadkar 		spin_unlock(&sbi->s_fc_lock);
1181aa75f4d3SHarshad Shirwadkar 
1182aa75f4d3SHarshad Shirwadkar 		if (fc_dentry->fcd_name.name &&
1183aa75f4d3SHarshad Shirwadkar 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1184aa75f4d3SHarshad Shirwadkar 			kfree(fc_dentry->fcd_name.name);
1185aa75f4d3SHarshad Shirwadkar 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1186aa75f4d3SHarshad Shirwadkar 		spin_lock(&sbi->s_fc_lock);
1187aa75f4d3SHarshad Shirwadkar 	}
1188aa75f4d3SHarshad Shirwadkar 
1189aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1190aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1191aa75f4d3SHarshad Shirwadkar 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1192aa75f4d3SHarshad Shirwadkar 				&sbi->s_fc_q[FC_Q_STAGING]);
1193aa75f4d3SHarshad Shirwadkar 
1194aa75f4d3SHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
1195aa75f4d3SHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
1196aa75f4d3SHarshad Shirwadkar 
1197aa75f4d3SHarshad Shirwadkar 	if (full)
1198aa75f4d3SHarshad Shirwadkar 		sbi->s_fc_bytes = 0;
1199aa75f4d3SHarshad Shirwadkar 	spin_unlock(&sbi->s_fc_lock);
1200aa75f4d3SHarshad Shirwadkar 	trace_ext4_fc_stats(sb);
1201ff780b91SHarshad Shirwadkar }
12026866d7b3SHarshad Shirwadkar 
12038016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */
12048016e29fSHarshad Shirwadkar 
12058016e29fSHarshad Shirwadkar /* Get length of a particular tlv */
12068016e29fSHarshad Shirwadkar static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
12078016e29fSHarshad Shirwadkar {
12088016e29fSHarshad Shirwadkar 	return le16_to_cpu(tl->fc_len);
12098016e29fSHarshad Shirwadkar }
12108016e29fSHarshad Shirwadkar 
12118016e29fSHarshad Shirwadkar /* Get a pointer to "value" of a tlv */
12128016e29fSHarshad Shirwadkar static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
12138016e29fSHarshad Shirwadkar {
12148016e29fSHarshad Shirwadkar 	return (u8 *)tl + sizeof(*tl);
12158016e29fSHarshad Shirwadkar }
12168016e29fSHarshad Shirwadkar 
12178016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */
12188016e29fSHarshad Shirwadkar struct dentry_info_args {
12198016e29fSHarshad Shirwadkar 	int parent_ino, dname_len, ino, inode_len;
12208016e29fSHarshad Shirwadkar 	char *dname;
12218016e29fSHarshad Shirwadkar };
12228016e29fSHarshad Shirwadkar 
12238016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg,
12248016e29fSHarshad Shirwadkar 				struct  ext4_fc_tl *tl)
12258016e29fSHarshad Shirwadkar {
12268016e29fSHarshad Shirwadkar 	struct ext4_fc_dentry_info *fcd;
12278016e29fSHarshad Shirwadkar 
12288016e29fSHarshad Shirwadkar 	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
12298016e29fSHarshad Shirwadkar 
12308016e29fSHarshad Shirwadkar 	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
12318016e29fSHarshad Shirwadkar 	darg->ino = le32_to_cpu(fcd->fc_ino);
12328016e29fSHarshad Shirwadkar 	darg->dname = fcd->fc_dname;
12338016e29fSHarshad Shirwadkar 	darg->dname_len = ext4_fc_tag_len(tl) -
12348016e29fSHarshad Shirwadkar 			sizeof(struct ext4_fc_dentry_info);
12358016e29fSHarshad Shirwadkar }
12368016e29fSHarshad Shirwadkar 
12378016e29fSHarshad Shirwadkar /* Unlink replay function */
12388016e29fSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
12398016e29fSHarshad Shirwadkar {
12408016e29fSHarshad Shirwadkar 	struct inode *inode, *old_parent;
12418016e29fSHarshad Shirwadkar 	struct qstr entry;
12428016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
12438016e29fSHarshad Shirwadkar 	int ret = 0;
12448016e29fSHarshad Shirwadkar 
12458016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
12468016e29fSHarshad Shirwadkar 
12478016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
12488016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
12498016e29fSHarshad Shirwadkar 
12508016e29fSHarshad Shirwadkar 	entry.name = darg.dname;
12518016e29fSHarshad Shirwadkar 	entry.len = darg.dname_len;
12528016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
12538016e29fSHarshad Shirwadkar 
12548016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
12558016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", darg.ino);
12568016e29fSHarshad Shirwadkar 		return 0;
12578016e29fSHarshad Shirwadkar 	}
12588016e29fSHarshad Shirwadkar 
12598016e29fSHarshad Shirwadkar 	old_parent = ext4_iget(sb, darg.parent_ino,
12608016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
12618016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(old_parent)) {
12628016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
12638016e29fSHarshad Shirwadkar 		iput(inode);
12648016e29fSHarshad Shirwadkar 		return 0;
12658016e29fSHarshad Shirwadkar 	}
12668016e29fSHarshad Shirwadkar 
12678016e29fSHarshad Shirwadkar 	ret = __ext4_unlink(old_parent, &entry, inode);
12688016e29fSHarshad Shirwadkar 	/* -ENOENT ok coz it might not exist anymore. */
12698016e29fSHarshad Shirwadkar 	if (ret == -ENOENT)
12708016e29fSHarshad Shirwadkar 		ret = 0;
12718016e29fSHarshad Shirwadkar 	iput(old_parent);
12728016e29fSHarshad Shirwadkar 	iput(inode);
12738016e29fSHarshad Shirwadkar 	return ret;
12748016e29fSHarshad Shirwadkar }
12758016e29fSHarshad Shirwadkar 
12768016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb,
12778016e29fSHarshad Shirwadkar 				struct dentry_info_args *darg,
12788016e29fSHarshad Shirwadkar 				struct inode *inode)
12798016e29fSHarshad Shirwadkar {
12808016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
12818016e29fSHarshad Shirwadkar 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
12828016e29fSHarshad Shirwadkar 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
12838016e29fSHarshad Shirwadkar 	int ret = 0;
12848016e29fSHarshad Shirwadkar 
12858016e29fSHarshad Shirwadkar 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
12868016e29fSHarshad Shirwadkar 	if (IS_ERR(dir)) {
12878016e29fSHarshad Shirwadkar 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
12888016e29fSHarshad Shirwadkar 		dir = NULL;
12898016e29fSHarshad Shirwadkar 		goto out;
12908016e29fSHarshad Shirwadkar 	}
12918016e29fSHarshad Shirwadkar 
12928016e29fSHarshad Shirwadkar 	dentry_dir = d_obtain_alias(dir);
12938016e29fSHarshad Shirwadkar 	if (IS_ERR(dentry_dir)) {
12948016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to obtain dentry");
12958016e29fSHarshad Shirwadkar 		dentry_dir = NULL;
12968016e29fSHarshad Shirwadkar 		goto out;
12978016e29fSHarshad Shirwadkar 	}
12988016e29fSHarshad Shirwadkar 
12998016e29fSHarshad Shirwadkar 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
13008016e29fSHarshad Shirwadkar 	if (!dentry_inode) {
13018016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode dentry not created.");
13028016e29fSHarshad Shirwadkar 		ret = -ENOMEM;
13038016e29fSHarshad Shirwadkar 		goto out;
13048016e29fSHarshad Shirwadkar 	}
13058016e29fSHarshad Shirwadkar 
13068016e29fSHarshad Shirwadkar 	ret = __ext4_link(dir, inode, dentry_inode);
13078016e29fSHarshad Shirwadkar 	/*
13088016e29fSHarshad Shirwadkar 	 * It's possible that link already existed since data blocks
13098016e29fSHarshad Shirwadkar 	 * for the dir in question got persisted before we crashed OR
13108016e29fSHarshad Shirwadkar 	 * we replayed this tag and crashed before the entire replay
13118016e29fSHarshad Shirwadkar 	 * could complete.
13128016e29fSHarshad Shirwadkar 	 */
13138016e29fSHarshad Shirwadkar 	if (ret && ret != -EEXIST) {
13148016e29fSHarshad Shirwadkar 		jbd_debug(1, "Failed to link\n");
13158016e29fSHarshad Shirwadkar 		goto out;
13168016e29fSHarshad Shirwadkar 	}
13178016e29fSHarshad Shirwadkar 
13188016e29fSHarshad Shirwadkar 	ret = 0;
13198016e29fSHarshad Shirwadkar out:
13208016e29fSHarshad Shirwadkar 	if (dentry_dir) {
13218016e29fSHarshad Shirwadkar 		d_drop(dentry_dir);
13228016e29fSHarshad Shirwadkar 		dput(dentry_dir);
13238016e29fSHarshad Shirwadkar 	} else if (dir) {
13248016e29fSHarshad Shirwadkar 		iput(dir);
13258016e29fSHarshad Shirwadkar 	}
13268016e29fSHarshad Shirwadkar 	if (dentry_inode) {
13278016e29fSHarshad Shirwadkar 		d_drop(dentry_inode);
13288016e29fSHarshad Shirwadkar 		dput(dentry_inode);
13298016e29fSHarshad Shirwadkar 	}
13308016e29fSHarshad Shirwadkar 
13318016e29fSHarshad Shirwadkar 	return ret;
13328016e29fSHarshad Shirwadkar }
13338016e29fSHarshad Shirwadkar 
13348016e29fSHarshad Shirwadkar /* Link replay function */
13358016e29fSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl)
13368016e29fSHarshad Shirwadkar {
13378016e29fSHarshad Shirwadkar 	struct inode *inode;
13388016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
13398016e29fSHarshad Shirwadkar 	int ret = 0;
13408016e29fSHarshad Shirwadkar 
13418016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
13428016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
13438016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
13448016e29fSHarshad Shirwadkar 
13458016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
13468016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
13478016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
13488016e29fSHarshad Shirwadkar 		return 0;
13498016e29fSHarshad Shirwadkar 	}
13508016e29fSHarshad Shirwadkar 
13518016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
13528016e29fSHarshad Shirwadkar 	iput(inode);
13538016e29fSHarshad Shirwadkar 	return ret;
13548016e29fSHarshad Shirwadkar }
13558016e29fSHarshad Shirwadkar 
13568016e29fSHarshad Shirwadkar /*
13578016e29fSHarshad Shirwadkar  * Record all the modified inodes during replay. We use this later to setup
13588016e29fSHarshad Shirwadkar  * block bitmaps correctly.
13598016e29fSHarshad Shirwadkar  */
13608016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
13618016e29fSHarshad Shirwadkar {
13628016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
13638016e29fSHarshad Shirwadkar 	int i;
13648016e29fSHarshad Shirwadkar 
13658016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
13668016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++)
13678016e29fSHarshad Shirwadkar 		if (state->fc_modified_inodes[i] == ino)
13688016e29fSHarshad Shirwadkar 			return 0;
13698016e29fSHarshad Shirwadkar 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
13708016e29fSHarshad Shirwadkar 		state->fc_modified_inodes_size +=
13718016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
13728016e29fSHarshad Shirwadkar 		state->fc_modified_inodes = krealloc(
13738016e29fSHarshad Shirwadkar 					state->fc_modified_inodes, sizeof(int) *
13748016e29fSHarshad Shirwadkar 					state->fc_modified_inodes_size,
13758016e29fSHarshad Shirwadkar 					GFP_KERNEL);
13768016e29fSHarshad Shirwadkar 		if (!state->fc_modified_inodes)
13778016e29fSHarshad Shirwadkar 			return -ENOMEM;
13788016e29fSHarshad Shirwadkar 	}
13798016e29fSHarshad Shirwadkar 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
13808016e29fSHarshad Shirwadkar 	return 0;
13818016e29fSHarshad Shirwadkar }
13828016e29fSHarshad Shirwadkar 
13838016e29fSHarshad Shirwadkar /*
13848016e29fSHarshad Shirwadkar  * Inode replay function
13858016e29fSHarshad Shirwadkar  */
13868016e29fSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
13878016e29fSHarshad Shirwadkar {
13888016e29fSHarshad Shirwadkar 	struct ext4_fc_inode *fc_inode;
13898016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_inode;
13908016e29fSHarshad Shirwadkar 	struct ext4_inode *raw_fc_inode;
13918016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
13928016e29fSHarshad Shirwadkar 	struct ext4_iloc iloc;
13938016e29fSHarshad Shirwadkar 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
13948016e29fSHarshad Shirwadkar 	struct ext4_extent_header *eh;
13958016e29fSHarshad Shirwadkar 
13968016e29fSHarshad Shirwadkar 	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
13978016e29fSHarshad Shirwadkar 
13988016e29fSHarshad Shirwadkar 	ino = le32_to_cpu(fc_inode->fc_ino);
13998016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
14008016e29fSHarshad Shirwadkar 
14018016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
14028016e29fSHarshad Shirwadkar 	if (!IS_ERR_OR_NULL(inode)) {
14038016e29fSHarshad Shirwadkar 		ext4_ext_clear_bb(inode);
14048016e29fSHarshad Shirwadkar 		iput(inode);
14058016e29fSHarshad Shirwadkar 	}
14068016e29fSHarshad Shirwadkar 
14078016e29fSHarshad Shirwadkar 	ext4_fc_record_modified_inode(sb, ino);
14088016e29fSHarshad Shirwadkar 
14098016e29fSHarshad Shirwadkar 	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
14108016e29fSHarshad Shirwadkar 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
14118016e29fSHarshad Shirwadkar 	if (ret)
14128016e29fSHarshad Shirwadkar 		goto out;
14138016e29fSHarshad Shirwadkar 
14148016e29fSHarshad Shirwadkar 	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
14158016e29fSHarshad Shirwadkar 	raw_inode = ext4_raw_inode(&iloc);
14168016e29fSHarshad Shirwadkar 
14178016e29fSHarshad Shirwadkar 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
14188016e29fSHarshad Shirwadkar 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
14198016e29fSHarshad Shirwadkar 		inode_len - offsetof(struct ext4_inode, i_generation));
14208016e29fSHarshad Shirwadkar 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
14218016e29fSHarshad Shirwadkar 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
14228016e29fSHarshad Shirwadkar 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
14238016e29fSHarshad Shirwadkar 			memset(eh, 0, sizeof(*eh));
14248016e29fSHarshad Shirwadkar 			eh->eh_magic = EXT4_EXT_MAGIC;
14258016e29fSHarshad Shirwadkar 			eh->eh_max = cpu_to_le16(
14268016e29fSHarshad Shirwadkar 				(sizeof(raw_inode->i_block) -
14278016e29fSHarshad Shirwadkar 				 sizeof(struct ext4_extent_header))
14288016e29fSHarshad Shirwadkar 				 / sizeof(struct ext4_extent));
14298016e29fSHarshad Shirwadkar 		}
14308016e29fSHarshad Shirwadkar 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
14318016e29fSHarshad Shirwadkar 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
14328016e29fSHarshad Shirwadkar 			sizeof(raw_inode->i_block));
14338016e29fSHarshad Shirwadkar 	}
14348016e29fSHarshad Shirwadkar 
14358016e29fSHarshad Shirwadkar 	/* Immediately update the inode on disk. */
14368016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
14378016e29fSHarshad Shirwadkar 	if (ret)
14388016e29fSHarshad Shirwadkar 		goto out;
14398016e29fSHarshad Shirwadkar 	ret = sync_dirty_buffer(iloc.bh);
14408016e29fSHarshad Shirwadkar 	if (ret)
14418016e29fSHarshad Shirwadkar 		goto out;
14428016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, ino);
14438016e29fSHarshad Shirwadkar 	if (ret)
14448016e29fSHarshad Shirwadkar 		goto out;
14458016e29fSHarshad Shirwadkar 
14468016e29fSHarshad Shirwadkar 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
14478016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
14488016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
14498016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
14508016e29fSHarshad Shirwadkar 		return -EFSCORRUPTED;
14518016e29fSHarshad Shirwadkar 	}
14528016e29fSHarshad Shirwadkar 
14538016e29fSHarshad Shirwadkar 	/*
14548016e29fSHarshad Shirwadkar 	 * Our allocator could have made different decisions than before
14558016e29fSHarshad Shirwadkar 	 * crashing. This should be fixed but until then, we calculate
14568016e29fSHarshad Shirwadkar 	 * the number of blocks the inode.
14578016e29fSHarshad Shirwadkar 	 */
14588016e29fSHarshad Shirwadkar 	ext4_ext_replay_set_iblocks(inode);
14598016e29fSHarshad Shirwadkar 
14608016e29fSHarshad Shirwadkar 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
14618016e29fSHarshad Shirwadkar 	ext4_reset_inode_seed(inode);
14628016e29fSHarshad Shirwadkar 
14638016e29fSHarshad Shirwadkar 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
14648016e29fSHarshad Shirwadkar 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
14658016e29fSHarshad Shirwadkar 	sync_dirty_buffer(iloc.bh);
14668016e29fSHarshad Shirwadkar 	brelse(iloc.bh);
14678016e29fSHarshad Shirwadkar out:
14688016e29fSHarshad Shirwadkar 	iput(inode);
14698016e29fSHarshad Shirwadkar 	if (!ret)
14708016e29fSHarshad Shirwadkar 		blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
14718016e29fSHarshad Shirwadkar 
14728016e29fSHarshad Shirwadkar 	return 0;
14738016e29fSHarshad Shirwadkar }
14748016e29fSHarshad Shirwadkar 
14758016e29fSHarshad Shirwadkar /*
14768016e29fSHarshad Shirwadkar  * Dentry create replay function.
14778016e29fSHarshad Shirwadkar  *
14788016e29fSHarshad Shirwadkar  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
14798016e29fSHarshad Shirwadkar  * inode for which we are trying to create a dentry here, should already have
14808016e29fSHarshad Shirwadkar  * been replayed before we start here.
14818016e29fSHarshad Shirwadkar  */
14828016e29fSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
14838016e29fSHarshad Shirwadkar {
14848016e29fSHarshad Shirwadkar 	int ret = 0;
14858016e29fSHarshad Shirwadkar 	struct inode *inode = NULL;
14868016e29fSHarshad Shirwadkar 	struct inode *dir = NULL;
14878016e29fSHarshad Shirwadkar 	struct dentry_info_args darg;
14888016e29fSHarshad Shirwadkar 
14898016e29fSHarshad Shirwadkar 	tl_to_darg(&darg, tl);
14908016e29fSHarshad Shirwadkar 
14918016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
14928016e29fSHarshad Shirwadkar 			darg.parent_ino, darg.dname_len);
14938016e29fSHarshad Shirwadkar 
14948016e29fSHarshad Shirwadkar 	/* This takes care of update group descriptor and other metadata */
14958016e29fSHarshad Shirwadkar 	ret = ext4_mark_inode_used(sb, darg.ino);
14968016e29fSHarshad Shirwadkar 	if (ret)
14978016e29fSHarshad Shirwadkar 		goto out;
14988016e29fSHarshad Shirwadkar 
14998016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
15008016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
15018016e29fSHarshad Shirwadkar 		jbd_debug(1, "inode %d not found.", darg.ino);
15028016e29fSHarshad Shirwadkar 		inode = NULL;
15038016e29fSHarshad Shirwadkar 		ret = -EINVAL;
15048016e29fSHarshad Shirwadkar 		goto out;
15058016e29fSHarshad Shirwadkar 	}
15068016e29fSHarshad Shirwadkar 
15078016e29fSHarshad Shirwadkar 	if (S_ISDIR(inode->i_mode)) {
15088016e29fSHarshad Shirwadkar 		/*
15098016e29fSHarshad Shirwadkar 		 * If we are creating a directory, we need to make sure that the
15108016e29fSHarshad Shirwadkar 		 * dot and dot dot dirents are setup properly.
15118016e29fSHarshad Shirwadkar 		 */
15128016e29fSHarshad Shirwadkar 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
15138016e29fSHarshad Shirwadkar 		if (IS_ERR_OR_NULL(dir)) {
15148016e29fSHarshad Shirwadkar 			jbd_debug(1, "Dir %d not found.", darg.ino);
15158016e29fSHarshad Shirwadkar 			goto out;
15168016e29fSHarshad Shirwadkar 		}
15178016e29fSHarshad Shirwadkar 		ret = ext4_init_new_dir(NULL, dir, inode);
15188016e29fSHarshad Shirwadkar 		iput(dir);
15198016e29fSHarshad Shirwadkar 		if (ret) {
15208016e29fSHarshad Shirwadkar 			ret = 0;
15218016e29fSHarshad Shirwadkar 			goto out;
15228016e29fSHarshad Shirwadkar 		}
15238016e29fSHarshad Shirwadkar 	}
15248016e29fSHarshad Shirwadkar 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
15258016e29fSHarshad Shirwadkar 	if (ret)
15268016e29fSHarshad Shirwadkar 		goto out;
15278016e29fSHarshad Shirwadkar 	set_nlink(inode, 1);
15288016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
15298016e29fSHarshad Shirwadkar out:
15308016e29fSHarshad Shirwadkar 	if (inode)
15318016e29fSHarshad Shirwadkar 		iput(inode);
15328016e29fSHarshad Shirwadkar 	return ret;
15338016e29fSHarshad Shirwadkar }
15348016e29fSHarshad Shirwadkar 
15358016e29fSHarshad Shirwadkar /*
15368016e29fSHarshad Shirwadkar  * Record physical disk regions which are in use as per fast commit area. Our
15378016e29fSHarshad Shirwadkar  * simple replay phase allocator excludes these regions from allocation.
15388016e29fSHarshad Shirwadkar  */
15398016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino,
15408016e29fSHarshad Shirwadkar 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
15418016e29fSHarshad Shirwadkar {
15428016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
15438016e29fSHarshad Shirwadkar 	struct ext4_fc_alloc_region *region;
15448016e29fSHarshad Shirwadkar 
15458016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
15468016e29fSHarshad Shirwadkar 	if (state->fc_regions_used == state->fc_regions_size) {
15478016e29fSHarshad Shirwadkar 		state->fc_regions_size +=
15488016e29fSHarshad Shirwadkar 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
15498016e29fSHarshad Shirwadkar 		state->fc_regions = krealloc(
15508016e29fSHarshad Shirwadkar 					state->fc_regions,
15518016e29fSHarshad Shirwadkar 					state->fc_regions_size *
15528016e29fSHarshad Shirwadkar 					sizeof(struct ext4_fc_alloc_region),
15538016e29fSHarshad Shirwadkar 					GFP_KERNEL);
15548016e29fSHarshad Shirwadkar 		if (!state->fc_regions)
15558016e29fSHarshad Shirwadkar 			return -ENOMEM;
15568016e29fSHarshad Shirwadkar 	}
15578016e29fSHarshad Shirwadkar 	region = &state->fc_regions[state->fc_regions_used++];
15588016e29fSHarshad Shirwadkar 	region->ino = ino;
15598016e29fSHarshad Shirwadkar 	region->lblk = lblk;
15608016e29fSHarshad Shirwadkar 	region->pblk = pblk;
15618016e29fSHarshad Shirwadkar 	region->len = len;
15628016e29fSHarshad Shirwadkar 
15638016e29fSHarshad Shirwadkar 	return 0;
15648016e29fSHarshad Shirwadkar }
15658016e29fSHarshad Shirwadkar 
15668016e29fSHarshad Shirwadkar /* Replay add range tag */
15678016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb,
15688016e29fSHarshad Shirwadkar 				struct ext4_fc_tl *tl)
15698016e29fSHarshad Shirwadkar {
15708016e29fSHarshad Shirwadkar 	struct ext4_fc_add_range *fc_add_ex;
15718016e29fSHarshad Shirwadkar 	struct ext4_extent newex, *ex;
15728016e29fSHarshad Shirwadkar 	struct inode *inode;
15738016e29fSHarshad Shirwadkar 	ext4_lblk_t start, cur;
15748016e29fSHarshad Shirwadkar 	int remaining, len;
15758016e29fSHarshad Shirwadkar 	ext4_fsblk_t start_pblk;
15768016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
15778016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
15788016e29fSHarshad Shirwadkar 	int ret;
15798016e29fSHarshad Shirwadkar 
15808016e29fSHarshad Shirwadkar 	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
15818016e29fSHarshad Shirwadkar 	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
15828016e29fSHarshad Shirwadkar 
15838016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
15848016e29fSHarshad Shirwadkar 		le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
15858016e29fSHarshad Shirwadkar 		ext4_ext_get_actual_len(ex));
15868016e29fSHarshad Shirwadkar 
15878016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
15888016e29fSHarshad Shirwadkar 				EXT4_IGET_NORMAL);
15898016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
15908016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode not found.");
15918016e29fSHarshad Shirwadkar 		return 0;
15928016e29fSHarshad Shirwadkar 	}
15938016e29fSHarshad Shirwadkar 
15948016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
15958016e29fSHarshad Shirwadkar 
15968016e29fSHarshad Shirwadkar 	start = le32_to_cpu(ex->ee_block);
15978016e29fSHarshad Shirwadkar 	start_pblk = ext4_ext_pblock(ex);
15988016e29fSHarshad Shirwadkar 	len = ext4_ext_get_actual_len(ex);
15998016e29fSHarshad Shirwadkar 
16008016e29fSHarshad Shirwadkar 	cur = start;
16018016e29fSHarshad Shirwadkar 	remaining = len;
16028016e29fSHarshad Shirwadkar 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
16038016e29fSHarshad Shirwadkar 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
16048016e29fSHarshad Shirwadkar 		  inode->i_ino);
16058016e29fSHarshad Shirwadkar 
16068016e29fSHarshad Shirwadkar 	while (remaining > 0) {
16078016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
16088016e29fSHarshad Shirwadkar 		map.m_len = remaining;
16098016e29fSHarshad Shirwadkar 		map.m_pblk = 0;
16108016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
16118016e29fSHarshad Shirwadkar 
16128016e29fSHarshad Shirwadkar 		if (ret < 0) {
16138016e29fSHarshad Shirwadkar 			iput(inode);
16148016e29fSHarshad Shirwadkar 			return 0;
16158016e29fSHarshad Shirwadkar 		}
16168016e29fSHarshad Shirwadkar 
16178016e29fSHarshad Shirwadkar 		if (ret == 0) {
16188016e29fSHarshad Shirwadkar 			/* Range is not mapped */
16198016e29fSHarshad Shirwadkar 			path = ext4_find_extent(inode, cur, NULL, 0);
16208016e29fSHarshad Shirwadkar 			if (!path)
16218016e29fSHarshad Shirwadkar 				continue;
16228016e29fSHarshad Shirwadkar 			memset(&newex, 0, sizeof(newex));
16238016e29fSHarshad Shirwadkar 			newex.ee_block = cpu_to_le32(cur);
16248016e29fSHarshad Shirwadkar 			ext4_ext_store_pblock(
16258016e29fSHarshad Shirwadkar 				&newex, start_pblk + cur - start);
16268016e29fSHarshad Shirwadkar 			newex.ee_len = cpu_to_le16(map.m_len);
16278016e29fSHarshad Shirwadkar 			if (ext4_ext_is_unwritten(ex))
16288016e29fSHarshad Shirwadkar 				ext4_ext_mark_unwritten(&newex);
16298016e29fSHarshad Shirwadkar 			down_write(&EXT4_I(inode)->i_data_sem);
16308016e29fSHarshad Shirwadkar 			ret = ext4_ext_insert_extent(
16318016e29fSHarshad Shirwadkar 				NULL, inode, &path, &newex, 0);
16328016e29fSHarshad Shirwadkar 			up_write((&EXT4_I(inode)->i_data_sem));
16338016e29fSHarshad Shirwadkar 			ext4_ext_drop_refs(path);
16348016e29fSHarshad Shirwadkar 			kfree(path);
16358016e29fSHarshad Shirwadkar 			if (ret) {
16368016e29fSHarshad Shirwadkar 				iput(inode);
16378016e29fSHarshad Shirwadkar 				return 0;
16388016e29fSHarshad Shirwadkar 			}
16398016e29fSHarshad Shirwadkar 			goto next;
16408016e29fSHarshad Shirwadkar 		}
16418016e29fSHarshad Shirwadkar 
16428016e29fSHarshad Shirwadkar 		if (start_pblk + cur - start != map.m_pblk) {
16438016e29fSHarshad Shirwadkar 			/*
16448016e29fSHarshad Shirwadkar 			 * Logical to physical mapping changed. This can happen
16458016e29fSHarshad Shirwadkar 			 * if this range was removed and then reallocated to
16468016e29fSHarshad Shirwadkar 			 * map to new physical blocks during a fast commit.
16478016e29fSHarshad Shirwadkar 			 */
16488016e29fSHarshad Shirwadkar 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
16498016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex),
16508016e29fSHarshad Shirwadkar 					start_pblk + cur - start);
16518016e29fSHarshad Shirwadkar 			if (ret) {
16528016e29fSHarshad Shirwadkar 				iput(inode);
16538016e29fSHarshad Shirwadkar 				return 0;
16548016e29fSHarshad Shirwadkar 			}
16558016e29fSHarshad Shirwadkar 			/*
16568016e29fSHarshad Shirwadkar 			 * Mark the old blocks as free since they aren't used
16578016e29fSHarshad Shirwadkar 			 * anymore. We maintain an array of all the modified
16588016e29fSHarshad Shirwadkar 			 * inodes. In case these blocks are still used at either
16598016e29fSHarshad Shirwadkar 			 * a different logical range in the same inode or in
16608016e29fSHarshad Shirwadkar 			 * some different inode, we will mark them as allocated
16618016e29fSHarshad Shirwadkar 			 * at the end of the FC replay using our array of
16628016e29fSHarshad Shirwadkar 			 * modified inodes.
16638016e29fSHarshad Shirwadkar 			 */
16648016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
16658016e29fSHarshad Shirwadkar 			goto next;
16668016e29fSHarshad Shirwadkar 		}
16678016e29fSHarshad Shirwadkar 
16688016e29fSHarshad Shirwadkar 		/* Range is mapped and needs a state change */
16698016e29fSHarshad Shirwadkar 		jbd_debug(1, "Converting from %d to %d %lld",
16708016e29fSHarshad Shirwadkar 				map.m_flags & EXT4_MAP_UNWRITTEN,
16718016e29fSHarshad Shirwadkar 			ext4_ext_is_unwritten(ex), map.m_pblk);
16728016e29fSHarshad Shirwadkar 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
16738016e29fSHarshad Shirwadkar 					ext4_ext_is_unwritten(ex), map.m_pblk);
16748016e29fSHarshad Shirwadkar 		if (ret) {
16758016e29fSHarshad Shirwadkar 			iput(inode);
16768016e29fSHarshad Shirwadkar 			return 0;
16778016e29fSHarshad Shirwadkar 		}
16788016e29fSHarshad Shirwadkar 		/*
16798016e29fSHarshad Shirwadkar 		 * We may have split the extent tree while toggling the state.
16808016e29fSHarshad Shirwadkar 		 * Try to shrink the extent tree now.
16818016e29fSHarshad Shirwadkar 		 */
16828016e29fSHarshad Shirwadkar 		ext4_ext_replay_shrink_inode(inode, start + len);
16838016e29fSHarshad Shirwadkar next:
16848016e29fSHarshad Shirwadkar 		cur += map.m_len;
16858016e29fSHarshad Shirwadkar 		remaining -= map.m_len;
16868016e29fSHarshad Shirwadkar 	}
16878016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
16888016e29fSHarshad Shirwadkar 					sb->s_blocksize_bits);
16898016e29fSHarshad Shirwadkar 	iput(inode);
16908016e29fSHarshad Shirwadkar 	return 0;
16918016e29fSHarshad Shirwadkar }
16928016e29fSHarshad Shirwadkar 
16938016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */
16948016e29fSHarshad Shirwadkar static int
16958016e29fSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
16968016e29fSHarshad Shirwadkar {
16978016e29fSHarshad Shirwadkar 	struct inode *inode;
16988016e29fSHarshad Shirwadkar 	struct ext4_fc_del_range *lrange;
16998016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17008016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, remaining;
17018016e29fSHarshad Shirwadkar 	int ret;
17028016e29fSHarshad Shirwadkar 
17038016e29fSHarshad Shirwadkar 	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
17048016e29fSHarshad Shirwadkar 	cur = le32_to_cpu(lrange->fc_lblk);
17058016e29fSHarshad Shirwadkar 	remaining = le32_to_cpu(lrange->fc_len);
17068016e29fSHarshad Shirwadkar 
17078016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
17088016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_ino), cur, remaining);
17098016e29fSHarshad Shirwadkar 
17108016e29fSHarshad Shirwadkar 	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
17118016e29fSHarshad Shirwadkar 	if (IS_ERR_OR_NULL(inode)) {
17128016e29fSHarshad Shirwadkar 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
17138016e29fSHarshad Shirwadkar 		return 0;
17148016e29fSHarshad Shirwadkar 	}
17158016e29fSHarshad Shirwadkar 
17168016e29fSHarshad Shirwadkar 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
17178016e29fSHarshad Shirwadkar 
17188016e29fSHarshad Shirwadkar 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
17198016e29fSHarshad Shirwadkar 			inode->i_ino, le32_to_cpu(lrange->fc_lblk),
17208016e29fSHarshad Shirwadkar 			le32_to_cpu(lrange->fc_len));
17218016e29fSHarshad Shirwadkar 	while (remaining > 0) {
17228016e29fSHarshad Shirwadkar 		map.m_lblk = cur;
17238016e29fSHarshad Shirwadkar 		map.m_len = remaining;
17248016e29fSHarshad Shirwadkar 
17258016e29fSHarshad Shirwadkar 		ret = ext4_map_blocks(NULL, inode, &map, 0);
17268016e29fSHarshad Shirwadkar 		if (ret < 0) {
17278016e29fSHarshad Shirwadkar 			iput(inode);
17288016e29fSHarshad Shirwadkar 			return 0;
17298016e29fSHarshad Shirwadkar 		}
17308016e29fSHarshad Shirwadkar 		if (ret > 0) {
17318016e29fSHarshad Shirwadkar 			remaining -= ret;
17328016e29fSHarshad Shirwadkar 			cur += ret;
17338016e29fSHarshad Shirwadkar 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
17348016e29fSHarshad Shirwadkar 		} else {
17358016e29fSHarshad Shirwadkar 			remaining -= map.m_len;
17368016e29fSHarshad Shirwadkar 			cur += map.m_len;
17378016e29fSHarshad Shirwadkar 		}
17388016e29fSHarshad Shirwadkar 	}
17398016e29fSHarshad Shirwadkar 
17408016e29fSHarshad Shirwadkar 	ret = ext4_punch_hole(inode,
17418016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
17428016e29fSHarshad Shirwadkar 		le32_to_cpu(lrange->fc_len) <<  sb->s_blocksize_bits);
17438016e29fSHarshad Shirwadkar 	if (ret)
17448016e29fSHarshad Shirwadkar 		jbd_debug(1, "ext4_punch_hole returned %d", ret);
17458016e29fSHarshad Shirwadkar 	ext4_ext_replay_shrink_inode(inode,
17468016e29fSHarshad Shirwadkar 		i_size_read(inode) >> sb->s_blocksize_bits);
17478016e29fSHarshad Shirwadkar 	ext4_mark_inode_dirty(NULL, inode);
17488016e29fSHarshad Shirwadkar 	iput(inode);
17498016e29fSHarshad Shirwadkar 
17508016e29fSHarshad Shirwadkar 	return 0;
17518016e29fSHarshad Shirwadkar }
17528016e29fSHarshad Shirwadkar 
17538016e29fSHarshad Shirwadkar static inline const char *tag2str(u16 tag)
17548016e29fSHarshad Shirwadkar {
17558016e29fSHarshad Shirwadkar 	switch (tag) {
17568016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_LINK:
17578016e29fSHarshad Shirwadkar 		return "TAG_ADD_ENTRY";
17588016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_UNLINK:
17598016e29fSHarshad Shirwadkar 		return "TAG_DEL_ENTRY";
17608016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_ADD_RANGE:
17618016e29fSHarshad Shirwadkar 		return "TAG_ADD_RANGE";
17628016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_CREAT:
17638016e29fSHarshad Shirwadkar 		return "TAG_CREAT_DENTRY";
17648016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_DEL_RANGE:
17658016e29fSHarshad Shirwadkar 		return "TAG_DEL_RANGE";
17668016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_INODE:
17678016e29fSHarshad Shirwadkar 		return "TAG_INODE";
17688016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_PAD:
17698016e29fSHarshad Shirwadkar 		return "TAG_PAD";
17708016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_TAIL:
17718016e29fSHarshad Shirwadkar 		return "TAG_TAIL";
17728016e29fSHarshad Shirwadkar 	case EXT4_FC_TAG_HEAD:
17738016e29fSHarshad Shirwadkar 		return "TAG_HEAD";
17748016e29fSHarshad Shirwadkar 	default:
17758016e29fSHarshad Shirwadkar 		return "TAG_ERROR";
17768016e29fSHarshad Shirwadkar 	}
17778016e29fSHarshad Shirwadkar }
17788016e29fSHarshad Shirwadkar 
17798016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
17808016e29fSHarshad Shirwadkar {
17818016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
17828016e29fSHarshad Shirwadkar 	struct inode *inode;
17838016e29fSHarshad Shirwadkar 	struct ext4_ext_path *path = NULL;
17848016e29fSHarshad Shirwadkar 	struct ext4_map_blocks map;
17858016e29fSHarshad Shirwadkar 	int i, ret, j;
17868016e29fSHarshad Shirwadkar 	ext4_lblk_t cur, end;
17878016e29fSHarshad Shirwadkar 
17888016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
17898016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
17908016e29fSHarshad Shirwadkar 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
17918016e29fSHarshad Shirwadkar 			EXT4_IGET_NORMAL);
17928016e29fSHarshad Shirwadkar 		if (IS_ERR_OR_NULL(inode)) {
17938016e29fSHarshad Shirwadkar 			jbd_debug(1, "Inode %d not found.",
17948016e29fSHarshad Shirwadkar 				state->fc_modified_inodes[i]);
17958016e29fSHarshad Shirwadkar 			continue;
17968016e29fSHarshad Shirwadkar 		}
17978016e29fSHarshad Shirwadkar 		cur = 0;
17988016e29fSHarshad Shirwadkar 		end = EXT_MAX_BLOCKS;
17998016e29fSHarshad Shirwadkar 		while (cur < end) {
18008016e29fSHarshad Shirwadkar 			map.m_lblk = cur;
18018016e29fSHarshad Shirwadkar 			map.m_len = end - cur;
18028016e29fSHarshad Shirwadkar 
18038016e29fSHarshad Shirwadkar 			ret = ext4_map_blocks(NULL, inode, &map, 0);
18048016e29fSHarshad Shirwadkar 			if (ret < 0)
18058016e29fSHarshad Shirwadkar 				break;
18068016e29fSHarshad Shirwadkar 
18078016e29fSHarshad Shirwadkar 			if (ret > 0) {
18088016e29fSHarshad Shirwadkar 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
18098016e29fSHarshad Shirwadkar 				if (!IS_ERR_OR_NULL(path)) {
18108016e29fSHarshad Shirwadkar 					for (j = 0; j < path->p_depth; j++)
18118016e29fSHarshad Shirwadkar 						ext4_mb_mark_bb(inode->i_sb,
18128016e29fSHarshad Shirwadkar 							path[j].p_block, 1, 1);
18138016e29fSHarshad Shirwadkar 					ext4_ext_drop_refs(path);
18148016e29fSHarshad Shirwadkar 					kfree(path);
18158016e29fSHarshad Shirwadkar 				}
18168016e29fSHarshad Shirwadkar 				cur += ret;
18178016e29fSHarshad Shirwadkar 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
18188016e29fSHarshad Shirwadkar 							map.m_len, 1);
18198016e29fSHarshad Shirwadkar 			} else {
18208016e29fSHarshad Shirwadkar 				cur = cur + (map.m_len ? map.m_len : 1);
18218016e29fSHarshad Shirwadkar 			}
18228016e29fSHarshad Shirwadkar 		}
18238016e29fSHarshad Shirwadkar 		iput(inode);
18248016e29fSHarshad Shirwadkar 	}
18258016e29fSHarshad Shirwadkar }
18268016e29fSHarshad Shirwadkar 
18278016e29fSHarshad Shirwadkar /*
18288016e29fSHarshad Shirwadkar  * Check if block is in excluded regions for block allocation. The simple
18298016e29fSHarshad Shirwadkar  * allocator that runs during replay phase is calls this function to see
18308016e29fSHarshad Shirwadkar  * if it is okay to use a block.
18318016e29fSHarshad Shirwadkar  */
18328016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
18338016e29fSHarshad Shirwadkar {
18348016e29fSHarshad Shirwadkar 	int i;
18358016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18368016e29fSHarshad Shirwadkar 
18378016e29fSHarshad Shirwadkar 	state = &EXT4_SB(sb)->s_fc_replay_state;
18388016e29fSHarshad Shirwadkar 	for (i = 0; i < state->fc_regions_valid; i++) {
18398016e29fSHarshad Shirwadkar 		if (state->fc_regions[i].ino == 0 ||
18408016e29fSHarshad Shirwadkar 			state->fc_regions[i].len == 0)
18418016e29fSHarshad Shirwadkar 			continue;
18428016e29fSHarshad Shirwadkar 		if (blk >= state->fc_regions[i].pblk &&
18438016e29fSHarshad Shirwadkar 		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
18448016e29fSHarshad Shirwadkar 			return true;
18458016e29fSHarshad Shirwadkar 	}
18468016e29fSHarshad Shirwadkar 	return false;
18478016e29fSHarshad Shirwadkar }
18488016e29fSHarshad Shirwadkar 
18498016e29fSHarshad Shirwadkar /* Cleanup function called after replay */
18508016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb)
18518016e29fSHarshad Shirwadkar {
18528016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
18538016e29fSHarshad Shirwadkar 
18548016e29fSHarshad Shirwadkar 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
18558016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_regions);
18568016e29fSHarshad Shirwadkar 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
18578016e29fSHarshad Shirwadkar }
18588016e29fSHarshad Shirwadkar 
18598016e29fSHarshad Shirwadkar /*
18608016e29fSHarshad Shirwadkar  * Recovery Scan phase handler
18618016e29fSHarshad Shirwadkar  *
18628016e29fSHarshad Shirwadkar  * This function is called during the scan phase and is responsible
18638016e29fSHarshad Shirwadkar  * for doing following things:
18648016e29fSHarshad Shirwadkar  * - Make sure the fast commit area has valid tags for replay
18658016e29fSHarshad Shirwadkar  * - Count number of tags that need to be replayed by the replay handler
18668016e29fSHarshad Shirwadkar  * - Verify CRC
18678016e29fSHarshad Shirwadkar  * - Create a list of excluded blocks for allocation during replay phase
18688016e29fSHarshad Shirwadkar  *
18698016e29fSHarshad Shirwadkar  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
18708016e29fSHarshad Shirwadkar  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
18718016e29fSHarshad Shirwadkar  * to indicate that scan has finished and JBD2 can now start replay phase.
18728016e29fSHarshad Shirwadkar  * It returns a negative error to indicate that there was an error. At the end
18738016e29fSHarshad Shirwadkar  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
18748016e29fSHarshad Shirwadkar  * to indicate the number of tags that need to replayed during the replay phase.
18758016e29fSHarshad Shirwadkar  */
18768016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal,
18778016e29fSHarshad Shirwadkar 				struct buffer_head *bh, int off,
18788016e29fSHarshad Shirwadkar 				tid_t expected_tid)
18798016e29fSHarshad Shirwadkar {
18808016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
18818016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
18828016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state;
18838016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
18848016e29fSHarshad Shirwadkar 	struct ext4_fc_add_range *ext;
18858016e29fSHarshad Shirwadkar 	struct ext4_fc_tl *tl;
18868016e29fSHarshad Shirwadkar 	struct ext4_fc_tail *tail;
18878016e29fSHarshad Shirwadkar 	__u8 *start, *end;
18888016e29fSHarshad Shirwadkar 	struct ext4_fc_head *head;
18898016e29fSHarshad Shirwadkar 	struct ext4_extent *ex;
18908016e29fSHarshad Shirwadkar 
18918016e29fSHarshad Shirwadkar 	state = &sbi->s_fc_replay_state;
18928016e29fSHarshad Shirwadkar 
18938016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
18948016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
18958016e29fSHarshad Shirwadkar 
18968016e29fSHarshad Shirwadkar 	if (state->fc_replay_expected_off == 0) {
18978016e29fSHarshad Shirwadkar 		state->fc_cur_tag = 0;
18988016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags = 0;
18998016e29fSHarshad Shirwadkar 		state->fc_crc = 0;
19008016e29fSHarshad Shirwadkar 		state->fc_regions = NULL;
19018016e29fSHarshad Shirwadkar 		state->fc_regions_valid = state->fc_regions_used =
19028016e29fSHarshad Shirwadkar 			state->fc_regions_size = 0;
19038016e29fSHarshad Shirwadkar 		/* Check if we can stop early */
19048016e29fSHarshad Shirwadkar 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
19058016e29fSHarshad Shirwadkar 			!= EXT4_FC_TAG_HEAD)
19068016e29fSHarshad Shirwadkar 			return 0;
19078016e29fSHarshad Shirwadkar 	}
19088016e29fSHarshad Shirwadkar 
19098016e29fSHarshad Shirwadkar 	if (off != state->fc_replay_expected_off) {
19108016e29fSHarshad Shirwadkar 		ret = -EFSCORRUPTED;
19118016e29fSHarshad Shirwadkar 		goto out_err;
19128016e29fSHarshad Shirwadkar 	}
19138016e29fSHarshad Shirwadkar 
19148016e29fSHarshad Shirwadkar 	state->fc_replay_expected_off++;
19158016e29fSHarshad Shirwadkar 	fc_for_each_tl(start, end, tl) {
19168016e29fSHarshad Shirwadkar 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
19178016e29fSHarshad Shirwadkar 			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
19188016e29fSHarshad Shirwadkar 		switch (le16_to_cpu(tl->fc_tag)) {
19198016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
19208016e29fSHarshad Shirwadkar 			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
19218016e29fSHarshad Shirwadkar 			ex = (struct ext4_extent *)&ext->fc_ex;
19228016e29fSHarshad Shirwadkar 			ret = ext4_fc_record_regions(sb,
19238016e29fSHarshad Shirwadkar 				le32_to_cpu(ext->fc_ino),
19248016e29fSHarshad Shirwadkar 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
19258016e29fSHarshad Shirwadkar 				ext4_ext_get_actual_len(ex));
19268016e29fSHarshad Shirwadkar 			if (ret < 0)
19278016e29fSHarshad Shirwadkar 				break;
19288016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_CONTINUE;
19298016e29fSHarshad Shirwadkar 			fallthrough;
19308016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
19318016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
19328016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
19338016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
19348016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
19358016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
19368016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
19378016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
19388016e29fSHarshad Shirwadkar 					sizeof(*tl) + ext4_fc_tag_len(tl));
19398016e29fSHarshad Shirwadkar 			break;
19408016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
19418016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
19428016e29fSHarshad Shirwadkar 			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
19438016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
19448016e29fSHarshad Shirwadkar 						sizeof(*tl) +
19458016e29fSHarshad Shirwadkar 						offsetof(struct ext4_fc_tail,
19468016e29fSHarshad Shirwadkar 						fc_crc));
19478016e29fSHarshad Shirwadkar 			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
19488016e29fSHarshad Shirwadkar 				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
19498016e29fSHarshad Shirwadkar 				state->fc_replay_num_tags = state->fc_cur_tag;
19508016e29fSHarshad Shirwadkar 				state->fc_regions_valid =
19518016e29fSHarshad Shirwadkar 					state->fc_regions_used;
19528016e29fSHarshad Shirwadkar 			} else {
19538016e29fSHarshad Shirwadkar 				ret = state->fc_replay_num_tags ?
19548016e29fSHarshad Shirwadkar 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
19558016e29fSHarshad Shirwadkar 			}
19568016e29fSHarshad Shirwadkar 			state->fc_crc = 0;
19578016e29fSHarshad Shirwadkar 			break;
19588016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
19598016e29fSHarshad Shirwadkar 			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
19608016e29fSHarshad Shirwadkar 			if (le32_to_cpu(head->fc_features) &
19618016e29fSHarshad Shirwadkar 				~EXT4_FC_SUPPORTED_FEATURES) {
19628016e29fSHarshad Shirwadkar 				ret = -EOPNOTSUPP;
19638016e29fSHarshad Shirwadkar 				break;
19648016e29fSHarshad Shirwadkar 			}
19658016e29fSHarshad Shirwadkar 			if (le32_to_cpu(head->fc_tid) != expected_tid) {
19668016e29fSHarshad Shirwadkar 				ret = JBD2_FC_REPLAY_STOP;
19678016e29fSHarshad Shirwadkar 				break;
19688016e29fSHarshad Shirwadkar 			}
19698016e29fSHarshad Shirwadkar 			state->fc_cur_tag++;
19708016e29fSHarshad Shirwadkar 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
19718016e29fSHarshad Shirwadkar 					sizeof(*tl) + ext4_fc_tag_len(tl));
19728016e29fSHarshad Shirwadkar 			break;
19738016e29fSHarshad Shirwadkar 		default:
19748016e29fSHarshad Shirwadkar 			ret = state->fc_replay_num_tags ?
19758016e29fSHarshad Shirwadkar 				JBD2_FC_REPLAY_STOP : -ECANCELED;
19768016e29fSHarshad Shirwadkar 		}
19778016e29fSHarshad Shirwadkar 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
19788016e29fSHarshad Shirwadkar 			break;
19798016e29fSHarshad Shirwadkar 	}
19808016e29fSHarshad Shirwadkar 
19818016e29fSHarshad Shirwadkar out_err:
19828016e29fSHarshad Shirwadkar 	trace_ext4_fc_replay_scan(sb, ret, off);
19838016e29fSHarshad Shirwadkar 	return ret;
19848016e29fSHarshad Shirwadkar }
19858016e29fSHarshad Shirwadkar 
19865b849b5fSHarshad Shirwadkar /*
19875b849b5fSHarshad Shirwadkar  * Main recovery path entry point.
19888016e29fSHarshad Shirwadkar  * The meaning of return codes is similar as above.
19895b849b5fSHarshad Shirwadkar  */
19905b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
19915b849b5fSHarshad Shirwadkar 				enum passtype pass, int off, tid_t expected_tid)
19925b849b5fSHarshad Shirwadkar {
19938016e29fSHarshad Shirwadkar 	struct super_block *sb = journal->j_private;
19948016e29fSHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB(sb);
19958016e29fSHarshad Shirwadkar 	struct ext4_fc_tl *tl;
19968016e29fSHarshad Shirwadkar 	__u8 *start, *end;
19978016e29fSHarshad Shirwadkar 	int ret = JBD2_FC_REPLAY_CONTINUE;
19988016e29fSHarshad Shirwadkar 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
19998016e29fSHarshad Shirwadkar 	struct ext4_fc_tail *tail;
20008016e29fSHarshad Shirwadkar 
20018016e29fSHarshad Shirwadkar 	if (pass == PASS_SCAN) {
20028016e29fSHarshad Shirwadkar 		state->fc_current_pass = PASS_SCAN;
20038016e29fSHarshad Shirwadkar 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
20048016e29fSHarshad Shirwadkar 	}
20058016e29fSHarshad Shirwadkar 
20068016e29fSHarshad Shirwadkar 	if (state->fc_current_pass != pass) {
20078016e29fSHarshad Shirwadkar 		state->fc_current_pass = pass;
20088016e29fSHarshad Shirwadkar 		sbi->s_mount_state |= EXT4_FC_REPLAY;
20098016e29fSHarshad Shirwadkar 	}
20108016e29fSHarshad Shirwadkar 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
20118016e29fSHarshad Shirwadkar 		jbd_debug(1, "Replay stops\n");
20128016e29fSHarshad Shirwadkar 		ext4_fc_set_bitmaps_and_counters(sb);
20135b849b5fSHarshad Shirwadkar 		return 0;
20145b849b5fSHarshad Shirwadkar 	}
20155b849b5fSHarshad Shirwadkar 
20168016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG
20178016e29fSHarshad Shirwadkar 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
20188016e29fSHarshad Shirwadkar 		pr_warn("Dropping fc block %d because max_replay set\n", off);
20198016e29fSHarshad Shirwadkar 		return JBD2_FC_REPLAY_STOP;
20208016e29fSHarshad Shirwadkar 	}
20218016e29fSHarshad Shirwadkar #endif
20228016e29fSHarshad Shirwadkar 
20238016e29fSHarshad Shirwadkar 	start = (u8 *)bh->b_data;
20248016e29fSHarshad Shirwadkar 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
20258016e29fSHarshad Shirwadkar 
20268016e29fSHarshad Shirwadkar 	fc_for_each_tl(start, end, tl) {
20278016e29fSHarshad Shirwadkar 		if (state->fc_replay_num_tags == 0) {
20288016e29fSHarshad Shirwadkar 			ret = JBD2_FC_REPLAY_STOP;
20298016e29fSHarshad Shirwadkar 			ext4_fc_set_bitmaps_and_counters(sb);
20308016e29fSHarshad Shirwadkar 			break;
20318016e29fSHarshad Shirwadkar 		}
20328016e29fSHarshad Shirwadkar 		jbd_debug(3, "Replay phase, tag:%s\n",
20338016e29fSHarshad Shirwadkar 				tag2str(le16_to_cpu(tl->fc_tag)));
20348016e29fSHarshad Shirwadkar 		state->fc_replay_num_tags--;
20358016e29fSHarshad Shirwadkar 		switch (le16_to_cpu(tl->fc_tag)) {
20368016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_LINK:
20378016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_link(sb, tl);
20388016e29fSHarshad Shirwadkar 			break;
20398016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_UNLINK:
20408016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_unlink(sb, tl);
20418016e29fSHarshad Shirwadkar 			break;
20428016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_ADD_RANGE:
20438016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_add_range(sb, tl);
20448016e29fSHarshad Shirwadkar 			break;
20458016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_CREAT:
20468016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_create(sb, tl);
20478016e29fSHarshad Shirwadkar 			break;
20488016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_DEL_RANGE:
20498016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_del_range(sb, tl);
20508016e29fSHarshad Shirwadkar 			break;
20518016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_INODE:
20528016e29fSHarshad Shirwadkar 			ret = ext4_fc_replay_inode(sb, tl);
20538016e29fSHarshad Shirwadkar 			break;
20548016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_PAD:
20558016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
20568016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
20578016e29fSHarshad Shirwadkar 			break;
20588016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_TAIL:
20598016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
20608016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
20618016e29fSHarshad Shirwadkar 			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
20628016e29fSHarshad Shirwadkar 			WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
20638016e29fSHarshad Shirwadkar 			break;
20648016e29fSHarshad Shirwadkar 		case EXT4_FC_TAG_HEAD:
20658016e29fSHarshad Shirwadkar 			break;
20668016e29fSHarshad Shirwadkar 		default:
20678016e29fSHarshad Shirwadkar 			trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
20688016e29fSHarshad Shirwadkar 				ext4_fc_tag_len(tl), 0);
20698016e29fSHarshad Shirwadkar 			ret = -ECANCELED;
20708016e29fSHarshad Shirwadkar 			break;
20718016e29fSHarshad Shirwadkar 		}
20728016e29fSHarshad Shirwadkar 		if (ret < 0)
20738016e29fSHarshad Shirwadkar 			break;
20748016e29fSHarshad Shirwadkar 		ret = JBD2_FC_REPLAY_CONTINUE;
20758016e29fSHarshad Shirwadkar 	}
20768016e29fSHarshad Shirwadkar 	return ret;
20778016e29fSHarshad Shirwadkar }
20788016e29fSHarshad Shirwadkar 
20796866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal)
20806866d7b3SHarshad Shirwadkar {
20815b849b5fSHarshad Shirwadkar 	/*
20825b849b5fSHarshad Shirwadkar 	 * We set replay callback even if fast commit disabled because we may
20835b849b5fSHarshad Shirwadkar 	 * could still have fast commit blocks that need to be replayed even if
20845b849b5fSHarshad Shirwadkar 	 * fast commit has now been turned off.
20855b849b5fSHarshad Shirwadkar 	 */
20865b849b5fSHarshad Shirwadkar 	journal->j_fc_replay_callback = ext4_fc_replay;
20876866d7b3SHarshad Shirwadkar 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
20886866d7b3SHarshad Shirwadkar 		return;
2089ff780b91SHarshad Shirwadkar 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
20906866d7b3SHarshad Shirwadkar 	if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
20916866d7b3SHarshad Shirwadkar 		pr_warn("Error while enabling fast commits, turning off.");
20926866d7b3SHarshad Shirwadkar 		ext4_clear_feature_fast_commit(sb);
20936866d7b3SHarshad Shirwadkar 	}
20946866d7b3SHarshad Shirwadkar }
2095aa75f4d3SHarshad Shirwadkar 
2096ce8c59d1SHarshad Shirwadkar const char *fc_ineligible_reasons[] = {
2097ce8c59d1SHarshad Shirwadkar 	"Extended attributes changed",
2098ce8c59d1SHarshad Shirwadkar 	"Cross rename",
2099ce8c59d1SHarshad Shirwadkar 	"Journal flag changed",
2100ce8c59d1SHarshad Shirwadkar 	"Insufficient memory",
2101ce8c59d1SHarshad Shirwadkar 	"Swap boot",
2102ce8c59d1SHarshad Shirwadkar 	"Resize",
2103ce8c59d1SHarshad Shirwadkar 	"Dir renamed",
2104ce8c59d1SHarshad Shirwadkar 	"Falloc range op",
2105ce8c59d1SHarshad Shirwadkar 	"FC Commit Failed"
2106ce8c59d1SHarshad Shirwadkar };
2107ce8c59d1SHarshad Shirwadkar 
2108ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v)
2109ce8c59d1SHarshad Shirwadkar {
2110ce8c59d1SHarshad Shirwadkar 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2111ce8c59d1SHarshad Shirwadkar 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2112ce8c59d1SHarshad Shirwadkar 	int i;
2113ce8c59d1SHarshad Shirwadkar 
2114ce8c59d1SHarshad Shirwadkar 	if (v != SEQ_START_TOKEN)
2115ce8c59d1SHarshad Shirwadkar 		return 0;
2116ce8c59d1SHarshad Shirwadkar 
2117ce8c59d1SHarshad Shirwadkar 	seq_printf(seq,
2118ce8c59d1SHarshad Shirwadkar 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2119ce8c59d1SHarshad Shirwadkar 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2120ce8c59d1SHarshad Shirwadkar 		   stats->fc_numblks,
2121ce8c59d1SHarshad Shirwadkar 		   div_u64(sbi->s_fc_avg_commit_time, 1000));
2122ce8c59d1SHarshad Shirwadkar 	seq_puts(seq, "Ineligible reasons:\n");
2123ce8c59d1SHarshad Shirwadkar 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2124ce8c59d1SHarshad Shirwadkar 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2125ce8c59d1SHarshad Shirwadkar 			stats->fc_ineligible_reason_count[i]);
2126ce8c59d1SHarshad Shirwadkar 
2127ce8c59d1SHarshad Shirwadkar 	return 0;
2128ce8c59d1SHarshad Shirwadkar }
2129ce8c59d1SHarshad Shirwadkar 
2130aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void)
2131aa75f4d3SHarshad Shirwadkar {
2132aa75f4d3SHarshad Shirwadkar 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2133aa75f4d3SHarshad Shirwadkar 					   SLAB_RECLAIM_ACCOUNT);
2134aa75f4d3SHarshad Shirwadkar 
2135aa75f4d3SHarshad Shirwadkar 	if (ext4_fc_dentry_cachep == NULL)
2136aa75f4d3SHarshad Shirwadkar 		return -ENOMEM;
2137aa75f4d3SHarshad Shirwadkar 
2138aa75f4d3SHarshad Shirwadkar 	return 0;
2139aa75f4d3SHarshad Shirwadkar }
2140