xref: /openbmc/linux/fs/ext4/fast_commit.c (revision 0de459a3)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * fs/ext4/fast_commit.c
5  *
6  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
7  *
8  * Ext4 fast commits routines.
9  */
10 #include "ext4.h"
11 #include "ext4_jbd2.h"
12 #include "ext4_extents.h"
13 #include "mballoc.h"
14 
15 /*
16  * Ext4 Fast Commits
17  * -----------------
18  *
19  * Ext4 fast commits implement fine grained journalling for Ext4.
20  *
21  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23  * TLV during the recovery phase. For the scenarios for which we currently
24  * don't have replay code, fast commit falls back to full commits.
25  * Fast commits record delta in one of the following three categories.
26  *
27  * (A) Directory entry updates:
28  *
29  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30  * - EXT4_FC_TAG_LINK		- records directory entry link
31  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32  *
33  * (B) File specific data range updates:
34  *
35  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37  *
38  * (C) Inode metadata (mtime / ctime etc):
39  *
40  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41  *				  during recovery. Note that iblocks field is
42  *				  not replayed and instead derived during
43  *				  replay.
44  * Commit Operation
45  * ----------------
46  * With fast commits, we maintain all the directory entry operations in the
47  * order in which they are issued in an in-memory queue. This queue is flushed
48  * to disk during the commit operation. We also maintain a list of inodes
49  * that need to be committed during a fast commit in another in memory queue of
50  * inodes. During the commit operation, we commit in the following order:
51  *
52  * [1] Lock inodes for any further data updates by setting COMMITTING state
53  * [2] Submit data buffers of all the inodes
54  * [3] Wait for [2] to complete
55  * [4] Commit all the directory entry updates in the fast commit space
56  * [5] Commit all the changed inode structures
57  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58  *     section for more details).
59  * [7] Wait for [4], [5] and [6] to complete.
60  *
61  * All the inode updates must call ext4_fc_start_update() before starting an
62  * update. If such an ongoing update is present, fast commit waits for it to
63  * complete. The completion of such an update is marked by
64  * ext4_fc_stop_update().
65  *
66  * Fast Commit Ineligibility
67  * -------------------------
68  *
69  * Not all operations are supported by fast commits today (e.g extended
70  * attributes). Fast commit ineligibility is marked by calling
71  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
72  * to full commit.
73  *
74  * Atomicity of commits
75  * --------------------
76  * In order to guarantee atomicity during the commit operation, fast commit
77  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
78  * tag contains CRC of the contents and TID of the transaction after which
79  * this fast commit should be applied. Recovery code replays fast commit
80  * logs only if there's at least 1 valid tail present. For every fast commit
81  * operation, there is 1 tail. This means, we may end up with multiple tails
82  * in the fast commit space. Here's an example:
83  *
84  * - Create a new file A and remove existing file B
85  * - fsync()
86  * - Append contents to file A
87  * - Truncate file A
88  * - fsync()
89  *
90  * The fast commit space at the end of above operations would look like this:
91  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
92  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
93  *
94  * Replay code should thus check for all the valid tails in the FC area.
95  *
96  * Fast Commit Replay Idempotence
97  * ------------------------------
98  *
99  * Fast commits tags are idempotent in nature provided the recovery code follows
100  * certain rules. The guiding principle that the commit path follows while
101  * committing is that it stores the result of a particular operation instead of
102  * storing the procedure.
103  *
104  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
105  * was associated with inode 10. During fast commit, instead of storing this
106  * operation as a procedure "rename a to b", we store the resulting file system
107  * state as a "series" of outcomes:
108  *
109  * - Link dirent b to inode 10
110  * - Unlink dirent a
111  * - Inode <10> with valid refcount
112  *
113  * Now when recovery code runs, it needs "enforce" this state on the file
114  * system. This is what guarantees idempotence of fast commit replay.
115  *
116  * Let's take an example of a procedure that is not idempotent and see how fast
117  * commits make it idempotent. Consider following sequence of operations:
118  *
119  *     rm A;    mv B A;    read A
120  *  (x)     (y)        (z)
121  *
122  * (x), (y) and (z) are the points at which we can crash. If we store this
123  * sequence of operations as is then the replay is not idempotent. Let's say
124  * while in replay, we crash at (z). During the second replay, file A (which was
125  * actually created as a result of "mv B A" operation) would get deleted. Thus,
126  * file named A would be absent when we try to read A. So, this sequence of
127  * operations is not idempotent. However, as mentioned above, instead of storing
128  * the procedure fast commits store the outcome of each procedure. Thus the fast
129  * commit log for above procedure would be as follows:
130  *
131  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
132  * inode 11 before the replay)
133  *
134  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
135  * (w)          (x)                    (y)          (z)
136  *
137  * If we crash at (z), we will have file A linked to inode 11. During the second
138  * replay, we will remove file A (inode 11). But we will create it back and make
139  * it point to inode 11. We won't find B, so we'll just skip that step. At this
140  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
141  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
142  * similarly. Thus, by converting a non-idempotent procedure into a series of
143  * idempotent outcomes, fast commits ensured idempotence during the replay.
144  *
145  * TODOs
146  * -----
147  *
148  * 0) Fast commit replay path hardening: Fast commit replay code should use
149  *    journal handles to make sure all the updates it does during the replay
150  *    path are atomic. With that if we crash during fast commit replay, after
151  *    trying to do recovery again, we will find a file system where fast commit
152  *    area is invalid (because new full commit would be found). In order to deal
153  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
154  *    superblock state is persisted before starting the replay, so that after
155  *    the crash, fast commit recovery code can look at that flag and perform
156  *    fast commit recovery even if that area is invalidated by later full
157  *    commits.
158  *
159  * 1) Fast commit's commit path locks the entire file system during fast
160  *    commit. This has significant performance penalty. Instead of that, we
161  *    should use ext4_fc_start/stop_update functions to start inode level
162  *    updates from ext4_journal_start/stop. Once we do that we can drop file
163  *    system locking during commit path.
164  *
165  * 2) Handle more ineligible cases.
166  */
167 
168 #include <trace/events/ext4.h>
169 static struct kmem_cache *ext4_fc_dentry_cachep;
170 
171 static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
172 {
173 	BUFFER_TRACE(bh, "");
174 	if (uptodate) {
175 		ext4_debug("%s: Block %lld up-to-date",
176 			   __func__, bh->b_blocknr);
177 		set_buffer_uptodate(bh);
178 	} else {
179 		ext4_debug("%s: Block %lld not up-to-date",
180 			   __func__, bh->b_blocknr);
181 		clear_buffer_uptodate(bh);
182 	}
183 
184 	unlock_buffer(bh);
185 }
186 
187 static inline void ext4_fc_reset_inode(struct inode *inode)
188 {
189 	struct ext4_inode_info *ei = EXT4_I(inode);
190 
191 	ei->i_fc_lblk_start = 0;
192 	ei->i_fc_lblk_len = 0;
193 }
194 
195 void ext4_fc_init_inode(struct inode *inode)
196 {
197 	struct ext4_inode_info *ei = EXT4_I(inode);
198 
199 	ext4_fc_reset_inode(inode);
200 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201 	INIT_LIST_HEAD(&ei->i_fc_list);
202 	INIT_LIST_HEAD(&ei->i_fc_dilist);
203 	init_waitqueue_head(&ei->i_fc_wait);
204 	atomic_set(&ei->i_fc_updates, 0);
205 }
206 
207 /* This function must be called with sbi->s_fc_lock held. */
208 static void ext4_fc_wait_committing_inode(struct inode *inode)
209 __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
210 {
211 	wait_queue_head_t *wq;
212 	struct ext4_inode_info *ei = EXT4_I(inode);
213 
214 #if (BITS_PER_LONG < 64)
215 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
216 			EXT4_STATE_FC_COMMITTING);
217 	wq = bit_waitqueue(&ei->i_state_flags,
218 				EXT4_STATE_FC_COMMITTING);
219 #else
220 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
221 			EXT4_STATE_FC_COMMITTING);
222 	wq = bit_waitqueue(&ei->i_flags,
223 				EXT4_STATE_FC_COMMITTING);
224 #endif
225 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
226 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
227 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
228 	schedule();
229 	finish_wait(wq, &wait.wq_entry);
230 }
231 
232 static bool ext4_fc_disabled(struct super_block *sb)
233 {
234 	return (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
235 		(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
236 }
237 
238 /*
239  * Inform Ext4's fast about start of an inode update
240  *
241  * This function is called by the high level call VFS callbacks before
242  * performing any inode update. This function blocks if there's an ongoing
243  * fast commit on the inode in question.
244  */
245 void ext4_fc_start_update(struct inode *inode)
246 {
247 	struct ext4_inode_info *ei = EXT4_I(inode);
248 
249 	if (ext4_fc_disabled(inode->i_sb))
250 		return;
251 
252 restart:
253 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
254 	if (list_empty(&ei->i_fc_list))
255 		goto out;
256 
257 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
258 		ext4_fc_wait_committing_inode(inode);
259 		goto restart;
260 	}
261 out:
262 	atomic_inc(&ei->i_fc_updates);
263 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
264 }
265 
266 /*
267  * Stop inode update and wake up waiting fast commits if any.
268  */
269 void ext4_fc_stop_update(struct inode *inode)
270 {
271 	struct ext4_inode_info *ei = EXT4_I(inode);
272 
273 	if (ext4_fc_disabled(inode->i_sb))
274 		return;
275 
276 	if (atomic_dec_and_test(&ei->i_fc_updates))
277 		wake_up_all(&ei->i_fc_wait);
278 }
279 
280 /*
281  * Remove inode from fast commit list. If the inode is being committed
282  * we wait until inode commit is done.
283  */
284 void ext4_fc_del(struct inode *inode)
285 {
286 	struct ext4_inode_info *ei = EXT4_I(inode);
287 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
288 	struct ext4_fc_dentry_update *fc_dentry;
289 
290 	if (ext4_fc_disabled(inode->i_sb))
291 		return;
292 
293 restart:
294 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
295 	if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
296 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
297 		return;
298 	}
299 
300 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
301 		ext4_fc_wait_committing_inode(inode);
302 		goto restart;
303 	}
304 
305 	if (!list_empty(&ei->i_fc_list))
306 		list_del_init(&ei->i_fc_list);
307 
308 	/*
309 	 * Since this inode is getting removed, let's also remove all FC
310 	 * dentry create references, since it is not needed to log it anyways.
311 	 */
312 	if (list_empty(&ei->i_fc_dilist)) {
313 		spin_unlock(&sbi->s_fc_lock);
314 		return;
315 	}
316 
317 	fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
318 	WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
319 	list_del_init(&fc_dentry->fcd_list);
320 	list_del_init(&fc_dentry->fcd_dilist);
321 
322 	WARN_ON(!list_empty(&ei->i_fc_dilist));
323 	spin_unlock(&sbi->s_fc_lock);
324 
325 	if (fc_dentry->fcd_name.name &&
326 		fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
327 		kfree(fc_dentry->fcd_name.name);
328 	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
329 
330 	return;
331 }
332 
333 /*
334  * Mark file system as fast commit ineligible, and record latest
335  * ineligible transaction tid. This means until the recorded
336  * transaction, commit operation would result in a full jbd2 commit.
337  */
338 void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
339 {
340 	struct ext4_sb_info *sbi = EXT4_SB(sb);
341 	tid_t tid;
342 
343 	if (ext4_fc_disabled(sb))
344 		return;
345 
346 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
347 	if (handle && !IS_ERR(handle))
348 		tid = handle->h_transaction->t_tid;
349 	else {
350 		read_lock(&sbi->s_journal->j_state_lock);
351 		tid = sbi->s_journal->j_running_transaction ?
352 				sbi->s_journal->j_running_transaction->t_tid : 0;
353 		read_unlock(&sbi->s_journal->j_state_lock);
354 	}
355 	spin_lock(&sbi->s_fc_lock);
356 	if (sbi->s_fc_ineligible_tid < tid)
357 		sbi->s_fc_ineligible_tid = tid;
358 	spin_unlock(&sbi->s_fc_lock);
359 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
360 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
361 }
362 
363 /*
364  * Generic fast commit tracking function. If this is the first time this we are
365  * called after a full commit, we initialize fast commit fields and then call
366  * __fc_track_fn() with update = 0. If we have already been called after a full
367  * commit, we pass update = 1. Based on that, the track function can determine
368  * if it needs to track a field for the first time or if it needs to just
369  * update the previously tracked value.
370  *
371  * If enqueue is set, this function enqueues the inode in fast commit list.
372  */
373 static int ext4_fc_track_template(
374 	handle_t *handle, struct inode *inode,
375 	int (*__fc_track_fn)(struct inode *, void *, bool),
376 	void *args, int enqueue)
377 {
378 	bool update = false;
379 	struct ext4_inode_info *ei = EXT4_I(inode);
380 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
381 	tid_t tid = 0;
382 	int ret;
383 
384 	tid = handle->h_transaction->t_tid;
385 	mutex_lock(&ei->i_fc_lock);
386 	if (tid == ei->i_sync_tid) {
387 		update = true;
388 	} else {
389 		ext4_fc_reset_inode(inode);
390 		ei->i_sync_tid = tid;
391 	}
392 	ret = __fc_track_fn(inode, args, update);
393 	mutex_unlock(&ei->i_fc_lock);
394 
395 	if (!enqueue)
396 		return ret;
397 
398 	spin_lock(&sbi->s_fc_lock);
399 	if (list_empty(&EXT4_I(inode)->i_fc_list))
400 		list_add_tail(&EXT4_I(inode)->i_fc_list,
401 				(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
402 				 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
403 				&sbi->s_fc_q[FC_Q_STAGING] :
404 				&sbi->s_fc_q[FC_Q_MAIN]);
405 	spin_unlock(&sbi->s_fc_lock);
406 
407 	return ret;
408 }
409 
410 struct __track_dentry_update_args {
411 	struct dentry *dentry;
412 	int op;
413 };
414 
415 /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
416 static int __track_dentry_update(struct inode *inode, void *arg, bool update)
417 {
418 	struct ext4_fc_dentry_update *node;
419 	struct ext4_inode_info *ei = EXT4_I(inode);
420 	struct __track_dentry_update_args *dentry_update =
421 		(struct __track_dentry_update_args *)arg;
422 	struct dentry *dentry = dentry_update->dentry;
423 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
424 
425 	mutex_unlock(&ei->i_fc_lock);
426 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
427 	if (!node) {
428 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
429 		mutex_lock(&ei->i_fc_lock);
430 		return -ENOMEM;
431 	}
432 
433 	node->fcd_op = dentry_update->op;
434 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
435 	node->fcd_ino = inode->i_ino;
436 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
437 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
438 		if (!node->fcd_name.name) {
439 			kmem_cache_free(ext4_fc_dentry_cachep, node);
440 			ext4_fc_mark_ineligible(inode->i_sb,
441 				EXT4_FC_REASON_NOMEM, NULL);
442 			mutex_lock(&ei->i_fc_lock);
443 			return -ENOMEM;
444 		}
445 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
446 			dentry->d_name.len);
447 	} else {
448 		memcpy(node->fcd_iname, dentry->d_name.name,
449 			dentry->d_name.len);
450 		node->fcd_name.name = node->fcd_iname;
451 	}
452 	node->fcd_name.len = dentry->d_name.len;
453 	INIT_LIST_HEAD(&node->fcd_dilist);
454 	spin_lock(&sbi->s_fc_lock);
455 	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
456 		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
457 		list_add_tail(&node->fcd_list,
458 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
459 	else
460 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
461 
462 	/*
463 	 * This helps us keep a track of all fc_dentry updates which is part of
464 	 * this ext4 inode. So in case the inode is getting unlinked, before
465 	 * even we get a chance to fsync, we could remove all fc_dentry
466 	 * references while evicting the inode in ext4_fc_del().
467 	 * Also with this, we don't need to loop over all the inodes in
468 	 * sbi->s_fc_q to get the corresponding inode in
469 	 * ext4_fc_commit_dentry_updates().
470 	 */
471 	if (dentry_update->op == EXT4_FC_TAG_CREAT) {
472 		WARN_ON(!list_empty(&ei->i_fc_dilist));
473 		list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
474 	}
475 	spin_unlock(&sbi->s_fc_lock);
476 	mutex_lock(&ei->i_fc_lock);
477 
478 	return 0;
479 }
480 
481 void __ext4_fc_track_unlink(handle_t *handle,
482 		struct inode *inode, struct dentry *dentry)
483 {
484 	struct __track_dentry_update_args args;
485 	int ret;
486 
487 	args.dentry = dentry;
488 	args.op = EXT4_FC_TAG_UNLINK;
489 
490 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
491 					(void *)&args, 0);
492 	trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
493 }
494 
495 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
496 {
497 	struct inode *inode = d_inode(dentry);
498 
499 	if (ext4_fc_disabled(inode->i_sb))
500 		return;
501 
502 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
503 		return;
504 
505 	__ext4_fc_track_unlink(handle, inode, dentry);
506 }
507 
508 void __ext4_fc_track_link(handle_t *handle,
509 	struct inode *inode, struct dentry *dentry)
510 {
511 	struct __track_dentry_update_args args;
512 	int ret;
513 
514 	args.dentry = dentry;
515 	args.op = EXT4_FC_TAG_LINK;
516 
517 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
518 					(void *)&args, 0);
519 	trace_ext4_fc_track_link(handle, inode, dentry, ret);
520 }
521 
522 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
523 {
524 	struct inode *inode = d_inode(dentry);
525 
526 	if (ext4_fc_disabled(inode->i_sb))
527 		return;
528 
529 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
530 		return;
531 
532 	__ext4_fc_track_link(handle, inode, dentry);
533 }
534 
535 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
536 			  struct dentry *dentry)
537 {
538 	struct __track_dentry_update_args args;
539 	int ret;
540 
541 	args.dentry = dentry;
542 	args.op = EXT4_FC_TAG_CREAT;
543 
544 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
545 					(void *)&args, 0);
546 	trace_ext4_fc_track_create(handle, inode, dentry, ret);
547 }
548 
549 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
550 {
551 	struct inode *inode = d_inode(dentry);
552 
553 	if (ext4_fc_disabled(inode->i_sb))
554 		return;
555 
556 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
557 		return;
558 
559 	__ext4_fc_track_create(handle, inode, dentry);
560 }
561 
562 /* __track_fn for inode tracking */
563 static int __track_inode(struct inode *inode, void *arg, bool update)
564 {
565 	if (update)
566 		return -EEXIST;
567 
568 	EXT4_I(inode)->i_fc_lblk_len = 0;
569 
570 	return 0;
571 }
572 
573 void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
574 {
575 	int ret;
576 
577 	if (S_ISDIR(inode->i_mode))
578 		return;
579 
580 	if (ext4_fc_disabled(inode->i_sb))
581 		return;
582 
583 	if (ext4_should_journal_data(inode)) {
584 		ext4_fc_mark_ineligible(inode->i_sb,
585 					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
586 		return;
587 	}
588 
589 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
590 		return;
591 
592 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
593 	trace_ext4_fc_track_inode(handle, inode, ret);
594 }
595 
596 struct __track_range_args {
597 	ext4_lblk_t start, end;
598 };
599 
600 /* __track_fn for tracking data updates */
601 static int __track_range(struct inode *inode, void *arg, bool update)
602 {
603 	struct ext4_inode_info *ei = EXT4_I(inode);
604 	ext4_lblk_t oldstart;
605 	struct __track_range_args *__arg =
606 		(struct __track_range_args *)arg;
607 
608 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
609 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
610 		return -ECANCELED;
611 	}
612 
613 	oldstart = ei->i_fc_lblk_start;
614 
615 	if (update && ei->i_fc_lblk_len > 0) {
616 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
617 		ei->i_fc_lblk_len =
618 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
619 				ei->i_fc_lblk_start + 1;
620 	} else {
621 		ei->i_fc_lblk_start = __arg->start;
622 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
623 	}
624 
625 	return 0;
626 }
627 
628 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
629 			 ext4_lblk_t end)
630 {
631 	struct __track_range_args args;
632 	int ret;
633 
634 	if (S_ISDIR(inode->i_mode))
635 		return;
636 
637 	if (ext4_fc_disabled(inode->i_sb))
638 		return;
639 
640 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
641 		return;
642 
643 	args.start = start;
644 	args.end = end;
645 
646 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
647 
648 	trace_ext4_fc_track_range(handle, inode, start, end, ret);
649 }
650 
651 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
652 {
653 	blk_opf_t write_flags = REQ_SYNC;
654 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
655 
656 	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
657 	if (test_opt(sb, BARRIER) && is_tail)
658 		write_flags |= REQ_FUA | REQ_PREFLUSH;
659 	lock_buffer(bh);
660 	set_buffer_dirty(bh);
661 	set_buffer_uptodate(bh);
662 	bh->b_end_io = ext4_end_buffer_io_sync;
663 	submit_bh(REQ_OP_WRITE | write_flags, bh);
664 	EXT4_SB(sb)->s_fc_bh = NULL;
665 }
666 
667 /* Ext4 commit path routines */
668 
669 /* memzero and update CRC */
670 static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
671 				u32 *crc)
672 {
673 	void *ret;
674 
675 	ret = memset(dst, 0, len);
676 	if (crc)
677 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
678 	return ret;
679 }
680 
681 /*
682  * Allocate len bytes on a fast commit buffer.
683  *
684  * During the commit time this function is used to manage fast commit
685  * block space. We don't split a fast commit log onto different
686  * blocks. So this function makes sure that if there's not enough space
687  * on the current block, the remaining space in the current block is
688  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
689  * new block is from jbd2 and CRC is updated to reflect the padding
690  * we added.
691  */
692 static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
693 {
694 	struct ext4_fc_tl *tl;
695 	struct ext4_sb_info *sbi = EXT4_SB(sb);
696 	struct buffer_head *bh;
697 	int bsize = sbi->s_journal->j_blocksize;
698 	int ret, off = sbi->s_fc_bytes % bsize;
699 	int pad_len;
700 
701 	/*
702 	 * After allocating len, we should have space at least for a 0 byte
703 	 * padding.
704 	 */
705 	if (len + EXT4_FC_TAG_BASE_LEN > bsize)
706 		return NULL;
707 
708 	if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
709 		/*
710 		 * Only allocate from current buffer if we have enough space for
711 		 * this request AND we have space to add a zero byte padding.
712 		 */
713 		if (!sbi->s_fc_bh) {
714 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
715 			if (ret)
716 				return NULL;
717 			sbi->s_fc_bh = bh;
718 		}
719 		sbi->s_fc_bytes += len;
720 		return sbi->s_fc_bh->b_data + off;
721 	}
722 	/* Need to add PAD tag */
723 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
724 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
725 	pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
726 	tl->fc_len = cpu_to_le16(pad_len);
727 	if (crc)
728 		*crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
729 	if (pad_len > 0)
730 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
731 	ext4_fc_submit_bh(sb, false);
732 
733 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
734 	if (ret)
735 		return NULL;
736 	sbi->s_fc_bh = bh;
737 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
738 	return sbi->s_fc_bh->b_data;
739 }
740 
741 /* memcpy to fc reserved space and update CRC */
742 static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
743 				int len, u32 *crc)
744 {
745 	if (crc)
746 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
747 	return memcpy(dst, src, len);
748 }
749 
750 /*
751  * Complete a fast commit by writing tail tag.
752  *
753  * Writing tail tag marks the end of a fast commit. In order to guarantee
754  * atomicity, after writing tail tag, even if there's space remaining
755  * in the block, next commit shouldn't use it. That's why tail tag
756  * has the length as that of the remaining space on the block.
757  */
758 static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
759 {
760 	struct ext4_sb_info *sbi = EXT4_SB(sb);
761 	struct ext4_fc_tl tl;
762 	struct ext4_fc_tail tail;
763 	int off, bsize = sbi->s_journal->j_blocksize;
764 	u8 *dst;
765 
766 	/*
767 	 * ext4_fc_reserve_space takes care of allocating an extra block if
768 	 * there's no enough space on this block for accommodating this tail.
769 	 */
770 	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
771 	if (!dst)
772 		return -ENOSPC;
773 
774 	off = sbi->s_fc_bytes % bsize;
775 
776 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
777 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
778 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
779 
780 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
781 	dst += EXT4_FC_TAG_BASE_LEN;
782 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
783 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
784 	dst += sizeof(tail.fc_tid);
785 	tail.fc_crc = cpu_to_le32(crc);
786 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
787 
788 	ext4_fc_submit_bh(sb, true);
789 
790 	return 0;
791 }
792 
793 /*
794  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
795  * Returns false if there's not enough space.
796  */
797 static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
798 			   u32 *crc)
799 {
800 	struct ext4_fc_tl tl;
801 	u8 *dst;
802 
803 	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
804 	if (!dst)
805 		return false;
806 
807 	tl.fc_tag = cpu_to_le16(tag);
808 	tl.fc_len = cpu_to_le16(len);
809 
810 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
811 	ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
812 
813 	return true;
814 }
815 
816 /* Same as above, but adds dentry tlv. */
817 static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
818 				   struct ext4_fc_dentry_update *fc_dentry)
819 {
820 	struct ext4_fc_dentry_info fcd;
821 	struct ext4_fc_tl tl;
822 	int dlen = fc_dentry->fcd_name.len;
823 	u8 *dst = ext4_fc_reserve_space(sb,
824 			EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
825 
826 	if (!dst)
827 		return false;
828 
829 	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
830 	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
831 	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
832 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
833 	ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
834 	dst += EXT4_FC_TAG_BASE_LEN;
835 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
836 	dst += sizeof(fcd);
837 	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
838 
839 	return true;
840 }
841 
842 /*
843  * Writes inode in the fast commit space under TLV with tag @tag.
844  * Returns 0 on success, error on failure.
845  */
846 static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
847 {
848 	struct ext4_inode_info *ei = EXT4_I(inode);
849 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
850 	int ret;
851 	struct ext4_iloc iloc;
852 	struct ext4_fc_inode fc_inode;
853 	struct ext4_fc_tl tl;
854 	u8 *dst;
855 
856 	ret = ext4_get_inode_loc(inode, &iloc);
857 	if (ret)
858 		return ret;
859 
860 	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
861 		inode_len = EXT4_INODE_SIZE(inode->i_sb);
862 	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
863 		inode_len += ei->i_extra_isize;
864 
865 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
866 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
867 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
868 
869 	ret = -ECANCELED;
870 	dst = ext4_fc_reserve_space(inode->i_sb,
871 		EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
872 	if (!dst)
873 		goto err;
874 
875 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
876 		goto err;
877 	dst += EXT4_FC_TAG_BASE_LEN;
878 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
879 		goto err;
880 	dst += sizeof(fc_inode);
881 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
882 					inode_len, crc))
883 		goto err;
884 	ret = 0;
885 err:
886 	brelse(iloc.bh);
887 	return ret;
888 }
889 
890 /*
891  * Writes updated data ranges for the inode in question. Updates CRC.
892  * Returns 0 on success, error otherwise.
893  */
894 static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
895 {
896 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
897 	struct ext4_inode_info *ei = EXT4_I(inode);
898 	struct ext4_map_blocks map;
899 	struct ext4_fc_add_range fc_ext;
900 	struct ext4_fc_del_range lrange;
901 	struct ext4_extent *ex;
902 	int ret;
903 
904 	mutex_lock(&ei->i_fc_lock);
905 	if (ei->i_fc_lblk_len == 0) {
906 		mutex_unlock(&ei->i_fc_lock);
907 		return 0;
908 	}
909 	old_blk_size = ei->i_fc_lblk_start;
910 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
911 	ei->i_fc_lblk_len = 0;
912 	mutex_unlock(&ei->i_fc_lock);
913 
914 	cur_lblk_off = old_blk_size;
915 	ext4_debug("will try writing %d to %d for inode %ld\n",
916 		   cur_lblk_off, new_blk_size, inode->i_ino);
917 
918 	while (cur_lblk_off <= new_blk_size) {
919 		map.m_lblk = cur_lblk_off;
920 		map.m_len = new_blk_size - cur_lblk_off + 1;
921 		ret = ext4_map_blocks(NULL, inode, &map, 0);
922 		if (ret < 0)
923 			return -ECANCELED;
924 
925 		if (map.m_len == 0) {
926 			cur_lblk_off++;
927 			continue;
928 		}
929 
930 		if (ret == 0) {
931 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
932 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
933 			lrange.fc_len = cpu_to_le32(map.m_len);
934 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
935 					    sizeof(lrange), (u8 *)&lrange, crc))
936 				return -ENOSPC;
937 		} else {
938 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
939 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
940 
941 			/* Limit the number of blocks in one extent */
942 			map.m_len = min(max, map.m_len);
943 
944 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
945 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
946 			ex->ee_block = cpu_to_le32(map.m_lblk);
947 			ex->ee_len = cpu_to_le16(map.m_len);
948 			ext4_ext_store_pblock(ex, map.m_pblk);
949 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
950 				ext4_ext_mark_unwritten(ex);
951 			else
952 				ext4_ext_mark_initialized(ex);
953 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
954 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
955 				return -ENOSPC;
956 		}
957 
958 		cur_lblk_off += map.m_len;
959 	}
960 
961 	return 0;
962 }
963 
964 
965 /* Submit data for all the fast commit inodes */
966 static int ext4_fc_submit_inode_data_all(journal_t *journal)
967 {
968 	struct super_block *sb = journal->j_private;
969 	struct ext4_sb_info *sbi = EXT4_SB(sb);
970 	struct ext4_inode_info *ei;
971 	int ret = 0;
972 
973 	spin_lock(&sbi->s_fc_lock);
974 	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
975 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
976 		while (atomic_read(&ei->i_fc_updates)) {
977 			DEFINE_WAIT(wait);
978 
979 			prepare_to_wait(&ei->i_fc_wait, &wait,
980 						TASK_UNINTERRUPTIBLE);
981 			if (atomic_read(&ei->i_fc_updates)) {
982 				spin_unlock(&sbi->s_fc_lock);
983 				schedule();
984 				spin_lock(&sbi->s_fc_lock);
985 			}
986 			finish_wait(&ei->i_fc_wait, &wait);
987 		}
988 		spin_unlock(&sbi->s_fc_lock);
989 		ret = jbd2_submit_inode_data(ei->jinode);
990 		if (ret)
991 			return ret;
992 		spin_lock(&sbi->s_fc_lock);
993 	}
994 	spin_unlock(&sbi->s_fc_lock);
995 
996 	return ret;
997 }
998 
999 /* Wait for completion of data for all the fast commit inodes */
1000 static int ext4_fc_wait_inode_data_all(journal_t *journal)
1001 {
1002 	struct super_block *sb = journal->j_private;
1003 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1004 	struct ext4_inode_info *pos, *n;
1005 	int ret = 0;
1006 
1007 	spin_lock(&sbi->s_fc_lock);
1008 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1009 		if (!ext4_test_inode_state(&pos->vfs_inode,
1010 					   EXT4_STATE_FC_COMMITTING))
1011 			continue;
1012 		spin_unlock(&sbi->s_fc_lock);
1013 
1014 		ret = jbd2_wait_inode_data(journal, pos->jinode);
1015 		if (ret)
1016 			return ret;
1017 		spin_lock(&sbi->s_fc_lock);
1018 	}
1019 	spin_unlock(&sbi->s_fc_lock);
1020 
1021 	return 0;
1022 }
1023 
1024 /* Commit all the directory entry updates */
1025 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
1026 __acquires(&sbi->s_fc_lock)
1027 __releases(&sbi->s_fc_lock)
1028 {
1029 	struct super_block *sb = journal->j_private;
1030 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1031 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
1032 	struct inode *inode;
1033 	struct ext4_inode_info *ei;
1034 	int ret;
1035 
1036 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
1037 		return 0;
1038 	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
1039 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
1040 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
1041 			spin_unlock(&sbi->s_fc_lock);
1042 			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1043 				ret = -ENOSPC;
1044 				goto lock_and_exit;
1045 			}
1046 			spin_lock(&sbi->s_fc_lock);
1047 			continue;
1048 		}
1049 		/*
1050 		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
1051 		 * corresponding inode pointer
1052 		 */
1053 		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1054 		ei = list_first_entry(&fc_dentry->fcd_dilist,
1055 				struct ext4_inode_info, i_fc_dilist);
1056 		inode = &ei->vfs_inode;
1057 		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1058 
1059 		spin_unlock(&sbi->s_fc_lock);
1060 
1061 		/*
1062 		 * We first write the inode and then the create dirent. This
1063 		 * allows the recovery code to create an unnamed inode first
1064 		 * and then link it to a directory entry. This allows us
1065 		 * to use namei.c routines almost as is and simplifies
1066 		 * the recovery code.
1067 		 */
1068 		ret = ext4_fc_write_inode(inode, crc);
1069 		if (ret)
1070 			goto lock_and_exit;
1071 
1072 		ret = ext4_fc_write_inode_data(inode, crc);
1073 		if (ret)
1074 			goto lock_and_exit;
1075 
1076 		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1077 			ret = -ENOSPC;
1078 			goto lock_and_exit;
1079 		}
1080 
1081 		spin_lock(&sbi->s_fc_lock);
1082 	}
1083 	return 0;
1084 lock_and_exit:
1085 	spin_lock(&sbi->s_fc_lock);
1086 	return ret;
1087 }
1088 
1089 static int ext4_fc_perform_commit(journal_t *journal)
1090 {
1091 	struct super_block *sb = journal->j_private;
1092 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1093 	struct ext4_inode_info *iter;
1094 	struct ext4_fc_head head;
1095 	struct inode *inode;
1096 	struct blk_plug plug;
1097 	int ret = 0;
1098 	u32 crc = 0;
1099 
1100 	ret = ext4_fc_submit_inode_data_all(journal);
1101 	if (ret)
1102 		return ret;
1103 
1104 	ret = ext4_fc_wait_inode_data_all(journal);
1105 	if (ret)
1106 		return ret;
1107 
1108 	/*
1109 	 * If file system device is different from journal device, issue a cache
1110 	 * flush before we start writing fast commit blocks.
1111 	 */
1112 	if (journal->j_fs_dev != journal->j_dev)
1113 		blkdev_issue_flush(journal->j_fs_dev);
1114 
1115 	blk_start_plug(&plug);
1116 	if (sbi->s_fc_bytes == 0) {
1117 		/*
1118 		 * Add a head tag only if this is the first fast commit
1119 		 * in this TID.
1120 		 */
1121 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1122 		head.fc_tid = cpu_to_le32(
1123 			sbi->s_journal->j_running_transaction->t_tid);
1124 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1125 			(u8 *)&head, &crc)) {
1126 			ret = -ENOSPC;
1127 			goto out;
1128 		}
1129 	}
1130 
1131 	spin_lock(&sbi->s_fc_lock);
1132 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1133 	if (ret) {
1134 		spin_unlock(&sbi->s_fc_lock);
1135 		goto out;
1136 	}
1137 
1138 	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1139 		inode = &iter->vfs_inode;
1140 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1141 			continue;
1142 
1143 		spin_unlock(&sbi->s_fc_lock);
1144 		ret = ext4_fc_write_inode_data(inode, &crc);
1145 		if (ret)
1146 			goto out;
1147 		ret = ext4_fc_write_inode(inode, &crc);
1148 		if (ret)
1149 			goto out;
1150 		spin_lock(&sbi->s_fc_lock);
1151 	}
1152 	spin_unlock(&sbi->s_fc_lock);
1153 
1154 	ret = ext4_fc_write_tail(sb, crc);
1155 
1156 out:
1157 	blk_finish_plug(&plug);
1158 	return ret;
1159 }
1160 
1161 static void ext4_fc_update_stats(struct super_block *sb, int status,
1162 				 u64 commit_time, int nblks, tid_t commit_tid)
1163 {
1164 	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
1165 
1166 	ext4_debug("Fast commit ended with status = %d for tid %u",
1167 			status, commit_tid);
1168 	if (status == EXT4_FC_STATUS_OK) {
1169 		stats->fc_num_commits++;
1170 		stats->fc_numblks += nblks;
1171 		if (likely(stats->s_fc_avg_commit_time))
1172 			stats->s_fc_avg_commit_time =
1173 				(commit_time +
1174 				 stats->s_fc_avg_commit_time * 3) / 4;
1175 		else
1176 			stats->s_fc_avg_commit_time = commit_time;
1177 	} else if (status == EXT4_FC_STATUS_FAILED ||
1178 		   status == EXT4_FC_STATUS_INELIGIBLE) {
1179 		if (status == EXT4_FC_STATUS_FAILED)
1180 			stats->fc_failed_commits++;
1181 		stats->fc_ineligible_commits++;
1182 	} else {
1183 		stats->fc_skipped_commits++;
1184 	}
1185 	trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
1186 }
1187 
1188 /*
1189  * The main commit entry point. Performs a fast commit for transaction
1190  * commit_tid if needed. If it's not possible to perform a fast commit
1191  * due to various reasons, we fall back to full commit. Returns 0
1192  * on success, error otherwise.
1193  */
1194 int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1195 {
1196 	struct super_block *sb = journal->j_private;
1197 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1198 	int nblks = 0, ret, bsize = journal->j_blocksize;
1199 	int subtid = atomic_read(&sbi->s_fc_subtid);
1200 	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1201 	ktime_t start_time, commit_time;
1202 
1203 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1204 		return jbd2_complete_transaction(journal, commit_tid);
1205 
1206 	trace_ext4_fc_commit_start(sb, commit_tid);
1207 
1208 	start_time = ktime_get();
1209 
1210 restart_fc:
1211 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1212 	if (ret == -EALREADY) {
1213 		/* There was an ongoing commit, check if we need to restart */
1214 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1215 			commit_tid > journal->j_commit_sequence)
1216 			goto restart_fc;
1217 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
1218 				commit_tid);
1219 		return 0;
1220 	} else if (ret) {
1221 		/*
1222 		 * Commit couldn't start. Just update stats and perform a
1223 		 * full commit.
1224 		 */
1225 		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
1226 				commit_tid);
1227 		return jbd2_complete_transaction(journal, commit_tid);
1228 	}
1229 
1230 	/*
1231 	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
1232 	 * if we are fast commit ineligible.
1233 	 */
1234 	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
1235 		status = EXT4_FC_STATUS_INELIGIBLE;
1236 		goto fallback;
1237 	}
1238 
1239 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1240 	ret = ext4_fc_perform_commit(journal);
1241 	if (ret < 0) {
1242 		status = EXT4_FC_STATUS_FAILED;
1243 		goto fallback;
1244 	}
1245 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1246 	ret = jbd2_fc_wait_bufs(journal, nblks);
1247 	if (ret < 0) {
1248 		status = EXT4_FC_STATUS_FAILED;
1249 		goto fallback;
1250 	}
1251 	atomic_inc(&sbi->s_fc_subtid);
1252 	ret = jbd2_fc_end_commit(journal);
1253 	/*
1254 	 * weight the commit time higher than the average time so we
1255 	 * don't react too strongly to vast changes in the commit time
1256 	 */
1257 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1258 	ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
1259 	return ret;
1260 
1261 fallback:
1262 	ret = jbd2_fc_end_commit_fallback(journal);
1263 	ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
1264 	return ret;
1265 }
1266 
1267 /*
1268  * Fast commit cleanup routine. This is called after every fast commit and
1269  * full commit. full is true if we are called after a full commit.
1270  */
1271 static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1272 {
1273 	struct super_block *sb = journal->j_private;
1274 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1275 	struct ext4_inode_info *iter, *iter_n;
1276 	struct ext4_fc_dentry_update *fc_dentry;
1277 
1278 	if (full && sbi->s_fc_bh)
1279 		sbi->s_fc_bh = NULL;
1280 
1281 	trace_ext4_fc_cleanup(journal, full, tid);
1282 	jbd2_fc_release_bufs(journal);
1283 
1284 	spin_lock(&sbi->s_fc_lock);
1285 	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
1286 				 i_fc_list) {
1287 		list_del_init(&iter->i_fc_list);
1288 		ext4_clear_inode_state(&iter->vfs_inode,
1289 				       EXT4_STATE_FC_COMMITTING);
1290 		if (iter->i_sync_tid <= tid)
1291 			ext4_fc_reset_inode(&iter->vfs_inode);
1292 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1293 		smp_mb();
1294 #if (BITS_PER_LONG < 64)
1295 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1296 #else
1297 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1298 #endif
1299 	}
1300 
1301 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1302 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1303 					     struct ext4_fc_dentry_update,
1304 					     fcd_list);
1305 		list_del_init(&fc_dentry->fcd_list);
1306 		list_del_init(&fc_dentry->fcd_dilist);
1307 		spin_unlock(&sbi->s_fc_lock);
1308 
1309 		if (fc_dentry->fcd_name.name &&
1310 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1311 			kfree(fc_dentry->fcd_name.name);
1312 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1313 		spin_lock(&sbi->s_fc_lock);
1314 	}
1315 
1316 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1317 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1318 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1319 				&sbi->s_fc_q[FC_Q_MAIN]);
1320 
1321 	if (tid >= sbi->s_fc_ineligible_tid) {
1322 		sbi->s_fc_ineligible_tid = 0;
1323 		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1324 	}
1325 
1326 	if (full)
1327 		sbi->s_fc_bytes = 0;
1328 	spin_unlock(&sbi->s_fc_lock);
1329 	trace_ext4_fc_stats(sb);
1330 }
1331 
1332 /* Ext4 Replay Path Routines */
1333 
1334 /* Helper struct for dentry replay routines */
1335 struct dentry_info_args {
1336 	int parent_ino, dname_len, ino, inode_len;
1337 	char *dname;
1338 };
1339 
1340 static inline void tl_to_darg(struct dentry_info_args *darg,
1341 			      struct ext4_fc_tl *tl, u8 *val)
1342 {
1343 	struct ext4_fc_dentry_info fcd;
1344 
1345 	memcpy(&fcd, val, sizeof(fcd));
1346 
1347 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1348 	darg->ino = le32_to_cpu(fcd.fc_ino);
1349 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1350 	darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
1351 }
1352 
1353 static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
1354 {
1355 	memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
1356 	tl->fc_len = le16_to_cpu(tl->fc_len);
1357 	tl->fc_tag = le16_to_cpu(tl->fc_tag);
1358 }
1359 
1360 /* Unlink replay function */
1361 static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1362 				 u8 *val)
1363 {
1364 	struct inode *inode, *old_parent;
1365 	struct qstr entry;
1366 	struct dentry_info_args darg;
1367 	int ret = 0;
1368 
1369 	tl_to_darg(&darg, tl, val);
1370 
1371 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
1372 			darg.parent_ino, darg.dname_len);
1373 
1374 	entry.name = darg.dname;
1375 	entry.len = darg.dname_len;
1376 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1377 
1378 	if (IS_ERR(inode)) {
1379 		ext4_debug("Inode %d not found", darg.ino);
1380 		return 0;
1381 	}
1382 
1383 	old_parent = ext4_iget(sb, darg.parent_ino,
1384 				EXT4_IGET_NORMAL);
1385 	if (IS_ERR(old_parent)) {
1386 		ext4_debug("Dir with inode %d not found", darg.parent_ino);
1387 		iput(inode);
1388 		return 0;
1389 	}
1390 
1391 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
1392 	/* -ENOENT ok coz it might not exist anymore. */
1393 	if (ret == -ENOENT)
1394 		ret = 0;
1395 	iput(old_parent);
1396 	iput(inode);
1397 	return ret;
1398 }
1399 
1400 static int ext4_fc_replay_link_internal(struct super_block *sb,
1401 				struct dentry_info_args *darg,
1402 				struct inode *inode)
1403 {
1404 	struct inode *dir = NULL;
1405 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
1406 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
1407 	int ret = 0;
1408 
1409 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
1410 	if (IS_ERR(dir)) {
1411 		ext4_debug("Dir with inode %d not found.", darg->parent_ino);
1412 		dir = NULL;
1413 		goto out;
1414 	}
1415 
1416 	dentry_dir = d_obtain_alias(dir);
1417 	if (IS_ERR(dentry_dir)) {
1418 		ext4_debug("Failed to obtain dentry");
1419 		dentry_dir = NULL;
1420 		goto out;
1421 	}
1422 
1423 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
1424 	if (!dentry_inode) {
1425 		ext4_debug("Inode dentry not created.");
1426 		ret = -ENOMEM;
1427 		goto out;
1428 	}
1429 
1430 	ret = __ext4_link(dir, inode, dentry_inode);
1431 	/*
1432 	 * It's possible that link already existed since data blocks
1433 	 * for the dir in question got persisted before we crashed OR
1434 	 * we replayed this tag and crashed before the entire replay
1435 	 * could complete.
1436 	 */
1437 	if (ret && ret != -EEXIST) {
1438 		ext4_debug("Failed to link\n");
1439 		goto out;
1440 	}
1441 
1442 	ret = 0;
1443 out:
1444 	if (dentry_dir) {
1445 		d_drop(dentry_dir);
1446 		dput(dentry_dir);
1447 	} else if (dir) {
1448 		iput(dir);
1449 	}
1450 	if (dentry_inode) {
1451 		d_drop(dentry_inode);
1452 		dput(dentry_inode);
1453 	}
1454 
1455 	return ret;
1456 }
1457 
1458 /* Link replay function */
1459 static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1460 			       u8 *val)
1461 {
1462 	struct inode *inode;
1463 	struct dentry_info_args darg;
1464 	int ret = 0;
1465 
1466 	tl_to_darg(&darg, tl, val);
1467 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
1468 			darg.parent_ino, darg.dname_len);
1469 
1470 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1471 	if (IS_ERR(inode)) {
1472 		ext4_debug("Inode not found.");
1473 		return 0;
1474 	}
1475 
1476 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1477 	iput(inode);
1478 	return ret;
1479 }
1480 
1481 /*
1482  * Record all the modified inodes during replay. We use this later to setup
1483  * block bitmaps correctly.
1484  */
1485 static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
1486 {
1487 	struct ext4_fc_replay_state *state;
1488 	int i;
1489 
1490 	state = &EXT4_SB(sb)->s_fc_replay_state;
1491 	for (i = 0; i < state->fc_modified_inodes_used; i++)
1492 		if (state->fc_modified_inodes[i] == ino)
1493 			return 0;
1494 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
1495 		int *fc_modified_inodes;
1496 
1497 		fc_modified_inodes = krealloc(state->fc_modified_inodes,
1498 				sizeof(int) * (state->fc_modified_inodes_size +
1499 				EXT4_FC_REPLAY_REALLOC_INCREMENT),
1500 				GFP_KERNEL);
1501 		if (!fc_modified_inodes)
1502 			return -ENOMEM;
1503 		state->fc_modified_inodes = fc_modified_inodes;
1504 		state->fc_modified_inodes_size +=
1505 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
1506 	}
1507 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
1508 	return 0;
1509 }
1510 
1511 /*
1512  * Inode replay function
1513  */
1514 static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1515 				u8 *val)
1516 {
1517 	struct ext4_fc_inode fc_inode;
1518 	struct ext4_inode *raw_inode;
1519 	struct ext4_inode *raw_fc_inode;
1520 	struct inode *inode = NULL;
1521 	struct ext4_iloc iloc;
1522 	int inode_len, ino, ret, tag = tl->fc_tag;
1523 	struct ext4_extent_header *eh;
1524 
1525 	memcpy(&fc_inode, val, sizeof(fc_inode));
1526 
1527 	ino = le32_to_cpu(fc_inode.fc_ino);
1528 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
1529 
1530 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1531 	if (!IS_ERR(inode)) {
1532 		ext4_ext_clear_bb(inode);
1533 		iput(inode);
1534 	}
1535 	inode = NULL;
1536 
1537 	ret = ext4_fc_record_modified_inode(sb, ino);
1538 	if (ret)
1539 		goto out;
1540 
1541 	raw_fc_inode = (struct ext4_inode *)
1542 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
1543 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
1544 	if (ret)
1545 		goto out;
1546 
1547 	inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
1548 	raw_inode = ext4_raw_inode(&iloc);
1549 
1550 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
1551 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
1552 		inode_len - offsetof(struct ext4_inode, i_generation));
1553 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
1554 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
1555 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
1556 			memset(eh, 0, sizeof(*eh));
1557 			eh->eh_magic = EXT4_EXT_MAGIC;
1558 			eh->eh_max = cpu_to_le16(
1559 				(sizeof(raw_inode->i_block) -
1560 				 sizeof(struct ext4_extent_header))
1561 				 / sizeof(struct ext4_extent));
1562 		}
1563 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
1564 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
1565 			sizeof(raw_inode->i_block));
1566 	}
1567 
1568 	/* Immediately update the inode on disk. */
1569 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1570 	if (ret)
1571 		goto out;
1572 	ret = sync_dirty_buffer(iloc.bh);
1573 	if (ret)
1574 		goto out;
1575 	ret = ext4_mark_inode_used(sb, ino);
1576 	if (ret)
1577 		goto out;
1578 
1579 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
1580 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1581 	if (IS_ERR(inode)) {
1582 		ext4_debug("Inode not found.");
1583 		return -EFSCORRUPTED;
1584 	}
1585 
1586 	/*
1587 	 * Our allocator could have made different decisions than before
1588 	 * crashing. This should be fixed but until then, we calculate
1589 	 * the number of blocks the inode.
1590 	 */
1591 	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
1592 		ext4_ext_replay_set_iblocks(inode);
1593 
1594 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
1595 	ext4_reset_inode_seed(inode);
1596 
1597 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
1598 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1599 	sync_dirty_buffer(iloc.bh);
1600 	brelse(iloc.bh);
1601 out:
1602 	iput(inode);
1603 	if (!ret)
1604 		blkdev_issue_flush(sb->s_bdev);
1605 
1606 	return 0;
1607 }
1608 
1609 /*
1610  * Dentry create replay function.
1611  *
1612  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
1613  * inode for which we are trying to create a dentry here, should already have
1614  * been replayed before we start here.
1615  */
1616 static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1617 				 u8 *val)
1618 {
1619 	int ret = 0;
1620 	struct inode *inode = NULL;
1621 	struct inode *dir = NULL;
1622 	struct dentry_info_args darg;
1623 
1624 	tl_to_darg(&darg, tl, val);
1625 
1626 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
1627 			darg.parent_ino, darg.dname_len);
1628 
1629 	/* This takes care of update group descriptor and other metadata */
1630 	ret = ext4_mark_inode_used(sb, darg.ino);
1631 	if (ret)
1632 		goto out;
1633 
1634 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1635 	if (IS_ERR(inode)) {
1636 		ext4_debug("inode %d not found.", darg.ino);
1637 		inode = NULL;
1638 		ret = -EINVAL;
1639 		goto out;
1640 	}
1641 
1642 	if (S_ISDIR(inode->i_mode)) {
1643 		/*
1644 		 * If we are creating a directory, we need to make sure that the
1645 		 * dot and dot dot dirents are setup properly.
1646 		 */
1647 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
1648 		if (IS_ERR(dir)) {
1649 			ext4_debug("Dir %d not found.", darg.ino);
1650 			goto out;
1651 		}
1652 		ret = ext4_init_new_dir(NULL, dir, inode);
1653 		iput(dir);
1654 		if (ret) {
1655 			ret = 0;
1656 			goto out;
1657 		}
1658 	}
1659 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1660 	if (ret)
1661 		goto out;
1662 	set_nlink(inode, 1);
1663 	ext4_mark_inode_dirty(NULL, inode);
1664 out:
1665 	iput(inode);
1666 	return ret;
1667 }
1668 
1669 /*
1670  * Record physical disk regions which are in use as per fast commit area,
1671  * and used by inodes during replay phase. Our simple replay phase
1672  * allocator excludes these regions from allocation.
1673  */
1674 int ext4_fc_record_regions(struct super_block *sb, int ino,
1675 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
1676 {
1677 	struct ext4_fc_replay_state *state;
1678 	struct ext4_fc_alloc_region *region;
1679 
1680 	state = &EXT4_SB(sb)->s_fc_replay_state;
1681 	/*
1682 	 * during replay phase, the fc_regions_valid may not same as
1683 	 * fc_regions_used, update it when do new additions.
1684 	 */
1685 	if (replay && state->fc_regions_used != state->fc_regions_valid)
1686 		state->fc_regions_used = state->fc_regions_valid;
1687 	if (state->fc_regions_used == state->fc_regions_size) {
1688 		struct ext4_fc_alloc_region *fc_regions;
1689 
1690 		fc_regions = krealloc(state->fc_regions,
1691 				      sizeof(struct ext4_fc_alloc_region) *
1692 				      (state->fc_regions_size +
1693 				       EXT4_FC_REPLAY_REALLOC_INCREMENT),
1694 				      GFP_KERNEL);
1695 		if (!fc_regions)
1696 			return -ENOMEM;
1697 		state->fc_regions_size +=
1698 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
1699 		state->fc_regions = fc_regions;
1700 	}
1701 	region = &state->fc_regions[state->fc_regions_used++];
1702 	region->ino = ino;
1703 	region->lblk = lblk;
1704 	region->pblk = pblk;
1705 	region->len = len;
1706 
1707 	if (replay)
1708 		state->fc_regions_valid++;
1709 
1710 	return 0;
1711 }
1712 
1713 /* Replay add range tag */
1714 static int ext4_fc_replay_add_range(struct super_block *sb,
1715 				    struct ext4_fc_tl *tl, u8 *val)
1716 {
1717 	struct ext4_fc_add_range fc_add_ex;
1718 	struct ext4_extent newex, *ex;
1719 	struct inode *inode;
1720 	ext4_lblk_t start, cur;
1721 	int remaining, len;
1722 	ext4_fsblk_t start_pblk;
1723 	struct ext4_map_blocks map;
1724 	struct ext4_ext_path *path = NULL;
1725 	int ret;
1726 
1727 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1728 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
1729 
1730 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1731 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
1732 		ext4_ext_get_actual_len(ex));
1733 
1734 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
1735 	if (IS_ERR(inode)) {
1736 		ext4_debug("Inode not found.");
1737 		return 0;
1738 	}
1739 
1740 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1741 	if (ret)
1742 		goto out;
1743 
1744 	start = le32_to_cpu(ex->ee_block);
1745 	start_pblk = ext4_ext_pblock(ex);
1746 	len = ext4_ext_get_actual_len(ex);
1747 
1748 	cur = start;
1749 	remaining = len;
1750 	ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
1751 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
1752 		  inode->i_ino);
1753 
1754 	while (remaining > 0) {
1755 		map.m_lblk = cur;
1756 		map.m_len = remaining;
1757 		map.m_pblk = 0;
1758 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1759 
1760 		if (ret < 0)
1761 			goto out;
1762 
1763 		if (ret == 0) {
1764 			/* Range is not mapped */
1765 			path = ext4_find_extent(inode, cur, NULL, 0);
1766 			if (IS_ERR(path))
1767 				goto out;
1768 			memset(&newex, 0, sizeof(newex));
1769 			newex.ee_block = cpu_to_le32(cur);
1770 			ext4_ext_store_pblock(
1771 				&newex, start_pblk + cur - start);
1772 			newex.ee_len = cpu_to_le16(map.m_len);
1773 			if (ext4_ext_is_unwritten(ex))
1774 				ext4_ext_mark_unwritten(&newex);
1775 			down_write(&EXT4_I(inode)->i_data_sem);
1776 			ret = ext4_ext_insert_extent(
1777 				NULL, inode, &path, &newex, 0);
1778 			up_write((&EXT4_I(inode)->i_data_sem));
1779 			ext4_free_ext_path(path);
1780 			if (ret)
1781 				goto out;
1782 			goto next;
1783 		}
1784 
1785 		if (start_pblk + cur - start != map.m_pblk) {
1786 			/*
1787 			 * Logical to physical mapping changed. This can happen
1788 			 * if this range was removed and then reallocated to
1789 			 * map to new physical blocks during a fast commit.
1790 			 */
1791 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1792 					ext4_ext_is_unwritten(ex),
1793 					start_pblk + cur - start);
1794 			if (ret)
1795 				goto out;
1796 			/*
1797 			 * Mark the old blocks as free since they aren't used
1798 			 * anymore. We maintain an array of all the modified
1799 			 * inodes. In case these blocks are still used at either
1800 			 * a different logical range in the same inode or in
1801 			 * some different inode, we will mark them as allocated
1802 			 * at the end of the FC replay using our array of
1803 			 * modified inodes.
1804 			 */
1805 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1806 			goto next;
1807 		}
1808 
1809 		/* Range is mapped and needs a state change */
1810 		ext4_debug("Converting from %ld to %d %lld",
1811 				map.m_flags & EXT4_MAP_UNWRITTEN,
1812 			ext4_ext_is_unwritten(ex), map.m_pblk);
1813 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1814 					ext4_ext_is_unwritten(ex), map.m_pblk);
1815 		if (ret)
1816 			goto out;
1817 		/*
1818 		 * We may have split the extent tree while toggling the state.
1819 		 * Try to shrink the extent tree now.
1820 		 */
1821 		ext4_ext_replay_shrink_inode(inode, start + len);
1822 next:
1823 		cur += map.m_len;
1824 		remaining -= map.m_len;
1825 	}
1826 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
1827 					sb->s_blocksize_bits);
1828 out:
1829 	iput(inode);
1830 	return 0;
1831 }
1832 
1833 /* Replay DEL_RANGE tag */
1834 static int
1835 ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1836 			 u8 *val)
1837 {
1838 	struct inode *inode;
1839 	struct ext4_fc_del_range lrange;
1840 	struct ext4_map_blocks map;
1841 	ext4_lblk_t cur, remaining;
1842 	int ret;
1843 
1844 	memcpy(&lrange, val, sizeof(lrange));
1845 	cur = le32_to_cpu(lrange.fc_lblk);
1846 	remaining = le32_to_cpu(lrange.fc_len);
1847 
1848 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1849 		le32_to_cpu(lrange.fc_ino), cur, remaining);
1850 
1851 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
1852 	if (IS_ERR(inode)) {
1853 		ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
1854 		return 0;
1855 	}
1856 
1857 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1858 	if (ret)
1859 		goto out;
1860 
1861 	ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
1862 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1863 			le32_to_cpu(lrange.fc_len));
1864 	while (remaining > 0) {
1865 		map.m_lblk = cur;
1866 		map.m_len = remaining;
1867 
1868 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1869 		if (ret < 0)
1870 			goto out;
1871 		if (ret > 0) {
1872 			remaining -= ret;
1873 			cur += ret;
1874 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1875 		} else {
1876 			remaining -= map.m_len;
1877 			cur += map.m_len;
1878 		}
1879 	}
1880 
1881 	down_write(&EXT4_I(inode)->i_data_sem);
1882 	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
1883 				le32_to_cpu(lrange.fc_lblk) +
1884 				le32_to_cpu(lrange.fc_len) - 1);
1885 	up_write(&EXT4_I(inode)->i_data_sem);
1886 	if (ret)
1887 		goto out;
1888 	ext4_ext_replay_shrink_inode(inode,
1889 		i_size_read(inode) >> sb->s_blocksize_bits);
1890 	ext4_mark_inode_dirty(NULL, inode);
1891 out:
1892 	iput(inode);
1893 	return 0;
1894 }
1895 
1896 static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
1897 {
1898 	struct ext4_fc_replay_state *state;
1899 	struct inode *inode;
1900 	struct ext4_ext_path *path = NULL;
1901 	struct ext4_map_blocks map;
1902 	int i, ret, j;
1903 	ext4_lblk_t cur, end;
1904 
1905 	state = &EXT4_SB(sb)->s_fc_replay_state;
1906 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
1907 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
1908 			EXT4_IGET_NORMAL);
1909 		if (IS_ERR(inode)) {
1910 			ext4_debug("Inode %d not found.",
1911 				state->fc_modified_inodes[i]);
1912 			continue;
1913 		}
1914 		cur = 0;
1915 		end = EXT_MAX_BLOCKS;
1916 		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
1917 			iput(inode);
1918 			continue;
1919 		}
1920 		while (cur < end) {
1921 			map.m_lblk = cur;
1922 			map.m_len = end - cur;
1923 
1924 			ret = ext4_map_blocks(NULL, inode, &map, 0);
1925 			if (ret < 0)
1926 				break;
1927 
1928 			if (ret > 0) {
1929 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
1930 				if (!IS_ERR(path)) {
1931 					for (j = 0; j < path->p_depth; j++)
1932 						ext4_mb_mark_bb(inode->i_sb,
1933 							path[j].p_block, 1, 1);
1934 					ext4_free_ext_path(path);
1935 				}
1936 				cur += ret;
1937 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
1938 							map.m_len, 1);
1939 			} else {
1940 				cur = cur + (map.m_len ? map.m_len : 1);
1941 			}
1942 		}
1943 		iput(inode);
1944 	}
1945 }
1946 
1947 /*
1948  * Check if block is in excluded regions for block allocation. The simple
1949  * allocator that runs during replay phase is calls this function to see
1950  * if it is okay to use a block.
1951  */
1952 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
1953 {
1954 	int i;
1955 	struct ext4_fc_replay_state *state;
1956 
1957 	state = &EXT4_SB(sb)->s_fc_replay_state;
1958 	for (i = 0; i < state->fc_regions_valid; i++) {
1959 		if (state->fc_regions[i].ino == 0 ||
1960 			state->fc_regions[i].len == 0)
1961 			continue;
1962 		if (in_range(blk, state->fc_regions[i].pblk,
1963 					state->fc_regions[i].len))
1964 			return true;
1965 	}
1966 	return false;
1967 }
1968 
1969 /* Cleanup function called after replay */
1970 void ext4_fc_replay_cleanup(struct super_block *sb)
1971 {
1972 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1973 
1974 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
1975 	kfree(sbi->s_fc_replay_state.fc_regions);
1976 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
1977 }
1978 
1979 static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
1980 					   u8 *val, u8 *end)
1981 {
1982 	if (val + tl->fc_len > end)
1983 		return false;
1984 
1985 	/* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
1986 	 * journal rescan before do CRC check. Other tags length check will
1987 	 * rely on CRC check.
1988 	 */
1989 	switch (tl->fc_tag) {
1990 	case EXT4_FC_TAG_ADD_RANGE:
1991 		return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
1992 	case EXT4_FC_TAG_TAIL:
1993 		return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
1994 	case EXT4_FC_TAG_HEAD:
1995 		return (sizeof(struct ext4_fc_head) == tl->fc_len);
1996 	case EXT4_FC_TAG_DEL_RANGE:
1997 	case EXT4_FC_TAG_LINK:
1998 	case EXT4_FC_TAG_UNLINK:
1999 	case EXT4_FC_TAG_CREAT:
2000 	case EXT4_FC_TAG_INODE:
2001 	case EXT4_FC_TAG_PAD:
2002 	default:
2003 		return true;
2004 	}
2005 }
2006 
2007 /*
2008  * Recovery Scan phase handler
2009  *
2010  * This function is called during the scan phase and is responsible
2011  * for doing following things:
2012  * - Make sure the fast commit area has valid tags for replay
2013  * - Count number of tags that need to be replayed by the replay handler
2014  * - Verify CRC
2015  * - Create a list of excluded blocks for allocation during replay phase
2016  *
2017  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
2018  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
2019  * to indicate that scan has finished and JBD2 can now start replay phase.
2020  * It returns a negative error to indicate that there was an error. At the end
2021  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
2022  * to indicate the number of tags that need to replayed during the replay phase.
2023  */
2024 static int ext4_fc_replay_scan(journal_t *journal,
2025 				struct buffer_head *bh, int off,
2026 				tid_t expected_tid)
2027 {
2028 	struct super_block *sb = journal->j_private;
2029 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2030 	struct ext4_fc_replay_state *state;
2031 	int ret = JBD2_FC_REPLAY_CONTINUE;
2032 	struct ext4_fc_add_range ext;
2033 	struct ext4_fc_tl tl;
2034 	struct ext4_fc_tail tail;
2035 	__u8 *start, *end, *cur, *val;
2036 	struct ext4_fc_head head;
2037 	struct ext4_extent *ex;
2038 
2039 	state = &sbi->s_fc_replay_state;
2040 
2041 	start = (u8 *)bh->b_data;
2042 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2043 
2044 	if (state->fc_replay_expected_off == 0) {
2045 		state->fc_cur_tag = 0;
2046 		state->fc_replay_num_tags = 0;
2047 		state->fc_crc = 0;
2048 		state->fc_regions = NULL;
2049 		state->fc_regions_valid = state->fc_regions_used =
2050 			state->fc_regions_size = 0;
2051 		/* Check if we can stop early */
2052 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
2053 			!= EXT4_FC_TAG_HEAD)
2054 			return 0;
2055 	}
2056 
2057 	if (off != state->fc_replay_expected_off) {
2058 		ret = -EFSCORRUPTED;
2059 		goto out_err;
2060 	}
2061 
2062 	state->fc_replay_expected_off++;
2063 	for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2064 	     cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2065 		ext4_fc_get_tl(&tl, cur);
2066 		val = cur + EXT4_FC_TAG_BASE_LEN;
2067 		if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
2068 			ret = state->fc_replay_num_tags ?
2069 				JBD2_FC_REPLAY_STOP : -ECANCELED;
2070 			goto out_err;
2071 		}
2072 		ext4_debug("Scan phase, tag:%s, blk %lld\n",
2073 			   tag2str(tl.fc_tag), bh->b_blocknr);
2074 		switch (tl.fc_tag) {
2075 		case EXT4_FC_TAG_ADD_RANGE:
2076 			memcpy(&ext, val, sizeof(ext));
2077 			ex = (struct ext4_extent *)&ext.fc_ex;
2078 			ret = ext4_fc_record_regions(sb,
2079 				le32_to_cpu(ext.fc_ino),
2080 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
2081 				ext4_ext_get_actual_len(ex), 0);
2082 			if (ret < 0)
2083 				break;
2084 			ret = JBD2_FC_REPLAY_CONTINUE;
2085 			fallthrough;
2086 		case EXT4_FC_TAG_DEL_RANGE:
2087 		case EXT4_FC_TAG_LINK:
2088 		case EXT4_FC_TAG_UNLINK:
2089 		case EXT4_FC_TAG_CREAT:
2090 		case EXT4_FC_TAG_INODE:
2091 		case EXT4_FC_TAG_PAD:
2092 			state->fc_cur_tag++;
2093 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2094 				EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2095 			break;
2096 		case EXT4_FC_TAG_TAIL:
2097 			state->fc_cur_tag++;
2098 			memcpy(&tail, val, sizeof(tail));
2099 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2100 						EXT4_FC_TAG_BASE_LEN +
2101 						offsetof(struct ext4_fc_tail,
2102 						fc_crc));
2103 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
2104 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
2105 				state->fc_replay_num_tags = state->fc_cur_tag;
2106 				state->fc_regions_valid =
2107 					state->fc_regions_used;
2108 			} else {
2109 				ret = state->fc_replay_num_tags ?
2110 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
2111 			}
2112 			state->fc_crc = 0;
2113 			break;
2114 		case EXT4_FC_TAG_HEAD:
2115 			memcpy(&head, val, sizeof(head));
2116 			if (le32_to_cpu(head.fc_features) &
2117 				~EXT4_FC_SUPPORTED_FEATURES) {
2118 				ret = -EOPNOTSUPP;
2119 				break;
2120 			}
2121 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
2122 				ret = JBD2_FC_REPLAY_STOP;
2123 				break;
2124 			}
2125 			state->fc_cur_tag++;
2126 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2127 				EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2128 			break;
2129 		default:
2130 			ret = state->fc_replay_num_tags ?
2131 				JBD2_FC_REPLAY_STOP : -ECANCELED;
2132 		}
2133 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
2134 			break;
2135 	}
2136 
2137 out_err:
2138 	trace_ext4_fc_replay_scan(sb, ret, off);
2139 	return ret;
2140 }
2141 
2142 /*
2143  * Main recovery path entry point.
2144  * The meaning of return codes is similar as above.
2145  */
2146 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
2147 				enum passtype pass, int off, tid_t expected_tid)
2148 {
2149 	struct super_block *sb = journal->j_private;
2150 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2151 	struct ext4_fc_tl tl;
2152 	__u8 *start, *end, *cur, *val;
2153 	int ret = JBD2_FC_REPLAY_CONTINUE;
2154 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2155 	struct ext4_fc_tail tail;
2156 
2157 	if (pass == PASS_SCAN) {
2158 		state->fc_current_pass = PASS_SCAN;
2159 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
2160 	}
2161 
2162 	if (state->fc_current_pass != pass) {
2163 		state->fc_current_pass = pass;
2164 		sbi->s_mount_state |= EXT4_FC_REPLAY;
2165 	}
2166 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
2167 		ext4_debug("Replay stops\n");
2168 		ext4_fc_set_bitmaps_and_counters(sb);
2169 		return 0;
2170 	}
2171 
2172 #ifdef CONFIG_EXT4_DEBUG
2173 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
2174 		pr_warn("Dropping fc block %d because max_replay set\n", off);
2175 		return JBD2_FC_REPLAY_STOP;
2176 	}
2177 #endif
2178 
2179 	start = (u8 *)bh->b_data;
2180 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2181 
2182 	for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2183 	     cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2184 		ext4_fc_get_tl(&tl, cur);
2185 		val = cur + EXT4_FC_TAG_BASE_LEN;
2186 
2187 		if (state->fc_replay_num_tags == 0) {
2188 			ret = JBD2_FC_REPLAY_STOP;
2189 			ext4_fc_set_bitmaps_and_counters(sb);
2190 			break;
2191 		}
2192 
2193 		ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
2194 		state->fc_replay_num_tags--;
2195 		switch (tl.fc_tag) {
2196 		case EXT4_FC_TAG_LINK:
2197 			ret = ext4_fc_replay_link(sb, &tl, val);
2198 			break;
2199 		case EXT4_FC_TAG_UNLINK:
2200 			ret = ext4_fc_replay_unlink(sb, &tl, val);
2201 			break;
2202 		case EXT4_FC_TAG_ADD_RANGE:
2203 			ret = ext4_fc_replay_add_range(sb, &tl, val);
2204 			break;
2205 		case EXT4_FC_TAG_CREAT:
2206 			ret = ext4_fc_replay_create(sb, &tl, val);
2207 			break;
2208 		case EXT4_FC_TAG_DEL_RANGE:
2209 			ret = ext4_fc_replay_del_range(sb, &tl, val);
2210 			break;
2211 		case EXT4_FC_TAG_INODE:
2212 			ret = ext4_fc_replay_inode(sb, &tl, val);
2213 			break;
2214 		case EXT4_FC_TAG_PAD:
2215 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2216 					     tl.fc_len, 0);
2217 			break;
2218 		case EXT4_FC_TAG_TAIL:
2219 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
2220 					     0, tl.fc_len, 0);
2221 			memcpy(&tail, val, sizeof(tail));
2222 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
2223 			break;
2224 		case EXT4_FC_TAG_HEAD:
2225 			break;
2226 		default:
2227 			trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
2228 			ret = -ECANCELED;
2229 			break;
2230 		}
2231 		if (ret < 0)
2232 			break;
2233 		ret = JBD2_FC_REPLAY_CONTINUE;
2234 	}
2235 	return ret;
2236 }
2237 
2238 void ext4_fc_init(struct super_block *sb, journal_t *journal)
2239 {
2240 	/*
2241 	 * We set replay callback even if fast commit disabled because we may
2242 	 * could still have fast commit blocks that need to be replayed even if
2243 	 * fast commit has now been turned off.
2244 	 */
2245 	journal->j_fc_replay_callback = ext4_fc_replay;
2246 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
2247 		return;
2248 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
2249 }
2250 
2251 static const char *fc_ineligible_reasons[] = {
2252 	"Extended attributes changed",
2253 	"Cross rename",
2254 	"Journal flag changed",
2255 	"Insufficient memory",
2256 	"Swap boot",
2257 	"Resize",
2258 	"Dir renamed",
2259 	"Falloc range op",
2260 	"Data journalling",
2261 	"FC Commit Failed"
2262 };
2263 
2264 int ext4_fc_info_show(struct seq_file *seq, void *v)
2265 {
2266 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2267 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2268 	int i;
2269 
2270 	if (v != SEQ_START_TOKEN)
2271 		return 0;
2272 
2273 	seq_printf(seq,
2274 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2275 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2276 		   stats->fc_numblks,
2277 		   div_u64(stats->s_fc_avg_commit_time, 1000));
2278 	seq_puts(seq, "Ineligible reasons:\n");
2279 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2280 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2281 			stats->fc_ineligible_reason_count[i]);
2282 
2283 	return 0;
2284 }
2285 
2286 int __init ext4_fc_init_dentry_cache(void)
2287 {
2288 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2289 					   SLAB_RECLAIM_ACCOUNT);
2290 
2291 	if (ext4_fc_dentry_cachep == NULL)
2292 		return -ENOMEM;
2293 
2294 	return 0;
2295 }
2296 
2297 void ext4_fc_destroy_dentry_cache(void)
2298 {
2299 	kmem_cache_destroy(ext4_fc_dentry_cachep);
2300 }
2301