16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 68aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 693088e5a5SBhaskar Chowdhury * attributes). Fast commit ineligibility is marked by calling one of the 70aa75f4d3SHarshad Shirwadkar * two following functions: 71aa75f4d3SHarshad Shirwadkar * 72aa75f4d3SHarshad Shirwadkar * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall 73aa75f4d3SHarshad Shirwadkar * back to full commit. This is useful in case of transient errors. 74aa75f4d3SHarshad Shirwadkar * 75aa75f4d3SHarshad Shirwadkar * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all 76aa75f4d3SHarshad Shirwadkar * the fast commits happening between ext4_fc_start_ineligible() and 77aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() and one fast commit after the call to 78aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() to fall back to full commits. It is important to 79aa75f4d3SHarshad Shirwadkar * make one more fast commit to fall back to full commit after stop call so 80aa75f4d3SHarshad Shirwadkar * that it guaranteed that the fast commit ineligible operation contained 81aa75f4d3SHarshad Shirwadkar * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is 82aa75f4d3SHarshad Shirwadkar * followed by at least 1 full commit. 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * Atomicity of commits 85aa75f4d3SHarshad Shirwadkar * -------------------- 86a740762fSHarshad Shirwadkar * In order to guarantee atomicity during the commit operation, fast commit 87aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 88aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 89aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 90aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 91aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 92aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 95aa75f4d3SHarshad Shirwadkar * - fsync() 96aa75f4d3SHarshad Shirwadkar * - Append contents to file A 97aa75f4d3SHarshad Shirwadkar * - Truncate file A 98aa75f4d3SHarshad Shirwadkar * - fsync() 99aa75f4d3SHarshad Shirwadkar * 100aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 101aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 102aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 103aa75f4d3SHarshad Shirwadkar * 104aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 105aa75f4d3SHarshad Shirwadkar * 106b1b7dce3SHarshad Shirwadkar * Fast Commit Replay Idempotence 107b1b7dce3SHarshad Shirwadkar * ------------------------------ 108b1b7dce3SHarshad Shirwadkar * 109b1b7dce3SHarshad Shirwadkar * Fast commits tags are idempotent in nature provided the recovery code follows 110b1b7dce3SHarshad Shirwadkar * certain rules. The guiding principle that the commit path follows while 111b1b7dce3SHarshad Shirwadkar * committing is that it stores the result of a particular operation instead of 112b1b7dce3SHarshad Shirwadkar * storing the procedure. 113b1b7dce3SHarshad Shirwadkar * 114b1b7dce3SHarshad Shirwadkar * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' 115b1b7dce3SHarshad Shirwadkar * was associated with inode 10. During fast commit, instead of storing this 116b1b7dce3SHarshad Shirwadkar * operation as a procedure "rename a to b", we store the resulting file system 117b1b7dce3SHarshad Shirwadkar * state as a "series" of outcomes: 118b1b7dce3SHarshad Shirwadkar * 119b1b7dce3SHarshad Shirwadkar * - Link dirent b to inode 10 120b1b7dce3SHarshad Shirwadkar * - Unlink dirent a 121b1b7dce3SHarshad Shirwadkar * - Inode <10> with valid refcount 122b1b7dce3SHarshad Shirwadkar * 123b1b7dce3SHarshad Shirwadkar * Now when recovery code runs, it needs "enforce" this state on the file 124b1b7dce3SHarshad Shirwadkar * system. This is what guarantees idempotence of fast commit replay. 125b1b7dce3SHarshad Shirwadkar * 126b1b7dce3SHarshad Shirwadkar * Let's take an example of a procedure that is not idempotent and see how fast 127b1b7dce3SHarshad Shirwadkar * commits make it idempotent. Consider following sequence of operations: 128b1b7dce3SHarshad Shirwadkar * 129b1b7dce3SHarshad Shirwadkar * rm A; mv B A; read A 130b1b7dce3SHarshad Shirwadkar * (x) (y) (z) 131b1b7dce3SHarshad Shirwadkar * 132b1b7dce3SHarshad Shirwadkar * (x), (y) and (z) are the points at which we can crash. If we store this 133b1b7dce3SHarshad Shirwadkar * sequence of operations as is then the replay is not idempotent. Let's say 134b1b7dce3SHarshad Shirwadkar * while in replay, we crash at (z). During the second replay, file A (which was 135b1b7dce3SHarshad Shirwadkar * actually created as a result of "mv B A" operation) would get deleted. Thus, 136b1b7dce3SHarshad Shirwadkar * file named A would be absent when we try to read A. So, this sequence of 137b1b7dce3SHarshad Shirwadkar * operations is not idempotent. However, as mentioned above, instead of storing 138b1b7dce3SHarshad Shirwadkar * the procedure fast commits store the outcome of each procedure. Thus the fast 139b1b7dce3SHarshad Shirwadkar * commit log for above procedure would be as follows: 140b1b7dce3SHarshad Shirwadkar * 141b1b7dce3SHarshad Shirwadkar * (Let's assume dirent A was linked to inode 10 and dirent B was linked to 142b1b7dce3SHarshad Shirwadkar * inode 11 before the replay) 143b1b7dce3SHarshad Shirwadkar * 144b1b7dce3SHarshad Shirwadkar * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] 145b1b7dce3SHarshad Shirwadkar * (w) (x) (y) (z) 146b1b7dce3SHarshad Shirwadkar * 147b1b7dce3SHarshad Shirwadkar * If we crash at (z), we will have file A linked to inode 11. During the second 148b1b7dce3SHarshad Shirwadkar * replay, we will remove file A (inode 11). But we will create it back and make 149b1b7dce3SHarshad Shirwadkar * it point to inode 11. We won't find B, so we'll just skip that step. At this 150b1b7dce3SHarshad Shirwadkar * point, the refcount for inode 11 is not reliable, but that gets fixed by the 151b1b7dce3SHarshad Shirwadkar * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled 152b1b7dce3SHarshad Shirwadkar * similarly. Thus, by converting a non-idempotent procedure into a series of 153b1b7dce3SHarshad Shirwadkar * idempotent outcomes, fast commits ensured idempotence during the replay. 154b1b7dce3SHarshad Shirwadkar * 155aa75f4d3SHarshad Shirwadkar * TODOs 156aa75f4d3SHarshad Shirwadkar * ----- 157b1b7dce3SHarshad Shirwadkar * 158b1b7dce3SHarshad Shirwadkar * 0) Fast commit replay path hardening: Fast commit replay code should use 159b1b7dce3SHarshad Shirwadkar * journal handles to make sure all the updates it does during the replay 160b1b7dce3SHarshad Shirwadkar * path are atomic. With that if we crash during fast commit replay, after 161b1b7dce3SHarshad Shirwadkar * trying to do recovery again, we will find a file system where fast commit 162b1b7dce3SHarshad Shirwadkar * area is invalid (because new full commit would be found). In order to deal 163b1b7dce3SHarshad Shirwadkar * with that, fast commit replay code should ensure that the "FC_REPLAY" 164b1b7dce3SHarshad Shirwadkar * superblock state is persisted before starting the replay, so that after 165b1b7dce3SHarshad Shirwadkar * the crash, fast commit recovery code can look at that flag and perform 166b1b7dce3SHarshad Shirwadkar * fast commit recovery even if that area is invalidated by later full 167b1b7dce3SHarshad Shirwadkar * commits. 168b1b7dce3SHarshad Shirwadkar * 169aa75f4d3SHarshad Shirwadkar * 1) Make fast commit atomic updates more fine grained. Today, a fast commit 170aa75f4d3SHarshad Shirwadkar * eligible update must be protected within ext4_fc_start_update() and 171aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). These routines are called at much higher 172aa75f4d3SHarshad Shirwadkar * routines. This can be made more fine grained by combining with 173aa75f4d3SHarshad Shirwadkar * ext4_journal_start(). 174aa75f4d3SHarshad Shirwadkar * 175aa75f4d3SHarshad Shirwadkar * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() 176aa75f4d3SHarshad Shirwadkar * 177aa75f4d3SHarshad Shirwadkar * 3) Handle more ineligible cases. 178aa75f4d3SHarshad Shirwadkar */ 179aa75f4d3SHarshad Shirwadkar 180aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 181aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 182aa75f4d3SHarshad Shirwadkar 183aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 184aa75f4d3SHarshad Shirwadkar { 185aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 186aa75f4d3SHarshad Shirwadkar if (uptodate) { 187aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 188aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 189aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 190aa75f4d3SHarshad Shirwadkar } else { 191aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 192aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 193aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 194aa75f4d3SHarshad Shirwadkar } 195aa75f4d3SHarshad Shirwadkar 196aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 197aa75f4d3SHarshad Shirwadkar } 198aa75f4d3SHarshad Shirwadkar 199aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 200aa75f4d3SHarshad Shirwadkar { 201aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 202aa75f4d3SHarshad Shirwadkar 203aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 204aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 205aa75f4d3SHarshad Shirwadkar } 206aa75f4d3SHarshad Shirwadkar 207aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 208aa75f4d3SHarshad Shirwadkar { 209aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 210aa75f4d3SHarshad Shirwadkar 211aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 212aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 213aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 214aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 215aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 216aa75f4d3SHarshad Shirwadkar } 217aa75f4d3SHarshad Shirwadkar 218f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */ 219f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode) 220fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) 221f6634e26SHarshad Shirwadkar { 222f6634e26SHarshad Shirwadkar wait_queue_head_t *wq; 223f6634e26SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 224f6634e26SHarshad Shirwadkar 225f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 226f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 227f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 228f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 229f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 230f6634e26SHarshad Shirwadkar #else 231f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 232f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 233f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 234f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 235f6634e26SHarshad Shirwadkar #endif 236f6634e26SHarshad Shirwadkar lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); 237f6634e26SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 238f6634e26SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 239f6634e26SHarshad Shirwadkar schedule(); 240f6634e26SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 241f6634e26SHarshad Shirwadkar } 242f6634e26SHarshad Shirwadkar 243aa75f4d3SHarshad Shirwadkar /* 244aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 245aa75f4d3SHarshad Shirwadkar * 246aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 247aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 248aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 249aa75f4d3SHarshad Shirwadkar */ 250aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 251aa75f4d3SHarshad Shirwadkar { 252aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 253aa75f4d3SHarshad Shirwadkar 2548016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2558016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 256aa75f4d3SHarshad Shirwadkar return; 257aa75f4d3SHarshad Shirwadkar 258aa75f4d3SHarshad Shirwadkar restart: 259aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 260aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 261aa75f4d3SHarshad Shirwadkar goto out; 262aa75f4d3SHarshad Shirwadkar 263aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 264f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 265aa75f4d3SHarshad Shirwadkar goto restart; 266aa75f4d3SHarshad Shirwadkar } 267aa75f4d3SHarshad Shirwadkar out: 268aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 269aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 270aa75f4d3SHarshad Shirwadkar } 271aa75f4d3SHarshad Shirwadkar 272aa75f4d3SHarshad Shirwadkar /* 273aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 274aa75f4d3SHarshad Shirwadkar */ 275aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 276aa75f4d3SHarshad Shirwadkar { 277aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 278aa75f4d3SHarshad Shirwadkar 2798016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2808016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 281aa75f4d3SHarshad Shirwadkar return; 282aa75f4d3SHarshad Shirwadkar 283aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 284aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 285aa75f4d3SHarshad Shirwadkar } 286aa75f4d3SHarshad Shirwadkar 287aa75f4d3SHarshad Shirwadkar /* 288aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 289aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 290aa75f4d3SHarshad Shirwadkar */ 291aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 292aa75f4d3SHarshad Shirwadkar { 293aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 294aa75f4d3SHarshad Shirwadkar 2958016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2968016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 297aa75f4d3SHarshad Shirwadkar return; 298aa75f4d3SHarshad Shirwadkar 299aa75f4d3SHarshad Shirwadkar restart: 300aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 301aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) { 302aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 303aa75f4d3SHarshad Shirwadkar return; 304aa75f4d3SHarshad Shirwadkar } 305aa75f4d3SHarshad Shirwadkar 306aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 307f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 308aa75f4d3SHarshad Shirwadkar goto restart; 309aa75f4d3SHarshad Shirwadkar } 310aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 311aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 312aa75f4d3SHarshad Shirwadkar } 313aa75f4d3SHarshad Shirwadkar 314aa75f4d3SHarshad Shirwadkar /* 315aa75f4d3SHarshad Shirwadkar * Mark file system as fast commit ineligible. This means that next commit 316aa75f4d3SHarshad Shirwadkar * operation would result in a full jbd2 commit. 317aa75f4d3SHarshad Shirwadkar */ 318aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason) 319aa75f4d3SHarshad Shirwadkar { 320aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 321aa75f4d3SHarshad Shirwadkar 3228016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3238016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3248016e29fSHarshad Shirwadkar return; 3258016e29fSHarshad Shirwadkar 3269b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 327aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 328aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 329aa75f4d3SHarshad Shirwadkar } 330aa75f4d3SHarshad Shirwadkar 331aa75f4d3SHarshad Shirwadkar /* 332aa75f4d3SHarshad Shirwadkar * Start a fast commit ineligible update. Any commits that happen while 333aa75f4d3SHarshad Shirwadkar * such an operation is in progress fall back to full commits. 334aa75f4d3SHarshad Shirwadkar */ 335aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason) 336aa75f4d3SHarshad Shirwadkar { 337aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 338aa75f4d3SHarshad Shirwadkar 3398016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3408016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3418016e29fSHarshad Shirwadkar return; 3428016e29fSHarshad Shirwadkar 343aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 344aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 345aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_ineligible_updates); 346aa75f4d3SHarshad Shirwadkar } 347aa75f4d3SHarshad Shirwadkar 348aa75f4d3SHarshad Shirwadkar /* 349ababea77SHarshad Shirwadkar * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here 350aa75f4d3SHarshad Shirwadkar * to ensure that after stopping the ineligible update, at least one full 351aa75f4d3SHarshad Shirwadkar * commit takes place. 352aa75f4d3SHarshad Shirwadkar */ 353aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb) 354aa75f4d3SHarshad Shirwadkar { 3558016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3568016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3578016e29fSHarshad Shirwadkar return; 3588016e29fSHarshad Shirwadkar 3599b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 360aa75f4d3SHarshad Shirwadkar atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); 361aa75f4d3SHarshad Shirwadkar } 362aa75f4d3SHarshad Shirwadkar 363aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb) 364aa75f4d3SHarshad Shirwadkar { 3659b5f6c9bSHarshad Shirwadkar return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) || 3669b5f6c9bSHarshad Shirwadkar atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates)); 367aa75f4d3SHarshad Shirwadkar } 368aa75f4d3SHarshad Shirwadkar 369aa75f4d3SHarshad Shirwadkar /* 370aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 371aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 372aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 373aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 374aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 375aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 376aa75f4d3SHarshad Shirwadkar * 377aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 378aa75f4d3SHarshad Shirwadkar */ 379aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 380a80f7fcfSHarshad Shirwadkar handle_t *handle, struct inode *inode, 381a80f7fcfSHarshad Shirwadkar int (*__fc_track_fn)(struct inode *, void *, bool), 382aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 383aa75f4d3SHarshad Shirwadkar { 384aa75f4d3SHarshad Shirwadkar bool update = false; 385aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 386aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 387a80f7fcfSHarshad Shirwadkar tid_t tid = 0; 388aa75f4d3SHarshad Shirwadkar int ret; 389aa75f4d3SHarshad Shirwadkar 3908016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 3918016e29fSHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_REPLAY)) 392aa75f4d3SHarshad Shirwadkar return -EOPNOTSUPP; 393aa75f4d3SHarshad Shirwadkar 394aa75f4d3SHarshad Shirwadkar if (ext4_fc_is_ineligible(inode->i_sb)) 395aa75f4d3SHarshad Shirwadkar return -EINVAL; 396aa75f4d3SHarshad Shirwadkar 397a80f7fcfSHarshad Shirwadkar tid = handle->h_transaction->t_tid; 398aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 399a80f7fcfSHarshad Shirwadkar if (tid == ei->i_sync_tid) { 400aa75f4d3SHarshad Shirwadkar update = true; 401aa75f4d3SHarshad Shirwadkar } else { 402aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 403a80f7fcfSHarshad Shirwadkar ei->i_sync_tid = tid; 404aa75f4d3SHarshad Shirwadkar } 405aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 406aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 407aa75f4d3SHarshad Shirwadkar 408aa75f4d3SHarshad Shirwadkar if (!enqueue) 409aa75f4d3SHarshad Shirwadkar return ret; 410aa75f4d3SHarshad Shirwadkar 411aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 412aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 413aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 4149b5f6c9bSHarshad Shirwadkar (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? 415aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 416aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 417aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 418aa75f4d3SHarshad Shirwadkar 419aa75f4d3SHarshad Shirwadkar return ret; 420aa75f4d3SHarshad Shirwadkar } 421aa75f4d3SHarshad Shirwadkar 422aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 423aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 424aa75f4d3SHarshad Shirwadkar int op; 425aa75f4d3SHarshad Shirwadkar }; 426aa75f4d3SHarshad Shirwadkar 427aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 428aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 429aa75f4d3SHarshad Shirwadkar { 430aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 431aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 432aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 433aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 434aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 435aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 436aa75f4d3SHarshad Shirwadkar 437aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 438aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 439aa75f4d3SHarshad Shirwadkar if (!node) { 440b21ebf14SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); 441aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 442aa75f4d3SHarshad Shirwadkar return -ENOMEM; 443aa75f4d3SHarshad Shirwadkar } 444aa75f4d3SHarshad Shirwadkar 445aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 446aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 447aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 448aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 449aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 450aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 451aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 452aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 453b21ebf14SHarshad Shirwadkar EXT4_FC_REASON_NOMEM); 454aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 455aa75f4d3SHarshad Shirwadkar return -ENOMEM; 456aa75f4d3SHarshad Shirwadkar } 457aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 458aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 459aa75f4d3SHarshad Shirwadkar } else { 460aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 461aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 462aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 463aa75f4d3SHarshad Shirwadkar } 464aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 465aa75f4d3SHarshad Shirwadkar 466aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 4679b5f6c9bSHarshad Shirwadkar if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) 468aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 469aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 470aa75f4d3SHarshad Shirwadkar else 471aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 472aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 473aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 474aa75f4d3SHarshad Shirwadkar 475aa75f4d3SHarshad Shirwadkar return 0; 476aa75f4d3SHarshad Shirwadkar } 477aa75f4d3SHarshad Shirwadkar 478a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle, 479a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 480aa75f4d3SHarshad Shirwadkar { 481aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 482aa75f4d3SHarshad Shirwadkar int ret; 483aa75f4d3SHarshad Shirwadkar 484aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 485aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 486aa75f4d3SHarshad Shirwadkar 487a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 488aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 489aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_unlink(inode, dentry, ret); 490aa75f4d3SHarshad Shirwadkar } 491aa75f4d3SHarshad Shirwadkar 492a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 493a80f7fcfSHarshad Shirwadkar { 494a80f7fcfSHarshad Shirwadkar __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); 495a80f7fcfSHarshad Shirwadkar } 496a80f7fcfSHarshad Shirwadkar 497a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle, 498a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 499aa75f4d3SHarshad Shirwadkar { 500aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 501aa75f4d3SHarshad Shirwadkar int ret; 502aa75f4d3SHarshad Shirwadkar 503aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 504aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 505aa75f4d3SHarshad Shirwadkar 506a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 507aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 508aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_link(inode, dentry, ret); 509aa75f4d3SHarshad Shirwadkar } 510aa75f4d3SHarshad Shirwadkar 511a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 512a80f7fcfSHarshad Shirwadkar { 513a80f7fcfSHarshad Shirwadkar __ext4_fc_track_link(handle, d_inode(dentry), dentry); 514a80f7fcfSHarshad Shirwadkar } 515a80f7fcfSHarshad Shirwadkar 5168210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode, 5178210bb29SHarshad Shirwadkar struct dentry *dentry) 518aa75f4d3SHarshad Shirwadkar { 519aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 520aa75f4d3SHarshad Shirwadkar int ret; 521aa75f4d3SHarshad Shirwadkar 522aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 523aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 524aa75f4d3SHarshad Shirwadkar 525a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 526aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 527aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_create(inode, dentry, ret); 528aa75f4d3SHarshad Shirwadkar } 529aa75f4d3SHarshad Shirwadkar 5308210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 5318210bb29SHarshad Shirwadkar { 5328210bb29SHarshad Shirwadkar __ext4_fc_track_create(handle, d_inode(dentry), dentry); 5338210bb29SHarshad Shirwadkar } 5348210bb29SHarshad Shirwadkar 535aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 536aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 537aa75f4d3SHarshad Shirwadkar { 538aa75f4d3SHarshad Shirwadkar if (update) 539aa75f4d3SHarshad Shirwadkar return -EEXIST; 540aa75f4d3SHarshad Shirwadkar 541aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 542aa75f4d3SHarshad Shirwadkar 543aa75f4d3SHarshad Shirwadkar return 0; 544aa75f4d3SHarshad Shirwadkar } 545aa75f4d3SHarshad Shirwadkar 546a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 547aa75f4d3SHarshad Shirwadkar { 548aa75f4d3SHarshad Shirwadkar int ret; 549aa75f4d3SHarshad Shirwadkar 550aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 551aa75f4d3SHarshad Shirwadkar return; 552aa75f4d3SHarshad Shirwadkar 553556e0319SHarshad Shirwadkar if (ext4_should_journal_data(inode)) { 554556e0319SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 555556e0319SHarshad Shirwadkar EXT4_FC_REASON_INODE_JOURNAL_DATA); 556556e0319SHarshad Shirwadkar return; 557556e0319SHarshad Shirwadkar } 558556e0319SHarshad Shirwadkar 559a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 560aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_inode(inode, ret); 561aa75f4d3SHarshad Shirwadkar } 562aa75f4d3SHarshad Shirwadkar 563aa75f4d3SHarshad Shirwadkar struct __track_range_args { 564aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 565aa75f4d3SHarshad Shirwadkar }; 566aa75f4d3SHarshad Shirwadkar 567aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 568aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 569aa75f4d3SHarshad Shirwadkar { 570aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 571aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 572aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 573aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 574aa75f4d3SHarshad Shirwadkar 575aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 576aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 577aa75f4d3SHarshad Shirwadkar return -ECANCELED; 578aa75f4d3SHarshad Shirwadkar } 579aa75f4d3SHarshad Shirwadkar 580aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 581aa75f4d3SHarshad Shirwadkar 582aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 583aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 584aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 585aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 586aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 587aa75f4d3SHarshad Shirwadkar } else { 588aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 589aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 590aa75f4d3SHarshad Shirwadkar } 591aa75f4d3SHarshad Shirwadkar 592aa75f4d3SHarshad Shirwadkar return 0; 593aa75f4d3SHarshad Shirwadkar } 594aa75f4d3SHarshad Shirwadkar 595a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 596aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 597aa75f4d3SHarshad Shirwadkar { 598aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 599aa75f4d3SHarshad Shirwadkar int ret; 600aa75f4d3SHarshad Shirwadkar 601aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 602aa75f4d3SHarshad Shirwadkar return; 603aa75f4d3SHarshad Shirwadkar 604aa75f4d3SHarshad Shirwadkar args.start = start; 605aa75f4d3SHarshad Shirwadkar args.end = end; 606aa75f4d3SHarshad Shirwadkar 607a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 608aa75f4d3SHarshad Shirwadkar 609aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_range(inode, start, end, ret); 610aa75f4d3SHarshad Shirwadkar } 611aa75f4d3SHarshad Shirwadkar 612e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) 613aa75f4d3SHarshad Shirwadkar { 614aa75f4d3SHarshad Shirwadkar int write_flags = REQ_SYNC; 615aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 616aa75f4d3SHarshad Shirwadkar 617e9f53353SDaejun Park /* Add REQ_FUA | REQ_PREFLUSH only its tail */ 618e9f53353SDaejun Park if (test_opt(sb, BARRIER) && is_tail) 619aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 620aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 621764b3fd3SHarshad Shirwadkar set_buffer_dirty(bh); 622aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 623aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 624aa75f4d3SHarshad Shirwadkar submit_bh(REQ_OP_WRITE, write_flags, bh); 625aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 626aa75f4d3SHarshad Shirwadkar } 627aa75f4d3SHarshad Shirwadkar 628aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 629aa75f4d3SHarshad Shirwadkar 630aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 631aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 632aa75f4d3SHarshad Shirwadkar u32 *crc) 633aa75f4d3SHarshad Shirwadkar { 634aa75f4d3SHarshad Shirwadkar void *ret; 635aa75f4d3SHarshad Shirwadkar 636aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 637aa75f4d3SHarshad Shirwadkar if (crc) 638aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 639aa75f4d3SHarshad Shirwadkar return ret; 640aa75f4d3SHarshad Shirwadkar } 641aa75f4d3SHarshad Shirwadkar 642aa75f4d3SHarshad Shirwadkar /* 643aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 644aa75f4d3SHarshad Shirwadkar * 645aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 646aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 647aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 648aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 649aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 650aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 651aa75f4d3SHarshad Shirwadkar * we added. 652aa75f4d3SHarshad Shirwadkar */ 653aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 654aa75f4d3SHarshad Shirwadkar { 655aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 656aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 657aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 658aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 659aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 660aa75f4d3SHarshad Shirwadkar int pad_len; 661aa75f4d3SHarshad Shirwadkar 662aa75f4d3SHarshad Shirwadkar /* 663aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 664aa75f4d3SHarshad Shirwadkar * padding. 665aa75f4d3SHarshad Shirwadkar */ 666aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 667aa75f4d3SHarshad Shirwadkar return NULL; 668aa75f4d3SHarshad Shirwadkar 669aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 670aa75f4d3SHarshad Shirwadkar /* 671aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 672aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 673aa75f4d3SHarshad Shirwadkar */ 674aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 675aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 676aa75f4d3SHarshad Shirwadkar if (ret) 677aa75f4d3SHarshad Shirwadkar return NULL; 678aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 679aa75f4d3SHarshad Shirwadkar } 680aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 681aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 682aa75f4d3SHarshad Shirwadkar } 683aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 684aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 685aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 686aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 687aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 688aa75f4d3SHarshad Shirwadkar if (crc) 689aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 690aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 691aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 692e9f53353SDaejun Park ext4_fc_submit_bh(sb, false); 693aa75f4d3SHarshad Shirwadkar 694aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 695aa75f4d3SHarshad Shirwadkar if (ret) 696aa75f4d3SHarshad Shirwadkar return NULL; 697aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 698aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 699aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 700aa75f4d3SHarshad Shirwadkar } 701aa75f4d3SHarshad Shirwadkar 702aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 703aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 704aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 705aa75f4d3SHarshad Shirwadkar { 706aa75f4d3SHarshad Shirwadkar if (crc) 707aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 708aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 709aa75f4d3SHarshad Shirwadkar } 710aa75f4d3SHarshad Shirwadkar 711aa75f4d3SHarshad Shirwadkar /* 712aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 713aa75f4d3SHarshad Shirwadkar * 714aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 715aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 716aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 717aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 718aa75f4d3SHarshad Shirwadkar */ 719aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 720aa75f4d3SHarshad Shirwadkar { 721aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 722aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 723aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 724aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 725aa75f4d3SHarshad Shirwadkar u8 *dst; 726aa75f4d3SHarshad Shirwadkar 727aa75f4d3SHarshad Shirwadkar /* 728aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 729aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 730aa75f4d3SHarshad Shirwadkar */ 731aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 732aa75f4d3SHarshad Shirwadkar if (!dst) 733aa75f4d3SHarshad Shirwadkar return -ENOSPC; 734aa75f4d3SHarshad Shirwadkar 735aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 736aa75f4d3SHarshad Shirwadkar 737aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 738aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 739aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 740aa75f4d3SHarshad Shirwadkar 741aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 742aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 743aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 744aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 745aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 746aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 747aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 748aa75f4d3SHarshad Shirwadkar 749e9f53353SDaejun Park ext4_fc_submit_bh(sb, true); 750aa75f4d3SHarshad Shirwadkar 751aa75f4d3SHarshad Shirwadkar return 0; 752aa75f4d3SHarshad Shirwadkar } 753aa75f4d3SHarshad Shirwadkar 754aa75f4d3SHarshad Shirwadkar /* 755aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 756aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 757aa75f4d3SHarshad Shirwadkar */ 758aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 759aa75f4d3SHarshad Shirwadkar u32 *crc) 760aa75f4d3SHarshad Shirwadkar { 761aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 762aa75f4d3SHarshad Shirwadkar u8 *dst; 763aa75f4d3SHarshad Shirwadkar 764aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 765aa75f4d3SHarshad Shirwadkar if (!dst) 766aa75f4d3SHarshad Shirwadkar return false; 767aa75f4d3SHarshad Shirwadkar 768aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 769aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 770aa75f4d3SHarshad Shirwadkar 771aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 772aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 773aa75f4d3SHarshad Shirwadkar 774aa75f4d3SHarshad Shirwadkar return true; 775aa75f4d3SHarshad Shirwadkar } 776aa75f4d3SHarshad Shirwadkar 777aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 778facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, 779facec450SGuoqing Jiang struct ext4_fc_dentry_update *fc_dentry) 780aa75f4d3SHarshad Shirwadkar { 781aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 782aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 783facec450SGuoqing Jiang int dlen = fc_dentry->fcd_name.len; 784aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 785aa75f4d3SHarshad Shirwadkar crc); 786aa75f4d3SHarshad Shirwadkar 787aa75f4d3SHarshad Shirwadkar if (!dst) 788aa75f4d3SHarshad Shirwadkar return false; 789aa75f4d3SHarshad Shirwadkar 790facec450SGuoqing Jiang fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); 791facec450SGuoqing Jiang fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 792facec450SGuoqing Jiang tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 793aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 794aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 795aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 796aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 797aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 798facec450SGuoqing Jiang ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); 799aa75f4d3SHarshad Shirwadkar dst += dlen; 800aa75f4d3SHarshad Shirwadkar 801aa75f4d3SHarshad Shirwadkar return true; 802aa75f4d3SHarshad Shirwadkar } 803aa75f4d3SHarshad Shirwadkar 804aa75f4d3SHarshad Shirwadkar /* 805aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 806aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 807aa75f4d3SHarshad Shirwadkar */ 808aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 809aa75f4d3SHarshad Shirwadkar { 810aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 811aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 812aa75f4d3SHarshad Shirwadkar int ret; 813aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 814aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 815aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 816aa75f4d3SHarshad Shirwadkar u8 *dst; 817aa75f4d3SHarshad Shirwadkar 818aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 819aa75f4d3SHarshad Shirwadkar if (ret) 820aa75f4d3SHarshad Shirwadkar return ret; 821aa75f4d3SHarshad Shirwadkar 822aa75f4d3SHarshad Shirwadkar if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 823aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 824aa75f4d3SHarshad Shirwadkar 825aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 826aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 827aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 828aa75f4d3SHarshad Shirwadkar 829aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 830aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 831aa75f4d3SHarshad Shirwadkar if (!dst) 832aa75f4d3SHarshad Shirwadkar return -ECANCELED; 833aa75f4d3SHarshad Shirwadkar 834aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 835aa75f4d3SHarshad Shirwadkar return -ECANCELED; 836aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 837aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 838aa75f4d3SHarshad Shirwadkar return -ECANCELED; 839aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 840aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 841aa75f4d3SHarshad Shirwadkar inode_len, crc)) 842aa75f4d3SHarshad Shirwadkar return -ECANCELED; 843aa75f4d3SHarshad Shirwadkar 844aa75f4d3SHarshad Shirwadkar return 0; 845aa75f4d3SHarshad Shirwadkar } 846aa75f4d3SHarshad Shirwadkar 847aa75f4d3SHarshad Shirwadkar /* 848aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 849aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 850aa75f4d3SHarshad Shirwadkar */ 851aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 852aa75f4d3SHarshad Shirwadkar { 853aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 854aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 855aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 856aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 857aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 858aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 859aa75f4d3SHarshad Shirwadkar int ret; 860aa75f4d3SHarshad Shirwadkar 861aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 862aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 863aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 864aa75f4d3SHarshad Shirwadkar return 0; 865aa75f4d3SHarshad Shirwadkar } 866aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 867aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 868aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 869aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 870aa75f4d3SHarshad Shirwadkar 871aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 872aa75f4d3SHarshad Shirwadkar jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", 873aa75f4d3SHarshad Shirwadkar __func__, cur_lblk_off, new_blk_size, inode->i_ino); 874aa75f4d3SHarshad Shirwadkar 875aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 876aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 877aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 878aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 879aa75f4d3SHarshad Shirwadkar if (ret < 0) 880aa75f4d3SHarshad Shirwadkar return -ECANCELED; 881aa75f4d3SHarshad Shirwadkar 882aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 883aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 884aa75f4d3SHarshad Shirwadkar continue; 885aa75f4d3SHarshad Shirwadkar } 886aa75f4d3SHarshad Shirwadkar 887aa75f4d3SHarshad Shirwadkar if (ret == 0) { 888aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 889aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 890aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 891aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 892aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 893aa75f4d3SHarshad Shirwadkar return -ENOSPC; 894aa75f4d3SHarshad Shirwadkar } else { 895*a2c2f082SHou Tao unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? 896*a2c2f082SHou Tao EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; 897*a2c2f082SHou Tao 898*a2c2f082SHou Tao /* Limit the number of blocks in one extent */ 899*a2c2f082SHou Tao map.m_len = min(max, map.m_len); 900*a2c2f082SHou Tao 901aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 902aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 903aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 904aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 905aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 906aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 907aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 908aa75f4d3SHarshad Shirwadkar else 909aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 910aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 911aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 912aa75f4d3SHarshad Shirwadkar return -ENOSPC; 913aa75f4d3SHarshad Shirwadkar } 914aa75f4d3SHarshad Shirwadkar 915aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 916aa75f4d3SHarshad Shirwadkar } 917aa75f4d3SHarshad Shirwadkar 918aa75f4d3SHarshad Shirwadkar return 0; 919aa75f4d3SHarshad Shirwadkar } 920aa75f4d3SHarshad Shirwadkar 921aa75f4d3SHarshad Shirwadkar 922aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 923aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 924aa75f4d3SHarshad Shirwadkar { 925aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 926aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 927aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 928aa75f4d3SHarshad Shirwadkar int ret = 0; 929aa75f4d3SHarshad Shirwadkar 930aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 9319b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); 93296e7c02dSDaejun Park list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 933aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 934aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 935aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 936aa75f4d3SHarshad Shirwadkar 937aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 938aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 939aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 940aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 941aa75f4d3SHarshad Shirwadkar schedule(); 942aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 943aa75f4d3SHarshad Shirwadkar } 944aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 945aa75f4d3SHarshad Shirwadkar } 946aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 947aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 948aa75f4d3SHarshad Shirwadkar if (ret) 949aa75f4d3SHarshad Shirwadkar return ret; 950aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 951aa75f4d3SHarshad Shirwadkar } 952aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 953aa75f4d3SHarshad Shirwadkar 954aa75f4d3SHarshad Shirwadkar return ret; 955aa75f4d3SHarshad Shirwadkar } 956aa75f4d3SHarshad Shirwadkar 957aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 958aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 959aa75f4d3SHarshad Shirwadkar { 960aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 961aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 962aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 963aa75f4d3SHarshad Shirwadkar int ret = 0; 964aa75f4d3SHarshad Shirwadkar 965aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 966aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 967aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 968aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 969aa75f4d3SHarshad Shirwadkar continue; 970aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 971aa75f4d3SHarshad Shirwadkar 972aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 973aa75f4d3SHarshad Shirwadkar if (ret) 974aa75f4d3SHarshad Shirwadkar return ret; 975aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 976aa75f4d3SHarshad Shirwadkar } 977aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 978aa75f4d3SHarshad Shirwadkar 979aa75f4d3SHarshad Shirwadkar return 0; 980aa75f4d3SHarshad Shirwadkar } 981aa75f4d3SHarshad Shirwadkar 982aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 983aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 984fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock) 985fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock) 986aa75f4d3SHarshad Shirwadkar { 987aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 988aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 98996e7c02dSDaejun Park struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 990aa75f4d3SHarshad Shirwadkar struct inode *inode; 99196e7c02dSDaejun Park struct ext4_inode_info *ei, *ei_n; 992aa75f4d3SHarshad Shirwadkar int ret; 993aa75f4d3SHarshad Shirwadkar 994aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 995aa75f4d3SHarshad Shirwadkar return 0; 99696e7c02dSDaejun Park list_for_each_entry_safe(fc_dentry, fc_dentry_n, 99796e7c02dSDaejun Park &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { 998aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 999aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1000facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1001aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1002aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1003aa75f4d3SHarshad Shirwadkar } 1004aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1005aa75f4d3SHarshad Shirwadkar continue; 1006aa75f4d3SHarshad Shirwadkar } 1007aa75f4d3SHarshad Shirwadkar 1008aa75f4d3SHarshad Shirwadkar inode = NULL; 100996e7c02dSDaejun Park list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN], 101096e7c02dSDaejun Park i_fc_list) { 1011aa75f4d3SHarshad Shirwadkar if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 1012aa75f4d3SHarshad Shirwadkar inode = &ei->vfs_inode; 1013aa75f4d3SHarshad Shirwadkar break; 1014aa75f4d3SHarshad Shirwadkar } 1015aa75f4d3SHarshad Shirwadkar } 1016aa75f4d3SHarshad Shirwadkar /* 1017aa75f4d3SHarshad Shirwadkar * If we don't find inode in our list, then it was deleted, 1018aa75f4d3SHarshad Shirwadkar * in which case, we don't need to record it's create tag. 1019aa75f4d3SHarshad Shirwadkar */ 1020aa75f4d3SHarshad Shirwadkar if (!inode) 1021aa75f4d3SHarshad Shirwadkar continue; 1022aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1023aa75f4d3SHarshad Shirwadkar 1024aa75f4d3SHarshad Shirwadkar /* 1025aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 1026aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 1027aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 1028aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 1029aa75f4d3SHarshad Shirwadkar * the recovery code. 1030aa75f4d3SHarshad Shirwadkar */ 1031aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 1032aa75f4d3SHarshad Shirwadkar if (ret) 1033aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1034aa75f4d3SHarshad Shirwadkar 1035aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 1036aa75f4d3SHarshad Shirwadkar if (ret) 1037aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1038aa75f4d3SHarshad Shirwadkar 1039facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1040aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1041aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1042aa75f4d3SHarshad Shirwadkar } 1043aa75f4d3SHarshad Shirwadkar 1044aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1045aa75f4d3SHarshad Shirwadkar } 1046aa75f4d3SHarshad Shirwadkar return 0; 1047aa75f4d3SHarshad Shirwadkar lock_and_exit: 1048aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1049aa75f4d3SHarshad Shirwadkar return ret; 1050aa75f4d3SHarshad Shirwadkar } 1051aa75f4d3SHarshad Shirwadkar 1052aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 1053aa75f4d3SHarshad Shirwadkar { 1054aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1055aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1056aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1057aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 1058aa75f4d3SHarshad Shirwadkar struct inode *inode; 1059aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 1060aa75f4d3SHarshad Shirwadkar int ret = 0; 1061aa75f4d3SHarshad Shirwadkar u32 crc = 0; 1062aa75f4d3SHarshad Shirwadkar 1063aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 1064aa75f4d3SHarshad Shirwadkar if (ret) 1065aa75f4d3SHarshad Shirwadkar return ret; 1066aa75f4d3SHarshad Shirwadkar 1067aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 1068aa75f4d3SHarshad Shirwadkar if (ret) 1069aa75f4d3SHarshad Shirwadkar return ret; 1070aa75f4d3SHarshad Shirwadkar 1071da0c5d26SHarshad Shirwadkar /* 1072da0c5d26SHarshad Shirwadkar * If file system device is different from journal device, issue a cache 1073da0c5d26SHarshad Shirwadkar * flush before we start writing fast commit blocks. 1074da0c5d26SHarshad Shirwadkar */ 1075da0c5d26SHarshad Shirwadkar if (journal->j_fs_dev != journal->j_dev) 1076c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev); 1077da0c5d26SHarshad Shirwadkar 1078aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1079aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1080aa75f4d3SHarshad Shirwadkar /* 1081aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1082aa75f4d3SHarshad Shirwadkar * in this TID. 1083aa75f4d3SHarshad Shirwadkar */ 1084aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1085aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1086aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1087aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1088e1262cd2SXu Yihang (u8 *)&head, &crc)) { 1089e1262cd2SXu Yihang ret = -ENOSPC; 1090aa75f4d3SHarshad Shirwadkar goto out; 1091aa75f4d3SHarshad Shirwadkar } 1092e1262cd2SXu Yihang } 1093aa75f4d3SHarshad Shirwadkar 1094aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1095aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1096aa75f4d3SHarshad Shirwadkar if (ret) { 1097aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1098aa75f4d3SHarshad Shirwadkar goto out; 1099aa75f4d3SHarshad Shirwadkar } 1100aa75f4d3SHarshad Shirwadkar 110196e7c02dSDaejun Park list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1102aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1103aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1104aa75f4d3SHarshad Shirwadkar continue; 1105aa75f4d3SHarshad Shirwadkar 1106aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1107aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1108aa75f4d3SHarshad Shirwadkar if (ret) 1109aa75f4d3SHarshad Shirwadkar goto out; 1110aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1111aa75f4d3SHarshad Shirwadkar if (ret) 1112aa75f4d3SHarshad Shirwadkar goto out; 1113aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1114aa75f4d3SHarshad Shirwadkar } 1115aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1116aa75f4d3SHarshad Shirwadkar 1117aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1118aa75f4d3SHarshad Shirwadkar 1119aa75f4d3SHarshad Shirwadkar out: 1120aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1121aa75f4d3SHarshad Shirwadkar return ret; 1122aa75f4d3SHarshad Shirwadkar } 1123aa75f4d3SHarshad Shirwadkar 1124aa75f4d3SHarshad Shirwadkar /* 1125aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1126aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1127aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1128aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1129aa75f4d3SHarshad Shirwadkar */ 1130aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1131aa75f4d3SHarshad Shirwadkar { 1132aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1133aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1134aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1135aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 1136aa75f4d3SHarshad Shirwadkar int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; 1137aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1138aa75f4d3SHarshad Shirwadkar 1139aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_start(sb); 1140aa75f4d3SHarshad Shirwadkar 1141aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1142aa75f4d3SHarshad Shirwadkar 1143aa75f4d3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 1144aa75f4d3SHarshad Shirwadkar (ext4_fc_is_ineligible(sb))) { 1145aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1146aa75f4d3SHarshad Shirwadkar goto out; 1147aa75f4d3SHarshad Shirwadkar } 1148aa75f4d3SHarshad Shirwadkar 1149aa75f4d3SHarshad Shirwadkar restart_fc: 1150aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1151aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1152aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1153aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1154aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1155aa75f4d3SHarshad Shirwadkar goto restart_fc; 1156aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_ALREADY_COMMITTED; 1157aa75f4d3SHarshad Shirwadkar goto out; 1158aa75f4d3SHarshad Shirwadkar } else if (ret) { 1159aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1160aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_START_FAILED; 1161aa75f4d3SHarshad Shirwadkar goto out; 1162aa75f4d3SHarshad Shirwadkar } 1163aa75f4d3SHarshad Shirwadkar 1164aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1165aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1166aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1167aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1168aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1169aa75f4d3SHarshad Shirwadkar goto out; 1170aa75f4d3SHarshad Shirwadkar } 1171aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1172aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1173aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1174aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1175aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1176aa75f4d3SHarshad Shirwadkar goto out; 1177aa75f4d3SHarshad Shirwadkar } 1178aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 1179aa75f4d3SHarshad Shirwadkar jbd2_fc_end_commit(journal); 1180aa75f4d3SHarshad Shirwadkar out: 1181aa75f4d3SHarshad Shirwadkar /* Has any ineligible update happened since we started? */ 1182aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { 1183aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1184aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1185aa75f4d3SHarshad Shirwadkar } 1186aa75f4d3SHarshad Shirwadkar 1187aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1188aa75f4d3SHarshad Shirwadkar if (reason != EXT4_FC_REASON_OK && 1189aa75f4d3SHarshad Shirwadkar reason != EXT4_FC_REASON_ALREADY_COMMITTED) { 1190aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_commits++; 1191aa75f4d3SHarshad Shirwadkar } else { 1192aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_num_commits++; 1193aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_numblks += nblks; 1194aa75f4d3SHarshad Shirwadkar } 1195aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1196aa75f4d3SHarshad Shirwadkar nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; 1197aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_stop(sb, nblks, reason); 1198aa75f4d3SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1199aa75f4d3SHarshad Shirwadkar /* 1200aa75f4d3SHarshad Shirwadkar * weight the commit time higher than the average time so we don't 1201aa75f4d3SHarshad Shirwadkar * react too strongly to vast changes in the commit time 1202aa75f4d3SHarshad Shirwadkar */ 1203aa75f4d3SHarshad Shirwadkar if (likely(sbi->s_fc_avg_commit_time)) 1204aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = (commit_time + 1205aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time * 3) / 4; 1206aa75f4d3SHarshad Shirwadkar else 1207aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = commit_time; 1208aa75f4d3SHarshad Shirwadkar jbd_debug(1, 1209aa75f4d3SHarshad Shirwadkar "Fast commit ended with blks = %d, reason = %d, subtid - %d", 1210aa75f4d3SHarshad Shirwadkar nblks, reason, subtid); 1211aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_FAILED) 12120bce577bSHarshad Shirwadkar return jbd2_fc_end_commit_fallback(journal); 1213aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_START_FAILED || 1214aa75f4d3SHarshad Shirwadkar reason == EXT4_FC_REASON_INELIGIBLE) 1215aa75f4d3SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1216aa75f4d3SHarshad Shirwadkar return 0; 1217aa75f4d3SHarshad Shirwadkar } 1218aa75f4d3SHarshad Shirwadkar 1219ff780b91SHarshad Shirwadkar /* 1220ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1221ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1222ff780b91SHarshad Shirwadkar */ 1223ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full) 1224ff780b91SHarshad Shirwadkar { 1225aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1226aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 122796e7c02dSDaejun Park struct ext4_inode_info *iter, *iter_n; 1228aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1229aa75f4d3SHarshad Shirwadkar 1230aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1231aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1232aa75f4d3SHarshad Shirwadkar 1233aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1234aa75f4d3SHarshad Shirwadkar 1235aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 123696e7c02dSDaejun Park list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], 123796e7c02dSDaejun Park i_fc_list) { 1238aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1239aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1240aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1241aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1242aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1243aa75f4d3SHarshad Shirwadkar smp_mb(); 1244aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1245aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1246aa75f4d3SHarshad Shirwadkar #else 1247aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1248aa75f4d3SHarshad Shirwadkar #endif 1249aa75f4d3SHarshad Shirwadkar } 1250aa75f4d3SHarshad Shirwadkar 1251aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1252aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1253aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1254aa75f4d3SHarshad Shirwadkar fcd_list); 1255aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1256aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1257aa75f4d3SHarshad Shirwadkar 1258aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1259aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1260aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1261aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1262aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1263aa75f4d3SHarshad Shirwadkar } 1264aa75f4d3SHarshad Shirwadkar 1265aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1266aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1267aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 126831e203e0SDaejun Park &sbi->s_fc_q[FC_Q_MAIN]); 1269aa75f4d3SHarshad Shirwadkar 12709b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); 12719b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 1272aa75f4d3SHarshad Shirwadkar 1273aa75f4d3SHarshad Shirwadkar if (full) 1274aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1275aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1276aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1277ff780b91SHarshad Shirwadkar } 12786866d7b3SHarshad Shirwadkar 12798016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 12808016e29fSHarshad Shirwadkar 12818016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 12828016e29fSHarshad Shirwadkar struct dentry_info_args { 12838016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 12848016e29fSHarshad Shirwadkar char *dname; 12858016e29fSHarshad Shirwadkar }; 12868016e29fSHarshad Shirwadkar 12878016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 1288a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 12898016e29fSHarshad Shirwadkar { 1290a7ba36bcSHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 12918016e29fSHarshad Shirwadkar 1292a7ba36bcSHarshad Shirwadkar memcpy(&fcd, val, sizeof(fcd)); 12938016e29fSHarshad Shirwadkar 1294a7ba36bcSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 1295a7ba36bcSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd.fc_ino); 1296a7ba36bcSHarshad Shirwadkar darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 1297a7ba36bcSHarshad Shirwadkar darg->dname_len = le16_to_cpu(tl->fc_len) - 12988016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_dentry_info); 12998016e29fSHarshad Shirwadkar } 13008016e29fSHarshad Shirwadkar 13018016e29fSHarshad Shirwadkar /* Unlink replay function */ 1302a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, 1303a7ba36bcSHarshad Shirwadkar u8 *val) 13048016e29fSHarshad Shirwadkar { 13058016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 13068016e29fSHarshad Shirwadkar struct qstr entry; 13078016e29fSHarshad Shirwadkar struct dentry_info_args darg; 13088016e29fSHarshad Shirwadkar int ret = 0; 13098016e29fSHarshad Shirwadkar 1310a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 13118016e29fSHarshad Shirwadkar 13128016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 13138016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 13148016e29fSHarshad Shirwadkar 13158016e29fSHarshad Shirwadkar entry.name = darg.dname; 13168016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 13178016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 13188016e29fSHarshad Shirwadkar 131923dd561aSYi Li if (IS_ERR(inode)) { 13208016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", darg.ino); 13218016e29fSHarshad Shirwadkar return 0; 13228016e29fSHarshad Shirwadkar } 13238016e29fSHarshad Shirwadkar 13248016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 13258016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 132623dd561aSYi Li if (IS_ERR(old_parent)) { 13278016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); 13288016e29fSHarshad Shirwadkar iput(inode); 13298016e29fSHarshad Shirwadkar return 0; 13308016e29fSHarshad Shirwadkar } 13318016e29fSHarshad Shirwadkar 1332a80f7fcfSHarshad Shirwadkar ret = __ext4_unlink(NULL, old_parent, &entry, inode); 13338016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 13348016e29fSHarshad Shirwadkar if (ret == -ENOENT) 13358016e29fSHarshad Shirwadkar ret = 0; 13368016e29fSHarshad Shirwadkar iput(old_parent); 13378016e29fSHarshad Shirwadkar iput(inode); 13388016e29fSHarshad Shirwadkar return ret; 13398016e29fSHarshad Shirwadkar } 13408016e29fSHarshad Shirwadkar 13418016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 13428016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 13438016e29fSHarshad Shirwadkar struct inode *inode) 13448016e29fSHarshad Shirwadkar { 13458016e29fSHarshad Shirwadkar struct inode *dir = NULL; 13468016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 13478016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 13488016e29fSHarshad Shirwadkar int ret = 0; 13498016e29fSHarshad Shirwadkar 13508016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 13518016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 13528016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino); 13538016e29fSHarshad Shirwadkar dir = NULL; 13548016e29fSHarshad Shirwadkar goto out; 13558016e29fSHarshad Shirwadkar } 13568016e29fSHarshad Shirwadkar 13578016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 13588016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 13598016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to obtain dentry"); 13608016e29fSHarshad Shirwadkar dentry_dir = NULL; 13618016e29fSHarshad Shirwadkar goto out; 13628016e29fSHarshad Shirwadkar } 13638016e29fSHarshad Shirwadkar 13648016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 13658016e29fSHarshad Shirwadkar if (!dentry_inode) { 13668016e29fSHarshad Shirwadkar jbd_debug(1, "Inode dentry not created."); 13678016e29fSHarshad Shirwadkar ret = -ENOMEM; 13688016e29fSHarshad Shirwadkar goto out; 13698016e29fSHarshad Shirwadkar } 13708016e29fSHarshad Shirwadkar 13718016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 13728016e29fSHarshad Shirwadkar /* 13738016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 13748016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 13758016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 13768016e29fSHarshad Shirwadkar * could complete. 13778016e29fSHarshad Shirwadkar */ 13788016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 13798016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to link\n"); 13808016e29fSHarshad Shirwadkar goto out; 13818016e29fSHarshad Shirwadkar } 13828016e29fSHarshad Shirwadkar 13838016e29fSHarshad Shirwadkar ret = 0; 13848016e29fSHarshad Shirwadkar out: 13858016e29fSHarshad Shirwadkar if (dentry_dir) { 13868016e29fSHarshad Shirwadkar d_drop(dentry_dir); 13878016e29fSHarshad Shirwadkar dput(dentry_dir); 13888016e29fSHarshad Shirwadkar } else if (dir) { 13898016e29fSHarshad Shirwadkar iput(dir); 13908016e29fSHarshad Shirwadkar } 13918016e29fSHarshad Shirwadkar if (dentry_inode) { 13928016e29fSHarshad Shirwadkar d_drop(dentry_inode); 13938016e29fSHarshad Shirwadkar dput(dentry_inode); 13948016e29fSHarshad Shirwadkar } 13958016e29fSHarshad Shirwadkar 13968016e29fSHarshad Shirwadkar return ret; 13978016e29fSHarshad Shirwadkar } 13988016e29fSHarshad Shirwadkar 13998016e29fSHarshad Shirwadkar /* Link replay function */ 1400a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl, 1401a7ba36bcSHarshad Shirwadkar u8 *val) 14028016e29fSHarshad Shirwadkar { 14038016e29fSHarshad Shirwadkar struct inode *inode; 14048016e29fSHarshad Shirwadkar struct dentry_info_args darg; 14058016e29fSHarshad Shirwadkar int ret = 0; 14068016e29fSHarshad Shirwadkar 1407a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 14088016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 14098016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 14108016e29fSHarshad Shirwadkar 14118016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 141223dd561aSYi Li if (IS_ERR(inode)) { 14138016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 14148016e29fSHarshad Shirwadkar return 0; 14158016e29fSHarshad Shirwadkar } 14168016e29fSHarshad Shirwadkar 14178016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 14188016e29fSHarshad Shirwadkar iput(inode); 14198016e29fSHarshad Shirwadkar return ret; 14208016e29fSHarshad Shirwadkar } 14218016e29fSHarshad Shirwadkar 14228016e29fSHarshad Shirwadkar /* 14238016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 14248016e29fSHarshad Shirwadkar * block bitmaps correctly. 14258016e29fSHarshad Shirwadkar */ 14268016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 14278016e29fSHarshad Shirwadkar { 14288016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 14298016e29fSHarshad Shirwadkar int i; 14308016e29fSHarshad Shirwadkar 14318016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 14328016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 14338016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 14348016e29fSHarshad Shirwadkar return 0; 14358016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 14368016e29fSHarshad Shirwadkar state->fc_modified_inodes_size += 14378016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 14388016e29fSHarshad Shirwadkar state->fc_modified_inodes = krealloc( 14398016e29fSHarshad Shirwadkar state->fc_modified_inodes, sizeof(int) * 14408016e29fSHarshad Shirwadkar state->fc_modified_inodes_size, 14418016e29fSHarshad Shirwadkar GFP_KERNEL); 14428016e29fSHarshad Shirwadkar if (!state->fc_modified_inodes) 14438016e29fSHarshad Shirwadkar return -ENOMEM; 14448016e29fSHarshad Shirwadkar } 14458016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 14468016e29fSHarshad Shirwadkar return 0; 14478016e29fSHarshad Shirwadkar } 14488016e29fSHarshad Shirwadkar 14498016e29fSHarshad Shirwadkar /* 14508016e29fSHarshad Shirwadkar * Inode replay function 14518016e29fSHarshad Shirwadkar */ 1452a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, 1453a7ba36bcSHarshad Shirwadkar u8 *val) 14548016e29fSHarshad Shirwadkar { 1455a7ba36bcSHarshad Shirwadkar struct ext4_fc_inode fc_inode; 14568016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 14578016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 14588016e29fSHarshad Shirwadkar struct inode *inode = NULL; 14598016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 14608016e29fSHarshad Shirwadkar int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 14618016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 14628016e29fSHarshad Shirwadkar 1463a7ba36bcSHarshad Shirwadkar memcpy(&fc_inode, val, sizeof(fc_inode)); 14648016e29fSHarshad Shirwadkar 1465a7ba36bcSHarshad Shirwadkar ino = le32_to_cpu(fc_inode.fc_ino); 14668016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 14678016e29fSHarshad Shirwadkar 14688016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 146923dd561aSYi Li if (!IS_ERR(inode)) { 14708016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 14718016e29fSHarshad Shirwadkar iput(inode); 14728016e29fSHarshad Shirwadkar } 147323dd561aSYi Li inode = NULL; 14748016e29fSHarshad Shirwadkar 14758016e29fSHarshad Shirwadkar ext4_fc_record_modified_inode(sb, ino); 14768016e29fSHarshad Shirwadkar 1477a7ba36bcSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *) 1478a7ba36bcSHarshad Shirwadkar (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); 14798016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 14808016e29fSHarshad Shirwadkar if (ret) 14818016e29fSHarshad Shirwadkar goto out; 14828016e29fSHarshad Shirwadkar 1483a7ba36bcSHarshad Shirwadkar inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); 14848016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 14858016e29fSHarshad Shirwadkar 14868016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 14878016e29fSHarshad Shirwadkar memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, 14888016e29fSHarshad Shirwadkar inode_len - offsetof(struct ext4_inode, i_generation)); 14898016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 14908016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 14918016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 14928016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 14938016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 14948016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 14958016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 14968016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 14978016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 14988016e29fSHarshad Shirwadkar } 14998016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 15008016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 15018016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 15028016e29fSHarshad Shirwadkar } 15038016e29fSHarshad Shirwadkar 15048016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 15058016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 15068016e29fSHarshad Shirwadkar if (ret) 15078016e29fSHarshad Shirwadkar goto out; 15088016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 15098016e29fSHarshad Shirwadkar if (ret) 15108016e29fSHarshad Shirwadkar goto out; 15118016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 15128016e29fSHarshad Shirwadkar if (ret) 15138016e29fSHarshad Shirwadkar goto out; 15148016e29fSHarshad Shirwadkar 15158016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 15168016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 151723dd561aSYi Li if (IS_ERR(inode)) { 15188016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 15198016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 15208016e29fSHarshad Shirwadkar } 15218016e29fSHarshad Shirwadkar 15228016e29fSHarshad Shirwadkar /* 15238016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 15248016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 15258016e29fSHarshad Shirwadkar * the number of blocks the inode. 15268016e29fSHarshad Shirwadkar */ 15278016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 15288016e29fSHarshad Shirwadkar 15298016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 15308016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 15318016e29fSHarshad Shirwadkar 15328016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 15338016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 15348016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 15358016e29fSHarshad Shirwadkar brelse(iloc.bh); 15368016e29fSHarshad Shirwadkar out: 15378016e29fSHarshad Shirwadkar iput(inode); 15388016e29fSHarshad Shirwadkar if (!ret) 1539c6bf3f0eSChristoph Hellwig blkdev_issue_flush(sb->s_bdev); 15408016e29fSHarshad Shirwadkar 15418016e29fSHarshad Shirwadkar return 0; 15428016e29fSHarshad Shirwadkar } 15438016e29fSHarshad Shirwadkar 15448016e29fSHarshad Shirwadkar /* 15458016e29fSHarshad Shirwadkar * Dentry create replay function. 15468016e29fSHarshad Shirwadkar * 15478016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 15488016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 15498016e29fSHarshad Shirwadkar * been replayed before we start here. 15508016e29fSHarshad Shirwadkar */ 1551a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, 1552a7ba36bcSHarshad Shirwadkar u8 *val) 15538016e29fSHarshad Shirwadkar { 15548016e29fSHarshad Shirwadkar int ret = 0; 15558016e29fSHarshad Shirwadkar struct inode *inode = NULL; 15568016e29fSHarshad Shirwadkar struct inode *dir = NULL; 15578016e29fSHarshad Shirwadkar struct dentry_info_args darg; 15588016e29fSHarshad Shirwadkar 1559a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 15608016e29fSHarshad Shirwadkar 15618016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 15628016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 15638016e29fSHarshad Shirwadkar 15648016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 15658016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 15668016e29fSHarshad Shirwadkar if (ret) 15678016e29fSHarshad Shirwadkar goto out; 15688016e29fSHarshad Shirwadkar 15698016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 157023dd561aSYi Li if (IS_ERR(inode)) { 15718016e29fSHarshad Shirwadkar jbd_debug(1, "inode %d not found.", darg.ino); 15728016e29fSHarshad Shirwadkar inode = NULL; 15738016e29fSHarshad Shirwadkar ret = -EINVAL; 15748016e29fSHarshad Shirwadkar goto out; 15758016e29fSHarshad Shirwadkar } 15768016e29fSHarshad Shirwadkar 15778016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 15788016e29fSHarshad Shirwadkar /* 15798016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 15808016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 15818016e29fSHarshad Shirwadkar */ 15828016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 158323dd561aSYi Li if (IS_ERR(dir)) { 15848016e29fSHarshad Shirwadkar jbd_debug(1, "Dir %d not found.", darg.ino); 15858016e29fSHarshad Shirwadkar goto out; 15868016e29fSHarshad Shirwadkar } 15878016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 15888016e29fSHarshad Shirwadkar iput(dir); 15898016e29fSHarshad Shirwadkar if (ret) { 15908016e29fSHarshad Shirwadkar ret = 0; 15918016e29fSHarshad Shirwadkar goto out; 15928016e29fSHarshad Shirwadkar } 15938016e29fSHarshad Shirwadkar } 15948016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 15958016e29fSHarshad Shirwadkar if (ret) 15968016e29fSHarshad Shirwadkar goto out; 15978016e29fSHarshad Shirwadkar set_nlink(inode, 1); 15988016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 15998016e29fSHarshad Shirwadkar out: 16008016e29fSHarshad Shirwadkar if (inode) 16018016e29fSHarshad Shirwadkar iput(inode); 16028016e29fSHarshad Shirwadkar return ret; 16038016e29fSHarshad Shirwadkar } 16048016e29fSHarshad Shirwadkar 16058016e29fSHarshad Shirwadkar /* 16068016e29fSHarshad Shirwadkar * Record physical disk regions which are in use as per fast commit area. Our 16078016e29fSHarshad Shirwadkar * simple replay phase allocator excludes these regions from allocation. 16088016e29fSHarshad Shirwadkar */ 16098016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino, 16108016e29fSHarshad Shirwadkar ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) 16118016e29fSHarshad Shirwadkar { 16128016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 16138016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 16148016e29fSHarshad Shirwadkar 16158016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 16168016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 16178016e29fSHarshad Shirwadkar state->fc_regions_size += 16188016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 16198016e29fSHarshad Shirwadkar state->fc_regions = krealloc( 16208016e29fSHarshad Shirwadkar state->fc_regions, 16218016e29fSHarshad Shirwadkar state->fc_regions_size * 16228016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_alloc_region), 16238016e29fSHarshad Shirwadkar GFP_KERNEL); 16248016e29fSHarshad Shirwadkar if (!state->fc_regions) 16258016e29fSHarshad Shirwadkar return -ENOMEM; 16268016e29fSHarshad Shirwadkar } 16278016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 16288016e29fSHarshad Shirwadkar region->ino = ino; 16298016e29fSHarshad Shirwadkar region->lblk = lblk; 16308016e29fSHarshad Shirwadkar region->pblk = pblk; 16318016e29fSHarshad Shirwadkar region->len = len; 16328016e29fSHarshad Shirwadkar 16338016e29fSHarshad Shirwadkar return 0; 16348016e29fSHarshad Shirwadkar } 16358016e29fSHarshad Shirwadkar 16368016e29fSHarshad Shirwadkar /* Replay add range tag */ 16378016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 1638a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 16398016e29fSHarshad Shirwadkar { 1640a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range fc_add_ex; 16418016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 16428016e29fSHarshad Shirwadkar struct inode *inode; 16438016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 16448016e29fSHarshad Shirwadkar int remaining, len; 16458016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 16468016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 16478016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 16488016e29fSHarshad Shirwadkar int ret; 16498016e29fSHarshad Shirwadkar 1650a7ba36bcSHarshad Shirwadkar memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); 1651a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex.fc_ex; 16528016e29fSHarshad Shirwadkar 16538016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 1654a7ba36bcSHarshad Shirwadkar le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), 16558016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 16568016e29fSHarshad Shirwadkar 1657a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); 165823dd561aSYi Li if (IS_ERR(inode)) { 16598016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 16608016e29fSHarshad Shirwadkar return 0; 16618016e29fSHarshad Shirwadkar } 16628016e29fSHarshad Shirwadkar 16638016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 16648016e29fSHarshad Shirwadkar 16658016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 16668016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 16678016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 16688016e29fSHarshad Shirwadkar 16698016e29fSHarshad Shirwadkar cur = start; 16708016e29fSHarshad Shirwadkar remaining = len; 16718016e29fSHarshad Shirwadkar jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 16728016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 16738016e29fSHarshad Shirwadkar inode->i_ino); 16748016e29fSHarshad Shirwadkar 16758016e29fSHarshad Shirwadkar while (remaining > 0) { 16768016e29fSHarshad Shirwadkar map.m_lblk = cur; 16778016e29fSHarshad Shirwadkar map.m_len = remaining; 16788016e29fSHarshad Shirwadkar map.m_pblk = 0; 16798016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 16808016e29fSHarshad Shirwadkar 16818016e29fSHarshad Shirwadkar if (ret < 0) { 16828016e29fSHarshad Shirwadkar iput(inode); 16838016e29fSHarshad Shirwadkar return 0; 16848016e29fSHarshad Shirwadkar } 16858016e29fSHarshad Shirwadkar 16868016e29fSHarshad Shirwadkar if (ret == 0) { 16878016e29fSHarshad Shirwadkar /* Range is not mapped */ 16888016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 16898c9be1e5SHarshad Shirwadkar if (IS_ERR(path)) { 16908c9be1e5SHarshad Shirwadkar iput(inode); 16918c9be1e5SHarshad Shirwadkar return 0; 16928c9be1e5SHarshad Shirwadkar } 16938016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 16948016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 16958016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 16968016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 16978016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 16988016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 16998016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 17008016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 17018016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 17028016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 17038016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 17048016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 17058016e29fSHarshad Shirwadkar kfree(path); 17068016e29fSHarshad Shirwadkar if (ret) { 17078016e29fSHarshad Shirwadkar iput(inode); 17088016e29fSHarshad Shirwadkar return 0; 17098016e29fSHarshad Shirwadkar } 17108016e29fSHarshad Shirwadkar goto next; 17118016e29fSHarshad Shirwadkar } 17128016e29fSHarshad Shirwadkar 17138016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 17148016e29fSHarshad Shirwadkar /* 17158016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 17168016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 17178016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 17188016e29fSHarshad Shirwadkar */ 17198016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 17208016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 17218016e29fSHarshad Shirwadkar start_pblk + cur - start); 17228016e29fSHarshad Shirwadkar if (ret) { 17238016e29fSHarshad Shirwadkar iput(inode); 17248016e29fSHarshad Shirwadkar return 0; 17258016e29fSHarshad Shirwadkar } 17268016e29fSHarshad Shirwadkar /* 17278016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 17288016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 17298016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 17308016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 17318016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 17328016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 17338016e29fSHarshad Shirwadkar * modified inodes. 17348016e29fSHarshad Shirwadkar */ 17358016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 17368016e29fSHarshad Shirwadkar goto next; 17378016e29fSHarshad Shirwadkar } 17388016e29fSHarshad Shirwadkar 17398016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 1740fcdf3c34SArnd Bergmann jbd_debug(1, "Converting from %ld to %d %lld", 17418016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 17428016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 17438016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 17448016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 17458016e29fSHarshad Shirwadkar if (ret) { 17468016e29fSHarshad Shirwadkar iput(inode); 17478016e29fSHarshad Shirwadkar return 0; 17488016e29fSHarshad Shirwadkar } 17498016e29fSHarshad Shirwadkar /* 17508016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 17518016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 17528016e29fSHarshad Shirwadkar */ 17538016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 17548016e29fSHarshad Shirwadkar next: 17558016e29fSHarshad Shirwadkar cur += map.m_len; 17568016e29fSHarshad Shirwadkar remaining -= map.m_len; 17578016e29fSHarshad Shirwadkar } 17588016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 17598016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 17608016e29fSHarshad Shirwadkar iput(inode); 17618016e29fSHarshad Shirwadkar return 0; 17628016e29fSHarshad Shirwadkar } 17638016e29fSHarshad Shirwadkar 17648016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 17658016e29fSHarshad Shirwadkar static int 1766a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, 1767a7ba36bcSHarshad Shirwadkar u8 *val) 17688016e29fSHarshad Shirwadkar { 17698016e29fSHarshad Shirwadkar struct inode *inode; 1770a7ba36bcSHarshad Shirwadkar struct ext4_fc_del_range lrange; 17718016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17728016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 17738016e29fSHarshad Shirwadkar int ret; 17748016e29fSHarshad Shirwadkar 1775a7ba36bcSHarshad Shirwadkar memcpy(&lrange, val, sizeof(lrange)); 1776a7ba36bcSHarshad Shirwadkar cur = le32_to_cpu(lrange.fc_lblk); 1777a7ba36bcSHarshad Shirwadkar remaining = le32_to_cpu(lrange.fc_len); 17788016e29fSHarshad Shirwadkar 17798016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 1780a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_ino), cur, remaining); 17818016e29fSHarshad Shirwadkar 1782a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); 178323dd561aSYi Li if (IS_ERR(inode)) { 1784a7ba36bcSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino)); 17858016e29fSHarshad Shirwadkar return 0; 17868016e29fSHarshad Shirwadkar } 17878016e29fSHarshad Shirwadkar 17888016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 17898016e29fSHarshad Shirwadkar 17908016e29fSHarshad Shirwadkar jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", 1791a7ba36bcSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange.fc_lblk), 1792a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_len)); 17938016e29fSHarshad Shirwadkar while (remaining > 0) { 17948016e29fSHarshad Shirwadkar map.m_lblk = cur; 17958016e29fSHarshad Shirwadkar map.m_len = remaining; 17968016e29fSHarshad Shirwadkar 17978016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 17988016e29fSHarshad Shirwadkar if (ret < 0) { 17998016e29fSHarshad Shirwadkar iput(inode); 18008016e29fSHarshad Shirwadkar return 0; 18018016e29fSHarshad Shirwadkar } 18028016e29fSHarshad Shirwadkar if (ret > 0) { 18038016e29fSHarshad Shirwadkar remaining -= ret; 18048016e29fSHarshad Shirwadkar cur += ret; 18058016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 18068016e29fSHarshad Shirwadkar } else { 18078016e29fSHarshad Shirwadkar remaining -= map.m_len; 18088016e29fSHarshad Shirwadkar cur += map.m_len; 18098016e29fSHarshad Shirwadkar } 18108016e29fSHarshad Shirwadkar } 18118016e29fSHarshad Shirwadkar 18128016e29fSHarshad Shirwadkar ret = ext4_punch_hole(inode, 1813a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits, 1814a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits); 18158016e29fSHarshad Shirwadkar if (ret) 18168016e29fSHarshad Shirwadkar jbd_debug(1, "ext4_punch_hole returned %d", ret); 18178016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 18188016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 18198016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 18208016e29fSHarshad Shirwadkar iput(inode); 18218016e29fSHarshad Shirwadkar 18228016e29fSHarshad Shirwadkar return 0; 18238016e29fSHarshad Shirwadkar } 18248016e29fSHarshad Shirwadkar 18258016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 18268016e29fSHarshad Shirwadkar { 18278016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18288016e29fSHarshad Shirwadkar struct inode *inode; 18298016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 18308016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 18318016e29fSHarshad Shirwadkar int i, ret, j; 18328016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 18338016e29fSHarshad Shirwadkar 18348016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 18358016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 18368016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 18378016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 183823dd561aSYi Li if (IS_ERR(inode)) { 18398016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found.", 18408016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 18418016e29fSHarshad Shirwadkar continue; 18428016e29fSHarshad Shirwadkar } 18438016e29fSHarshad Shirwadkar cur = 0; 18448016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 18458016e29fSHarshad Shirwadkar while (cur < end) { 18468016e29fSHarshad Shirwadkar map.m_lblk = cur; 18478016e29fSHarshad Shirwadkar map.m_len = end - cur; 18488016e29fSHarshad Shirwadkar 18498016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 18508016e29fSHarshad Shirwadkar if (ret < 0) 18518016e29fSHarshad Shirwadkar break; 18528016e29fSHarshad Shirwadkar 18538016e29fSHarshad Shirwadkar if (ret > 0) { 18548016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 185523dd561aSYi Li if (!IS_ERR(path)) { 18568016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 18578016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 18588016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 18598016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 18608016e29fSHarshad Shirwadkar kfree(path); 18618016e29fSHarshad Shirwadkar } 18628016e29fSHarshad Shirwadkar cur += ret; 18638016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 18648016e29fSHarshad Shirwadkar map.m_len, 1); 18658016e29fSHarshad Shirwadkar } else { 18668016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 18678016e29fSHarshad Shirwadkar } 18688016e29fSHarshad Shirwadkar } 18698016e29fSHarshad Shirwadkar iput(inode); 18708016e29fSHarshad Shirwadkar } 18718016e29fSHarshad Shirwadkar } 18728016e29fSHarshad Shirwadkar 18738016e29fSHarshad Shirwadkar /* 18748016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 18758016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 18768016e29fSHarshad Shirwadkar * if it is okay to use a block. 18778016e29fSHarshad Shirwadkar */ 18788016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 18798016e29fSHarshad Shirwadkar { 18808016e29fSHarshad Shirwadkar int i; 18818016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18828016e29fSHarshad Shirwadkar 18838016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 18848016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 18858016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 18868016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 18878016e29fSHarshad Shirwadkar continue; 18888016e29fSHarshad Shirwadkar if (blk >= state->fc_regions[i].pblk && 18898016e29fSHarshad Shirwadkar blk < state->fc_regions[i].pblk + state->fc_regions[i].len) 18908016e29fSHarshad Shirwadkar return true; 18918016e29fSHarshad Shirwadkar } 18928016e29fSHarshad Shirwadkar return false; 18938016e29fSHarshad Shirwadkar } 18948016e29fSHarshad Shirwadkar 18958016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 18968016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 18978016e29fSHarshad Shirwadkar { 18988016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 18998016e29fSHarshad Shirwadkar 19008016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 19018016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 19028016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 19038016e29fSHarshad Shirwadkar } 19048016e29fSHarshad Shirwadkar 19058016e29fSHarshad Shirwadkar /* 19068016e29fSHarshad Shirwadkar * Recovery Scan phase handler 19078016e29fSHarshad Shirwadkar * 19088016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 19098016e29fSHarshad Shirwadkar * for doing following things: 19108016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 19118016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 19128016e29fSHarshad Shirwadkar * - Verify CRC 19138016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 19148016e29fSHarshad Shirwadkar * 19158016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 19168016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 19178016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 19188016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 19198016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 19208016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 19218016e29fSHarshad Shirwadkar */ 19228016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 19238016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 19248016e29fSHarshad Shirwadkar tid_t expected_tid) 19258016e29fSHarshad Shirwadkar { 19268016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 19278016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 19288016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 19298016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 1930a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range ext; 1931a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 1932a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 1933a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 1934a7ba36bcSHarshad Shirwadkar struct ext4_fc_head head; 19358016e29fSHarshad Shirwadkar struct ext4_extent *ex; 19368016e29fSHarshad Shirwadkar 19378016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 19388016e29fSHarshad Shirwadkar 19398016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 19408016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 19418016e29fSHarshad Shirwadkar 19428016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 19438016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 19448016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 19458016e29fSHarshad Shirwadkar state->fc_crc = 0; 19468016e29fSHarshad Shirwadkar state->fc_regions = NULL; 19478016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 19488016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 19498016e29fSHarshad Shirwadkar /* Check if we can stop early */ 19508016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 19518016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 19528016e29fSHarshad Shirwadkar return 0; 19538016e29fSHarshad Shirwadkar } 19548016e29fSHarshad Shirwadkar 19558016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 19568016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 19578016e29fSHarshad Shirwadkar goto out_err; 19588016e29fSHarshad Shirwadkar } 19598016e29fSHarshad Shirwadkar 19608016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 1961a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 1962a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 1963a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 19648016e29fSHarshad Shirwadkar jbd_debug(3, "Scan phase, tag:%s, blk %lld\n", 1965a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); 1966a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 19678016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 1968a7ba36bcSHarshad Shirwadkar memcpy(&ext, val, sizeof(ext)); 1969a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&ext.fc_ex; 19708016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 1971a7ba36bcSHarshad Shirwadkar le32_to_cpu(ext.fc_ino), 19728016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 19738016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 19748016e29fSHarshad Shirwadkar if (ret < 0) 19758016e29fSHarshad Shirwadkar break; 19768016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 19778016e29fSHarshad Shirwadkar fallthrough; 19788016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 19798016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 19808016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 19818016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 19828016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 19838016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 19848016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1985a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 1986a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 19878016e29fSHarshad Shirwadkar break; 19888016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 19898016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1990a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 1991a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 1992a7ba36bcSHarshad Shirwadkar sizeof(tl) + 19938016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 19948016e29fSHarshad Shirwadkar fc_crc)); 1995a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(tail.fc_tid) == expected_tid && 1996a7ba36bcSHarshad Shirwadkar le32_to_cpu(tail.fc_crc) == state->fc_crc) { 19978016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 19988016e29fSHarshad Shirwadkar state->fc_regions_valid = 19998016e29fSHarshad Shirwadkar state->fc_regions_used; 20008016e29fSHarshad Shirwadkar } else { 20018016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 20028016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 20038016e29fSHarshad Shirwadkar } 20048016e29fSHarshad Shirwadkar state->fc_crc = 0; 20058016e29fSHarshad Shirwadkar break; 20068016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 2007a7ba36bcSHarshad Shirwadkar memcpy(&head, val, sizeof(head)); 2008a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_features) & 20098016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 20108016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 20118016e29fSHarshad Shirwadkar break; 20128016e29fSHarshad Shirwadkar } 2013a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_tid) != expected_tid) { 20148016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 20158016e29fSHarshad Shirwadkar break; 20168016e29fSHarshad Shirwadkar } 20178016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2018a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2019a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 20208016e29fSHarshad Shirwadkar break; 20218016e29fSHarshad Shirwadkar default: 20228016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 20238016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 20248016e29fSHarshad Shirwadkar } 20258016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 20268016e29fSHarshad Shirwadkar break; 20278016e29fSHarshad Shirwadkar } 20288016e29fSHarshad Shirwadkar 20298016e29fSHarshad Shirwadkar out_err: 20308016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 20318016e29fSHarshad Shirwadkar return ret; 20328016e29fSHarshad Shirwadkar } 20338016e29fSHarshad Shirwadkar 20345b849b5fSHarshad Shirwadkar /* 20355b849b5fSHarshad Shirwadkar * Main recovery path entry point. 20368016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 20375b849b5fSHarshad Shirwadkar */ 20385b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 20395b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 20405b849b5fSHarshad Shirwadkar { 20418016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 20428016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 2043a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 2044a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 20458016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 20468016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 2047a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 20488016e29fSHarshad Shirwadkar 20498016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 20508016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 20518016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 20528016e29fSHarshad Shirwadkar } 20538016e29fSHarshad Shirwadkar 20548016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 20558016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 20568016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 20578016e29fSHarshad Shirwadkar } 20588016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 20598016e29fSHarshad Shirwadkar jbd_debug(1, "Replay stops\n"); 20608016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20615b849b5fSHarshad Shirwadkar return 0; 20625b849b5fSHarshad Shirwadkar } 20635b849b5fSHarshad Shirwadkar 20648016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 20658016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 20668016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 20678016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 20688016e29fSHarshad Shirwadkar } 20698016e29fSHarshad Shirwadkar #endif 20708016e29fSHarshad Shirwadkar 20718016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 20728016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 20738016e29fSHarshad Shirwadkar 2074a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2075a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 2076a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 2077a7ba36bcSHarshad Shirwadkar 20788016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 20798016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 20808016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20818016e29fSHarshad Shirwadkar break; 20828016e29fSHarshad Shirwadkar } 20838016e29fSHarshad Shirwadkar jbd_debug(3, "Replay phase, tag:%s\n", 2084a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag))); 20858016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 2086a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 20878016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 2088a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, &tl, val); 20898016e29fSHarshad Shirwadkar break; 20908016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 2091a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, &tl, val); 20928016e29fSHarshad Shirwadkar break; 20938016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2094a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, &tl, val); 20958016e29fSHarshad Shirwadkar break; 20968016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 2097a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, &tl, val); 20988016e29fSHarshad Shirwadkar break; 20998016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 2100a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, &tl, val); 21018016e29fSHarshad Shirwadkar break; 21028016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 2103a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, &tl, val); 21048016e29fSHarshad Shirwadkar break; 21058016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 21068016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2107a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 21088016e29fSHarshad Shirwadkar break; 21098016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 21108016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 2111a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 2112a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2113a7ba36bcSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 21148016e29fSHarshad Shirwadkar break; 21158016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 21168016e29fSHarshad Shirwadkar break; 21178016e29fSHarshad Shirwadkar default: 2118a7ba36bcSHarshad Shirwadkar trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, 2119a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 21208016e29fSHarshad Shirwadkar ret = -ECANCELED; 21218016e29fSHarshad Shirwadkar break; 21228016e29fSHarshad Shirwadkar } 21238016e29fSHarshad Shirwadkar if (ret < 0) 21248016e29fSHarshad Shirwadkar break; 21258016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 21268016e29fSHarshad Shirwadkar } 21278016e29fSHarshad Shirwadkar return ret; 21288016e29fSHarshad Shirwadkar } 21298016e29fSHarshad Shirwadkar 21306866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 21316866d7b3SHarshad Shirwadkar { 21325b849b5fSHarshad Shirwadkar /* 21335b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 21345b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 21355b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 21365b849b5fSHarshad Shirwadkar */ 21375b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 21386866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 21396866d7b3SHarshad Shirwadkar return; 2140ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 21416866d7b3SHarshad Shirwadkar } 2142aa75f4d3SHarshad Shirwadkar 2143fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = { 2144ce8c59d1SHarshad Shirwadkar "Extended attributes changed", 2145ce8c59d1SHarshad Shirwadkar "Cross rename", 2146ce8c59d1SHarshad Shirwadkar "Journal flag changed", 2147ce8c59d1SHarshad Shirwadkar "Insufficient memory", 2148ce8c59d1SHarshad Shirwadkar "Swap boot", 2149ce8c59d1SHarshad Shirwadkar "Resize", 2150ce8c59d1SHarshad Shirwadkar "Dir renamed", 2151ce8c59d1SHarshad Shirwadkar "Falloc range op", 2152556e0319SHarshad Shirwadkar "Data journalling", 2153ce8c59d1SHarshad Shirwadkar "FC Commit Failed" 2154ce8c59d1SHarshad Shirwadkar }; 2155ce8c59d1SHarshad Shirwadkar 2156ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v) 2157ce8c59d1SHarshad Shirwadkar { 2158ce8c59d1SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 2159ce8c59d1SHarshad Shirwadkar struct ext4_fc_stats *stats = &sbi->s_fc_stats; 2160ce8c59d1SHarshad Shirwadkar int i; 2161ce8c59d1SHarshad Shirwadkar 2162ce8c59d1SHarshad Shirwadkar if (v != SEQ_START_TOKEN) 2163ce8c59d1SHarshad Shirwadkar return 0; 2164ce8c59d1SHarshad Shirwadkar 2165ce8c59d1SHarshad Shirwadkar seq_printf(seq, 2166ce8c59d1SHarshad Shirwadkar "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 2167ce8c59d1SHarshad Shirwadkar stats->fc_num_commits, stats->fc_ineligible_commits, 2168ce8c59d1SHarshad Shirwadkar stats->fc_numblks, 2169ce8c59d1SHarshad Shirwadkar div_u64(sbi->s_fc_avg_commit_time, 1000)); 2170ce8c59d1SHarshad Shirwadkar seq_puts(seq, "Ineligible reasons:\n"); 2171ce8c59d1SHarshad Shirwadkar for (i = 0; i < EXT4_FC_REASON_MAX; i++) 2172ce8c59d1SHarshad Shirwadkar seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 2173ce8c59d1SHarshad Shirwadkar stats->fc_ineligible_reason_count[i]); 2174ce8c59d1SHarshad Shirwadkar 2175ce8c59d1SHarshad Shirwadkar return 0; 2176ce8c59d1SHarshad Shirwadkar } 2177ce8c59d1SHarshad Shirwadkar 2178aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2179aa75f4d3SHarshad Shirwadkar { 2180aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2181aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2182aa75f4d3SHarshad Shirwadkar 2183aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2184aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2185aa75f4d3SHarshad Shirwadkar 2186aa75f4d3SHarshad Shirwadkar return 0; 2187aa75f4d3SHarshad Shirwadkar } 2188