16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 687bbbe241SHarshad Shirwadkar * 69aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 707bbbe241SHarshad Shirwadkar * attributes). Fast commit ineligibility is marked by calling 717bbbe241SHarshad Shirwadkar * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back 727bbbe241SHarshad Shirwadkar * to full commit. 73aa75f4d3SHarshad Shirwadkar * 74aa75f4d3SHarshad Shirwadkar * Atomicity of commits 75aa75f4d3SHarshad Shirwadkar * -------------------- 76a740762fSHarshad Shirwadkar * In order to guarantee atomicity during the commit operation, fast commit 77aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 78aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 79aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 80aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 81aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 82aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 85aa75f4d3SHarshad Shirwadkar * - fsync() 86aa75f4d3SHarshad Shirwadkar * - Append contents to file A 87aa75f4d3SHarshad Shirwadkar * - Truncate file A 88aa75f4d3SHarshad Shirwadkar * - fsync() 89aa75f4d3SHarshad Shirwadkar * 90aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 91aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 92aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 95aa75f4d3SHarshad Shirwadkar * 96b1b7dce3SHarshad Shirwadkar * Fast Commit Replay Idempotence 97b1b7dce3SHarshad Shirwadkar * ------------------------------ 98b1b7dce3SHarshad Shirwadkar * 99b1b7dce3SHarshad Shirwadkar * Fast commits tags are idempotent in nature provided the recovery code follows 100b1b7dce3SHarshad Shirwadkar * certain rules. The guiding principle that the commit path follows while 101b1b7dce3SHarshad Shirwadkar * committing is that it stores the result of a particular operation instead of 102b1b7dce3SHarshad Shirwadkar * storing the procedure. 103b1b7dce3SHarshad Shirwadkar * 104b1b7dce3SHarshad Shirwadkar * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' 105b1b7dce3SHarshad Shirwadkar * was associated with inode 10. During fast commit, instead of storing this 106b1b7dce3SHarshad Shirwadkar * operation as a procedure "rename a to b", we store the resulting file system 107b1b7dce3SHarshad Shirwadkar * state as a "series" of outcomes: 108b1b7dce3SHarshad Shirwadkar * 109b1b7dce3SHarshad Shirwadkar * - Link dirent b to inode 10 110b1b7dce3SHarshad Shirwadkar * - Unlink dirent a 111b1b7dce3SHarshad Shirwadkar * - Inode <10> with valid refcount 112b1b7dce3SHarshad Shirwadkar * 113b1b7dce3SHarshad Shirwadkar * Now when recovery code runs, it needs "enforce" this state on the file 114b1b7dce3SHarshad Shirwadkar * system. This is what guarantees idempotence of fast commit replay. 115b1b7dce3SHarshad Shirwadkar * 116b1b7dce3SHarshad Shirwadkar * Let's take an example of a procedure that is not idempotent and see how fast 117b1b7dce3SHarshad Shirwadkar * commits make it idempotent. Consider following sequence of operations: 118b1b7dce3SHarshad Shirwadkar * 119b1b7dce3SHarshad Shirwadkar * rm A; mv B A; read A 120b1b7dce3SHarshad Shirwadkar * (x) (y) (z) 121b1b7dce3SHarshad Shirwadkar * 122b1b7dce3SHarshad Shirwadkar * (x), (y) and (z) are the points at which we can crash. If we store this 123b1b7dce3SHarshad Shirwadkar * sequence of operations as is then the replay is not idempotent. Let's say 124b1b7dce3SHarshad Shirwadkar * while in replay, we crash at (z). During the second replay, file A (which was 125b1b7dce3SHarshad Shirwadkar * actually created as a result of "mv B A" operation) would get deleted. Thus, 126b1b7dce3SHarshad Shirwadkar * file named A would be absent when we try to read A. So, this sequence of 127b1b7dce3SHarshad Shirwadkar * operations is not idempotent. However, as mentioned above, instead of storing 128b1b7dce3SHarshad Shirwadkar * the procedure fast commits store the outcome of each procedure. Thus the fast 129b1b7dce3SHarshad Shirwadkar * commit log for above procedure would be as follows: 130b1b7dce3SHarshad Shirwadkar * 131b1b7dce3SHarshad Shirwadkar * (Let's assume dirent A was linked to inode 10 and dirent B was linked to 132b1b7dce3SHarshad Shirwadkar * inode 11 before the replay) 133b1b7dce3SHarshad Shirwadkar * 134b1b7dce3SHarshad Shirwadkar * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] 135b1b7dce3SHarshad Shirwadkar * (w) (x) (y) (z) 136b1b7dce3SHarshad Shirwadkar * 137b1b7dce3SHarshad Shirwadkar * If we crash at (z), we will have file A linked to inode 11. During the second 138b1b7dce3SHarshad Shirwadkar * replay, we will remove file A (inode 11). But we will create it back and make 139b1b7dce3SHarshad Shirwadkar * it point to inode 11. We won't find B, so we'll just skip that step. At this 140b1b7dce3SHarshad Shirwadkar * point, the refcount for inode 11 is not reliable, but that gets fixed by the 141b1b7dce3SHarshad Shirwadkar * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled 142b1b7dce3SHarshad Shirwadkar * similarly. Thus, by converting a non-idempotent procedure into a series of 143b1b7dce3SHarshad Shirwadkar * idempotent outcomes, fast commits ensured idempotence during the replay. 144b1b7dce3SHarshad Shirwadkar * 145aa75f4d3SHarshad Shirwadkar * TODOs 146aa75f4d3SHarshad Shirwadkar * ----- 147b1b7dce3SHarshad Shirwadkar * 148b1b7dce3SHarshad Shirwadkar * 0) Fast commit replay path hardening: Fast commit replay code should use 149b1b7dce3SHarshad Shirwadkar * journal handles to make sure all the updates it does during the replay 150b1b7dce3SHarshad Shirwadkar * path are atomic. With that if we crash during fast commit replay, after 151b1b7dce3SHarshad Shirwadkar * trying to do recovery again, we will find a file system where fast commit 152b1b7dce3SHarshad Shirwadkar * area is invalid (because new full commit would be found). In order to deal 153b1b7dce3SHarshad Shirwadkar * with that, fast commit replay code should ensure that the "FC_REPLAY" 154b1b7dce3SHarshad Shirwadkar * superblock state is persisted before starting the replay, so that after 155b1b7dce3SHarshad Shirwadkar * the crash, fast commit recovery code can look at that flag and perform 156b1b7dce3SHarshad Shirwadkar * fast commit recovery even if that area is invalidated by later full 157b1b7dce3SHarshad Shirwadkar * commits. 158b1b7dce3SHarshad Shirwadkar * 159d1199b94SHarshad Shirwadkar * 1) Fast commit's commit path locks the entire file system during fast 160d1199b94SHarshad Shirwadkar * commit. This has significant performance penalty. Instead of that, we 161d1199b94SHarshad Shirwadkar * should use ext4_fc_start/stop_update functions to start inode level 162d1199b94SHarshad Shirwadkar * updates from ext4_journal_start/stop. Once we do that we can drop file 163d1199b94SHarshad Shirwadkar * system locking during commit path. 164aa75f4d3SHarshad Shirwadkar * 165d1199b94SHarshad Shirwadkar * 2) Handle more ineligible cases. 166aa75f4d3SHarshad Shirwadkar */ 167aa75f4d3SHarshad Shirwadkar 168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 170aa75f4d3SHarshad Shirwadkar 171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 172aa75f4d3SHarshad Shirwadkar { 173aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 174aa75f4d3SHarshad Shirwadkar if (uptodate) { 175aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 176aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 177aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 178aa75f4d3SHarshad Shirwadkar } else { 179aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 180aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 181aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 182aa75f4d3SHarshad Shirwadkar } 183aa75f4d3SHarshad Shirwadkar 184aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 185aa75f4d3SHarshad Shirwadkar } 186aa75f4d3SHarshad Shirwadkar 187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 188aa75f4d3SHarshad Shirwadkar { 189aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 190aa75f4d3SHarshad Shirwadkar 191aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 192aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 193aa75f4d3SHarshad Shirwadkar } 194aa75f4d3SHarshad Shirwadkar 195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 196aa75f4d3SHarshad Shirwadkar { 197aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 198aa75f4d3SHarshad Shirwadkar 199aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 200aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 201aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 202b3998b3bSRitesh Harjani INIT_LIST_HEAD(&ei->i_fc_dilist); 203aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 204aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 205aa75f4d3SHarshad Shirwadkar } 206aa75f4d3SHarshad Shirwadkar 207f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */ 208f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode) 209fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) 210f6634e26SHarshad Shirwadkar { 211f6634e26SHarshad Shirwadkar wait_queue_head_t *wq; 212f6634e26SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 213f6634e26SHarshad Shirwadkar 214f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 215f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 216f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 217f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 218f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 219f6634e26SHarshad Shirwadkar #else 220f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 221f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 222f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 223f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 224f6634e26SHarshad Shirwadkar #endif 225f6634e26SHarshad Shirwadkar lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); 226f6634e26SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 227f6634e26SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 228f6634e26SHarshad Shirwadkar schedule(); 229f6634e26SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 230f6634e26SHarshad Shirwadkar } 231f6634e26SHarshad Shirwadkar 232aa75f4d3SHarshad Shirwadkar /* 233aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 234aa75f4d3SHarshad Shirwadkar * 235aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 236aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 237aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 238aa75f4d3SHarshad Shirwadkar */ 239aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 240aa75f4d3SHarshad Shirwadkar { 241aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 242aa75f4d3SHarshad Shirwadkar 2438016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2448016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 245aa75f4d3SHarshad Shirwadkar return; 246aa75f4d3SHarshad Shirwadkar 247aa75f4d3SHarshad Shirwadkar restart: 248aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 249aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 250aa75f4d3SHarshad Shirwadkar goto out; 251aa75f4d3SHarshad Shirwadkar 252aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 253f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 254aa75f4d3SHarshad Shirwadkar goto restart; 255aa75f4d3SHarshad Shirwadkar } 256aa75f4d3SHarshad Shirwadkar out: 257aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 258aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 259aa75f4d3SHarshad Shirwadkar } 260aa75f4d3SHarshad Shirwadkar 261aa75f4d3SHarshad Shirwadkar /* 262aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 263aa75f4d3SHarshad Shirwadkar */ 264aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 265aa75f4d3SHarshad Shirwadkar { 266aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 267aa75f4d3SHarshad Shirwadkar 2688016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2698016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 270aa75f4d3SHarshad Shirwadkar return; 271aa75f4d3SHarshad Shirwadkar 272aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 273aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 274aa75f4d3SHarshad Shirwadkar } 275aa75f4d3SHarshad Shirwadkar 276aa75f4d3SHarshad Shirwadkar /* 277aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 278aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 279aa75f4d3SHarshad Shirwadkar */ 280aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 281aa75f4d3SHarshad Shirwadkar { 282aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 283b3998b3bSRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 284b3998b3bSRitesh Harjani struct ext4_fc_dentry_update *fc_dentry; 285aa75f4d3SHarshad Shirwadkar 2868016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2878016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 288aa75f4d3SHarshad Shirwadkar return; 289aa75f4d3SHarshad Shirwadkar 290aa75f4d3SHarshad Shirwadkar restart: 291aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 292b3998b3bSRitesh Harjani if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { 293aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 294aa75f4d3SHarshad Shirwadkar return; 295aa75f4d3SHarshad Shirwadkar } 296aa75f4d3SHarshad Shirwadkar 297aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 298f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 299aa75f4d3SHarshad Shirwadkar goto restart; 300aa75f4d3SHarshad Shirwadkar } 301b3998b3bSRitesh Harjani 302b3998b3bSRitesh Harjani if (!list_empty(&ei->i_fc_list)) 303aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 304b3998b3bSRitesh Harjani 305b3998b3bSRitesh Harjani /* 306b3998b3bSRitesh Harjani * Since this inode is getting removed, let's also remove all FC 307b3998b3bSRitesh Harjani * dentry create references, since it is not needed to log it anyways. 308b3998b3bSRitesh Harjani */ 309b3998b3bSRitesh Harjani if (list_empty(&ei->i_fc_dilist)) { 310b3998b3bSRitesh Harjani spin_unlock(&sbi->s_fc_lock); 311b3998b3bSRitesh Harjani return; 312b3998b3bSRitesh Harjani } 313b3998b3bSRitesh Harjani 314b3998b3bSRitesh Harjani fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); 315b3998b3bSRitesh Harjani WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); 316b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_list); 317b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_dilist); 318b3998b3bSRitesh Harjani 319b3998b3bSRitesh Harjani WARN_ON(!list_empty(&ei->i_fc_dilist)); 320b3998b3bSRitesh Harjani spin_unlock(&sbi->s_fc_lock); 321b3998b3bSRitesh Harjani 322b3998b3bSRitesh Harjani if (fc_dentry->fcd_name.name && 323b3998b3bSRitesh Harjani fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 324b3998b3bSRitesh Harjani kfree(fc_dentry->fcd_name.name); 325b3998b3bSRitesh Harjani kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 326b3998b3bSRitesh Harjani 327b3998b3bSRitesh Harjani return; 328aa75f4d3SHarshad Shirwadkar } 329aa75f4d3SHarshad Shirwadkar 330aa75f4d3SHarshad Shirwadkar /* 331e85c81baSXin Yin * Mark file system as fast commit ineligible, and record latest 332e85c81baSXin Yin * ineligible transaction tid. This means until the recorded 333e85c81baSXin Yin * transaction, commit operation would result in a full jbd2 commit. 334aa75f4d3SHarshad Shirwadkar */ 335e85c81baSXin Yin void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) 336aa75f4d3SHarshad Shirwadkar { 337aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 338e85c81baSXin Yin tid_t tid; 339aa75f4d3SHarshad Shirwadkar 3408016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3418016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3428016e29fSHarshad Shirwadkar return; 3438016e29fSHarshad Shirwadkar 3449b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 345e85c81baSXin Yin if (handle && !IS_ERR(handle)) 346e85c81baSXin Yin tid = handle->h_transaction->t_tid; 347e85c81baSXin Yin else { 348e85c81baSXin Yin read_lock(&sbi->s_journal->j_state_lock); 349e85c81baSXin Yin tid = sbi->s_journal->j_running_transaction ? 350e85c81baSXin Yin sbi->s_journal->j_running_transaction->t_tid : 0; 351e85c81baSXin Yin read_unlock(&sbi->s_journal->j_state_lock); 352e85c81baSXin Yin } 353e85c81baSXin Yin spin_lock(&sbi->s_fc_lock); 354e85c81baSXin Yin if (sbi->s_fc_ineligible_tid < tid) 355e85c81baSXin Yin sbi->s_fc_ineligible_tid = tid; 356e85c81baSXin Yin spin_unlock(&sbi->s_fc_lock); 357aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 358aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 359aa75f4d3SHarshad Shirwadkar } 360aa75f4d3SHarshad Shirwadkar 361aa75f4d3SHarshad Shirwadkar /* 362aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 363aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 364aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 365aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 366aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 367aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 368aa75f4d3SHarshad Shirwadkar * 369aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 370aa75f4d3SHarshad Shirwadkar */ 371aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 372a80f7fcfSHarshad Shirwadkar handle_t *handle, struct inode *inode, 373a80f7fcfSHarshad Shirwadkar int (*__fc_track_fn)(struct inode *, void *, bool), 374aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 375aa75f4d3SHarshad Shirwadkar { 376aa75f4d3SHarshad Shirwadkar bool update = false; 377aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 378aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 379a80f7fcfSHarshad Shirwadkar tid_t tid = 0; 380aa75f4d3SHarshad Shirwadkar int ret; 381aa75f4d3SHarshad Shirwadkar 382a80f7fcfSHarshad Shirwadkar tid = handle->h_transaction->t_tid; 383aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 384a80f7fcfSHarshad Shirwadkar if (tid == ei->i_sync_tid) { 385aa75f4d3SHarshad Shirwadkar update = true; 386aa75f4d3SHarshad Shirwadkar } else { 387aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 388a80f7fcfSHarshad Shirwadkar ei->i_sync_tid = tid; 389aa75f4d3SHarshad Shirwadkar } 390aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 391aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 392aa75f4d3SHarshad Shirwadkar 393aa75f4d3SHarshad Shirwadkar if (!enqueue) 394aa75f4d3SHarshad Shirwadkar return ret; 395aa75f4d3SHarshad Shirwadkar 396aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 397aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 398aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 399bdc8a53aSXin Yin (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 400bdc8a53aSXin Yin sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? 401aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 402aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 403aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 404aa75f4d3SHarshad Shirwadkar 405aa75f4d3SHarshad Shirwadkar return ret; 406aa75f4d3SHarshad Shirwadkar } 407aa75f4d3SHarshad Shirwadkar 408aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 409aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 410aa75f4d3SHarshad Shirwadkar int op; 411aa75f4d3SHarshad Shirwadkar }; 412aa75f4d3SHarshad Shirwadkar 413aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 414aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 415aa75f4d3SHarshad Shirwadkar { 416aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 417aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 418aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 419aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 420aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 421aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 422aa75f4d3SHarshad Shirwadkar 423aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 424aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 425aa75f4d3SHarshad Shirwadkar if (!node) { 426e85c81baSXin Yin ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); 427aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 428aa75f4d3SHarshad Shirwadkar return -ENOMEM; 429aa75f4d3SHarshad Shirwadkar } 430aa75f4d3SHarshad Shirwadkar 431aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 432aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 433aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 434aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 435aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 436aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 437aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 438aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 439e85c81baSXin Yin EXT4_FC_REASON_NOMEM, NULL); 440aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 441aa75f4d3SHarshad Shirwadkar return -ENOMEM; 442aa75f4d3SHarshad Shirwadkar } 443aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 444aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 445aa75f4d3SHarshad Shirwadkar } else { 446aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 447aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 448aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 449aa75f4d3SHarshad Shirwadkar } 450aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 451b3998b3bSRitesh Harjani INIT_LIST_HEAD(&node->fcd_dilist); 452aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 453bdc8a53aSXin Yin if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 454bdc8a53aSXin Yin sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) 455aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 456aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 457aa75f4d3SHarshad Shirwadkar else 458aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 459b3998b3bSRitesh Harjani 460b3998b3bSRitesh Harjani /* 461b3998b3bSRitesh Harjani * This helps us keep a track of all fc_dentry updates which is part of 462b3998b3bSRitesh Harjani * this ext4 inode. So in case the inode is getting unlinked, before 463b3998b3bSRitesh Harjani * even we get a chance to fsync, we could remove all fc_dentry 464b3998b3bSRitesh Harjani * references while evicting the inode in ext4_fc_del(). 465b3998b3bSRitesh Harjani * Also with this, we don't need to loop over all the inodes in 466b3998b3bSRitesh Harjani * sbi->s_fc_q to get the corresponding inode in 467b3998b3bSRitesh Harjani * ext4_fc_commit_dentry_updates(). 468b3998b3bSRitesh Harjani */ 469b3998b3bSRitesh Harjani if (dentry_update->op == EXT4_FC_TAG_CREAT) { 470b3998b3bSRitesh Harjani WARN_ON(!list_empty(&ei->i_fc_dilist)); 471b3998b3bSRitesh Harjani list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); 472b3998b3bSRitesh Harjani } 473aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 474aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 475aa75f4d3SHarshad Shirwadkar 476aa75f4d3SHarshad Shirwadkar return 0; 477aa75f4d3SHarshad Shirwadkar } 478aa75f4d3SHarshad Shirwadkar 479a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle, 480a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 481aa75f4d3SHarshad Shirwadkar { 482aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 483aa75f4d3SHarshad Shirwadkar int ret; 484aa75f4d3SHarshad Shirwadkar 485aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 486aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 487aa75f4d3SHarshad Shirwadkar 488a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 489aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 4901d2e2440SRitesh Harjani trace_ext4_fc_track_unlink(handle, inode, dentry, ret); 491aa75f4d3SHarshad Shirwadkar } 492aa75f4d3SHarshad Shirwadkar 493a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 494a80f7fcfSHarshad Shirwadkar { 49578be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 49678be0471SRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 49778be0471SRitesh Harjani 49878be0471SRitesh Harjani if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 49978be0471SRitesh Harjani (sbi->s_mount_state & EXT4_FC_REPLAY)) 50078be0471SRitesh Harjani return; 50178be0471SRitesh Harjani 50278be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 50378be0471SRitesh Harjani return; 50478be0471SRitesh Harjani 50578be0471SRitesh Harjani __ext4_fc_track_unlink(handle, inode, dentry); 506a80f7fcfSHarshad Shirwadkar } 507a80f7fcfSHarshad Shirwadkar 508a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle, 509a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 510aa75f4d3SHarshad Shirwadkar { 511aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 512aa75f4d3SHarshad Shirwadkar int ret; 513aa75f4d3SHarshad Shirwadkar 514aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 515aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 516aa75f4d3SHarshad Shirwadkar 517a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 518aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 5191d2e2440SRitesh Harjani trace_ext4_fc_track_link(handle, inode, dentry, ret); 520aa75f4d3SHarshad Shirwadkar } 521aa75f4d3SHarshad Shirwadkar 522a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 523a80f7fcfSHarshad Shirwadkar { 52478be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 52578be0471SRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 52678be0471SRitesh Harjani 52778be0471SRitesh Harjani if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 52878be0471SRitesh Harjani (sbi->s_mount_state & EXT4_FC_REPLAY)) 52978be0471SRitesh Harjani return; 53078be0471SRitesh Harjani 53178be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 53278be0471SRitesh Harjani return; 53378be0471SRitesh Harjani 53478be0471SRitesh Harjani __ext4_fc_track_link(handle, inode, dentry); 535a80f7fcfSHarshad Shirwadkar } 536a80f7fcfSHarshad Shirwadkar 5378210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode, 5388210bb29SHarshad Shirwadkar struct dentry *dentry) 539aa75f4d3SHarshad Shirwadkar { 540aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 541aa75f4d3SHarshad Shirwadkar int ret; 542aa75f4d3SHarshad Shirwadkar 543aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 544aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 545aa75f4d3SHarshad Shirwadkar 546a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 547aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 5481d2e2440SRitesh Harjani trace_ext4_fc_track_create(handle, inode, dentry, ret); 549aa75f4d3SHarshad Shirwadkar } 550aa75f4d3SHarshad Shirwadkar 5518210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 5528210bb29SHarshad Shirwadkar { 55378be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 55478be0471SRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 55578be0471SRitesh Harjani 55678be0471SRitesh Harjani if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 55778be0471SRitesh Harjani (sbi->s_mount_state & EXT4_FC_REPLAY)) 55878be0471SRitesh Harjani return; 55978be0471SRitesh Harjani 56078be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 56178be0471SRitesh Harjani return; 56278be0471SRitesh Harjani 56378be0471SRitesh Harjani __ext4_fc_track_create(handle, inode, dentry); 5648210bb29SHarshad Shirwadkar } 5658210bb29SHarshad Shirwadkar 566aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 567aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 568aa75f4d3SHarshad Shirwadkar { 569aa75f4d3SHarshad Shirwadkar if (update) 570aa75f4d3SHarshad Shirwadkar return -EEXIST; 571aa75f4d3SHarshad Shirwadkar 572aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 573aa75f4d3SHarshad Shirwadkar 574aa75f4d3SHarshad Shirwadkar return 0; 575aa75f4d3SHarshad Shirwadkar } 576aa75f4d3SHarshad Shirwadkar 577a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 578aa75f4d3SHarshad Shirwadkar { 57978be0471SRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 580aa75f4d3SHarshad Shirwadkar int ret; 581aa75f4d3SHarshad Shirwadkar 582aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 583aa75f4d3SHarshad Shirwadkar return; 584aa75f4d3SHarshad Shirwadkar 585556e0319SHarshad Shirwadkar if (ext4_should_journal_data(inode)) { 586556e0319SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 587e85c81baSXin Yin EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); 588556e0319SHarshad Shirwadkar return; 589556e0319SHarshad Shirwadkar } 590556e0319SHarshad Shirwadkar 59178be0471SRitesh Harjani if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 59278be0471SRitesh Harjani (sbi->s_mount_state & EXT4_FC_REPLAY)) 59378be0471SRitesh Harjani return; 59478be0471SRitesh Harjani 59578be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 59678be0471SRitesh Harjani return; 59778be0471SRitesh Harjani 598a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 5991d2e2440SRitesh Harjani trace_ext4_fc_track_inode(handle, inode, ret); 600aa75f4d3SHarshad Shirwadkar } 601aa75f4d3SHarshad Shirwadkar 602aa75f4d3SHarshad Shirwadkar struct __track_range_args { 603aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 604aa75f4d3SHarshad Shirwadkar }; 605aa75f4d3SHarshad Shirwadkar 606aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 607aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 608aa75f4d3SHarshad Shirwadkar { 609aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 610aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 611aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 612aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 613aa75f4d3SHarshad Shirwadkar 614aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 615aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 616aa75f4d3SHarshad Shirwadkar return -ECANCELED; 617aa75f4d3SHarshad Shirwadkar } 618aa75f4d3SHarshad Shirwadkar 619aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 620aa75f4d3SHarshad Shirwadkar 621aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 622aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 623aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 624aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 625aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 626aa75f4d3SHarshad Shirwadkar } else { 627aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 628aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 629aa75f4d3SHarshad Shirwadkar } 630aa75f4d3SHarshad Shirwadkar 631aa75f4d3SHarshad Shirwadkar return 0; 632aa75f4d3SHarshad Shirwadkar } 633aa75f4d3SHarshad Shirwadkar 634a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 635aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 636aa75f4d3SHarshad Shirwadkar { 63778be0471SRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 638aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 639aa75f4d3SHarshad Shirwadkar int ret; 640aa75f4d3SHarshad Shirwadkar 641aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 642aa75f4d3SHarshad Shirwadkar return; 643aa75f4d3SHarshad Shirwadkar 64478be0471SRitesh Harjani if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 64578be0471SRitesh Harjani (sbi->s_mount_state & EXT4_FC_REPLAY)) 64678be0471SRitesh Harjani return; 64778be0471SRitesh Harjani 64878be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 64978be0471SRitesh Harjani return; 65078be0471SRitesh Harjani 651aa75f4d3SHarshad Shirwadkar args.start = start; 652aa75f4d3SHarshad Shirwadkar args.end = end; 653aa75f4d3SHarshad Shirwadkar 654a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 655aa75f4d3SHarshad Shirwadkar 6561d2e2440SRitesh Harjani trace_ext4_fc_track_range(handle, inode, start, end, ret); 657aa75f4d3SHarshad Shirwadkar } 658aa75f4d3SHarshad Shirwadkar 659e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) 660aa75f4d3SHarshad Shirwadkar { 66167c0f556SBart Van Assche blk_opf_t write_flags = REQ_SYNC; 662aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 663aa75f4d3SHarshad Shirwadkar 664e9f53353SDaejun Park /* Add REQ_FUA | REQ_PREFLUSH only its tail */ 665e9f53353SDaejun Park if (test_opt(sb, BARRIER) && is_tail) 666aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 667aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 668764b3fd3SHarshad Shirwadkar set_buffer_dirty(bh); 669aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 670aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 6711420c4a5SBart Van Assche submit_bh(REQ_OP_WRITE | write_flags, bh); 672aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 673aa75f4d3SHarshad Shirwadkar } 674aa75f4d3SHarshad Shirwadkar 675aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 676aa75f4d3SHarshad Shirwadkar 677aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 678aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 679aa75f4d3SHarshad Shirwadkar u32 *crc) 680aa75f4d3SHarshad Shirwadkar { 681aa75f4d3SHarshad Shirwadkar void *ret; 682aa75f4d3SHarshad Shirwadkar 683aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 684aa75f4d3SHarshad Shirwadkar if (crc) 685aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 686aa75f4d3SHarshad Shirwadkar return ret; 687aa75f4d3SHarshad Shirwadkar } 688aa75f4d3SHarshad Shirwadkar 689aa75f4d3SHarshad Shirwadkar /* 690aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 691aa75f4d3SHarshad Shirwadkar * 692aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 693aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 694aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 695aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 696aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 697aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 698aa75f4d3SHarshad Shirwadkar * we added. 699aa75f4d3SHarshad Shirwadkar */ 700aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 701aa75f4d3SHarshad Shirwadkar { 702aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 703aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 704aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 705aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 706aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 707aa75f4d3SHarshad Shirwadkar int pad_len; 708aa75f4d3SHarshad Shirwadkar 709aa75f4d3SHarshad Shirwadkar /* 710aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 711aa75f4d3SHarshad Shirwadkar * padding. 712aa75f4d3SHarshad Shirwadkar */ 713aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 714aa75f4d3SHarshad Shirwadkar return NULL; 715aa75f4d3SHarshad Shirwadkar 716aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 717aa75f4d3SHarshad Shirwadkar /* 718aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 719aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 720aa75f4d3SHarshad Shirwadkar */ 721aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 722aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 723aa75f4d3SHarshad Shirwadkar if (ret) 724aa75f4d3SHarshad Shirwadkar return NULL; 725aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 726aa75f4d3SHarshad Shirwadkar } 727aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 728aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 729aa75f4d3SHarshad Shirwadkar } 730aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 731aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 732aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 733aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 734aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 735aa75f4d3SHarshad Shirwadkar if (crc) 736aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 737aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 738aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 739e9f53353SDaejun Park ext4_fc_submit_bh(sb, false); 740aa75f4d3SHarshad Shirwadkar 741aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 742aa75f4d3SHarshad Shirwadkar if (ret) 743aa75f4d3SHarshad Shirwadkar return NULL; 744aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 745aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 746aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 747aa75f4d3SHarshad Shirwadkar } 748aa75f4d3SHarshad Shirwadkar 749aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 750aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 751aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 752aa75f4d3SHarshad Shirwadkar { 753aa75f4d3SHarshad Shirwadkar if (crc) 754aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 755aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 756aa75f4d3SHarshad Shirwadkar } 757aa75f4d3SHarshad Shirwadkar 758aa75f4d3SHarshad Shirwadkar /* 759aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 760aa75f4d3SHarshad Shirwadkar * 761aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 762aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 763aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 764aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 765aa75f4d3SHarshad Shirwadkar */ 766aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 767aa75f4d3SHarshad Shirwadkar { 768aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 769aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 770aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 771aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 772aa75f4d3SHarshad Shirwadkar u8 *dst; 773aa75f4d3SHarshad Shirwadkar 774aa75f4d3SHarshad Shirwadkar /* 775aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 776aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 777aa75f4d3SHarshad Shirwadkar */ 778aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 779aa75f4d3SHarshad Shirwadkar if (!dst) 780aa75f4d3SHarshad Shirwadkar return -ENOSPC; 781aa75f4d3SHarshad Shirwadkar 782aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 783aa75f4d3SHarshad Shirwadkar 784aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 785aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 786aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 787aa75f4d3SHarshad Shirwadkar 788aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 789aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 790aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 791aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 792aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 793aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 794aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 795aa75f4d3SHarshad Shirwadkar 796e9f53353SDaejun Park ext4_fc_submit_bh(sb, true); 797aa75f4d3SHarshad Shirwadkar 798aa75f4d3SHarshad Shirwadkar return 0; 799aa75f4d3SHarshad Shirwadkar } 800aa75f4d3SHarshad Shirwadkar 801aa75f4d3SHarshad Shirwadkar /* 802aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 803aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 804aa75f4d3SHarshad Shirwadkar */ 805aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 806aa75f4d3SHarshad Shirwadkar u32 *crc) 807aa75f4d3SHarshad Shirwadkar { 808aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 809aa75f4d3SHarshad Shirwadkar u8 *dst; 810aa75f4d3SHarshad Shirwadkar 811aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 812aa75f4d3SHarshad Shirwadkar if (!dst) 813aa75f4d3SHarshad Shirwadkar return false; 814aa75f4d3SHarshad Shirwadkar 815aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 816aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 817aa75f4d3SHarshad Shirwadkar 818aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 819aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 820aa75f4d3SHarshad Shirwadkar 821aa75f4d3SHarshad Shirwadkar return true; 822aa75f4d3SHarshad Shirwadkar } 823aa75f4d3SHarshad Shirwadkar 824aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 825facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, 826facec450SGuoqing Jiang struct ext4_fc_dentry_update *fc_dentry) 827aa75f4d3SHarshad Shirwadkar { 828aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 829aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 830facec450SGuoqing Jiang int dlen = fc_dentry->fcd_name.len; 831aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 832aa75f4d3SHarshad Shirwadkar crc); 833aa75f4d3SHarshad Shirwadkar 834aa75f4d3SHarshad Shirwadkar if (!dst) 835aa75f4d3SHarshad Shirwadkar return false; 836aa75f4d3SHarshad Shirwadkar 837facec450SGuoqing Jiang fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); 838facec450SGuoqing Jiang fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 839facec450SGuoqing Jiang tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 840aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 841aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 842aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 843aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 844aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 845facec450SGuoqing Jiang ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); 846aa75f4d3SHarshad Shirwadkar 847aa75f4d3SHarshad Shirwadkar return true; 848aa75f4d3SHarshad Shirwadkar } 849aa75f4d3SHarshad Shirwadkar 850aa75f4d3SHarshad Shirwadkar /* 851aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 852aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 853aa75f4d3SHarshad Shirwadkar */ 854aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 855aa75f4d3SHarshad Shirwadkar { 856aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 857aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 858aa75f4d3SHarshad Shirwadkar int ret; 859aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 860aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 861aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 862aa75f4d3SHarshad Shirwadkar u8 *dst; 863aa75f4d3SHarshad Shirwadkar 864aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 865aa75f4d3SHarshad Shirwadkar if (ret) 866aa75f4d3SHarshad Shirwadkar return ret; 867aa75f4d3SHarshad Shirwadkar 8686c31a689SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 8696c31a689SHarshad Shirwadkar inode_len = EXT4_INODE_SIZE(inode->i_sb); 8706c31a689SHarshad Shirwadkar else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 871aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 872aa75f4d3SHarshad Shirwadkar 873aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 874aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 875aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 876aa75f4d3SHarshad Shirwadkar 877*ccbf8eebSYe Bin ret = -ECANCELED; 878aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 879aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 880aa75f4d3SHarshad Shirwadkar if (!dst) 881*ccbf8eebSYe Bin goto err; 882aa75f4d3SHarshad Shirwadkar 883aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 884*ccbf8eebSYe Bin goto err; 885aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 886aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 887*ccbf8eebSYe Bin goto err; 888aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 889aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 890aa75f4d3SHarshad Shirwadkar inode_len, crc)) 891*ccbf8eebSYe Bin goto err; 892*ccbf8eebSYe Bin ret = 0; 893*ccbf8eebSYe Bin err: 894*ccbf8eebSYe Bin brelse(iloc.bh); 895*ccbf8eebSYe Bin return ret; 896aa75f4d3SHarshad Shirwadkar } 897aa75f4d3SHarshad Shirwadkar 898aa75f4d3SHarshad Shirwadkar /* 899aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 900aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 901aa75f4d3SHarshad Shirwadkar */ 902aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 903aa75f4d3SHarshad Shirwadkar { 904aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 905aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 906aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 907aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 908aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 909aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 910aa75f4d3SHarshad Shirwadkar int ret; 911aa75f4d3SHarshad Shirwadkar 912aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 913aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 914aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 915aa75f4d3SHarshad Shirwadkar return 0; 916aa75f4d3SHarshad Shirwadkar } 917aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 918aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 919aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 920aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 921aa75f4d3SHarshad Shirwadkar 922aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 9234978c659SJan Kara ext4_debug("will try writing %d to %d for inode %ld\n", 9244978c659SJan Kara cur_lblk_off, new_blk_size, inode->i_ino); 925aa75f4d3SHarshad Shirwadkar 926aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 927aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 928aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 929aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 930aa75f4d3SHarshad Shirwadkar if (ret < 0) 931aa75f4d3SHarshad Shirwadkar return -ECANCELED; 932aa75f4d3SHarshad Shirwadkar 933aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 934aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 935aa75f4d3SHarshad Shirwadkar continue; 936aa75f4d3SHarshad Shirwadkar } 937aa75f4d3SHarshad Shirwadkar 938aa75f4d3SHarshad Shirwadkar if (ret == 0) { 939aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 940aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 941aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 942aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 943aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 944aa75f4d3SHarshad Shirwadkar return -ENOSPC; 945aa75f4d3SHarshad Shirwadkar } else { 946a2c2f082SHou Tao unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? 947a2c2f082SHou Tao EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; 948a2c2f082SHou Tao 949a2c2f082SHou Tao /* Limit the number of blocks in one extent */ 950a2c2f082SHou Tao map.m_len = min(max, map.m_len); 951a2c2f082SHou Tao 952aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 953aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 954aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 955aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 956aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 957aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 958aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 959aa75f4d3SHarshad Shirwadkar else 960aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 961aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 962aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 963aa75f4d3SHarshad Shirwadkar return -ENOSPC; 964aa75f4d3SHarshad Shirwadkar } 965aa75f4d3SHarshad Shirwadkar 966aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 967aa75f4d3SHarshad Shirwadkar } 968aa75f4d3SHarshad Shirwadkar 969aa75f4d3SHarshad Shirwadkar return 0; 970aa75f4d3SHarshad Shirwadkar } 971aa75f4d3SHarshad Shirwadkar 972aa75f4d3SHarshad Shirwadkar 973aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 974aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 975aa75f4d3SHarshad Shirwadkar { 976c30365b9SYu Zhe struct super_block *sb = journal->j_private; 977aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 978aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 979aa75f4d3SHarshad Shirwadkar int ret = 0; 980aa75f4d3SHarshad Shirwadkar 981aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 98296e7c02dSDaejun Park list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 983aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 984aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 985aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 986aa75f4d3SHarshad Shirwadkar 987aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 988aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 989aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 990aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 991aa75f4d3SHarshad Shirwadkar schedule(); 992aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 993aa75f4d3SHarshad Shirwadkar } 994aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 995aa75f4d3SHarshad Shirwadkar } 996aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 997aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 998aa75f4d3SHarshad Shirwadkar if (ret) 999aa75f4d3SHarshad Shirwadkar return ret; 1000aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1001aa75f4d3SHarshad Shirwadkar } 1002aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1003aa75f4d3SHarshad Shirwadkar 1004aa75f4d3SHarshad Shirwadkar return ret; 1005aa75f4d3SHarshad Shirwadkar } 1006aa75f4d3SHarshad Shirwadkar 1007aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 1008aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 1009aa75f4d3SHarshad Shirwadkar { 1010c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1011aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1012aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 1013aa75f4d3SHarshad Shirwadkar int ret = 0; 1014aa75f4d3SHarshad Shirwadkar 1015aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1016aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1017aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 1018aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 1019aa75f4d3SHarshad Shirwadkar continue; 1020aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1021aa75f4d3SHarshad Shirwadkar 1022aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 1023aa75f4d3SHarshad Shirwadkar if (ret) 1024aa75f4d3SHarshad Shirwadkar return ret; 1025aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1026aa75f4d3SHarshad Shirwadkar } 1027aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1028aa75f4d3SHarshad Shirwadkar 1029aa75f4d3SHarshad Shirwadkar return 0; 1030aa75f4d3SHarshad Shirwadkar } 1031aa75f4d3SHarshad Shirwadkar 1032aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 1033aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 1034fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock) 1035fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock) 1036aa75f4d3SHarshad Shirwadkar { 1037c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1038aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 103996e7c02dSDaejun Park struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 1040aa75f4d3SHarshad Shirwadkar struct inode *inode; 1041b3998b3bSRitesh Harjani struct ext4_inode_info *ei; 1042aa75f4d3SHarshad Shirwadkar int ret; 1043aa75f4d3SHarshad Shirwadkar 1044aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 1045aa75f4d3SHarshad Shirwadkar return 0; 104696e7c02dSDaejun Park list_for_each_entry_safe(fc_dentry, fc_dentry_n, 104796e7c02dSDaejun Park &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { 1048aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 1049aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1050facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1051aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1052aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1053aa75f4d3SHarshad Shirwadkar } 1054aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1055aa75f4d3SHarshad Shirwadkar continue; 1056aa75f4d3SHarshad Shirwadkar } 1057aa75f4d3SHarshad Shirwadkar /* 1058b3998b3bSRitesh Harjani * With fcd_dilist we need not loop in sbi->s_fc_q to get the 1059b3998b3bSRitesh Harjani * corresponding inode pointer 1060aa75f4d3SHarshad Shirwadkar */ 1061b3998b3bSRitesh Harjani WARN_ON(list_empty(&fc_dentry->fcd_dilist)); 1062b3998b3bSRitesh Harjani ei = list_first_entry(&fc_dentry->fcd_dilist, 1063b3998b3bSRitesh Harjani struct ext4_inode_info, i_fc_dilist); 1064b3998b3bSRitesh Harjani inode = &ei->vfs_inode; 1065b3998b3bSRitesh Harjani WARN_ON(inode->i_ino != fc_dentry->fcd_ino); 1066b3998b3bSRitesh Harjani 1067aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1068aa75f4d3SHarshad Shirwadkar 1069aa75f4d3SHarshad Shirwadkar /* 1070aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 1071aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 1072aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 1073aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 1074aa75f4d3SHarshad Shirwadkar * the recovery code. 1075aa75f4d3SHarshad Shirwadkar */ 1076aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 1077aa75f4d3SHarshad Shirwadkar if (ret) 1078aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1079aa75f4d3SHarshad Shirwadkar 1080aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 1081aa75f4d3SHarshad Shirwadkar if (ret) 1082aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1083aa75f4d3SHarshad Shirwadkar 1084facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1085aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1086aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1087aa75f4d3SHarshad Shirwadkar } 1088aa75f4d3SHarshad Shirwadkar 1089aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1090aa75f4d3SHarshad Shirwadkar } 1091aa75f4d3SHarshad Shirwadkar return 0; 1092aa75f4d3SHarshad Shirwadkar lock_and_exit: 1093aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1094aa75f4d3SHarshad Shirwadkar return ret; 1095aa75f4d3SHarshad Shirwadkar } 1096aa75f4d3SHarshad Shirwadkar 1097aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 1098aa75f4d3SHarshad Shirwadkar { 1099c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1100aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1101aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1102aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 1103aa75f4d3SHarshad Shirwadkar struct inode *inode; 1104aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 1105aa75f4d3SHarshad Shirwadkar int ret = 0; 1106aa75f4d3SHarshad Shirwadkar u32 crc = 0; 1107aa75f4d3SHarshad Shirwadkar 1108aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 1109aa75f4d3SHarshad Shirwadkar if (ret) 1110aa75f4d3SHarshad Shirwadkar return ret; 1111aa75f4d3SHarshad Shirwadkar 1112aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 1113aa75f4d3SHarshad Shirwadkar if (ret) 1114aa75f4d3SHarshad Shirwadkar return ret; 1115aa75f4d3SHarshad Shirwadkar 1116da0c5d26SHarshad Shirwadkar /* 1117da0c5d26SHarshad Shirwadkar * If file system device is different from journal device, issue a cache 1118da0c5d26SHarshad Shirwadkar * flush before we start writing fast commit blocks. 1119da0c5d26SHarshad Shirwadkar */ 1120da0c5d26SHarshad Shirwadkar if (journal->j_fs_dev != journal->j_dev) 1121c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev); 1122da0c5d26SHarshad Shirwadkar 1123aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1124aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1125aa75f4d3SHarshad Shirwadkar /* 1126aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1127aa75f4d3SHarshad Shirwadkar * in this TID. 1128aa75f4d3SHarshad Shirwadkar */ 1129aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1130aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1131aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1132aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1133e1262cd2SXu Yihang (u8 *)&head, &crc)) { 1134e1262cd2SXu Yihang ret = -ENOSPC; 1135aa75f4d3SHarshad Shirwadkar goto out; 1136aa75f4d3SHarshad Shirwadkar } 1137e1262cd2SXu Yihang } 1138aa75f4d3SHarshad Shirwadkar 1139aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1140aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1141aa75f4d3SHarshad Shirwadkar if (ret) { 1142aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1143aa75f4d3SHarshad Shirwadkar goto out; 1144aa75f4d3SHarshad Shirwadkar } 1145aa75f4d3SHarshad Shirwadkar 114696e7c02dSDaejun Park list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1147aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1148aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1149aa75f4d3SHarshad Shirwadkar continue; 1150aa75f4d3SHarshad Shirwadkar 1151aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1152aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1153aa75f4d3SHarshad Shirwadkar if (ret) 1154aa75f4d3SHarshad Shirwadkar goto out; 1155aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1156aa75f4d3SHarshad Shirwadkar if (ret) 1157aa75f4d3SHarshad Shirwadkar goto out; 1158aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1159aa75f4d3SHarshad Shirwadkar } 1160aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1161aa75f4d3SHarshad Shirwadkar 1162aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1163aa75f4d3SHarshad Shirwadkar 1164aa75f4d3SHarshad Shirwadkar out: 1165aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1166aa75f4d3SHarshad Shirwadkar return ret; 1167aa75f4d3SHarshad Shirwadkar } 1168aa75f4d3SHarshad Shirwadkar 11690915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status, 1170d9bf099cSRitesh Harjani u64 commit_time, int nblks, tid_t commit_tid) 11710915e464SHarshad Shirwadkar { 11720915e464SHarshad Shirwadkar struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; 11730915e464SHarshad Shirwadkar 11744978c659SJan Kara ext4_debug("Fast commit ended with status = %d for tid %u", 1175d9bf099cSRitesh Harjani status, commit_tid); 11760915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_OK) { 11770915e464SHarshad Shirwadkar stats->fc_num_commits++; 11780915e464SHarshad Shirwadkar stats->fc_numblks += nblks; 11790915e464SHarshad Shirwadkar if (likely(stats->s_fc_avg_commit_time)) 11800915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = 11810915e464SHarshad Shirwadkar (commit_time + 11820915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time * 3) / 4; 11830915e464SHarshad Shirwadkar else 11840915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = commit_time; 11850915e464SHarshad Shirwadkar } else if (status == EXT4_FC_STATUS_FAILED || 11860915e464SHarshad Shirwadkar status == EXT4_FC_STATUS_INELIGIBLE) { 11870915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_FAILED) 11880915e464SHarshad Shirwadkar stats->fc_failed_commits++; 11890915e464SHarshad Shirwadkar stats->fc_ineligible_commits++; 11900915e464SHarshad Shirwadkar } else { 11910915e464SHarshad Shirwadkar stats->fc_skipped_commits++; 11920915e464SHarshad Shirwadkar } 11935641ace5SRitesh Harjani trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); 11940915e464SHarshad Shirwadkar } 11950915e464SHarshad Shirwadkar 1196aa75f4d3SHarshad Shirwadkar /* 1197aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1198aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1199aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1200aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1201aa75f4d3SHarshad Shirwadkar */ 1202aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1203aa75f4d3SHarshad Shirwadkar { 1204c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1205aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1206aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1207aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 12080915e464SHarshad Shirwadkar int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; 1209aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1210aa75f4d3SHarshad Shirwadkar 12117f142440SRitesh Harjani if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 12127f142440SRitesh Harjani return jbd2_complete_transaction(journal, commit_tid); 12137f142440SRitesh Harjani 12145641ace5SRitesh Harjani trace_ext4_fc_commit_start(sb, commit_tid); 1215aa75f4d3SHarshad Shirwadkar 1216aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1217aa75f4d3SHarshad Shirwadkar 1218aa75f4d3SHarshad Shirwadkar restart_fc: 1219aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1220aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1221aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1222aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1223aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1224aa75f4d3SHarshad Shirwadkar goto restart_fc; 1225d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, 1226d9bf099cSRitesh Harjani commit_tid); 12270915e464SHarshad Shirwadkar return 0; 1228aa75f4d3SHarshad Shirwadkar } else if (ret) { 12290915e464SHarshad Shirwadkar /* 12300915e464SHarshad Shirwadkar * Commit couldn't start. Just update stats and perform a 12310915e464SHarshad Shirwadkar * full commit. 12320915e464SHarshad Shirwadkar */ 1233d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, 1234d9bf099cSRitesh Harjani commit_tid); 12350915e464SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1236aa75f4d3SHarshad Shirwadkar } 12370915e464SHarshad Shirwadkar 12387bbbe241SHarshad Shirwadkar /* 12397bbbe241SHarshad Shirwadkar * After establishing journal barrier via jbd2_fc_begin_commit(), check 12407bbbe241SHarshad Shirwadkar * if we are fast commit ineligible. 12417bbbe241SHarshad Shirwadkar */ 12427bbbe241SHarshad Shirwadkar if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { 12430915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_INELIGIBLE; 12440915e464SHarshad Shirwadkar goto fallback; 12457bbbe241SHarshad Shirwadkar } 1246aa75f4d3SHarshad Shirwadkar 1247aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1248aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1249aa75f4d3SHarshad Shirwadkar if (ret < 0) { 12500915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 12510915e464SHarshad Shirwadkar goto fallback; 1252aa75f4d3SHarshad Shirwadkar } 1253aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1254aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1255aa75f4d3SHarshad Shirwadkar if (ret < 0) { 12560915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 12570915e464SHarshad Shirwadkar goto fallback; 1258aa75f4d3SHarshad Shirwadkar } 1259aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 12600915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit(journal); 1261aa75f4d3SHarshad Shirwadkar /* 12620915e464SHarshad Shirwadkar * weight the commit time higher than the average time so we 12630915e464SHarshad Shirwadkar * don't react too strongly to vast changes in the commit time 1264aa75f4d3SHarshad Shirwadkar */ 12650915e464SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1266d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); 12670915e464SHarshad Shirwadkar return ret; 12680915e464SHarshad Shirwadkar 12690915e464SHarshad Shirwadkar fallback: 12700915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit_fallback(journal); 1271d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, status, 0, 0, commit_tid); 12720915e464SHarshad Shirwadkar return ret; 1273aa75f4d3SHarshad Shirwadkar } 1274aa75f4d3SHarshad Shirwadkar 1275ff780b91SHarshad Shirwadkar /* 1276ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1277ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1278ff780b91SHarshad Shirwadkar */ 1279e85c81baSXin Yin static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) 1280ff780b91SHarshad Shirwadkar { 1281aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1282aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 128396e7c02dSDaejun Park struct ext4_inode_info *iter, *iter_n; 1284aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1285aa75f4d3SHarshad Shirwadkar 1286aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1287aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1288aa75f4d3SHarshad Shirwadkar 128908f4c42aSRitesh Harjani trace_ext4_fc_cleanup(journal, full, tid); 1290aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1291aa75f4d3SHarshad Shirwadkar 1292aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 129396e7c02dSDaejun Park list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], 129496e7c02dSDaejun Park i_fc_list) { 1295aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1296aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1297aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1298bdc8a53aSXin Yin if (iter->i_sync_tid <= tid) 1299aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1300aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1301aa75f4d3SHarshad Shirwadkar smp_mb(); 1302aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1303aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1304aa75f4d3SHarshad Shirwadkar #else 1305aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1306aa75f4d3SHarshad Shirwadkar #endif 1307aa75f4d3SHarshad Shirwadkar } 1308aa75f4d3SHarshad Shirwadkar 1309aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1310aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1311aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1312aa75f4d3SHarshad Shirwadkar fcd_list); 1313aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1314b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_dilist); 1315aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1316aa75f4d3SHarshad Shirwadkar 1317aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1318aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1319aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1320aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1321aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1322aa75f4d3SHarshad Shirwadkar } 1323aa75f4d3SHarshad Shirwadkar 1324aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1325aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1326aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 132731e203e0SDaejun Park &sbi->s_fc_q[FC_Q_MAIN]); 1328aa75f4d3SHarshad Shirwadkar 1329e85c81baSXin Yin if (tid >= sbi->s_fc_ineligible_tid) { 1330e85c81baSXin Yin sbi->s_fc_ineligible_tid = 0; 13319b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 1332e85c81baSXin Yin } 1333aa75f4d3SHarshad Shirwadkar 1334aa75f4d3SHarshad Shirwadkar if (full) 1335aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1336aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1337aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1338ff780b91SHarshad Shirwadkar } 13396866d7b3SHarshad Shirwadkar 13408016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 13418016e29fSHarshad Shirwadkar 13428016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 13438016e29fSHarshad Shirwadkar struct dentry_info_args { 13448016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 13458016e29fSHarshad Shirwadkar char *dname; 13468016e29fSHarshad Shirwadkar }; 13478016e29fSHarshad Shirwadkar 13488016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 1349a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 13508016e29fSHarshad Shirwadkar { 1351a7ba36bcSHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 13528016e29fSHarshad Shirwadkar 1353a7ba36bcSHarshad Shirwadkar memcpy(&fcd, val, sizeof(fcd)); 13548016e29fSHarshad Shirwadkar 1355a7ba36bcSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 1356a7ba36bcSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd.fc_ino); 1357a7ba36bcSHarshad Shirwadkar darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 1358a7ba36bcSHarshad Shirwadkar darg->dname_len = le16_to_cpu(tl->fc_len) - 13598016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_dentry_info); 13608016e29fSHarshad Shirwadkar } 13618016e29fSHarshad Shirwadkar 13628016e29fSHarshad Shirwadkar /* Unlink replay function */ 1363a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, 1364a7ba36bcSHarshad Shirwadkar u8 *val) 13658016e29fSHarshad Shirwadkar { 13668016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 13678016e29fSHarshad Shirwadkar struct qstr entry; 13688016e29fSHarshad Shirwadkar struct dentry_info_args darg; 13698016e29fSHarshad Shirwadkar int ret = 0; 13708016e29fSHarshad Shirwadkar 1371a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 13728016e29fSHarshad Shirwadkar 13738016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 13748016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 13758016e29fSHarshad Shirwadkar 13768016e29fSHarshad Shirwadkar entry.name = darg.dname; 13778016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 13788016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 13798016e29fSHarshad Shirwadkar 138023dd561aSYi Li if (IS_ERR(inode)) { 13814978c659SJan Kara ext4_debug("Inode %d not found", darg.ino); 13828016e29fSHarshad Shirwadkar return 0; 13838016e29fSHarshad Shirwadkar } 13848016e29fSHarshad Shirwadkar 13858016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 13868016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 138723dd561aSYi Li if (IS_ERR(old_parent)) { 13884978c659SJan Kara ext4_debug("Dir with inode %d not found", darg.parent_ino); 13898016e29fSHarshad Shirwadkar iput(inode); 13908016e29fSHarshad Shirwadkar return 0; 13918016e29fSHarshad Shirwadkar } 13928016e29fSHarshad Shirwadkar 1393a80f7fcfSHarshad Shirwadkar ret = __ext4_unlink(NULL, old_parent, &entry, inode); 13948016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 13958016e29fSHarshad Shirwadkar if (ret == -ENOENT) 13968016e29fSHarshad Shirwadkar ret = 0; 13978016e29fSHarshad Shirwadkar iput(old_parent); 13988016e29fSHarshad Shirwadkar iput(inode); 13998016e29fSHarshad Shirwadkar return ret; 14008016e29fSHarshad Shirwadkar } 14018016e29fSHarshad Shirwadkar 14028016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 14038016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 14048016e29fSHarshad Shirwadkar struct inode *inode) 14058016e29fSHarshad Shirwadkar { 14068016e29fSHarshad Shirwadkar struct inode *dir = NULL; 14078016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 14088016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 14098016e29fSHarshad Shirwadkar int ret = 0; 14108016e29fSHarshad Shirwadkar 14118016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 14128016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 14134978c659SJan Kara ext4_debug("Dir with inode %d not found.", darg->parent_ino); 14148016e29fSHarshad Shirwadkar dir = NULL; 14158016e29fSHarshad Shirwadkar goto out; 14168016e29fSHarshad Shirwadkar } 14178016e29fSHarshad Shirwadkar 14188016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 14198016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 14204978c659SJan Kara ext4_debug("Failed to obtain dentry"); 14218016e29fSHarshad Shirwadkar dentry_dir = NULL; 14228016e29fSHarshad Shirwadkar goto out; 14238016e29fSHarshad Shirwadkar } 14248016e29fSHarshad Shirwadkar 14258016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 14268016e29fSHarshad Shirwadkar if (!dentry_inode) { 14274978c659SJan Kara ext4_debug("Inode dentry not created."); 14288016e29fSHarshad Shirwadkar ret = -ENOMEM; 14298016e29fSHarshad Shirwadkar goto out; 14308016e29fSHarshad Shirwadkar } 14318016e29fSHarshad Shirwadkar 14328016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 14338016e29fSHarshad Shirwadkar /* 14348016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 14358016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 14368016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 14378016e29fSHarshad Shirwadkar * could complete. 14388016e29fSHarshad Shirwadkar */ 14398016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 14404978c659SJan Kara ext4_debug("Failed to link\n"); 14418016e29fSHarshad Shirwadkar goto out; 14428016e29fSHarshad Shirwadkar } 14438016e29fSHarshad Shirwadkar 14448016e29fSHarshad Shirwadkar ret = 0; 14458016e29fSHarshad Shirwadkar out: 14468016e29fSHarshad Shirwadkar if (dentry_dir) { 14478016e29fSHarshad Shirwadkar d_drop(dentry_dir); 14488016e29fSHarshad Shirwadkar dput(dentry_dir); 14498016e29fSHarshad Shirwadkar } else if (dir) { 14508016e29fSHarshad Shirwadkar iput(dir); 14518016e29fSHarshad Shirwadkar } 14528016e29fSHarshad Shirwadkar if (dentry_inode) { 14538016e29fSHarshad Shirwadkar d_drop(dentry_inode); 14548016e29fSHarshad Shirwadkar dput(dentry_inode); 14558016e29fSHarshad Shirwadkar } 14568016e29fSHarshad Shirwadkar 14578016e29fSHarshad Shirwadkar return ret; 14588016e29fSHarshad Shirwadkar } 14598016e29fSHarshad Shirwadkar 14608016e29fSHarshad Shirwadkar /* Link replay function */ 1461a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl, 1462a7ba36bcSHarshad Shirwadkar u8 *val) 14638016e29fSHarshad Shirwadkar { 14648016e29fSHarshad Shirwadkar struct inode *inode; 14658016e29fSHarshad Shirwadkar struct dentry_info_args darg; 14668016e29fSHarshad Shirwadkar int ret = 0; 14678016e29fSHarshad Shirwadkar 1468a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 14698016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 14708016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 14718016e29fSHarshad Shirwadkar 14728016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 147323dd561aSYi Li if (IS_ERR(inode)) { 14744978c659SJan Kara ext4_debug("Inode not found."); 14758016e29fSHarshad Shirwadkar return 0; 14768016e29fSHarshad Shirwadkar } 14778016e29fSHarshad Shirwadkar 14788016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 14798016e29fSHarshad Shirwadkar iput(inode); 14808016e29fSHarshad Shirwadkar return ret; 14818016e29fSHarshad Shirwadkar } 14828016e29fSHarshad Shirwadkar 14838016e29fSHarshad Shirwadkar /* 14848016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 14858016e29fSHarshad Shirwadkar * block bitmaps correctly. 14868016e29fSHarshad Shirwadkar */ 14878016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 14888016e29fSHarshad Shirwadkar { 14898016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 14908016e29fSHarshad Shirwadkar int i; 14918016e29fSHarshad Shirwadkar 14928016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 14938016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 14948016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 14958016e29fSHarshad Shirwadkar return 0; 14968016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 14978016e29fSHarshad Shirwadkar state->fc_modified_inodes = krealloc( 1498cdce59a1SRitesh Harjani state->fc_modified_inodes, 1499cdce59a1SRitesh Harjani sizeof(int) * (state->fc_modified_inodes_size + 1500cdce59a1SRitesh Harjani EXT4_FC_REPLAY_REALLOC_INCREMENT), 15018016e29fSHarshad Shirwadkar GFP_KERNEL); 15028016e29fSHarshad Shirwadkar if (!state->fc_modified_inodes) 15038016e29fSHarshad Shirwadkar return -ENOMEM; 1504cdce59a1SRitesh Harjani state->fc_modified_inodes_size += 1505cdce59a1SRitesh Harjani EXT4_FC_REPLAY_REALLOC_INCREMENT; 15068016e29fSHarshad Shirwadkar } 15078016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 15088016e29fSHarshad Shirwadkar return 0; 15098016e29fSHarshad Shirwadkar } 15108016e29fSHarshad Shirwadkar 15118016e29fSHarshad Shirwadkar /* 15128016e29fSHarshad Shirwadkar * Inode replay function 15138016e29fSHarshad Shirwadkar */ 1514a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, 1515a7ba36bcSHarshad Shirwadkar u8 *val) 15168016e29fSHarshad Shirwadkar { 1517a7ba36bcSHarshad Shirwadkar struct ext4_fc_inode fc_inode; 15188016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 15198016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 15208016e29fSHarshad Shirwadkar struct inode *inode = NULL; 15218016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 15228016e29fSHarshad Shirwadkar int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 15238016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 15248016e29fSHarshad Shirwadkar 1525a7ba36bcSHarshad Shirwadkar memcpy(&fc_inode, val, sizeof(fc_inode)); 15268016e29fSHarshad Shirwadkar 1527a7ba36bcSHarshad Shirwadkar ino = le32_to_cpu(fc_inode.fc_ino); 15288016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 15298016e29fSHarshad Shirwadkar 15308016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 153123dd561aSYi Li if (!IS_ERR(inode)) { 15328016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 15338016e29fSHarshad Shirwadkar iput(inode); 15348016e29fSHarshad Shirwadkar } 153523dd561aSYi Li inode = NULL; 15368016e29fSHarshad Shirwadkar 1537cdce59a1SRitesh Harjani ret = ext4_fc_record_modified_inode(sb, ino); 1538cdce59a1SRitesh Harjani if (ret) 1539cdce59a1SRitesh Harjani goto out; 15408016e29fSHarshad Shirwadkar 1541a7ba36bcSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *) 1542a7ba36bcSHarshad Shirwadkar (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); 15438016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 15448016e29fSHarshad Shirwadkar if (ret) 15458016e29fSHarshad Shirwadkar goto out; 15468016e29fSHarshad Shirwadkar 1547a7ba36bcSHarshad Shirwadkar inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); 15488016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 15498016e29fSHarshad Shirwadkar 15508016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 15518016e29fSHarshad Shirwadkar memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, 15528016e29fSHarshad Shirwadkar inode_len - offsetof(struct ext4_inode, i_generation)); 15538016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 15548016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 15558016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 15568016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 15578016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 15588016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 15598016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 15608016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 15618016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 15628016e29fSHarshad Shirwadkar } 15638016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 15648016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 15658016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 15668016e29fSHarshad Shirwadkar } 15678016e29fSHarshad Shirwadkar 15688016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 15698016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 15708016e29fSHarshad Shirwadkar if (ret) 15718016e29fSHarshad Shirwadkar goto out; 15728016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 15738016e29fSHarshad Shirwadkar if (ret) 15748016e29fSHarshad Shirwadkar goto out; 15758016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 15768016e29fSHarshad Shirwadkar if (ret) 15778016e29fSHarshad Shirwadkar goto out; 15788016e29fSHarshad Shirwadkar 15798016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 15808016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 158123dd561aSYi Li if (IS_ERR(inode)) { 15824978c659SJan Kara ext4_debug("Inode not found."); 15838016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 15848016e29fSHarshad Shirwadkar } 15858016e29fSHarshad Shirwadkar 15868016e29fSHarshad Shirwadkar /* 15878016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 15888016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 15898016e29fSHarshad Shirwadkar * the number of blocks the inode. 15908016e29fSHarshad Shirwadkar */ 15911ebf2178SHarshad Shirwadkar if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 15928016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 15938016e29fSHarshad Shirwadkar 15948016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 15958016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 15968016e29fSHarshad Shirwadkar 15978016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 15988016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 15998016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 16008016e29fSHarshad Shirwadkar brelse(iloc.bh); 16018016e29fSHarshad Shirwadkar out: 16028016e29fSHarshad Shirwadkar iput(inode); 16038016e29fSHarshad Shirwadkar if (!ret) 1604c6bf3f0eSChristoph Hellwig blkdev_issue_flush(sb->s_bdev); 16058016e29fSHarshad Shirwadkar 16068016e29fSHarshad Shirwadkar return 0; 16078016e29fSHarshad Shirwadkar } 16088016e29fSHarshad Shirwadkar 16098016e29fSHarshad Shirwadkar /* 16108016e29fSHarshad Shirwadkar * Dentry create replay function. 16118016e29fSHarshad Shirwadkar * 16128016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 16138016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 16148016e29fSHarshad Shirwadkar * been replayed before we start here. 16158016e29fSHarshad Shirwadkar */ 1616a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, 1617a7ba36bcSHarshad Shirwadkar u8 *val) 16188016e29fSHarshad Shirwadkar { 16198016e29fSHarshad Shirwadkar int ret = 0; 16208016e29fSHarshad Shirwadkar struct inode *inode = NULL; 16218016e29fSHarshad Shirwadkar struct inode *dir = NULL; 16228016e29fSHarshad Shirwadkar struct dentry_info_args darg; 16238016e29fSHarshad Shirwadkar 1624a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 16258016e29fSHarshad Shirwadkar 16268016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 16278016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 16288016e29fSHarshad Shirwadkar 16298016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 16308016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 16318016e29fSHarshad Shirwadkar if (ret) 16328016e29fSHarshad Shirwadkar goto out; 16338016e29fSHarshad Shirwadkar 16348016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 163523dd561aSYi Li if (IS_ERR(inode)) { 16364978c659SJan Kara ext4_debug("inode %d not found.", darg.ino); 16378016e29fSHarshad Shirwadkar inode = NULL; 16388016e29fSHarshad Shirwadkar ret = -EINVAL; 16398016e29fSHarshad Shirwadkar goto out; 16408016e29fSHarshad Shirwadkar } 16418016e29fSHarshad Shirwadkar 16428016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 16438016e29fSHarshad Shirwadkar /* 16448016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 16458016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 16468016e29fSHarshad Shirwadkar */ 16478016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 164823dd561aSYi Li if (IS_ERR(dir)) { 16494978c659SJan Kara ext4_debug("Dir %d not found.", darg.ino); 16508016e29fSHarshad Shirwadkar goto out; 16518016e29fSHarshad Shirwadkar } 16528016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 16538016e29fSHarshad Shirwadkar iput(dir); 16548016e29fSHarshad Shirwadkar if (ret) { 16558016e29fSHarshad Shirwadkar ret = 0; 16568016e29fSHarshad Shirwadkar goto out; 16578016e29fSHarshad Shirwadkar } 16588016e29fSHarshad Shirwadkar } 16598016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 16608016e29fSHarshad Shirwadkar if (ret) 16618016e29fSHarshad Shirwadkar goto out; 16628016e29fSHarshad Shirwadkar set_nlink(inode, 1); 16638016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 16648016e29fSHarshad Shirwadkar out: 16658016e29fSHarshad Shirwadkar iput(inode); 16668016e29fSHarshad Shirwadkar return ret; 16678016e29fSHarshad Shirwadkar } 16688016e29fSHarshad Shirwadkar 16698016e29fSHarshad Shirwadkar /* 1670599ea31dSXin Yin * Record physical disk regions which are in use as per fast commit area, 1671599ea31dSXin Yin * and used by inodes during replay phase. Our simple replay phase 1672599ea31dSXin Yin * allocator excludes these regions from allocation. 16738016e29fSHarshad Shirwadkar */ 1674599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino, 1675599ea31dSXin Yin ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) 16768016e29fSHarshad Shirwadkar { 16778016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 16788016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 16798016e29fSHarshad Shirwadkar 16808016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1681599ea31dSXin Yin /* 1682599ea31dSXin Yin * during replay phase, the fc_regions_valid may not same as 1683599ea31dSXin Yin * fc_regions_used, update it when do new additions. 1684599ea31dSXin Yin */ 1685599ea31dSXin Yin if (replay && state->fc_regions_used != state->fc_regions_valid) 1686599ea31dSXin Yin state->fc_regions_used = state->fc_regions_valid; 16878016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 16888016e29fSHarshad Shirwadkar state->fc_regions_size += 16898016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 16908016e29fSHarshad Shirwadkar state->fc_regions = krealloc( 16918016e29fSHarshad Shirwadkar state->fc_regions, 16928016e29fSHarshad Shirwadkar state->fc_regions_size * 16938016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_alloc_region), 16948016e29fSHarshad Shirwadkar GFP_KERNEL); 16958016e29fSHarshad Shirwadkar if (!state->fc_regions) 16968016e29fSHarshad Shirwadkar return -ENOMEM; 16978016e29fSHarshad Shirwadkar } 16988016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 16998016e29fSHarshad Shirwadkar region->ino = ino; 17008016e29fSHarshad Shirwadkar region->lblk = lblk; 17018016e29fSHarshad Shirwadkar region->pblk = pblk; 17028016e29fSHarshad Shirwadkar region->len = len; 17038016e29fSHarshad Shirwadkar 1704599ea31dSXin Yin if (replay) 1705599ea31dSXin Yin state->fc_regions_valid++; 1706599ea31dSXin Yin 17078016e29fSHarshad Shirwadkar return 0; 17088016e29fSHarshad Shirwadkar } 17098016e29fSHarshad Shirwadkar 17108016e29fSHarshad Shirwadkar /* Replay add range tag */ 17118016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 1712a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 17138016e29fSHarshad Shirwadkar { 1714a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range fc_add_ex; 17158016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 17168016e29fSHarshad Shirwadkar struct inode *inode; 17178016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 17188016e29fSHarshad Shirwadkar int remaining, len; 17198016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 17208016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17218016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 17228016e29fSHarshad Shirwadkar int ret; 17238016e29fSHarshad Shirwadkar 1724a7ba36bcSHarshad Shirwadkar memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); 1725a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex.fc_ex; 17268016e29fSHarshad Shirwadkar 17278016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 1728a7ba36bcSHarshad Shirwadkar le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), 17298016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 17308016e29fSHarshad Shirwadkar 1731a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); 173223dd561aSYi Li if (IS_ERR(inode)) { 17334978c659SJan Kara ext4_debug("Inode not found."); 17348016e29fSHarshad Shirwadkar return 0; 17358016e29fSHarshad Shirwadkar } 17368016e29fSHarshad Shirwadkar 17378016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1738cdce59a1SRitesh Harjani if (ret) 1739cdce59a1SRitesh Harjani goto out; 17408016e29fSHarshad Shirwadkar 17418016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 17428016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 17438016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 17448016e29fSHarshad Shirwadkar 17458016e29fSHarshad Shirwadkar cur = start; 17468016e29fSHarshad Shirwadkar remaining = len; 17474978c659SJan Kara ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 17488016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 17498016e29fSHarshad Shirwadkar inode->i_ino); 17508016e29fSHarshad Shirwadkar 17518016e29fSHarshad Shirwadkar while (remaining > 0) { 17528016e29fSHarshad Shirwadkar map.m_lblk = cur; 17538016e29fSHarshad Shirwadkar map.m_len = remaining; 17548016e29fSHarshad Shirwadkar map.m_pblk = 0; 17558016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 17568016e29fSHarshad Shirwadkar 1757cdce59a1SRitesh Harjani if (ret < 0) 1758cdce59a1SRitesh Harjani goto out; 17598016e29fSHarshad Shirwadkar 17608016e29fSHarshad Shirwadkar if (ret == 0) { 17618016e29fSHarshad Shirwadkar /* Range is not mapped */ 17628016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 1763cdce59a1SRitesh Harjani if (IS_ERR(path)) 1764cdce59a1SRitesh Harjani goto out; 17658016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 17668016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 17678016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 17688016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 17698016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 17708016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 17718016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 17728016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 17738016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 17748016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 17758016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 17768016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 17778016e29fSHarshad Shirwadkar kfree(path); 1778cdce59a1SRitesh Harjani if (ret) 1779cdce59a1SRitesh Harjani goto out; 17808016e29fSHarshad Shirwadkar goto next; 17818016e29fSHarshad Shirwadkar } 17828016e29fSHarshad Shirwadkar 17838016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 17848016e29fSHarshad Shirwadkar /* 17858016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 17868016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 17878016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 17888016e29fSHarshad Shirwadkar */ 17898016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 17908016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 17918016e29fSHarshad Shirwadkar start_pblk + cur - start); 1792cdce59a1SRitesh Harjani if (ret) 1793cdce59a1SRitesh Harjani goto out; 17948016e29fSHarshad Shirwadkar /* 17958016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 17968016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 17978016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 17988016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 17998016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 18008016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 18018016e29fSHarshad Shirwadkar * modified inodes. 18028016e29fSHarshad Shirwadkar */ 18038016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 18048016e29fSHarshad Shirwadkar goto next; 18058016e29fSHarshad Shirwadkar } 18068016e29fSHarshad Shirwadkar 18078016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 18084978c659SJan Kara ext4_debug("Converting from %ld to %d %lld", 18098016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 18108016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 18118016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 18128016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 1813cdce59a1SRitesh Harjani if (ret) 1814cdce59a1SRitesh Harjani goto out; 18158016e29fSHarshad Shirwadkar /* 18168016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 18178016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 18188016e29fSHarshad Shirwadkar */ 18198016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 18208016e29fSHarshad Shirwadkar next: 18218016e29fSHarshad Shirwadkar cur += map.m_len; 18228016e29fSHarshad Shirwadkar remaining -= map.m_len; 18238016e29fSHarshad Shirwadkar } 18248016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 18258016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 1826cdce59a1SRitesh Harjani out: 18278016e29fSHarshad Shirwadkar iput(inode); 18288016e29fSHarshad Shirwadkar return 0; 18298016e29fSHarshad Shirwadkar } 18308016e29fSHarshad Shirwadkar 18318016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 18328016e29fSHarshad Shirwadkar static int 1833a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, 1834a7ba36bcSHarshad Shirwadkar u8 *val) 18358016e29fSHarshad Shirwadkar { 18368016e29fSHarshad Shirwadkar struct inode *inode; 1837a7ba36bcSHarshad Shirwadkar struct ext4_fc_del_range lrange; 18388016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 18398016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 18408016e29fSHarshad Shirwadkar int ret; 18418016e29fSHarshad Shirwadkar 1842a7ba36bcSHarshad Shirwadkar memcpy(&lrange, val, sizeof(lrange)); 1843a7ba36bcSHarshad Shirwadkar cur = le32_to_cpu(lrange.fc_lblk); 1844a7ba36bcSHarshad Shirwadkar remaining = le32_to_cpu(lrange.fc_len); 18458016e29fSHarshad Shirwadkar 18468016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 1847a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_ino), cur, remaining); 18488016e29fSHarshad Shirwadkar 1849a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); 185023dd561aSYi Li if (IS_ERR(inode)) { 18514978c659SJan Kara ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino)); 18528016e29fSHarshad Shirwadkar return 0; 18538016e29fSHarshad Shirwadkar } 18548016e29fSHarshad Shirwadkar 18558016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1856cdce59a1SRitesh Harjani if (ret) 1857cdce59a1SRitesh Harjani goto out; 18588016e29fSHarshad Shirwadkar 18594978c659SJan Kara ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n", 1860a7ba36bcSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange.fc_lblk), 1861a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_len)); 18628016e29fSHarshad Shirwadkar while (remaining > 0) { 18638016e29fSHarshad Shirwadkar map.m_lblk = cur; 18648016e29fSHarshad Shirwadkar map.m_len = remaining; 18658016e29fSHarshad Shirwadkar 18668016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 1867cdce59a1SRitesh Harjani if (ret < 0) 1868cdce59a1SRitesh Harjani goto out; 18698016e29fSHarshad Shirwadkar if (ret > 0) { 18708016e29fSHarshad Shirwadkar remaining -= ret; 18718016e29fSHarshad Shirwadkar cur += ret; 18728016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 18738016e29fSHarshad Shirwadkar } else { 18748016e29fSHarshad Shirwadkar remaining -= map.m_len; 18758016e29fSHarshad Shirwadkar cur += map.m_len; 18768016e29fSHarshad Shirwadkar } 18778016e29fSHarshad Shirwadkar } 18788016e29fSHarshad Shirwadkar 18790b5b5a62SXin Yin down_write(&EXT4_I(inode)->i_data_sem); 18808fca8a2bSXin Yin ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), 18818fca8a2bSXin Yin le32_to_cpu(lrange.fc_lblk) + 18828fca8a2bSXin Yin le32_to_cpu(lrange.fc_len) - 1); 18830b5b5a62SXin Yin up_write(&EXT4_I(inode)->i_data_sem); 1884cdce59a1SRitesh Harjani if (ret) 1885cdce59a1SRitesh Harjani goto out; 18868016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 18878016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 18888016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 1889cdce59a1SRitesh Harjani out: 18908016e29fSHarshad Shirwadkar iput(inode); 18918016e29fSHarshad Shirwadkar return 0; 18928016e29fSHarshad Shirwadkar } 18938016e29fSHarshad Shirwadkar 18948016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 18958016e29fSHarshad Shirwadkar { 18968016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18978016e29fSHarshad Shirwadkar struct inode *inode; 18988016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 18998016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 19008016e29fSHarshad Shirwadkar int i, ret, j; 19018016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 19028016e29fSHarshad Shirwadkar 19038016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 19048016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 19058016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 19068016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 190723dd561aSYi Li if (IS_ERR(inode)) { 19084978c659SJan Kara ext4_debug("Inode %d not found.", 19098016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 19108016e29fSHarshad Shirwadkar continue; 19118016e29fSHarshad Shirwadkar } 19128016e29fSHarshad Shirwadkar cur = 0; 19138016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 19141ebf2178SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { 19151ebf2178SHarshad Shirwadkar iput(inode); 19161ebf2178SHarshad Shirwadkar continue; 19171ebf2178SHarshad Shirwadkar } 19188016e29fSHarshad Shirwadkar while (cur < end) { 19198016e29fSHarshad Shirwadkar map.m_lblk = cur; 19208016e29fSHarshad Shirwadkar map.m_len = end - cur; 19218016e29fSHarshad Shirwadkar 19228016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 19238016e29fSHarshad Shirwadkar if (ret < 0) 19248016e29fSHarshad Shirwadkar break; 19258016e29fSHarshad Shirwadkar 19268016e29fSHarshad Shirwadkar if (ret > 0) { 19278016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 192823dd561aSYi Li if (!IS_ERR(path)) { 19298016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 19308016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 19318016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 19328016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 19338016e29fSHarshad Shirwadkar kfree(path); 19348016e29fSHarshad Shirwadkar } 19358016e29fSHarshad Shirwadkar cur += ret; 19368016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 19378016e29fSHarshad Shirwadkar map.m_len, 1); 19388016e29fSHarshad Shirwadkar } else { 19398016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 19408016e29fSHarshad Shirwadkar } 19418016e29fSHarshad Shirwadkar } 19428016e29fSHarshad Shirwadkar iput(inode); 19438016e29fSHarshad Shirwadkar } 19448016e29fSHarshad Shirwadkar } 19458016e29fSHarshad Shirwadkar 19468016e29fSHarshad Shirwadkar /* 19478016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 19488016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 19498016e29fSHarshad Shirwadkar * if it is okay to use a block. 19508016e29fSHarshad Shirwadkar */ 19518016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 19528016e29fSHarshad Shirwadkar { 19538016e29fSHarshad Shirwadkar int i; 19548016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 19558016e29fSHarshad Shirwadkar 19568016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 19578016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 19588016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 19598016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 19608016e29fSHarshad Shirwadkar continue; 1961dbaafbadSRitesh Harjani if (in_range(blk, state->fc_regions[i].pblk, 1962dbaafbadSRitesh Harjani state->fc_regions[i].len)) 19638016e29fSHarshad Shirwadkar return true; 19648016e29fSHarshad Shirwadkar } 19658016e29fSHarshad Shirwadkar return false; 19668016e29fSHarshad Shirwadkar } 19678016e29fSHarshad Shirwadkar 19688016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 19698016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 19708016e29fSHarshad Shirwadkar { 19718016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 19728016e29fSHarshad Shirwadkar 19738016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 19748016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 19758016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 19768016e29fSHarshad Shirwadkar } 19778016e29fSHarshad Shirwadkar 19788016e29fSHarshad Shirwadkar /* 19798016e29fSHarshad Shirwadkar * Recovery Scan phase handler 19808016e29fSHarshad Shirwadkar * 19818016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 19828016e29fSHarshad Shirwadkar * for doing following things: 19838016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 19848016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 19858016e29fSHarshad Shirwadkar * - Verify CRC 19868016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 19878016e29fSHarshad Shirwadkar * 19888016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 19898016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 19908016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 19918016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 19928016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 19938016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 19948016e29fSHarshad Shirwadkar */ 19958016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 19968016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 19978016e29fSHarshad Shirwadkar tid_t expected_tid) 19988016e29fSHarshad Shirwadkar { 19998016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 20008016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 20018016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 20028016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 2003a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range ext; 2004a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 2005a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 2006a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 2007a7ba36bcSHarshad Shirwadkar struct ext4_fc_head head; 20088016e29fSHarshad Shirwadkar struct ext4_extent *ex; 20098016e29fSHarshad Shirwadkar 20108016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 20118016e29fSHarshad Shirwadkar 20128016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 20138016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 20148016e29fSHarshad Shirwadkar 20158016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 20168016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 20178016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 20188016e29fSHarshad Shirwadkar state->fc_crc = 0; 20198016e29fSHarshad Shirwadkar state->fc_regions = NULL; 20208016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 20218016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 20228016e29fSHarshad Shirwadkar /* Check if we can stop early */ 20238016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 20248016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 20258016e29fSHarshad Shirwadkar return 0; 20268016e29fSHarshad Shirwadkar } 20278016e29fSHarshad Shirwadkar 20288016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 20298016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 20308016e29fSHarshad Shirwadkar goto out_err; 20318016e29fSHarshad Shirwadkar } 20328016e29fSHarshad Shirwadkar 20338016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 2034a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2035a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 2036a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 20374978c659SJan Kara ext4_debug("Scan phase, tag:%s, blk %lld\n", 2038a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); 2039a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 20408016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2041a7ba36bcSHarshad Shirwadkar memcpy(&ext, val, sizeof(ext)); 2042a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&ext.fc_ex; 20438016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 2044a7ba36bcSHarshad Shirwadkar le32_to_cpu(ext.fc_ino), 20458016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 2046599ea31dSXin Yin ext4_ext_get_actual_len(ex), 0); 20478016e29fSHarshad Shirwadkar if (ret < 0) 20488016e29fSHarshad Shirwadkar break; 20498016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 20508016e29fSHarshad Shirwadkar fallthrough; 20518016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 20528016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 20538016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 20548016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 20558016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 20568016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 20578016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2058a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2059a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 20608016e29fSHarshad Shirwadkar break; 20618016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 20628016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2063a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2064a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2065a7ba36bcSHarshad Shirwadkar sizeof(tl) + 20668016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 20678016e29fSHarshad Shirwadkar fc_crc)); 2068a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(tail.fc_tid) == expected_tid && 2069a7ba36bcSHarshad Shirwadkar le32_to_cpu(tail.fc_crc) == state->fc_crc) { 20708016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 20718016e29fSHarshad Shirwadkar state->fc_regions_valid = 20728016e29fSHarshad Shirwadkar state->fc_regions_used; 20738016e29fSHarshad Shirwadkar } else { 20748016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 20758016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 20768016e29fSHarshad Shirwadkar } 20778016e29fSHarshad Shirwadkar state->fc_crc = 0; 20788016e29fSHarshad Shirwadkar break; 20798016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 2080a7ba36bcSHarshad Shirwadkar memcpy(&head, val, sizeof(head)); 2081a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_features) & 20828016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 20838016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 20848016e29fSHarshad Shirwadkar break; 20858016e29fSHarshad Shirwadkar } 2086a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_tid) != expected_tid) { 20878016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 20888016e29fSHarshad Shirwadkar break; 20898016e29fSHarshad Shirwadkar } 20908016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2091a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2092a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 20938016e29fSHarshad Shirwadkar break; 20948016e29fSHarshad Shirwadkar default: 20958016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 20968016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 20978016e29fSHarshad Shirwadkar } 20988016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 20998016e29fSHarshad Shirwadkar break; 21008016e29fSHarshad Shirwadkar } 21018016e29fSHarshad Shirwadkar 21028016e29fSHarshad Shirwadkar out_err: 21038016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 21048016e29fSHarshad Shirwadkar return ret; 21058016e29fSHarshad Shirwadkar } 21068016e29fSHarshad Shirwadkar 21075b849b5fSHarshad Shirwadkar /* 21085b849b5fSHarshad Shirwadkar * Main recovery path entry point. 21098016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 21105b849b5fSHarshad Shirwadkar */ 21115b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 21125b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 21135b849b5fSHarshad Shirwadkar { 21148016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 21158016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 2116a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 2117a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 21188016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 21198016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 2120a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 21218016e29fSHarshad Shirwadkar 21228016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 21238016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 21248016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 21258016e29fSHarshad Shirwadkar } 21268016e29fSHarshad Shirwadkar 21278016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 21288016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 21298016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 21308016e29fSHarshad Shirwadkar } 21318016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 21324978c659SJan Kara ext4_debug("Replay stops\n"); 21338016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 21345b849b5fSHarshad Shirwadkar return 0; 21355b849b5fSHarshad Shirwadkar } 21365b849b5fSHarshad Shirwadkar 21378016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 21388016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 21398016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 21408016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 21418016e29fSHarshad Shirwadkar } 21428016e29fSHarshad Shirwadkar #endif 21438016e29fSHarshad Shirwadkar 21448016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 21458016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 21468016e29fSHarshad Shirwadkar 2147a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2148a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 2149a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 2150a7ba36bcSHarshad Shirwadkar 21518016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 21528016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 21538016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 21548016e29fSHarshad Shirwadkar break; 21558016e29fSHarshad Shirwadkar } 21564978c659SJan Kara ext4_debug("Replay phase, tag:%s\n", 2157a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag))); 21588016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 2159a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 21608016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 2161a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, &tl, val); 21628016e29fSHarshad Shirwadkar break; 21638016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 2164a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, &tl, val); 21658016e29fSHarshad Shirwadkar break; 21668016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2167a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, &tl, val); 21688016e29fSHarshad Shirwadkar break; 21698016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 2170a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, &tl, val); 21718016e29fSHarshad Shirwadkar break; 21728016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 2173a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, &tl, val); 21748016e29fSHarshad Shirwadkar break; 21758016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 2176a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, &tl, val); 21778016e29fSHarshad Shirwadkar break; 21788016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 21798016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2180a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 21818016e29fSHarshad Shirwadkar break; 21828016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 21838016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 2184a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 2185a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2186a7ba36bcSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 21878016e29fSHarshad Shirwadkar break; 21888016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 21898016e29fSHarshad Shirwadkar break; 21908016e29fSHarshad Shirwadkar default: 2191a7ba36bcSHarshad Shirwadkar trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, 2192a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 21938016e29fSHarshad Shirwadkar ret = -ECANCELED; 21948016e29fSHarshad Shirwadkar break; 21958016e29fSHarshad Shirwadkar } 21968016e29fSHarshad Shirwadkar if (ret < 0) 21978016e29fSHarshad Shirwadkar break; 21988016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 21998016e29fSHarshad Shirwadkar } 22008016e29fSHarshad Shirwadkar return ret; 22018016e29fSHarshad Shirwadkar } 22028016e29fSHarshad Shirwadkar 22036866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 22046866d7b3SHarshad Shirwadkar { 22055b849b5fSHarshad Shirwadkar /* 22065b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 22075b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 22085b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 22095b849b5fSHarshad Shirwadkar */ 22105b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 22116866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 22126866d7b3SHarshad Shirwadkar return; 2213ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 22146866d7b3SHarshad Shirwadkar } 2215aa75f4d3SHarshad Shirwadkar 2216fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = { 2217ce8c59d1SHarshad Shirwadkar "Extended attributes changed", 2218ce8c59d1SHarshad Shirwadkar "Cross rename", 2219ce8c59d1SHarshad Shirwadkar "Journal flag changed", 2220ce8c59d1SHarshad Shirwadkar "Insufficient memory", 2221ce8c59d1SHarshad Shirwadkar "Swap boot", 2222ce8c59d1SHarshad Shirwadkar "Resize", 2223ce8c59d1SHarshad Shirwadkar "Dir renamed", 2224ce8c59d1SHarshad Shirwadkar "Falloc range op", 2225556e0319SHarshad Shirwadkar "Data journalling", 2226ce8c59d1SHarshad Shirwadkar "FC Commit Failed" 2227ce8c59d1SHarshad Shirwadkar }; 2228ce8c59d1SHarshad Shirwadkar 2229ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v) 2230ce8c59d1SHarshad Shirwadkar { 2231ce8c59d1SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 2232ce8c59d1SHarshad Shirwadkar struct ext4_fc_stats *stats = &sbi->s_fc_stats; 2233ce8c59d1SHarshad Shirwadkar int i; 2234ce8c59d1SHarshad Shirwadkar 2235ce8c59d1SHarshad Shirwadkar if (v != SEQ_START_TOKEN) 2236ce8c59d1SHarshad Shirwadkar return 0; 2237ce8c59d1SHarshad Shirwadkar 2238ce8c59d1SHarshad Shirwadkar seq_printf(seq, 2239ce8c59d1SHarshad Shirwadkar "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 2240ce8c59d1SHarshad Shirwadkar stats->fc_num_commits, stats->fc_ineligible_commits, 2241ce8c59d1SHarshad Shirwadkar stats->fc_numblks, 22420915e464SHarshad Shirwadkar div_u64(stats->s_fc_avg_commit_time, 1000)); 2243ce8c59d1SHarshad Shirwadkar seq_puts(seq, "Ineligible reasons:\n"); 2244ce8c59d1SHarshad Shirwadkar for (i = 0; i < EXT4_FC_REASON_MAX; i++) 2245ce8c59d1SHarshad Shirwadkar seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 2246ce8c59d1SHarshad Shirwadkar stats->fc_ineligible_reason_count[i]); 2247ce8c59d1SHarshad Shirwadkar 2248ce8c59d1SHarshad Shirwadkar return 0; 2249ce8c59d1SHarshad Shirwadkar } 2250ce8c59d1SHarshad Shirwadkar 2251aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2252aa75f4d3SHarshad Shirwadkar { 2253aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2254aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2255aa75f4d3SHarshad Shirwadkar 2256aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2257aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2258aa75f4d3SHarshad Shirwadkar 2259aa75f4d3SHarshad Shirwadkar return 0; 2260aa75f4d3SHarshad Shirwadkar } 2261ab047d51SSebastian Andrzej Siewior 2262ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void) 2263ab047d51SSebastian Andrzej Siewior { 2264ab047d51SSebastian Andrzej Siewior kmem_cache_destroy(ext4_fc_dentry_cachep); 2265ab047d51SSebastian Andrzej Siewior } 2266