16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 687bbbe241SHarshad Shirwadkar * 69aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 707bbbe241SHarshad Shirwadkar * attributes). Fast commit ineligibility is marked by calling 717bbbe241SHarshad Shirwadkar * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back 727bbbe241SHarshad Shirwadkar * to full commit. 73aa75f4d3SHarshad Shirwadkar * 74aa75f4d3SHarshad Shirwadkar * Atomicity of commits 75aa75f4d3SHarshad Shirwadkar * -------------------- 76a740762fSHarshad Shirwadkar * In order to guarantee atomicity during the commit operation, fast commit 77aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 78aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 79aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 80aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 81aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 82aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 85aa75f4d3SHarshad Shirwadkar * - fsync() 86aa75f4d3SHarshad Shirwadkar * - Append contents to file A 87aa75f4d3SHarshad Shirwadkar * - Truncate file A 88aa75f4d3SHarshad Shirwadkar * - fsync() 89aa75f4d3SHarshad Shirwadkar * 90aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 91aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 92aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 95aa75f4d3SHarshad Shirwadkar * 96b1b7dce3SHarshad Shirwadkar * Fast Commit Replay Idempotence 97b1b7dce3SHarshad Shirwadkar * ------------------------------ 98b1b7dce3SHarshad Shirwadkar * 99b1b7dce3SHarshad Shirwadkar * Fast commits tags are idempotent in nature provided the recovery code follows 100b1b7dce3SHarshad Shirwadkar * certain rules. The guiding principle that the commit path follows while 101b1b7dce3SHarshad Shirwadkar * committing is that it stores the result of a particular operation instead of 102b1b7dce3SHarshad Shirwadkar * storing the procedure. 103b1b7dce3SHarshad Shirwadkar * 104b1b7dce3SHarshad Shirwadkar * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' 105b1b7dce3SHarshad Shirwadkar * was associated with inode 10. During fast commit, instead of storing this 106b1b7dce3SHarshad Shirwadkar * operation as a procedure "rename a to b", we store the resulting file system 107b1b7dce3SHarshad Shirwadkar * state as a "series" of outcomes: 108b1b7dce3SHarshad Shirwadkar * 109b1b7dce3SHarshad Shirwadkar * - Link dirent b to inode 10 110b1b7dce3SHarshad Shirwadkar * - Unlink dirent a 111b1b7dce3SHarshad Shirwadkar * - Inode <10> with valid refcount 112b1b7dce3SHarshad Shirwadkar * 113b1b7dce3SHarshad Shirwadkar * Now when recovery code runs, it needs "enforce" this state on the file 114b1b7dce3SHarshad Shirwadkar * system. This is what guarantees idempotence of fast commit replay. 115b1b7dce3SHarshad Shirwadkar * 116b1b7dce3SHarshad Shirwadkar * Let's take an example of a procedure that is not idempotent and see how fast 117b1b7dce3SHarshad Shirwadkar * commits make it idempotent. Consider following sequence of operations: 118b1b7dce3SHarshad Shirwadkar * 119b1b7dce3SHarshad Shirwadkar * rm A; mv B A; read A 120b1b7dce3SHarshad Shirwadkar * (x) (y) (z) 121b1b7dce3SHarshad Shirwadkar * 122b1b7dce3SHarshad Shirwadkar * (x), (y) and (z) are the points at which we can crash. If we store this 123b1b7dce3SHarshad Shirwadkar * sequence of operations as is then the replay is not idempotent. Let's say 124b1b7dce3SHarshad Shirwadkar * while in replay, we crash at (z). During the second replay, file A (which was 125b1b7dce3SHarshad Shirwadkar * actually created as a result of "mv B A" operation) would get deleted. Thus, 126b1b7dce3SHarshad Shirwadkar * file named A would be absent when we try to read A. So, this sequence of 127b1b7dce3SHarshad Shirwadkar * operations is not idempotent. However, as mentioned above, instead of storing 128b1b7dce3SHarshad Shirwadkar * the procedure fast commits store the outcome of each procedure. Thus the fast 129b1b7dce3SHarshad Shirwadkar * commit log for above procedure would be as follows: 130b1b7dce3SHarshad Shirwadkar * 131b1b7dce3SHarshad Shirwadkar * (Let's assume dirent A was linked to inode 10 and dirent B was linked to 132b1b7dce3SHarshad Shirwadkar * inode 11 before the replay) 133b1b7dce3SHarshad Shirwadkar * 134b1b7dce3SHarshad Shirwadkar * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] 135b1b7dce3SHarshad Shirwadkar * (w) (x) (y) (z) 136b1b7dce3SHarshad Shirwadkar * 137b1b7dce3SHarshad Shirwadkar * If we crash at (z), we will have file A linked to inode 11. During the second 138b1b7dce3SHarshad Shirwadkar * replay, we will remove file A (inode 11). But we will create it back and make 139b1b7dce3SHarshad Shirwadkar * it point to inode 11. We won't find B, so we'll just skip that step. At this 140b1b7dce3SHarshad Shirwadkar * point, the refcount for inode 11 is not reliable, but that gets fixed by the 141b1b7dce3SHarshad Shirwadkar * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled 142b1b7dce3SHarshad Shirwadkar * similarly. Thus, by converting a non-idempotent procedure into a series of 143b1b7dce3SHarshad Shirwadkar * idempotent outcomes, fast commits ensured idempotence during the replay. 144b1b7dce3SHarshad Shirwadkar * 145aa75f4d3SHarshad Shirwadkar * TODOs 146aa75f4d3SHarshad Shirwadkar * ----- 147b1b7dce3SHarshad Shirwadkar * 148b1b7dce3SHarshad Shirwadkar * 0) Fast commit replay path hardening: Fast commit replay code should use 149b1b7dce3SHarshad Shirwadkar * journal handles to make sure all the updates it does during the replay 150b1b7dce3SHarshad Shirwadkar * path are atomic. With that if we crash during fast commit replay, after 151b1b7dce3SHarshad Shirwadkar * trying to do recovery again, we will find a file system where fast commit 152b1b7dce3SHarshad Shirwadkar * area is invalid (because new full commit would be found). In order to deal 153b1b7dce3SHarshad Shirwadkar * with that, fast commit replay code should ensure that the "FC_REPLAY" 154b1b7dce3SHarshad Shirwadkar * superblock state is persisted before starting the replay, so that after 155b1b7dce3SHarshad Shirwadkar * the crash, fast commit recovery code can look at that flag and perform 156b1b7dce3SHarshad Shirwadkar * fast commit recovery even if that area is invalidated by later full 157b1b7dce3SHarshad Shirwadkar * commits. 158b1b7dce3SHarshad Shirwadkar * 159d1199b94SHarshad Shirwadkar * 1) Fast commit's commit path locks the entire file system during fast 160d1199b94SHarshad Shirwadkar * commit. This has significant performance penalty. Instead of that, we 161d1199b94SHarshad Shirwadkar * should use ext4_fc_start/stop_update functions to start inode level 162d1199b94SHarshad Shirwadkar * updates from ext4_journal_start/stop. Once we do that we can drop file 163d1199b94SHarshad Shirwadkar * system locking during commit path. 164aa75f4d3SHarshad Shirwadkar * 165d1199b94SHarshad Shirwadkar * 2) Handle more ineligible cases. 166aa75f4d3SHarshad Shirwadkar */ 167aa75f4d3SHarshad Shirwadkar 168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 170aa75f4d3SHarshad Shirwadkar 171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 172aa75f4d3SHarshad Shirwadkar { 173aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 174aa75f4d3SHarshad Shirwadkar if (uptodate) { 175aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 176aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 177aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 178aa75f4d3SHarshad Shirwadkar } else { 179aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 180aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 181aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 182aa75f4d3SHarshad Shirwadkar } 183aa75f4d3SHarshad Shirwadkar 184aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 185aa75f4d3SHarshad Shirwadkar } 186aa75f4d3SHarshad Shirwadkar 187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 188aa75f4d3SHarshad Shirwadkar { 189aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 190aa75f4d3SHarshad Shirwadkar 191aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 192aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 193aa75f4d3SHarshad Shirwadkar } 194aa75f4d3SHarshad Shirwadkar 195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 196aa75f4d3SHarshad Shirwadkar { 197aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 198aa75f4d3SHarshad Shirwadkar 199aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 200aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 201aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 202b3998b3bSRitesh Harjani INIT_LIST_HEAD(&ei->i_fc_dilist); 203aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 204aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 205aa75f4d3SHarshad Shirwadkar } 206aa75f4d3SHarshad Shirwadkar 207f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */ 208f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode) 209fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) 210f6634e26SHarshad Shirwadkar { 211f6634e26SHarshad Shirwadkar wait_queue_head_t *wq; 212f6634e26SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 213f6634e26SHarshad Shirwadkar 214f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 215f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 216f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 217f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 218f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 219f6634e26SHarshad Shirwadkar #else 220f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 221f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 222f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 223f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 224f6634e26SHarshad Shirwadkar #endif 225f6634e26SHarshad Shirwadkar lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); 226f6634e26SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 227f6634e26SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 228f6634e26SHarshad Shirwadkar schedule(); 229f6634e26SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 230f6634e26SHarshad Shirwadkar } 231f6634e26SHarshad Shirwadkar 232b7b80a35SYe Bin static bool ext4_fc_disabled(struct super_block *sb) 233b7b80a35SYe Bin { 234b7b80a35SYe Bin return (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 235b7b80a35SYe Bin (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); 236b7b80a35SYe Bin } 237b7b80a35SYe Bin 238aa75f4d3SHarshad Shirwadkar /* 239aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 240aa75f4d3SHarshad Shirwadkar * 241aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 242aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 243aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 244aa75f4d3SHarshad Shirwadkar */ 245aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 246aa75f4d3SHarshad Shirwadkar { 247aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 248aa75f4d3SHarshad Shirwadkar 249b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 250aa75f4d3SHarshad Shirwadkar return; 251aa75f4d3SHarshad Shirwadkar 252aa75f4d3SHarshad Shirwadkar restart: 253aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 254aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 255aa75f4d3SHarshad Shirwadkar goto out; 256aa75f4d3SHarshad Shirwadkar 257aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 258f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 259aa75f4d3SHarshad Shirwadkar goto restart; 260aa75f4d3SHarshad Shirwadkar } 261aa75f4d3SHarshad Shirwadkar out: 262aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 263aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 264aa75f4d3SHarshad Shirwadkar } 265aa75f4d3SHarshad Shirwadkar 266aa75f4d3SHarshad Shirwadkar /* 267aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 268aa75f4d3SHarshad Shirwadkar */ 269aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 270aa75f4d3SHarshad Shirwadkar { 271aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 272aa75f4d3SHarshad Shirwadkar 273b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 274aa75f4d3SHarshad Shirwadkar return; 275aa75f4d3SHarshad Shirwadkar 276aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 277aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 278aa75f4d3SHarshad Shirwadkar } 279aa75f4d3SHarshad Shirwadkar 280aa75f4d3SHarshad Shirwadkar /* 281aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 282aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 283aa75f4d3SHarshad Shirwadkar */ 284aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 285aa75f4d3SHarshad Shirwadkar { 286aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 287b3998b3bSRitesh Harjani struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 288b3998b3bSRitesh Harjani struct ext4_fc_dentry_update *fc_dentry; 289aa75f4d3SHarshad Shirwadkar 290b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 291aa75f4d3SHarshad Shirwadkar return; 292aa75f4d3SHarshad Shirwadkar 293aa75f4d3SHarshad Shirwadkar restart: 294aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 295b3998b3bSRitesh Harjani if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { 296aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 297aa75f4d3SHarshad Shirwadkar return; 298aa75f4d3SHarshad Shirwadkar } 299aa75f4d3SHarshad Shirwadkar 300aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 301f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 302aa75f4d3SHarshad Shirwadkar goto restart; 303aa75f4d3SHarshad Shirwadkar } 304b3998b3bSRitesh Harjani 305b3998b3bSRitesh Harjani if (!list_empty(&ei->i_fc_list)) 306aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 307b3998b3bSRitesh Harjani 308b3998b3bSRitesh Harjani /* 309b3998b3bSRitesh Harjani * Since this inode is getting removed, let's also remove all FC 310b3998b3bSRitesh Harjani * dentry create references, since it is not needed to log it anyways. 311b3998b3bSRitesh Harjani */ 312b3998b3bSRitesh Harjani if (list_empty(&ei->i_fc_dilist)) { 313b3998b3bSRitesh Harjani spin_unlock(&sbi->s_fc_lock); 314b3998b3bSRitesh Harjani return; 315b3998b3bSRitesh Harjani } 316b3998b3bSRitesh Harjani 317b3998b3bSRitesh Harjani fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); 318b3998b3bSRitesh Harjani WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); 319b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_list); 320b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_dilist); 321b3998b3bSRitesh Harjani 322b3998b3bSRitesh Harjani WARN_ON(!list_empty(&ei->i_fc_dilist)); 323b3998b3bSRitesh Harjani spin_unlock(&sbi->s_fc_lock); 324b3998b3bSRitesh Harjani 325b3998b3bSRitesh Harjani if (fc_dentry->fcd_name.name && 326b3998b3bSRitesh Harjani fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 327b3998b3bSRitesh Harjani kfree(fc_dentry->fcd_name.name); 328b3998b3bSRitesh Harjani kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 329b3998b3bSRitesh Harjani 330b3998b3bSRitesh Harjani return; 331aa75f4d3SHarshad Shirwadkar } 332aa75f4d3SHarshad Shirwadkar 333aa75f4d3SHarshad Shirwadkar /* 334e85c81baSXin Yin * Mark file system as fast commit ineligible, and record latest 335e85c81baSXin Yin * ineligible transaction tid. This means until the recorded 336e85c81baSXin Yin * transaction, commit operation would result in a full jbd2 commit. 337aa75f4d3SHarshad Shirwadkar */ 338e85c81baSXin Yin void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) 339aa75f4d3SHarshad Shirwadkar { 340aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 341e85c81baSXin Yin tid_t tid; 3428c762b4eSLuis Henriques (SUSE) bool has_transaction = true; 3438c762b4eSLuis Henriques (SUSE) bool is_ineligible; 344aa75f4d3SHarshad Shirwadkar 345b7b80a35SYe Bin if (ext4_fc_disabled(sb)) 3468016e29fSHarshad Shirwadkar return; 3478016e29fSHarshad Shirwadkar 348e85c81baSXin Yin if (handle && !IS_ERR(handle)) 349e85c81baSXin Yin tid = handle->h_transaction->t_tid; 350e85c81baSXin Yin else { 351e85c81baSXin Yin read_lock(&sbi->s_journal->j_state_lock); 3528c762b4eSLuis Henriques (SUSE) if (sbi->s_journal->j_running_transaction) 3538c762b4eSLuis Henriques (SUSE) tid = sbi->s_journal->j_running_transaction->t_tid; 3548c762b4eSLuis Henriques (SUSE) else 3558c762b4eSLuis Henriques (SUSE) has_transaction = false; 356e85c81baSXin Yin read_unlock(&sbi->s_journal->j_state_lock); 357e85c81baSXin Yin } 358e85c81baSXin Yin spin_lock(&sbi->s_fc_lock); 3598c762b4eSLuis Henriques (SUSE) is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 3608c762b4eSLuis Henriques (SUSE) if (has_transaction && 3618c762b4eSLuis Henriques (SUSE) (!is_ineligible || 3628c762b4eSLuis Henriques (SUSE) (is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid)))) 363e85c81baSXin Yin sbi->s_fc_ineligible_tid = tid; 3648c762b4eSLuis Henriques (SUSE) ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 365e85c81baSXin Yin spin_unlock(&sbi->s_fc_lock); 366aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 367aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 368aa75f4d3SHarshad Shirwadkar } 369aa75f4d3SHarshad Shirwadkar 370aa75f4d3SHarshad Shirwadkar /* 371aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 372aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 373aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 374aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 375aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 376aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 377aa75f4d3SHarshad Shirwadkar * 378aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 379aa75f4d3SHarshad Shirwadkar */ 380aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 381a80f7fcfSHarshad Shirwadkar handle_t *handle, struct inode *inode, 382*c5771f1cSLuis Henriques (SUSE) int (*__fc_track_fn)(handle_t *handle, struct inode *, void *, bool), 383aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 384aa75f4d3SHarshad Shirwadkar { 385aa75f4d3SHarshad Shirwadkar bool update = false; 386aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 387aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 388a80f7fcfSHarshad Shirwadkar tid_t tid = 0; 389aa75f4d3SHarshad Shirwadkar int ret; 390aa75f4d3SHarshad Shirwadkar 391a80f7fcfSHarshad Shirwadkar tid = handle->h_transaction->t_tid; 392aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 393a80f7fcfSHarshad Shirwadkar if (tid == ei->i_sync_tid) { 394aa75f4d3SHarshad Shirwadkar update = true; 395aa75f4d3SHarshad Shirwadkar } else { 396aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 397a80f7fcfSHarshad Shirwadkar ei->i_sync_tid = tid; 398aa75f4d3SHarshad Shirwadkar } 399*c5771f1cSLuis Henriques (SUSE) ret = __fc_track_fn(handle, inode, args, update); 400aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 401aa75f4d3SHarshad Shirwadkar 402aa75f4d3SHarshad Shirwadkar if (!enqueue) 403aa75f4d3SHarshad Shirwadkar return ret; 404aa75f4d3SHarshad Shirwadkar 405aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 406aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 407aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 408bdc8a53aSXin Yin (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 409bdc8a53aSXin Yin sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? 410aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 411aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 412aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 413aa75f4d3SHarshad Shirwadkar 414aa75f4d3SHarshad Shirwadkar return ret; 415aa75f4d3SHarshad Shirwadkar } 416aa75f4d3SHarshad Shirwadkar 417aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 418aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 419aa75f4d3SHarshad Shirwadkar int op; 420aa75f4d3SHarshad Shirwadkar }; 421aa75f4d3SHarshad Shirwadkar 422aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 423*c5771f1cSLuis Henriques (SUSE) static int __track_dentry_update(handle_t *handle, struct inode *inode, 424*c5771f1cSLuis Henriques (SUSE) void *arg, bool update) 425aa75f4d3SHarshad Shirwadkar { 426aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 427aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 428aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 429aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 430aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 4310fbcb525SEric Biggers struct inode *dir = dentry->d_parent->d_inode; 4320fbcb525SEric Biggers struct super_block *sb = inode->i_sb; 4330fbcb525SEric Biggers struct ext4_sb_info *sbi = EXT4_SB(sb); 434aa75f4d3SHarshad Shirwadkar 435aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 4360fbcb525SEric Biggers 4370fbcb525SEric Biggers if (IS_ENCRYPTED(dir)) { 4380fbcb525SEric Biggers ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, 439*c5771f1cSLuis Henriques (SUSE) handle); 4400fbcb525SEric Biggers mutex_lock(&ei->i_fc_lock); 4410fbcb525SEric Biggers return -EOPNOTSUPP; 4420fbcb525SEric Biggers } 4430fbcb525SEric Biggers 444aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 445aa75f4d3SHarshad Shirwadkar if (!node) { 446*c5771f1cSLuis Henriques (SUSE) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); 447aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 448aa75f4d3SHarshad Shirwadkar return -ENOMEM; 449aa75f4d3SHarshad Shirwadkar } 450aa75f4d3SHarshad Shirwadkar 451aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 4520fbcb525SEric Biggers node->fcd_parent = dir->i_ino; 453aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 454aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 455aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 456aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 457aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 458*c5771f1cSLuis Henriques (SUSE) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); 459aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 460aa75f4d3SHarshad Shirwadkar return -ENOMEM; 461aa75f4d3SHarshad Shirwadkar } 462aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 463aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 464aa75f4d3SHarshad Shirwadkar } else { 465aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 466aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 467aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 468aa75f4d3SHarshad Shirwadkar } 469aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 470b3998b3bSRitesh Harjani INIT_LIST_HEAD(&node->fcd_dilist); 471aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 472bdc8a53aSXin Yin if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 473bdc8a53aSXin Yin sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) 474aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 475aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 476aa75f4d3SHarshad Shirwadkar else 477aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 478b3998b3bSRitesh Harjani 479b3998b3bSRitesh Harjani /* 480b3998b3bSRitesh Harjani * This helps us keep a track of all fc_dentry updates which is part of 481b3998b3bSRitesh Harjani * this ext4 inode. So in case the inode is getting unlinked, before 482b3998b3bSRitesh Harjani * even we get a chance to fsync, we could remove all fc_dentry 483b3998b3bSRitesh Harjani * references while evicting the inode in ext4_fc_del(). 484b3998b3bSRitesh Harjani * Also with this, we don't need to loop over all the inodes in 485b3998b3bSRitesh Harjani * sbi->s_fc_q to get the corresponding inode in 486b3998b3bSRitesh Harjani * ext4_fc_commit_dentry_updates(). 487b3998b3bSRitesh Harjani */ 488b3998b3bSRitesh Harjani if (dentry_update->op == EXT4_FC_TAG_CREAT) { 489b3998b3bSRitesh Harjani WARN_ON(!list_empty(&ei->i_fc_dilist)); 490b3998b3bSRitesh Harjani list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); 491b3998b3bSRitesh Harjani } 492aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 493aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 494aa75f4d3SHarshad Shirwadkar 495aa75f4d3SHarshad Shirwadkar return 0; 496aa75f4d3SHarshad Shirwadkar } 497aa75f4d3SHarshad Shirwadkar 498a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle, 499a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 500aa75f4d3SHarshad Shirwadkar { 501aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 502aa75f4d3SHarshad Shirwadkar int ret; 503aa75f4d3SHarshad Shirwadkar 504aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 505aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 506aa75f4d3SHarshad Shirwadkar 507a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 508aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 5091d2e2440SRitesh Harjani trace_ext4_fc_track_unlink(handle, inode, dentry, ret); 510aa75f4d3SHarshad Shirwadkar } 511aa75f4d3SHarshad Shirwadkar 512a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 513a80f7fcfSHarshad Shirwadkar { 51478be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 51578be0471SRitesh Harjani 516b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 51778be0471SRitesh Harjani return; 51878be0471SRitesh Harjani 51978be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 52078be0471SRitesh Harjani return; 52178be0471SRitesh Harjani 52278be0471SRitesh Harjani __ext4_fc_track_unlink(handle, inode, dentry); 523a80f7fcfSHarshad Shirwadkar } 524a80f7fcfSHarshad Shirwadkar 525a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle, 526a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 527aa75f4d3SHarshad Shirwadkar { 528aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 529aa75f4d3SHarshad Shirwadkar int ret; 530aa75f4d3SHarshad Shirwadkar 531aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 532aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 533aa75f4d3SHarshad Shirwadkar 534a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 535aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 5361d2e2440SRitesh Harjani trace_ext4_fc_track_link(handle, inode, dentry, ret); 537aa75f4d3SHarshad Shirwadkar } 538aa75f4d3SHarshad Shirwadkar 539a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 540a80f7fcfSHarshad Shirwadkar { 54178be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 54278be0471SRitesh Harjani 543b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 54478be0471SRitesh Harjani return; 54578be0471SRitesh Harjani 54678be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 54778be0471SRitesh Harjani return; 54878be0471SRitesh Harjani 54978be0471SRitesh Harjani __ext4_fc_track_link(handle, inode, dentry); 550a80f7fcfSHarshad Shirwadkar } 551a80f7fcfSHarshad Shirwadkar 5528210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode, 5538210bb29SHarshad Shirwadkar struct dentry *dentry) 554aa75f4d3SHarshad Shirwadkar { 555aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 556aa75f4d3SHarshad Shirwadkar int ret; 557aa75f4d3SHarshad Shirwadkar 558aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 559aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 560aa75f4d3SHarshad Shirwadkar 561a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 562aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 5631d2e2440SRitesh Harjani trace_ext4_fc_track_create(handle, inode, dentry, ret); 564aa75f4d3SHarshad Shirwadkar } 565aa75f4d3SHarshad Shirwadkar 5668210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 5678210bb29SHarshad Shirwadkar { 56878be0471SRitesh Harjani struct inode *inode = d_inode(dentry); 56978be0471SRitesh Harjani 570b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 57178be0471SRitesh Harjani return; 57278be0471SRitesh Harjani 57378be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 57478be0471SRitesh Harjani return; 57578be0471SRitesh Harjani 57678be0471SRitesh Harjani __ext4_fc_track_create(handle, inode, dentry); 5778210bb29SHarshad Shirwadkar } 5788210bb29SHarshad Shirwadkar 579aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 580*c5771f1cSLuis Henriques (SUSE) static int __track_inode(handle_t *handle, struct inode *inode, void *arg, 581*c5771f1cSLuis Henriques (SUSE) bool update) 582aa75f4d3SHarshad Shirwadkar { 583aa75f4d3SHarshad Shirwadkar if (update) 584aa75f4d3SHarshad Shirwadkar return -EEXIST; 585aa75f4d3SHarshad Shirwadkar 586aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 587aa75f4d3SHarshad Shirwadkar 588aa75f4d3SHarshad Shirwadkar return 0; 589aa75f4d3SHarshad Shirwadkar } 590aa75f4d3SHarshad Shirwadkar 591a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 592aa75f4d3SHarshad Shirwadkar { 593aa75f4d3SHarshad Shirwadkar int ret; 594aa75f4d3SHarshad Shirwadkar 595aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 596aa75f4d3SHarshad Shirwadkar return; 597aa75f4d3SHarshad Shirwadkar 598e64e6ca9SYe Bin if (ext4_fc_disabled(inode->i_sb)) 599e64e6ca9SYe Bin return; 600e64e6ca9SYe Bin 601556e0319SHarshad Shirwadkar if (ext4_should_journal_data(inode)) { 602556e0319SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 603e85c81baSXin Yin EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); 604556e0319SHarshad Shirwadkar return; 605556e0319SHarshad Shirwadkar } 606556e0319SHarshad Shirwadkar 60778be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 60878be0471SRitesh Harjani return; 60978be0471SRitesh Harjani 610a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 6111d2e2440SRitesh Harjani trace_ext4_fc_track_inode(handle, inode, ret); 612aa75f4d3SHarshad Shirwadkar } 613aa75f4d3SHarshad Shirwadkar 614aa75f4d3SHarshad Shirwadkar struct __track_range_args { 615aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 616aa75f4d3SHarshad Shirwadkar }; 617aa75f4d3SHarshad Shirwadkar 618aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 619*c5771f1cSLuis Henriques (SUSE) static int __track_range(handle_t *handle, struct inode *inode, void *arg, 620*c5771f1cSLuis Henriques (SUSE) bool update) 621aa75f4d3SHarshad Shirwadkar { 622aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 623aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 624aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 625aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 626aa75f4d3SHarshad Shirwadkar 627aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 628aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 629aa75f4d3SHarshad Shirwadkar return -ECANCELED; 630aa75f4d3SHarshad Shirwadkar } 631aa75f4d3SHarshad Shirwadkar 632aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 633aa75f4d3SHarshad Shirwadkar 634aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 635aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 636aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 637aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 638aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 639aa75f4d3SHarshad Shirwadkar } else { 640aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 641aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 642aa75f4d3SHarshad Shirwadkar } 643aa75f4d3SHarshad Shirwadkar 644aa75f4d3SHarshad Shirwadkar return 0; 645aa75f4d3SHarshad Shirwadkar } 646aa75f4d3SHarshad Shirwadkar 647a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 648aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 649aa75f4d3SHarshad Shirwadkar { 650aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 651aa75f4d3SHarshad Shirwadkar int ret; 652aa75f4d3SHarshad Shirwadkar 653aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 654aa75f4d3SHarshad Shirwadkar return; 655aa75f4d3SHarshad Shirwadkar 656b7b80a35SYe Bin if (ext4_fc_disabled(inode->i_sb)) 65778be0471SRitesh Harjani return; 65878be0471SRitesh Harjani 65978be0471SRitesh Harjani if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 66078be0471SRitesh Harjani return; 66178be0471SRitesh Harjani 6626b18e4efSLuis Henriques (SUSE) if (ext4_has_inline_data(inode)) { 6636b18e4efSLuis Henriques (SUSE) ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, 6646b18e4efSLuis Henriques (SUSE) handle); 6656b18e4efSLuis Henriques (SUSE) return; 6666b18e4efSLuis Henriques (SUSE) } 6676b18e4efSLuis Henriques (SUSE) 668aa75f4d3SHarshad Shirwadkar args.start = start; 669aa75f4d3SHarshad Shirwadkar args.end = end; 670aa75f4d3SHarshad Shirwadkar 671a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 672aa75f4d3SHarshad Shirwadkar 6731d2e2440SRitesh Harjani trace_ext4_fc_track_range(handle, inode, start, end, ret); 674aa75f4d3SHarshad Shirwadkar } 675aa75f4d3SHarshad Shirwadkar 676e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) 677aa75f4d3SHarshad Shirwadkar { 67867c0f556SBart Van Assche blk_opf_t write_flags = REQ_SYNC; 679aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 680aa75f4d3SHarshad Shirwadkar 681e9f53353SDaejun Park /* Add REQ_FUA | REQ_PREFLUSH only its tail */ 682e9f53353SDaejun Park if (test_opt(sb, BARRIER) && is_tail) 683aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 684aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 685764b3fd3SHarshad Shirwadkar set_buffer_dirty(bh); 686aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 687aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 6881420c4a5SBart Van Assche submit_bh(REQ_OP_WRITE | write_flags, bh); 689aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 690aa75f4d3SHarshad Shirwadkar } 691aa75f4d3SHarshad Shirwadkar 692aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 693aa75f4d3SHarshad Shirwadkar 694aa75f4d3SHarshad Shirwadkar /* 695aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 696aa75f4d3SHarshad Shirwadkar * 697aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 698aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 699aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 700aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 701aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 702aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 703aa75f4d3SHarshad Shirwadkar * we added. 704aa75f4d3SHarshad Shirwadkar */ 705aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 706aa75f4d3SHarshad Shirwadkar { 7078415ce07SEric Biggers struct ext4_fc_tl tl; 708aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 709aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 710aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 711aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 71248a6a66dSEric Biggers int remaining; 7138415ce07SEric Biggers u8 *dst; 714aa75f4d3SHarshad Shirwadkar 715aa75f4d3SHarshad Shirwadkar /* 71648a6a66dSEric Biggers * If 'len' is too long to fit in any block alongside a PAD tlv, then we 71748a6a66dSEric Biggers * cannot fulfill the request. 718aa75f4d3SHarshad Shirwadkar */ 71948a6a66dSEric Biggers if (len > bsize - EXT4_FC_TAG_BASE_LEN) 720aa75f4d3SHarshad Shirwadkar return NULL; 721aa75f4d3SHarshad Shirwadkar 722aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 723aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 724aa75f4d3SHarshad Shirwadkar if (ret) 725aa75f4d3SHarshad Shirwadkar return NULL; 726aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 727aa75f4d3SHarshad Shirwadkar } 7288415ce07SEric Biggers dst = sbi->s_fc_bh->b_data + off; 72948a6a66dSEric Biggers 73048a6a66dSEric Biggers /* 73148a6a66dSEric Biggers * Allocate the bytes in the current block if we can do so while still 73248a6a66dSEric Biggers * leaving enough space for a PAD tlv. 73348a6a66dSEric Biggers */ 73448a6a66dSEric Biggers remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; 73548a6a66dSEric Biggers if (len <= remaining) { 73648a6a66dSEric Biggers sbi->s_fc_bytes += len; 73748a6a66dSEric Biggers return dst; 7388415ce07SEric Biggers } 73948a6a66dSEric Biggers 74048a6a66dSEric Biggers /* 74148a6a66dSEric Biggers * Else, terminate the current block with a PAD tlv, then allocate a new 74248a6a66dSEric Biggers * block and allocate the bytes at the start of that new block. 74348a6a66dSEric Biggers */ 74448a6a66dSEric Biggers 74548a6a66dSEric Biggers tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 74648a6a66dSEric Biggers tl.fc_len = cpu_to_le16(remaining); 7478805dbcbSEric Biggers memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 7488805dbcbSEric Biggers memset(dst + EXT4_FC_TAG_BASE_LEN, 0, remaining); 7498805dbcbSEric Biggers *crc = ext4_chksum(sbi, *crc, sbi->s_fc_bh->b_data, bsize); 750594bc43bSEric Biggers 751e9f53353SDaejun Park ext4_fc_submit_bh(sb, false); 752aa75f4d3SHarshad Shirwadkar 753aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 754aa75f4d3SHarshad Shirwadkar if (ret) 755aa75f4d3SHarshad Shirwadkar return NULL; 756aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 75748a6a66dSEric Biggers sbi->s_fc_bytes += bsize - off + len; 758aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 759aa75f4d3SHarshad Shirwadkar } 760aa75f4d3SHarshad Shirwadkar 761aa75f4d3SHarshad Shirwadkar /* 762aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 763aa75f4d3SHarshad Shirwadkar * 764aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 765aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 766aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 767aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 768aa75f4d3SHarshad Shirwadkar */ 769aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 770aa75f4d3SHarshad Shirwadkar { 771aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 772aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 773aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 774aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 775aa75f4d3SHarshad Shirwadkar u8 *dst; 776aa75f4d3SHarshad Shirwadkar 777aa75f4d3SHarshad Shirwadkar /* 778aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 779aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 780aa75f4d3SHarshad Shirwadkar */ 781fdc2a3c7SYe Bin dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); 782aa75f4d3SHarshad Shirwadkar if (!dst) 783aa75f4d3SHarshad Shirwadkar return -ENOSPC; 784aa75f4d3SHarshad Shirwadkar 785aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 786aa75f4d3SHarshad Shirwadkar 787aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 78848a6a66dSEric Biggers tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail)); 789aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 790aa75f4d3SHarshad Shirwadkar 7918805dbcbSEric Biggers memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 792fdc2a3c7SYe Bin dst += EXT4_FC_TAG_BASE_LEN; 793aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 7948805dbcbSEric Biggers memcpy(dst, &tail.fc_tid, sizeof(tail.fc_tid)); 795aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 7968805dbcbSEric Biggers crc = ext4_chksum(sbi, crc, sbi->s_fc_bh->b_data, 7978805dbcbSEric Biggers dst - (u8 *)sbi->s_fc_bh->b_data); 798aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 7998805dbcbSEric Biggers memcpy(dst, &tail.fc_crc, sizeof(tail.fc_crc)); 800594bc43bSEric Biggers dst += sizeof(tail.fc_crc); 801594bc43bSEric Biggers memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ 802aa75f4d3SHarshad Shirwadkar 803e9f53353SDaejun Park ext4_fc_submit_bh(sb, true); 804aa75f4d3SHarshad Shirwadkar 805aa75f4d3SHarshad Shirwadkar return 0; 806aa75f4d3SHarshad Shirwadkar } 807aa75f4d3SHarshad Shirwadkar 808aa75f4d3SHarshad Shirwadkar /* 809aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 810aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 811aa75f4d3SHarshad Shirwadkar */ 812aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 813aa75f4d3SHarshad Shirwadkar u32 *crc) 814aa75f4d3SHarshad Shirwadkar { 815aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 816aa75f4d3SHarshad Shirwadkar u8 *dst; 817aa75f4d3SHarshad Shirwadkar 818fdc2a3c7SYe Bin dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); 819aa75f4d3SHarshad Shirwadkar if (!dst) 820aa75f4d3SHarshad Shirwadkar return false; 821aa75f4d3SHarshad Shirwadkar 822aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 823aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 824aa75f4d3SHarshad Shirwadkar 8258805dbcbSEric Biggers memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 8268805dbcbSEric Biggers memcpy(dst + EXT4_FC_TAG_BASE_LEN, val, len); 827aa75f4d3SHarshad Shirwadkar 828aa75f4d3SHarshad Shirwadkar return true; 829aa75f4d3SHarshad Shirwadkar } 830aa75f4d3SHarshad Shirwadkar 831aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 832facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, 833facec450SGuoqing Jiang struct ext4_fc_dentry_update *fc_dentry) 834aa75f4d3SHarshad Shirwadkar { 835aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 836aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 837facec450SGuoqing Jiang int dlen = fc_dentry->fcd_name.len; 838fdc2a3c7SYe Bin u8 *dst = ext4_fc_reserve_space(sb, 839fdc2a3c7SYe Bin EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); 840aa75f4d3SHarshad Shirwadkar 841aa75f4d3SHarshad Shirwadkar if (!dst) 842aa75f4d3SHarshad Shirwadkar return false; 843aa75f4d3SHarshad Shirwadkar 844facec450SGuoqing Jiang fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); 845facec450SGuoqing Jiang fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 846facec450SGuoqing Jiang tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 847aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 8488805dbcbSEric Biggers memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 849fdc2a3c7SYe Bin dst += EXT4_FC_TAG_BASE_LEN; 8508805dbcbSEric Biggers memcpy(dst, &fcd, sizeof(fcd)); 851aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 8528805dbcbSEric Biggers memcpy(dst, fc_dentry->fcd_name.name, dlen); 853aa75f4d3SHarshad Shirwadkar 854aa75f4d3SHarshad Shirwadkar return true; 855aa75f4d3SHarshad Shirwadkar } 856aa75f4d3SHarshad Shirwadkar 857aa75f4d3SHarshad Shirwadkar /* 858aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 859aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 860aa75f4d3SHarshad Shirwadkar */ 861aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 862aa75f4d3SHarshad Shirwadkar { 863aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 864aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 865aa75f4d3SHarshad Shirwadkar int ret; 866aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 867aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 868aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 869aa75f4d3SHarshad Shirwadkar u8 *dst; 870aa75f4d3SHarshad Shirwadkar 871aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 872aa75f4d3SHarshad Shirwadkar if (ret) 873aa75f4d3SHarshad Shirwadkar return ret; 874aa75f4d3SHarshad Shirwadkar 8756c31a689SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 8766c31a689SHarshad Shirwadkar inode_len = EXT4_INODE_SIZE(inode->i_sb); 8776c31a689SHarshad Shirwadkar else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 878aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 879aa75f4d3SHarshad Shirwadkar 880aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 881aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 882aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 883aa75f4d3SHarshad Shirwadkar 884ccbf8eebSYe Bin ret = -ECANCELED; 885aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 886fdc2a3c7SYe Bin EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); 887aa75f4d3SHarshad Shirwadkar if (!dst) 888ccbf8eebSYe Bin goto err; 889aa75f4d3SHarshad Shirwadkar 8908805dbcbSEric Biggers memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 891fdc2a3c7SYe Bin dst += EXT4_FC_TAG_BASE_LEN; 8928805dbcbSEric Biggers memcpy(dst, &fc_inode, sizeof(fc_inode)); 893aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 8948805dbcbSEric Biggers memcpy(dst, (u8 *)ext4_raw_inode(&iloc), inode_len); 895ccbf8eebSYe Bin ret = 0; 896ccbf8eebSYe Bin err: 897ccbf8eebSYe Bin brelse(iloc.bh); 898ccbf8eebSYe Bin return ret; 899aa75f4d3SHarshad Shirwadkar } 900aa75f4d3SHarshad Shirwadkar 901aa75f4d3SHarshad Shirwadkar /* 902aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 903aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 904aa75f4d3SHarshad Shirwadkar */ 905aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 906aa75f4d3SHarshad Shirwadkar { 907aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 908aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 909aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 910aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 911aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 912aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 913aa75f4d3SHarshad Shirwadkar int ret; 914aa75f4d3SHarshad Shirwadkar 915aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 916aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 917aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 918aa75f4d3SHarshad Shirwadkar return 0; 919aa75f4d3SHarshad Shirwadkar } 920aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 921aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 922aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 923aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 924aa75f4d3SHarshad Shirwadkar 925aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 9264978c659SJan Kara ext4_debug("will try writing %d to %d for inode %ld\n", 9274978c659SJan Kara cur_lblk_off, new_blk_size, inode->i_ino); 928aa75f4d3SHarshad Shirwadkar 929aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 930aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 931aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 932aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 933aa75f4d3SHarshad Shirwadkar if (ret < 0) 934aa75f4d3SHarshad Shirwadkar return -ECANCELED; 935aa75f4d3SHarshad Shirwadkar 936aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 937aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 938aa75f4d3SHarshad Shirwadkar continue; 939aa75f4d3SHarshad Shirwadkar } 940aa75f4d3SHarshad Shirwadkar 941aa75f4d3SHarshad Shirwadkar if (ret == 0) { 942aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 943aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 944aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 945aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 946aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 947aa75f4d3SHarshad Shirwadkar return -ENOSPC; 948aa75f4d3SHarshad Shirwadkar } else { 949a2c2f082SHou Tao unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? 950a2c2f082SHou Tao EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; 951a2c2f082SHou Tao 952a2c2f082SHou Tao /* Limit the number of blocks in one extent */ 953a2c2f082SHou Tao map.m_len = min(max, map.m_len); 954a2c2f082SHou Tao 955aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 956aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 957aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 958aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 959aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 960aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 961aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 962aa75f4d3SHarshad Shirwadkar else 963aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 964aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 965aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 966aa75f4d3SHarshad Shirwadkar return -ENOSPC; 967aa75f4d3SHarshad Shirwadkar } 968aa75f4d3SHarshad Shirwadkar 969aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 970aa75f4d3SHarshad Shirwadkar } 971aa75f4d3SHarshad Shirwadkar 972aa75f4d3SHarshad Shirwadkar return 0; 973aa75f4d3SHarshad Shirwadkar } 974aa75f4d3SHarshad Shirwadkar 975aa75f4d3SHarshad Shirwadkar 976aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 977aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 978aa75f4d3SHarshad Shirwadkar { 979c30365b9SYu Zhe struct super_block *sb = journal->j_private; 980aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 981aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 982aa75f4d3SHarshad Shirwadkar int ret = 0; 983aa75f4d3SHarshad Shirwadkar 984aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 98596e7c02dSDaejun Park list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 986aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 987aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 988aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 989aa75f4d3SHarshad Shirwadkar 990aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 991aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 992aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 993aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 994aa75f4d3SHarshad Shirwadkar schedule(); 995aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 996aa75f4d3SHarshad Shirwadkar } 997aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 998aa75f4d3SHarshad Shirwadkar } 999aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1000f30ff35fSJan Kara ret = jbd2_submit_inode_data(journal, ei->jinode); 1001aa75f4d3SHarshad Shirwadkar if (ret) 1002aa75f4d3SHarshad Shirwadkar return ret; 1003aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1004aa75f4d3SHarshad Shirwadkar } 1005aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1006aa75f4d3SHarshad Shirwadkar 1007aa75f4d3SHarshad Shirwadkar return ret; 1008aa75f4d3SHarshad Shirwadkar } 1009aa75f4d3SHarshad Shirwadkar 1010aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 1011aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 1012aa75f4d3SHarshad Shirwadkar { 1013c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1014aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1015aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 1016aa75f4d3SHarshad Shirwadkar int ret = 0; 1017aa75f4d3SHarshad Shirwadkar 1018aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1019aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1020aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 1021aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 1022aa75f4d3SHarshad Shirwadkar continue; 1023aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1024aa75f4d3SHarshad Shirwadkar 1025aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 1026aa75f4d3SHarshad Shirwadkar if (ret) 1027aa75f4d3SHarshad Shirwadkar return ret; 1028aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1029aa75f4d3SHarshad Shirwadkar } 1030aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1031aa75f4d3SHarshad Shirwadkar 1032aa75f4d3SHarshad Shirwadkar return 0; 1033aa75f4d3SHarshad Shirwadkar } 1034aa75f4d3SHarshad Shirwadkar 1035aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 1036aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 1037fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock) 1038fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock) 1039aa75f4d3SHarshad Shirwadkar { 1040c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1041aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 104296e7c02dSDaejun Park struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 1043aa75f4d3SHarshad Shirwadkar struct inode *inode; 1044b3998b3bSRitesh Harjani struct ext4_inode_info *ei; 1045aa75f4d3SHarshad Shirwadkar int ret; 1046aa75f4d3SHarshad Shirwadkar 1047aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 1048aa75f4d3SHarshad Shirwadkar return 0; 104996e7c02dSDaejun Park list_for_each_entry_safe(fc_dentry, fc_dentry_n, 105096e7c02dSDaejun Park &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { 1051aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 1052aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1053facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1054aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1055aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1056aa75f4d3SHarshad Shirwadkar } 1057aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1058aa75f4d3SHarshad Shirwadkar continue; 1059aa75f4d3SHarshad Shirwadkar } 1060aa75f4d3SHarshad Shirwadkar /* 1061b3998b3bSRitesh Harjani * With fcd_dilist we need not loop in sbi->s_fc_q to get the 1062b3998b3bSRitesh Harjani * corresponding inode pointer 1063aa75f4d3SHarshad Shirwadkar */ 1064b3998b3bSRitesh Harjani WARN_ON(list_empty(&fc_dentry->fcd_dilist)); 1065b3998b3bSRitesh Harjani ei = list_first_entry(&fc_dentry->fcd_dilist, 1066b3998b3bSRitesh Harjani struct ext4_inode_info, i_fc_dilist); 1067b3998b3bSRitesh Harjani inode = &ei->vfs_inode; 1068b3998b3bSRitesh Harjani WARN_ON(inode->i_ino != fc_dentry->fcd_ino); 1069b3998b3bSRitesh Harjani 1070aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1071aa75f4d3SHarshad Shirwadkar 1072aa75f4d3SHarshad Shirwadkar /* 1073aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 1074aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 1075aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 1076aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 1077aa75f4d3SHarshad Shirwadkar * the recovery code. 1078aa75f4d3SHarshad Shirwadkar */ 1079aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 1080aa75f4d3SHarshad Shirwadkar if (ret) 1081aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1082aa75f4d3SHarshad Shirwadkar 1083aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 1084aa75f4d3SHarshad Shirwadkar if (ret) 1085aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1086aa75f4d3SHarshad Shirwadkar 1087facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 1088aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 1089aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 1090aa75f4d3SHarshad Shirwadkar } 1091aa75f4d3SHarshad Shirwadkar 1092aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1093aa75f4d3SHarshad Shirwadkar } 1094aa75f4d3SHarshad Shirwadkar return 0; 1095aa75f4d3SHarshad Shirwadkar lock_and_exit: 1096aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1097aa75f4d3SHarshad Shirwadkar return ret; 1098aa75f4d3SHarshad Shirwadkar } 1099aa75f4d3SHarshad Shirwadkar 1100aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 1101aa75f4d3SHarshad Shirwadkar { 1102c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1103aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1104aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1105aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 1106aa75f4d3SHarshad Shirwadkar struct inode *inode; 1107aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 1108aa75f4d3SHarshad Shirwadkar int ret = 0; 1109aa75f4d3SHarshad Shirwadkar u32 crc = 0; 1110aa75f4d3SHarshad Shirwadkar 1111aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 1112aa75f4d3SHarshad Shirwadkar if (ret) 1113aa75f4d3SHarshad Shirwadkar return ret; 1114aa75f4d3SHarshad Shirwadkar 1115aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 1116aa75f4d3SHarshad Shirwadkar if (ret) 1117aa75f4d3SHarshad Shirwadkar return ret; 1118aa75f4d3SHarshad Shirwadkar 1119da0c5d26SHarshad Shirwadkar /* 1120da0c5d26SHarshad Shirwadkar * If file system device is different from journal device, issue a cache 1121da0c5d26SHarshad Shirwadkar * flush before we start writing fast commit blocks. 1122da0c5d26SHarshad Shirwadkar */ 1123da0c5d26SHarshad Shirwadkar if (journal->j_fs_dev != journal->j_dev) 1124c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev); 1125da0c5d26SHarshad Shirwadkar 1126aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1127aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1128aa75f4d3SHarshad Shirwadkar /* 1129aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1130aa75f4d3SHarshad Shirwadkar * in this TID. 1131aa75f4d3SHarshad Shirwadkar */ 1132aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1133aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1134aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1135aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1136e1262cd2SXu Yihang (u8 *)&head, &crc)) { 1137e1262cd2SXu Yihang ret = -ENOSPC; 1138aa75f4d3SHarshad Shirwadkar goto out; 1139aa75f4d3SHarshad Shirwadkar } 1140e1262cd2SXu Yihang } 1141aa75f4d3SHarshad Shirwadkar 1142aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1143aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1144aa75f4d3SHarshad Shirwadkar if (ret) { 1145aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1146aa75f4d3SHarshad Shirwadkar goto out; 1147aa75f4d3SHarshad Shirwadkar } 1148aa75f4d3SHarshad Shirwadkar 114996e7c02dSDaejun Park list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1150aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1151aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1152aa75f4d3SHarshad Shirwadkar continue; 1153aa75f4d3SHarshad Shirwadkar 1154aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1155aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1156aa75f4d3SHarshad Shirwadkar if (ret) 1157aa75f4d3SHarshad Shirwadkar goto out; 1158aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1159aa75f4d3SHarshad Shirwadkar if (ret) 1160aa75f4d3SHarshad Shirwadkar goto out; 1161aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1162aa75f4d3SHarshad Shirwadkar } 1163aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1164aa75f4d3SHarshad Shirwadkar 1165aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1166aa75f4d3SHarshad Shirwadkar 1167aa75f4d3SHarshad Shirwadkar out: 1168aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1169aa75f4d3SHarshad Shirwadkar return ret; 1170aa75f4d3SHarshad Shirwadkar } 1171aa75f4d3SHarshad Shirwadkar 11720915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status, 1173d9bf099cSRitesh Harjani u64 commit_time, int nblks, tid_t commit_tid) 11740915e464SHarshad Shirwadkar { 11750915e464SHarshad Shirwadkar struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; 11760915e464SHarshad Shirwadkar 11774978c659SJan Kara ext4_debug("Fast commit ended with status = %d for tid %u", 1178d9bf099cSRitesh Harjani status, commit_tid); 11790915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_OK) { 11800915e464SHarshad Shirwadkar stats->fc_num_commits++; 11810915e464SHarshad Shirwadkar stats->fc_numblks += nblks; 11820915e464SHarshad Shirwadkar if (likely(stats->s_fc_avg_commit_time)) 11830915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = 11840915e464SHarshad Shirwadkar (commit_time + 11850915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time * 3) / 4; 11860915e464SHarshad Shirwadkar else 11870915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = commit_time; 11880915e464SHarshad Shirwadkar } else if (status == EXT4_FC_STATUS_FAILED || 11890915e464SHarshad Shirwadkar status == EXT4_FC_STATUS_INELIGIBLE) { 11900915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_FAILED) 11910915e464SHarshad Shirwadkar stats->fc_failed_commits++; 11920915e464SHarshad Shirwadkar stats->fc_ineligible_commits++; 11930915e464SHarshad Shirwadkar } else { 11940915e464SHarshad Shirwadkar stats->fc_skipped_commits++; 11950915e464SHarshad Shirwadkar } 11965641ace5SRitesh Harjani trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); 11970915e464SHarshad Shirwadkar } 11980915e464SHarshad Shirwadkar 1199aa75f4d3SHarshad Shirwadkar /* 1200aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1201aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1202aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1203aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1204aa75f4d3SHarshad Shirwadkar */ 1205aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1206aa75f4d3SHarshad Shirwadkar { 1207c30365b9SYu Zhe struct super_block *sb = journal->j_private; 1208aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1209aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1210aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 12110915e464SHarshad Shirwadkar int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; 1212aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1213aa75f4d3SHarshad Shirwadkar 12147f142440SRitesh Harjani if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 12157f142440SRitesh Harjani return jbd2_complete_transaction(journal, commit_tid); 12167f142440SRitesh Harjani 12175641ace5SRitesh Harjani trace_ext4_fc_commit_start(sb, commit_tid); 1218aa75f4d3SHarshad Shirwadkar 1219aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1220aa75f4d3SHarshad Shirwadkar 1221aa75f4d3SHarshad Shirwadkar restart_fc: 1222aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1223aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1224aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1225aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 12263236afd1SLuis Henriques (SUSE) tid_gt(commit_tid, journal->j_commit_sequence)) 1227aa75f4d3SHarshad Shirwadkar goto restart_fc; 1228d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, 1229d9bf099cSRitesh Harjani commit_tid); 12300915e464SHarshad Shirwadkar return 0; 1231aa75f4d3SHarshad Shirwadkar } else if (ret) { 12320915e464SHarshad Shirwadkar /* 12330915e464SHarshad Shirwadkar * Commit couldn't start. Just update stats and perform a 12340915e464SHarshad Shirwadkar * full commit. 12350915e464SHarshad Shirwadkar */ 1236d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, 1237d9bf099cSRitesh Harjani commit_tid); 12380915e464SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1239aa75f4d3SHarshad Shirwadkar } 12400915e464SHarshad Shirwadkar 12417bbbe241SHarshad Shirwadkar /* 12427bbbe241SHarshad Shirwadkar * After establishing journal barrier via jbd2_fc_begin_commit(), check 12437bbbe241SHarshad Shirwadkar * if we are fast commit ineligible. 12447bbbe241SHarshad Shirwadkar */ 12457bbbe241SHarshad Shirwadkar if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { 12460915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_INELIGIBLE; 12470915e464SHarshad Shirwadkar goto fallback; 12487bbbe241SHarshad Shirwadkar } 1249aa75f4d3SHarshad Shirwadkar 1250aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1251aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1252aa75f4d3SHarshad Shirwadkar if (ret < 0) { 12530915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 12540915e464SHarshad Shirwadkar goto fallback; 1255aa75f4d3SHarshad Shirwadkar } 1256aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1257aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1258aa75f4d3SHarshad Shirwadkar if (ret < 0) { 12590915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 12600915e464SHarshad Shirwadkar goto fallback; 1261aa75f4d3SHarshad Shirwadkar } 1262aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 12630915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit(journal); 1264aa75f4d3SHarshad Shirwadkar /* 12650915e464SHarshad Shirwadkar * weight the commit time higher than the average time so we 12660915e464SHarshad Shirwadkar * don't react too strongly to vast changes in the commit time 1267aa75f4d3SHarshad Shirwadkar */ 12680915e464SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1269d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); 12700915e464SHarshad Shirwadkar return ret; 12710915e464SHarshad Shirwadkar 12720915e464SHarshad Shirwadkar fallback: 12730915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit_fallback(journal); 1274d9bf099cSRitesh Harjani ext4_fc_update_stats(sb, status, 0, 0, commit_tid); 12750915e464SHarshad Shirwadkar return ret; 1276aa75f4d3SHarshad Shirwadkar } 1277aa75f4d3SHarshad Shirwadkar 1278ff780b91SHarshad Shirwadkar /* 1279ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1280ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1281ff780b91SHarshad Shirwadkar */ 1282e85c81baSXin Yin static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) 1283ff780b91SHarshad Shirwadkar { 1284aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1285aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 128696e7c02dSDaejun Park struct ext4_inode_info *iter, *iter_n; 1287aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1288aa75f4d3SHarshad Shirwadkar 1289aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1290aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1291aa75f4d3SHarshad Shirwadkar 129208f4c42aSRitesh Harjani trace_ext4_fc_cleanup(journal, full, tid); 1293aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1294aa75f4d3SHarshad Shirwadkar 1295aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 129696e7c02dSDaejun Park list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], 129796e7c02dSDaejun Park i_fc_list) { 1298aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1299aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1300aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1301d13a3558SLuis Henriques (SUSE) if (tid_geq(tid, iter->i_sync_tid)) { 1302aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1303d13a3558SLuis Henriques (SUSE) } else if (full) { 1304d13a3558SLuis Henriques (SUSE) /* 1305d13a3558SLuis Henriques (SUSE) * We are called after a full commit, inode has been 1306d13a3558SLuis Henriques (SUSE) * modified while the commit was running. Re-enqueue 1307d13a3558SLuis Henriques (SUSE) * the inode into STAGING, which will then be splice 1308d13a3558SLuis Henriques (SUSE) * back into MAIN. This cannot happen during 1309d13a3558SLuis Henriques (SUSE) * fastcommit because the journal is locked all the 1310d13a3558SLuis Henriques (SUSE) * time in that case (and tid doesn't increase so 1311d13a3558SLuis Henriques (SUSE) * tid check above isn't reliable). 1312d13a3558SLuis Henriques (SUSE) */ 1313d13a3558SLuis Henriques (SUSE) list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list, 1314d13a3558SLuis Henriques (SUSE) &sbi->s_fc_q[FC_Q_STAGING]); 1315d13a3558SLuis Henriques (SUSE) } 1316aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1317aa75f4d3SHarshad Shirwadkar smp_mb(); 1318aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1319aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1320aa75f4d3SHarshad Shirwadkar #else 1321aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1322aa75f4d3SHarshad Shirwadkar #endif 1323aa75f4d3SHarshad Shirwadkar } 1324aa75f4d3SHarshad Shirwadkar 1325aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1326aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1327aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1328aa75f4d3SHarshad Shirwadkar fcd_list); 1329aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1330b3998b3bSRitesh Harjani list_del_init(&fc_dentry->fcd_dilist); 1331aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1332aa75f4d3SHarshad Shirwadkar 1333aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1334aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1335aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1336aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1337aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1338aa75f4d3SHarshad Shirwadkar } 1339aa75f4d3SHarshad Shirwadkar 1340aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1341aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1342aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 134331e203e0SDaejun Park &sbi->s_fc_q[FC_Q_MAIN]); 1344aa75f4d3SHarshad Shirwadkar 13453236afd1SLuis Henriques (SUSE) if (tid_geq(tid, sbi->s_fc_ineligible_tid)) { 1346e85c81baSXin Yin sbi->s_fc_ineligible_tid = 0; 13479b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 1348e85c81baSXin Yin } 1349aa75f4d3SHarshad Shirwadkar 1350aa75f4d3SHarshad Shirwadkar if (full) 1351aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1352aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1353aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1354ff780b91SHarshad Shirwadkar } 13556866d7b3SHarshad Shirwadkar 13568016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 13578016e29fSHarshad Shirwadkar 13588016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 13598016e29fSHarshad Shirwadkar struct dentry_info_args { 13608016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 13618016e29fSHarshad Shirwadkar char *dname; 13628016e29fSHarshad Shirwadkar }; 13638016e29fSHarshad Shirwadkar 136411768cfdSEric Biggers /* Same as struct ext4_fc_tl, but uses native endianness fields */ 136511768cfdSEric Biggers struct ext4_fc_tl_mem { 136611768cfdSEric Biggers u16 fc_tag; 136711768cfdSEric Biggers u16 fc_len; 136811768cfdSEric Biggers }; 136911768cfdSEric Biggers 13708016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 137111768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 13728016e29fSHarshad Shirwadkar { 1373a7ba36bcSHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 13748016e29fSHarshad Shirwadkar 1375a7ba36bcSHarshad Shirwadkar memcpy(&fcd, val, sizeof(fcd)); 13768016e29fSHarshad Shirwadkar 1377a7ba36bcSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 1378a7ba36bcSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd.fc_ino); 1379a7ba36bcSHarshad Shirwadkar darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 1380dcc58274SYe Bin darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); 1381dcc58274SYe Bin } 1382dcc58274SYe Bin 138311768cfdSEric Biggers static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val) 1384dcc58274SYe Bin { 138511768cfdSEric Biggers struct ext4_fc_tl tl_disk; 138611768cfdSEric Biggers 138711768cfdSEric Biggers memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN); 138811768cfdSEric Biggers tl->fc_len = le16_to_cpu(tl_disk.fc_len); 138911768cfdSEric Biggers tl->fc_tag = le16_to_cpu(tl_disk.fc_tag); 13908016e29fSHarshad Shirwadkar } 13918016e29fSHarshad Shirwadkar 13928016e29fSHarshad Shirwadkar /* Unlink replay function */ 139311768cfdSEric Biggers static int ext4_fc_replay_unlink(struct super_block *sb, 139411768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 13958016e29fSHarshad Shirwadkar { 13968016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 13978016e29fSHarshad Shirwadkar struct qstr entry; 13988016e29fSHarshad Shirwadkar struct dentry_info_args darg; 13998016e29fSHarshad Shirwadkar int ret = 0; 14008016e29fSHarshad Shirwadkar 1401a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 14028016e29fSHarshad Shirwadkar 14038016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 14048016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 14058016e29fSHarshad Shirwadkar 14068016e29fSHarshad Shirwadkar entry.name = darg.dname; 14078016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 14088016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 14098016e29fSHarshad Shirwadkar 141023dd561aSYi Li if (IS_ERR(inode)) { 14114978c659SJan Kara ext4_debug("Inode %d not found", darg.ino); 14128016e29fSHarshad Shirwadkar return 0; 14138016e29fSHarshad Shirwadkar } 14148016e29fSHarshad Shirwadkar 14158016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 14168016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 141723dd561aSYi Li if (IS_ERR(old_parent)) { 14184978c659SJan Kara ext4_debug("Dir with inode %d not found", darg.parent_ino); 14198016e29fSHarshad Shirwadkar iput(inode); 14208016e29fSHarshad Shirwadkar return 0; 14218016e29fSHarshad Shirwadkar } 14228016e29fSHarshad Shirwadkar 14234c0d5778SEric Biggers ret = __ext4_unlink(old_parent, &entry, inode, NULL); 14248016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 14258016e29fSHarshad Shirwadkar if (ret == -ENOENT) 14268016e29fSHarshad Shirwadkar ret = 0; 14278016e29fSHarshad Shirwadkar iput(old_parent); 14288016e29fSHarshad Shirwadkar iput(inode); 14298016e29fSHarshad Shirwadkar return ret; 14308016e29fSHarshad Shirwadkar } 14318016e29fSHarshad Shirwadkar 14328016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 14338016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 14348016e29fSHarshad Shirwadkar struct inode *inode) 14358016e29fSHarshad Shirwadkar { 14368016e29fSHarshad Shirwadkar struct inode *dir = NULL; 14378016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 14388016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 14398016e29fSHarshad Shirwadkar int ret = 0; 14408016e29fSHarshad Shirwadkar 14418016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 14428016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 14434978c659SJan Kara ext4_debug("Dir with inode %d not found.", darg->parent_ino); 14448016e29fSHarshad Shirwadkar dir = NULL; 14458016e29fSHarshad Shirwadkar goto out; 14468016e29fSHarshad Shirwadkar } 14478016e29fSHarshad Shirwadkar 14488016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 14498016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 14504978c659SJan Kara ext4_debug("Failed to obtain dentry"); 14518016e29fSHarshad Shirwadkar dentry_dir = NULL; 14528016e29fSHarshad Shirwadkar goto out; 14538016e29fSHarshad Shirwadkar } 14548016e29fSHarshad Shirwadkar 14558016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 14568016e29fSHarshad Shirwadkar if (!dentry_inode) { 14574978c659SJan Kara ext4_debug("Inode dentry not created."); 14588016e29fSHarshad Shirwadkar ret = -ENOMEM; 14598016e29fSHarshad Shirwadkar goto out; 14608016e29fSHarshad Shirwadkar } 14618016e29fSHarshad Shirwadkar 14628016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 14638016e29fSHarshad Shirwadkar /* 14648016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 14658016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 14668016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 14678016e29fSHarshad Shirwadkar * could complete. 14688016e29fSHarshad Shirwadkar */ 14698016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 14704978c659SJan Kara ext4_debug("Failed to link\n"); 14718016e29fSHarshad Shirwadkar goto out; 14728016e29fSHarshad Shirwadkar } 14738016e29fSHarshad Shirwadkar 14748016e29fSHarshad Shirwadkar ret = 0; 14758016e29fSHarshad Shirwadkar out: 14768016e29fSHarshad Shirwadkar if (dentry_dir) { 14778016e29fSHarshad Shirwadkar d_drop(dentry_dir); 14788016e29fSHarshad Shirwadkar dput(dentry_dir); 14798016e29fSHarshad Shirwadkar } else if (dir) { 14808016e29fSHarshad Shirwadkar iput(dir); 14818016e29fSHarshad Shirwadkar } 14828016e29fSHarshad Shirwadkar if (dentry_inode) { 14838016e29fSHarshad Shirwadkar d_drop(dentry_inode); 14848016e29fSHarshad Shirwadkar dput(dentry_inode); 14858016e29fSHarshad Shirwadkar } 14868016e29fSHarshad Shirwadkar 14878016e29fSHarshad Shirwadkar return ret; 14888016e29fSHarshad Shirwadkar } 14898016e29fSHarshad Shirwadkar 14908016e29fSHarshad Shirwadkar /* Link replay function */ 149111768cfdSEric Biggers static int ext4_fc_replay_link(struct super_block *sb, 149211768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 14938016e29fSHarshad Shirwadkar { 14948016e29fSHarshad Shirwadkar struct inode *inode; 14958016e29fSHarshad Shirwadkar struct dentry_info_args darg; 14968016e29fSHarshad Shirwadkar int ret = 0; 14978016e29fSHarshad Shirwadkar 1498a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 14998016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 15008016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 15018016e29fSHarshad Shirwadkar 15028016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 150323dd561aSYi Li if (IS_ERR(inode)) { 15044978c659SJan Kara ext4_debug("Inode not found."); 15058016e29fSHarshad Shirwadkar return 0; 15068016e29fSHarshad Shirwadkar } 15078016e29fSHarshad Shirwadkar 15088016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 15098016e29fSHarshad Shirwadkar iput(inode); 15108016e29fSHarshad Shirwadkar return ret; 15118016e29fSHarshad Shirwadkar } 15128016e29fSHarshad Shirwadkar 15138016e29fSHarshad Shirwadkar /* 15148016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 15158016e29fSHarshad Shirwadkar * block bitmaps correctly. 15168016e29fSHarshad Shirwadkar */ 15178016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 15188016e29fSHarshad Shirwadkar { 15198016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 15208016e29fSHarshad Shirwadkar int i; 15218016e29fSHarshad Shirwadkar 15228016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 15238016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 15248016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 15258016e29fSHarshad Shirwadkar return 0; 15268016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 15279305721aSYe Bin int *fc_modified_inodes; 15289305721aSYe Bin 15299305721aSYe Bin fc_modified_inodes = krealloc(state->fc_modified_inodes, 1530cdce59a1SRitesh Harjani sizeof(int) * (state->fc_modified_inodes_size + 1531cdce59a1SRitesh Harjani EXT4_FC_REPLAY_REALLOC_INCREMENT), 15328016e29fSHarshad Shirwadkar GFP_KERNEL); 15339305721aSYe Bin if (!fc_modified_inodes) 15348016e29fSHarshad Shirwadkar return -ENOMEM; 15359305721aSYe Bin state->fc_modified_inodes = fc_modified_inodes; 1536cdce59a1SRitesh Harjani state->fc_modified_inodes_size += 1537cdce59a1SRitesh Harjani EXT4_FC_REPLAY_REALLOC_INCREMENT; 15388016e29fSHarshad Shirwadkar } 15398016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 15408016e29fSHarshad Shirwadkar return 0; 15418016e29fSHarshad Shirwadkar } 15428016e29fSHarshad Shirwadkar 15438016e29fSHarshad Shirwadkar /* 15448016e29fSHarshad Shirwadkar * Inode replay function 15458016e29fSHarshad Shirwadkar */ 154611768cfdSEric Biggers static int ext4_fc_replay_inode(struct super_block *sb, 154711768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 15488016e29fSHarshad Shirwadkar { 1549a7ba36bcSHarshad Shirwadkar struct ext4_fc_inode fc_inode; 15508016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 15518016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 15528016e29fSHarshad Shirwadkar struct inode *inode = NULL; 15538016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 1554dcc58274SYe Bin int inode_len, ino, ret, tag = tl->fc_tag; 15558016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 15560d043351STheodore Ts'o size_t off_gen = offsetof(struct ext4_inode, i_generation); 15578016e29fSHarshad Shirwadkar 1558a7ba36bcSHarshad Shirwadkar memcpy(&fc_inode, val, sizeof(fc_inode)); 15598016e29fSHarshad Shirwadkar 1560a7ba36bcSHarshad Shirwadkar ino = le32_to_cpu(fc_inode.fc_ino); 15618016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 15628016e29fSHarshad Shirwadkar 15638016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 156423dd561aSYi Li if (!IS_ERR(inode)) { 15658016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 15668016e29fSHarshad Shirwadkar iput(inode); 15678016e29fSHarshad Shirwadkar } 156823dd561aSYi Li inode = NULL; 15698016e29fSHarshad Shirwadkar 1570cdce59a1SRitesh Harjani ret = ext4_fc_record_modified_inode(sb, ino); 1571cdce59a1SRitesh Harjani if (ret) 1572cdce59a1SRitesh Harjani goto out; 15738016e29fSHarshad Shirwadkar 1574a7ba36bcSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *) 1575a7ba36bcSHarshad Shirwadkar (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); 15768016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 15778016e29fSHarshad Shirwadkar if (ret) 15788016e29fSHarshad Shirwadkar goto out; 15798016e29fSHarshad Shirwadkar 1580dcc58274SYe Bin inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); 15818016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 15828016e29fSHarshad Shirwadkar 15838016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 15840d043351STheodore Ts'o memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, 15850d043351STheodore Ts'o inode_len - off_gen); 15868016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 15878016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 15888016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 15898016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 15908016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 15918016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 15928016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 15938016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 15948016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 15958016e29fSHarshad Shirwadkar } 15968016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 15978016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 15988016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 15998016e29fSHarshad Shirwadkar } 16008016e29fSHarshad Shirwadkar 16018016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 16028016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 16038016e29fSHarshad Shirwadkar if (ret) 16048016e29fSHarshad Shirwadkar goto out; 16058016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 16068016e29fSHarshad Shirwadkar if (ret) 16078016e29fSHarshad Shirwadkar goto out; 16088016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 16098016e29fSHarshad Shirwadkar if (ret) 16108016e29fSHarshad Shirwadkar goto out; 16118016e29fSHarshad Shirwadkar 16128016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 16138016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 161423dd561aSYi Li if (IS_ERR(inode)) { 16154978c659SJan Kara ext4_debug("Inode not found."); 16168016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 16178016e29fSHarshad Shirwadkar } 16188016e29fSHarshad Shirwadkar 16198016e29fSHarshad Shirwadkar /* 16208016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 16218016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 16228016e29fSHarshad Shirwadkar * the number of blocks the inode. 16238016e29fSHarshad Shirwadkar */ 16241ebf2178SHarshad Shirwadkar if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 16258016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 16268016e29fSHarshad Shirwadkar 16278016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 16288016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 16298016e29fSHarshad Shirwadkar 16308016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 16318016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 16328016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 16338016e29fSHarshad Shirwadkar brelse(iloc.bh); 16348016e29fSHarshad Shirwadkar out: 16358016e29fSHarshad Shirwadkar iput(inode); 16368016e29fSHarshad Shirwadkar if (!ret) 1637c6bf3f0eSChristoph Hellwig blkdev_issue_flush(sb->s_bdev); 16388016e29fSHarshad Shirwadkar 16398016e29fSHarshad Shirwadkar return 0; 16408016e29fSHarshad Shirwadkar } 16418016e29fSHarshad Shirwadkar 16428016e29fSHarshad Shirwadkar /* 16438016e29fSHarshad Shirwadkar * Dentry create replay function. 16448016e29fSHarshad Shirwadkar * 16458016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 16468016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 16478016e29fSHarshad Shirwadkar * been replayed before we start here. 16488016e29fSHarshad Shirwadkar */ 164911768cfdSEric Biggers static int ext4_fc_replay_create(struct super_block *sb, 165011768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 16518016e29fSHarshad Shirwadkar { 16528016e29fSHarshad Shirwadkar int ret = 0; 16538016e29fSHarshad Shirwadkar struct inode *inode = NULL; 16548016e29fSHarshad Shirwadkar struct inode *dir = NULL; 16558016e29fSHarshad Shirwadkar struct dentry_info_args darg; 16568016e29fSHarshad Shirwadkar 1657a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 16588016e29fSHarshad Shirwadkar 16598016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 16608016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 16618016e29fSHarshad Shirwadkar 16628016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 16638016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 16648016e29fSHarshad Shirwadkar if (ret) 16658016e29fSHarshad Shirwadkar goto out; 16668016e29fSHarshad Shirwadkar 16678016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 166823dd561aSYi Li if (IS_ERR(inode)) { 16694978c659SJan Kara ext4_debug("inode %d not found.", darg.ino); 16708016e29fSHarshad Shirwadkar inode = NULL; 16718016e29fSHarshad Shirwadkar ret = -EINVAL; 16728016e29fSHarshad Shirwadkar goto out; 16738016e29fSHarshad Shirwadkar } 16748016e29fSHarshad Shirwadkar 16758016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 16768016e29fSHarshad Shirwadkar /* 16778016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 16788016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 16798016e29fSHarshad Shirwadkar */ 16808016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 168123dd561aSYi Li if (IS_ERR(dir)) { 16824978c659SJan Kara ext4_debug("Dir %d not found.", darg.ino); 16838016e29fSHarshad Shirwadkar goto out; 16848016e29fSHarshad Shirwadkar } 16858016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 16868016e29fSHarshad Shirwadkar iput(dir); 16878016e29fSHarshad Shirwadkar if (ret) { 16888016e29fSHarshad Shirwadkar ret = 0; 16898016e29fSHarshad Shirwadkar goto out; 16908016e29fSHarshad Shirwadkar } 16918016e29fSHarshad Shirwadkar } 16928016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 16938016e29fSHarshad Shirwadkar if (ret) 16948016e29fSHarshad Shirwadkar goto out; 16958016e29fSHarshad Shirwadkar set_nlink(inode, 1); 16968016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 16978016e29fSHarshad Shirwadkar out: 16988016e29fSHarshad Shirwadkar iput(inode); 16998016e29fSHarshad Shirwadkar return ret; 17008016e29fSHarshad Shirwadkar } 17018016e29fSHarshad Shirwadkar 17028016e29fSHarshad Shirwadkar /* 1703599ea31dSXin Yin * Record physical disk regions which are in use as per fast commit area, 1704599ea31dSXin Yin * and used by inodes during replay phase. Our simple replay phase 1705599ea31dSXin Yin * allocator excludes these regions from allocation. 17068016e29fSHarshad Shirwadkar */ 1707599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino, 1708599ea31dSXin Yin ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) 17098016e29fSHarshad Shirwadkar { 17108016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 17118016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 17128016e29fSHarshad Shirwadkar 17138016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1714599ea31dSXin Yin /* 1715599ea31dSXin Yin * during replay phase, the fc_regions_valid may not same as 1716599ea31dSXin Yin * fc_regions_used, update it when do new additions. 1717599ea31dSXin Yin */ 1718599ea31dSXin Yin if (replay && state->fc_regions_used != state->fc_regions_valid) 1719599ea31dSXin Yin state->fc_regions_used = state->fc_regions_valid; 17208016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 17217069d105SYe Bin struct ext4_fc_alloc_region *fc_regions; 17227069d105SYe Bin 17237069d105SYe Bin fc_regions = krealloc(state->fc_regions, 172427cd4978SYe Bin sizeof(struct ext4_fc_alloc_region) * 172527cd4978SYe Bin (state->fc_regions_size + 172627cd4978SYe Bin EXT4_FC_REPLAY_REALLOC_INCREMENT), 17278016e29fSHarshad Shirwadkar GFP_KERNEL); 17287069d105SYe Bin if (!fc_regions) 17298016e29fSHarshad Shirwadkar return -ENOMEM; 173027cd4978SYe Bin state->fc_regions_size += 173127cd4978SYe Bin EXT4_FC_REPLAY_REALLOC_INCREMENT; 17327069d105SYe Bin state->fc_regions = fc_regions; 17338016e29fSHarshad Shirwadkar } 17348016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 17358016e29fSHarshad Shirwadkar region->ino = ino; 17368016e29fSHarshad Shirwadkar region->lblk = lblk; 17378016e29fSHarshad Shirwadkar region->pblk = pblk; 17388016e29fSHarshad Shirwadkar region->len = len; 17398016e29fSHarshad Shirwadkar 1740599ea31dSXin Yin if (replay) 1741599ea31dSXin Yin state->fc_regions_valid++; 1742599ea31dSXin Yin 17438016e29fSHarshad Shirwadkar return 0; 17448016e29fSHarshad Shirwadkar } 17458016e29fSHarshad Shirwadkar 17468016e29fSHarshad Shirwadkar /* Replay add range tag */ 17478016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 174811768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 17498016e29fSHarshad Shirwadkar { 1750a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range fc_add_ex; 17518016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 17528016e29fSHarshad Shirwadkar struct inode *inode; 17538016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 17548016e29fSHarshad Shirwadkar int remaining, len; 17558016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 17568016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17578016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 17588016e29fSHarshad Shirwadkar int ret; 17598016e29fSHarshad Shirwadkar 1760a7ba36bcSHarshad Shirwadkar memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); 1761a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex.fc_ex; 17628016e29fSHarshad Shirwadkar 17638016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 1764a7ba36bcSHarshad Shirwadkar le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), 17658016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 17668016e29fSHarshad Shirwadkar 1767a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); 176823dd561aSYi Li if (IS_ERR(inode)) { 17694978c659SJan Kara ext4_debug("Inode not found."); 17708016e29fSHarshad Shirwadkar return 0; 17718016e29fSHarshad Shirwadkar } 17728016e29fSHarshad Shirwadkar 17738016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1774cdce59a1SRitesh Harjani if (ret) 1775cdce59a1SRitesh Harjani goto out; 17768016e29fSHarshad Shirwadkar 17778016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 17788016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 17798016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 17808016e29fSHarshad Shirwadkar 17818016e29fSHarshad Shirwadkar cur = start; 17828016e29fSHarshad Shirwadkar remaining = len; 17834978c659SJan Kara ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 17848016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 17858016e29fSHarshad Shirwadkar inode->i_ino); 17868016e29fSHarshad Shirwadkar 17878016e29fSHarshad Shirwadkar while (remaining > 0) { 17888016e29fSHarshad Shirwadkar map.m_lblk = cur; 17898016e29fSHarshad Shirwadkar map.m_len = remaining; 17908016e29fSHarshad Shirwadkar map.m_pblk = 0; 17918016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 17928016e29fSHarshad Shirwadkar 1793cdce59a1SRitesh Harjani if (ret < 0) 1794cdce59a1SRitesh Harjani goto out; 17958016e29fSHarshad Shirwadkar 17968016e29fSHarshad Shirwadkar if (ret == 0) { 17978016e29fSHarshad Shirwadkar /* Range is not mapped */ 17988016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 1799cdce59a1SRitesh Harjani if (IS_ERR(path)) 1800cdce59a1SRitesh Harjani goto out; 18018016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 18028016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 18038016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 18048016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 18058016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 18068016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 18078016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 18088016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 18098016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 18108016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 18118016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 18127ff5fddaSYe Bin ext4_free_ext_path(path); 1813cdce59a1SRitesh Harjani if (ret) 1814cdce59a1SRitesh Harjani goto out; 18158016e29fSHarshad Shirwadkar goto next; 18168016e29fSHarshad Shirwadkar } 18178016e29fSHarshad Shirwadkar 18188016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 18198016e29fSHarshad Shirwadkar /* 18208016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 18218016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 18228016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 18238016e29fSHarshad Shirwadkar */ 18248016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 18258016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 18268016e29fSHarshad Shirwadkar start_pblk + cur - start); 1827cdce59a1SRitesh Harjani if (ret) 1828cdce59a1SRitesh Harjani goto out; 18298016e29fSHarshad Shirwadkar /* 18308016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 18318016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 18328016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 18338016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 18348016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 18358016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 18368016e29fSHarshad Shirwadkar * modified inodes. 18378016e29fSHarshad Shirwadkar */ 18388016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 18398016e29fSHarshad Shirwadkar goto next; 18408016e29fSHarshad Shirwadkar } 18418016e29fSHarshad Shirwadkar 18428016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 18434978c659SJan Kara ext4_debug("Converting from %ld to %d %lld", 18448016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 18458016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 18468016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 18478016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 1848cdce59a1SRitesh Harjani if (ret) 1849cdce59a1SRitesh Harjani goto out; 18508016e29fSHarshad Shirwadkar /* 18518016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 18528016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 18538016e29fSHarshad Shirwadkar */ 18548016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 18558016e29fSHarshad Shirwadkar next: 18568016e29fSHarshad Shirwadkar cur += map.m_len; 18578016e29fSHarshad Shirwadkar remaining -= map.m_len; 18588016e29fSHarshad Shirwadkar } 18598016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 18608016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 1861cdce59a1SRitesh Harjani out: 18628016e29fSHarshad Shirwadkar iput(inode); 18638016e29fSHarshad Shirwadkar return 0; 18648016e29fSHarshad Shirwadkar } 18658016e29fSHarshad Shirwadkar 18668016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 18678016e29fSHarshad Shirwadkar static int 186811768cfdSEric Biggers ext4_fc_replay_del_range(struct super_block *sb, 186911768cfdSEric Biggers struct ext4_fc_tl_mem *tl, u8 *val) 18708016e29fSHarshad Shirwadkar { 18718016e29fSHarshad Shirwadkar struct inode *inode; 1872a7ba36bcSHarshad Shirwadkar struct ext4_fc_del_range lrange; 18738016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 18748016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 18758016e29fSHarshad Shirwadkar int ret; 18768016e29fSHarshad Shirwadkar 1877a7ba36bcSHarshad Shirwadkar memcpy(&lrange, val, sizeof(lrange)); 1878a7ba36bcSHarshad Shirwadkar cur = le32_to_cpu(lrange.fc_lblk); 1879a7ba36bcSHarshad Shirwadkar remaining = le32_to_cpu(lrange.fc_len); 18808016e29fSHarshad Shirwadkar 18818016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 1882a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_ino), cur, remaining); 18838016e29fSHarshad Shirwadkar 1884a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); 188523dd561aSYi Li if (IS_ERR(inode)) { 18864978c659SJan Kara ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino)); 18878016e29fSHarshad Shirwadkar return 0; 18888016e29fSHarshad Shirwadkar } 18898016e29fSHarshad Shirwadkar 18908016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1891cdce59a1SRitesh Harjani if (ret) 1892cdce59a1SRitesh Harjani goto out; 18938016e29fSHarshad Shirwadkar 18944978c659SJan Kara ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n", 1895a7ba36bcSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange.fc_lblk), 1896a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_len)); 18978016e29fSHarshad Shirwadkar while (remaining > 0) { 18988016e29fSHarshad Shirwadkar map.m_lblk = cur; 18998016e29fSHarshad Shirwadkar map.m_len = remaining; 19008016e29fSHarshad Shirwadkar 19018016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 1902cdce59a1SRitesh Harjani if (ret < 0) 1903cdce59a1SRitesh Harjani goto out; 19048016e29fSHarshad Shirwadkar if (ret > 0) { 19058016e29fSHarshad Shirwadkar remaining -= ret; 19068016e29fSHarshad Shirwadkar cur += ret; 19078016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 19088016e29fSHarshad Shirwadkar } else { 19098016e29fSHarshad Shirwadkar remaining -= map.m_len; 19108016e29fSHarshad Shirwadkar cur += map.m_len; 19118016e29fSHarshad Shirwadkar } 19128016e29fSHarshad Shirwadkar } 19138016e29fSHarshad Shirwadkar 19140b5b5a62SXin Yin down_write(&EXT4_I(inode)->i_data_sem); 19158fca8a2bSXin Yin ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), 19168fca8a2bSXin Yin le32_to_cpu(lrange.fc_lblk) + 19178fca8a2bSXin Yin le32_to_cpu(lrange.fc_len) - 1); 19180b5b5a62SXin Yin up_write(&EXT4_I(inode)->i_data_sem); 1919cdce59a1SRitesh Harjani if (ret) 1920cdce59a1SRitesh Harjani goto out; 19218016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 19228016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 19238016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 1924cdce59a1SRitesh Harjani out: 19258016e29fSHarshad Shirwadkar iput(inode); 19268016e29fSHarshad Shirwadkar return 0; 19278016e29fSHarshad Shirwadkar } 19288016e29fSHarshad Shirwadkar 19298016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 19308016e29fSHarshad Shirwadkar { 19318016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 19328016e29fSHarshad Shirwadkar struct inode *inode; 19338016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 19348016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 19358016e29fSHarshad Shirwadkar int i, ret, j; 19368016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 19378016e29fSHarshad Shirwadkar 19388016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 19398016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 19408016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 19418016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 194223dd561aSYi Li if (IS_ERR(inode)) { 19434978c659SJan Kara ext4_debug("Inode %d not found.", 19448016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 19458016e29fSHarshad Shirwadkar continue; 19468016e29fSHarshad Shirwadkar } 19478016e29fSHarshad Shirwadkar cur = 0; 19488016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 19491ebf2178SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { 19501ebf2178SHarshad Shirwadkar iput(inode); 19511ebf2178SHarshad Shirwadkar continue; 19521ebf2178SHarshad Shirwadkar } 19538016e29fSHarshad Shirwadkar while (cur < end) { 19548016e29fSHarshad Shirwadkar map.m_lblk = cur; 19558016e29fSHarshad Shirwadkar map.m_len = end - cur; 19568016e29fSHarshad Shirwadkar 19578016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 19588016e29fSHarshad Shirwadkar if (ret < 0) 19598016e29fSHarshad Shirwadkar break; 19608016e29fSHarshad Shirwadkar 19618016e29fSHarshad Shirwadkar if (ret > 0) { 19628016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 196323dd561aSYi Li if (!IS_ERR(path)) { 19648016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 19658016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 19668016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 19677ff5fddaSYe Bin ext4_free_ext_path(path); 19688016e29fSHarshad Shirwadkar } 19698016e29fSHarshad Shirwadkar cur += ret; 19708016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 19718016e29fSHarshad Shirwadkar map.m_len, 1); 19728016e29fSHarshad Shirwadkar } else { 19738016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 19748016e29fSHarshad Shirwadkar } 19758016e29fSHarshad Shirwadkar } 19768016e29fSHarshad Shirwadkar iput(inode); 19778016e29fSHarshad Shirwadkar } 19788016e29fSHarshad Shirwadkar } 19798016e29fSHarshad Shirwadkar 19808016e29fSHarshad Shirwadkar /* 19818016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 19828016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 19838016e29fSHarshad Shirwadkar * if it is okay to use a block. 19848016e29fSHarshad Shirwadkar */ 19858016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 19868016e29fSHarshad Shirwadkar { 19878016e29fSHarshad Shirwadkar int i; 19888016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 19898016e29fSHarshad Shirwadkar 19908016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 19918016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 19928016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 19938016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 19948016e29fSHarshad Shirwadkar continue; 1995dbaafbadSRitesh Harjani if (in_range(blk, state->fc_regions[i].pblk, 1996dbaafbadSRitesh Harjani state->fc_regions[i].len)) 19978016e29fSHarshad Shirwadkar return true; 19988016e29fSHarshad Shirwadkar } 19998016e29fSHarshad Shirwadkar return false; 20008016e29fSHarshad Shirwadkar } 20018016e29fSHarshad Shirwadkar 20028016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 20038016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 20048016e29fSHarshad Shirwadkar { 20058016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 20068016e29fSHarshad Shirwadkar 20078016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 20088016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 20098016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 20108016e29fSHarshad Shirwadkar } 20118016e29fSHarshad Shirwadkar 201264b4a25cSEric Biggers static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi, 201364b4a25cSEric Biggers int tag, int len) 20141b45cc5cSYe Bin { 201564b4a25cSEric Biggers switch (tag) { 20161b45cc5cSYe Bin case EXT4_FC_TAG_ADD_RANGE: 201764b4a25cSEric Biggers return len == sizeof(struct ext4_fc_add_range); 20181b45cc5cSYe Bin case EXT4_FC_TAG_DEL_RANGE: 201964b4a25cSEric Biggers return len == sizeof(struct ext4_fc_del_range); 202064b4a25cSEric Biggers case EXT4_FC_TAG_CREAT: 20211b45cc5cSYe Bin case EXT4_FC_TAG_LINK: 20221b45cc5cSYe Bin case EXT4_FC_TAG_UNLINK: 202364b4a25cSEric Biggers len -= sizeof(struct ext4_fc_dentry_info); 202464b4a25cSEric Biggers return len >= 1 && len <= EXT4_NAME_LEN; 20251b45cc5cSYe Bin case EXT4_FC_TAG_INODE: 202664b4a25cSEric Biggers len -= sizeof(struct ext4_fc_inode); 202764b4a25cSEric Biggers return len >= EXT4_GOOD_OLD_INODE_SIZE && 202864b4a25cSEric Biggers len <= sbi->s_inode_size; 20291b45cc5cSYe Bin case EXT4_FC_TAG_PAD: 203064b4a25cSEric Biggers return true; /* padding can have any length */ 203164b4a25cSEric Biggers case EXT4_FC_TAG_TAIL: 203264b4a25cSEric Biggers return len >= sizeof(struct ext4_fc_tail); 203364b4a25cSEric Biggers case EXT4_FC_TAG_HEAD: 203464b4a25cSEric Biggers return len == sizeof(struct ext4_fc_head); 20351b45cc5cSYe Bin } 203664b4a25cSEric Biggers return false; 20371b45cc5cSYe Bin } 20381b45cc5cSYe Bin 20398016e29fSHarshad Shirwadkar /* 20408016e29fSHarshad Shirwadkar * Recovery Scan phase handler 20418016e29fSHarshad Shirwadkar * 20428016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 20438016e29fSHarshad Shirwadkar * for doing following things: 20448016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 20458016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 20468016e29fSHarshad Shirwadkar * - Verify CRC 20478016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 20488016e29fSHarshad Shirwadkar * 20498016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 20508016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 20518016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 20528016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 20538016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 20548016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 20558016e29fSHarshad Shirwadkar */ 20568016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 20578016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 20588016e29fSHarshad Shirwadkar tid_t expected_tid) 20598016e29fSHarshad Shirwadkar { 20608016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 20618016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 20628016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 20638016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 2064a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range ext; 206511768cfdSEric Biggers struct ext4_fc_tl_mem tl; 2066a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 2067a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 2068a7ba36bcSHarshad Shirwadkar struct ext4_fc_head head; 20698016e29fSHarshad Shirwadkar struct ext4_extent *ex; 20708016e29fSHarshad Shirwadkar 20718016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 20728016e29fSHarshad Shirwadkar 20738016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 207448a6a66dSEric Biggers end = start + journal->j_blocksize; 20758016e29fSHarshad Shirwadkar 20768016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 20778016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 20788016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 20798016e29fSHarshad Shirwadkar state->fc_crc = 0; 20808016e29fSHarshad Shirwadkar state->fc_regions = NULL; 20818016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 20828016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 20838016e29fSHarshad Shirwadkar /* Check if we can stop early */ 20848016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 20858016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 20868016e29fSHarshad Shirwadkar return 0; 20878016e29fSHarshad Shirwadkar } 20888016e29fSHarshad Shirwadkar 20898016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 20908016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 20918016e29fSHarshad Shirwadkar goto out_err; 20928016e29fSHarshad Shirwadkar } 20938016e29fSHarshad Shirwadkar 20948016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 209548a6a66dSEric Biggers for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; 2096dcc58274SYe Bin cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 2097dcc58274SYe Bin ext4_fc_get_tl(&tl, cur); 2098fdc2a3c7SYe Bin val = cur + EXT4_FC_TAG_BASE_LEN; 209964b4a25cSEric Biggers if (tl.fc_len > end - val || 210064b4a25cSEric Biggers !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) { 21011b45cc5cSYe Bin ret = state->fc_replay_num_tags ? 21021b45cc5cSYe Bin JBD2_FC_REPLAY_STOP : -ECANCELED; 21031b45cc5cSYe Bin goto out_err; 21041b45cc5cSYe Bin } 21054978c659SJan Kara ext4_debug("Scan phase, tag:%s, blk %lld\n", 2106dcc58274SYe Bin tag2str(tl.fc_tag), bh->b_blocknr); 2107dcc58274SYe Bin switch (tl.fc_tag) { 21088016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2109a7ba36bcSHarshad Shirwadkar memcpy(&ext, val, sizeof(ext)); 2110a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&ext.fc_ex; 21118016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 2112a7ba36bcSHarshad Shirwadkar le32_to_cpu(ext.fc_ino), 21138016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 2114599ea31dSXin Yin ext4_ext_get_actual_len(ex), 0); 21158016e29fSHarshad Shirwadkar if (ret < 0) 21168016e29fSHarshad Shirwadkar break; 21178016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 21188016e29fSHarshad Shirwadkar fallthrough; 21198016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 21208016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 21218016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 21228016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 21238016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 21248016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 21258016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2126a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2127dcc58274SYe Bin EXT4_FC_TAG_BASE_LEN + tl.fc_len); 21288016e29fSHarshad Shirwadkar break; 21298016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 21308016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2131a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2132a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2133fdc2a3c7SYe Bin EXT4_FC_TAG_BASE_LEN + 21348016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 21358016e29fSHarshad Shirwadkar fc_crc)); 2136a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(tail.fc_tid) == expected_tid && 2137a7ba36bcSHarshad Shirwadkar le32_to_cpu(tail.fc_crc) == state->fc_crc) { 21388016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 21398016e29fSHarshad Shirwadkar state->fc_regions_valid = 21408016e29fSHarshad Shirwadkar state->fc_regions_used; 21418016e29fSHarshad Shirwadkar } else { 21428016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 21438016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 21448016e29fSHarshad Shirwadkar } 21458016e29fSHarshad Shirwadkar state->fc_crc = 0; 21468016e29fSHarshad Shirwadkar break; 21478016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 2148a7ba36bcSHarshad Shirwadkar memcpy(&head, val, sizeof(head)); 2149a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_features) & 21508016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 21518016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 21528016e29fSHarshad Shirwadkar break; 21538016e29fSHarshad Shirwadkar } 2154a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_tid) != expected_tid) { 21558016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 21568016e29fSHarshad Shirwadkar break; 21578016e29fSHarshad Shirwadkar } 21588016e29fSHarshad Shirwadkar state->fc_cur_tag++; 2159a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2160dcc58274SYe Bin EXT4_FC_TAG_BASE_LEN + tl.fc_len); 21618016e29fSHarshad Shirwadkar break; 21628016e29fSHarshad Shirwadkar default: 21638016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 21648016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 21658016e29fSHarshad Shirwadkar } 21668016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 21678016e29fSHarshad Shirwadkar break; 21688016e29fSHarshad Shirwadkar } 21698016e29fSHarshad Shirwadkar 21708016e29fSHarshad Shirwadkar out_err: 21718016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 21728016e29fSHarshad Shirwadkar return ret; 21738016e29fSHarshad Shirwadkar } 21748016e29fSHarshad Shirwadkar 21755b849b5fSHarshad Shirwadkar /* 21765b849b5fSHarshad Shirwadkar * Main recovery path entry point. 21778016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 21785b849b5fSHarshad Shirwadkar */ 21795b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 21805b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 21815b849b5fSHarshad Shirwadkar { 21828016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 21838016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 218411768cfdSEric Biggers struct ext4_fc_tl_mem tl; 2185a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 21868016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 21878016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 2188a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 21898016e29fSHarshad Shirwadkar 21908016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 21918016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 21928016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 21938016e29fSHarshad Shirwadkar } 21948016e29fSHarshad Shirwadkar 21958016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 21968016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 21978016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 21988016e29fSHarshad Shirwadkar } 21998016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 22004978c659SJan Kara ext4_debug("Replay stops\n"); 22018016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 22025b849b5fSHarshad Shirwadkar return 0; 22035b849b5fSHarshad Shirwadkar } 22045b849b5fSHarshad Shirwadkar 22058016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 22068016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 22078016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 22088016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 22098016e29fSHarshad Shirwadkar } 22108016e29fSHarshad Shirwadkar #endif 22118016e29fSHarshad Shirwadkar 22128016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 221348a6a66dSEric Biggers end = start + journal->j_blocksize; 22148016e29fSHarshad Shirwadkar 221548a6a66dSEric Biggers for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; 2216dcc58274SYe Bin cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 2217dcc58274SYe Bin ext4_fc_get_tl(&tl, cur); 2218fdc2a3c7SYe Bin val = cur + EXT4_FC_TAG_BASE_LEN; 2219a7ba36bcSHarshad Shirwadkar 22208016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 22218016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 22228016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 22238016e29fSHarshad Shirwadkar break; 22248016e29fSHarshad Shirwadkar } 22251b45cc5cSYe Bin 2226dcc58274SYe Bin ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); 22278016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 2228dcc58274SYe Bin switch (tl.fc_tag) { 22298016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 2230a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, &tl, val); 22318016e29fSHarshad Shirwadkar break; 22328016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 2233a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, &tl, val); 22348016e29fSHarshad Shirwadkar break; 22358016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2236a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, &tl, val); 22378016e29fSHarshad Shirwadkar break; 22388016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 2239a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, &tl, val); 22408016e29fSHarshad Shirwadkar break; 22418016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 2242a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, &tl, val); 22438016e29fSHarshad Shirwadkar break; 22448016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 2245a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, &tl, val); 22468016e29fSHarshad Shirwadkar break; 22478016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 22488016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2249dcc58274SYe Bin tl.fc_len, 0); 22508016e29fSHarshad Shirwadkar break; 22518016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 2252dcc58274SYe Bin trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 2253dcc58274SYe Bin 0, tl.fc_len, 0); 2254a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2255a7ba36bcSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 22568016e29fSHarshad Shirwadkar break; 22578016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 22588016e29fSHarshad Shirwadkar break; 22598016e29fSHarshad Shirwadkar default: 2260dcc58274SYe Bin trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); 22618016e29fSHarshad Shirwadkar ret = -ECANCELED; 22628016e29fSHarshad Shirwadkar break; 22638016e29fSHarshad Shirwadkar } 22648016e29fSHarshad Shirwadkar if (ret < 0) 22658016e29fSHarshad Shirwadkar break; 22668016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 22678016e29fSHarshad Shirwadkar } 22688016e29fSHarshad Shirwadkar return ret; 22698016e29fSHarshad Shirwadkar } 22708016e29fSHarshad Shirwadkar 22716866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 22726866d7b3SHarshad Shirwadkar { 22735b849b5fSHarshad Shirwadkar /* 22745b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 22755b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 22765b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 22775b849b5fSHarshad Shirwadkar */ 22785b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 22796866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 22806866d7b3SHarshad Shirwadkar return; 2281ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 22826866d7b3SHarshad Shirwadkar } 2283aa75f4d3SHarshad Shirwadkar 22840fbcb525SEric Biggers static const char * const fc_ineligible_reasons[] = { 22850fbcb525SEric Biggers [EXT4_FC_REASON_XATTR] = "Extended attributes changed", 22860fbcb525SEric Biggers [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename", 22870fbcb525SEric Biggers [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed", 22880fbcb525SEric Biggers [EXT4_FC_REASON_NOMEM] = "Insufficient memory", 22890fbcb525SEric Biggers [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot", 22900fbcb525SEric Biggers [EXT4_FC_REASON_RESIZE] = "Resize", 22910fbcb525SEric Biggers [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed", 22920fbcb525SEric Biggers [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", 22930fbcb525SEric Biggers [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", 22940fbcb525SEric Biggers [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", 2295ce8c59d1SHarshad Shirwadkar }; 2296ce8c59d1SHarshad Shirwadkar 2297ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v) 2298ce8c59d1SHarshad Shirwadkar { 2299ce8c59d1SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 2300ce8c59d1SHarshad Shirwadkar struct ext4_fc_stats *stats = &sbi->s_fc_stats; 2301ce8c59d1SHarshad Shirwadkar int i; 2302ce8c59d1SHarshad Shirwadkar 2303ce8c59d1SHarshad Shirwadkar if (v != SEQ_START_TOKEN) 2304ce8c59d1SHarshad Shirwadkar return 0; 2305ce8c59d1SHarshad Shirwadkar 2306ce8c59d1SHarshad Shirwadkar seq_printf(seq, 2307ce8c59d1SHarshad Shirwadkar "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 2308ce8c59d1SHarshad Shirwadkar stats->fc_num_commits, stats->fc_ineligible_commits, 2309ce8c59d1SHarshad Shirwadkar stats->fc_numblks, 23100915e464SHarshad Shirwadkar div_u64(stats->s_fc_avg_commit_time, 1000)); 2311ce8c59d1SHarshad Shirwadkar seq_puts(seq, "Ineligible reasons:\n"); 2312ce8c59d1SHarshad Shirwadkar for (i = 0; i < EXT4_FC_REASON_MAX; i++) 2313ce8c59d1SHarshad Shirwadkar seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 2314ce8c59d1SHarshad Shirwadkar stats->fc_ineligible_reason_count[i]); 2315ce8c59d1SHarshad Shirwadkar 2316ce8c59d1SHarshad Shirwadkar return 0; 2317ce8c59d1SHarshad Shirwadkar } 2318ce8c59d1SHarshad Shirwadkar 2319aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2320aa75f4d3SHarshad Shirwadkar { 2321aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2322aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2323aa75f4d3SHarshad Shirwadkar 2324aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2325aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2326aa75f4d3SHarshad Shirwadkar 2327aa75f4d3SHarshad Shirwadkar return 0; 2328aa75f4d3SHarshad Shirwadkar } 2329ab047d51SSebastian Andrzej Siewior 2330ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void) 2331ab047d51SSebastian Andrzej Siewior { 2332ab047d51SSebastian Andrzej Siewior kmem_cache_destroy(ext4_fc_dentry_cachep); 2333ab047d51SSebastian Andrzej Siewior } 2334