16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 687bbbe241SHarshad Shirwadkar * 69aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 707bbbe241SHarshad Shirwadkar * attributes). Fast commit ineligibility is marked by calling 717bbbe241SHarshad Shirwadkar * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back 727bbbe241SHarshad Shirwadkar * to full commit. 73aa75f4d3SHarshad Shirwadkar * 74aa75f4d3SHarshad Shirwadkar * Atomicity of commits 75aa75f4d3SHarshad Shirwadkar * -------------------- 76a740762fSHarshad Shirwadkar * In order to guarantee atomicity during the commit operation, fast commit 77aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 78aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 79aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 80aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 81aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 82aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 85aa75f4d3SHarshad Shirwadkar * - fsync() 86aa75f4d3SHarshad Shirwadkar * - Append contents to file A 87aa75f4d3SHarshad Shirwadkar * - Truncate file A 88aa75f4d3SHarshad Shirwadkar * - fsync() 89aa75f4d3SHarshad Shirwadkar * 90aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 91aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 92aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 95aa75f4d3SHarshad Shirwadkar * 96b1b7dce3SHarshad Shirwadkar * Fast Commit Replay Idempotence 97b1b7dce3SHarshad Shirwadkar * ------------------------------ 98b1b7dce3SHarshad Shirwadkar * 99b1b7dce3SHarshad Shirwadkar * Fast commits tags are idempotent in nature provided the recovery code follows 100b1b7dce3SHarshad Shirwadkar * certain rules. The guiding principle that the commit path follows while 101b1b7dce3SHarshad Shirwadkar * committing is that it stores the result of a particular operation instead of 102b1b7dce3SHarshad Shirwadkar * storing the procedure. 103b1b7dce3SHarshad Shirwadkar * 104b1b7dce3SHarshad Shirwadkar * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' 105b1b7dce3SHarshad Shirwadkar * was associated with inode 10. During fast commit, instead of storing this 106b1b7dce3SHarshad Shirwadkar * operation as a procedure "rename a to b", we store the resulting file system 107b1b7dce3SHarshad Shirwadkar * state as a "series" of outcomes: 108b1b7dce3SHarshad Shirwadkar * 109b1b7dce3SHarshad Shirwadkar * - Link dirent b to inode 10 110b1b7dce3SHarshad Shirwadkar * - Unlink dirent a 111b1b7dce3SHarshad Shirwadkar * - Inode <10> with valid refcount 112b1b7dce3SHarshad Shirwadkar * 113b1b7dce3SHarshad Shirwadkar * Now when recovery code runs, it needs "enforce" this state on the file 114b1b7dce3SHarshad Shirwadkar * system. This is what guarantees idempotence of fast commit replay. 115b1b7dce3SHarshad Shirwadkar * 116b1b7dce3SHarshad Shirwadkar * Let's take an example of a procedure that is not idempotent and see how fast 117b1b7dce3SHarshad Shirwadkar * commits make it idempotent. Consider following sequence of operations: 118b1b7dce3SHarshad Shirwadkar * 119b1b7dce3SHarshad Shirwadkar * rm A; mv B A; read A 120b1b7dce3SHarshad Shirwadkar * (x) (y) (z) 121b1b7dce3SHarshad Shirwadkar * 122b1b7dce3SHarshad Shirwadkar * (x), (y) and (z) are the points at which we can crash. If we store this 123b1b7dce3SHarshad Shirwadkar * sequence of operations as is then the replay is not idempotent. Let's say 124b1b7dce3SHarshad Shirwadkar * while in replay, we crash at (z). During the second replay, file A (which was 125b1b7dce3SHarshad Shirwadkar * actually created as a result of "mv B A" operation) would get deleted. Thus, 126b1b7dce3SHarshad Shirwadkar * file named A would be absent when we try to read A. So, this sequence of 127b1b7dce3SHarshad Shirwadkar * operations is not idempotent. However, as mentioned above, instead of storing 128b1b7dce3SHarshad Shirwadkar * the procedure fast commits store the outcome of each procedure. Thus the fast 129b1b7dce3SHarshad Shirwadkar * commit log for above procedure would be as follows: 130b1b7dce3SHarshad Shirwadkar * 131b1b7dce3SHarshad Shirwadkar * (Let's assume dirent A was linked to inode 10 and dirent B was linked to 132b1b7dce3SHarshad Shirwadkar * inode 11 before the replay) 133b1b7dce3SHarshad Shirwadkar * 134b1b7dce3SHarshad Shirwadkar * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] 135b1b7dce3SHarshad Shirwadkar * (w) (x) (y) (z) 136b1b7dce3SHarshad Shirwadkar * 137b1b7dce3SHarshad Shirwadkar * If we crash at (z), we will have file A linked to inode 11. During the second 138b1b7dce3SHarshad Shirwadkar * replay, we will remove file A (inode 11). But we will create it back and make 139b1b7dce3SHarshad Shirwadkar * it point to inode 11. We won't find B, so we'll just skip that step. At this 140b1b7dce3SHarshad Shirwadkar * point, the refcount for inode 11 is not reliable, but that gets fixed by the 141b1b7dce3SHarshad Shirwadkar * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled 142b1b7dce3SHarshad Shirwadkar * similarly. Thus, by converting a non-idempotent procedure into a series of 143b1b7dce3SHarshad Shirwadkar * idempotent outcomes, fast commits ensured idempotence during the replay. 144b1b7dce3SHarshad Shirwadkar * 145aa75f4d3SHarshad Shirwadkar * TODOs 146aa75f4d3SHarshad Shirwadkar * ----- 147b1b7dce3SHarshad Shirwadkar * 148b1b7dce3SHarshad Shirwadkar * 0) Fast commit replay path hardening: Fast commit replay code should use 149b1b7dce3SHarshad Shirwadkar * journal handles to make sure all the updates it does during the replay 150b1b7dce3SHarshad Shirwadkar * path are atomic. With that if we crash during fast commit replay, after 151b1b7dce3SHarshad Shirwadkar * trying to do recovery again, we will find a file system where fast commit 152b1b7dce3SHarshad Shirwadkar * area is invalid (because new full commit would be found). In order to deal 153b1b7dce3SHarshad Shirwadkar * with that, fast commit replay code should ensure that the "FC_REPLAY" 154b1b7dce3SHarshad Shirwadkar * superblock state is persisted before starting the replay, so that after 155b1b7dce3SHarshad Shirwadkar * the crash, fast commit recovery code can look at that flag and perform 156b1b7dce3SHarshad Shirwadkar * fast commit recovery even if that area is invalidated by later full 157b1b7dce3SHarshad Shirwadkar * commits. 158b1b7dce3SHarshad Shirwadkar * 159d1199b94SHarshad Shirwadkar * 1) Fast commit's commit path locks the entire file system during fast 160d1199b94SHarshad Shirwadkar * commit. This has significant performance penalty. Instead of that, we 161d1199b94SHarshad Shirwadkar * should use ext4_fc_start/stop_update functions to start inode level 162d1199b94SHarshad Shirwadkar * updates from ext4_journal_start/stop. Once we do that we can drop file 163d1199b94SHarshad Shirwadkar * system locking during commit path. 164aa75f4d3SHarshad Shirwadkar * 165d1199b94SHarshad Shirwadkar * 2) Handle more ineligible cases. 166aa75f4d3SHarshad Shirwadkar */ 167aa75f4d3SHarshad Shirwadkar 168aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 169aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 170aa75f4d3SHarshad Shirwadkar 171aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 172aa75f4d3SHarshad Shirwadkar { 173aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 174aa75f4d3SHarshad Shirwadkar if (uptodate) { 175aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 176aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 177aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 178aa75f4d3SHarshad Shirwadkar } else { 179aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 180aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 181aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 182aa75f4d3SHarshad Shirwadkar } 183aa75f4d3SHarshad Shirwadkar 184aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 185aa75f4d3SHarshad Shirwadkar } 186aa75f4d3SHarshad Shirwadkar 187aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 188aa75f4d3SHarshad Shirwadkar { 189aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 190aa75f4d3SHarshad Shirwadkar 191aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 192aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 193aa75f4d3SHarshad Shirwadkar } 194aa75f4d3SHarshad Shirwadkar 195aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 196aa75f4d3SHarshad Shirwadkar { 197aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 198aa75f4d3SHarshad Shirwadkar 199aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 200aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 201aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 202aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 203aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 204aa75f4d3SHarshad Shirwadkar } 205aa75f4d3SHarshad Shirwadkar 206f6634e26SHarshad Shirwadkar /* This function must be called with sbi->s_fc_lock held. */ 207f6634e26SHarshad Shirwadkar static void ext4_fc_wait_committing_inode(struct inode *inode) 208fa329e27STheodore Ts'o __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) 209f6634e26SHarshad Shirwadkar { 210f6634e26SHarshad Shirwadkar wait_queue_head_t *wq; 211f6634e26SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 212f6634e26SHarshad Shirwadkar 213f6634e26SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 214f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 215f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 216f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 217f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 218f6634e26SHarshad Shirwadkar #else 219f6634e26SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 220f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 221f6634e26SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 222f6634e26SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 223f6634e26SHarshad Shirwadkar #endif 224f6634e26SHarshad Shirwadkar lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); 225f6634e26SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 226f6634e26SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 227f6634e26SHarshad Shirwadkar schedule(); 228f6634e26SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 229f6634e26SHarshad Shirwadkar } 230f6634e26SHarshad Shirwadkar 231aa75f4d3SHarshad Shirwadkar /* 232aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 233aa75f4d3SHarshad Shirwadkar * 234aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 235aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 236aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 237aa75f4d3SHarshad Shirwadkar */ 238aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 239aa75f4d3SHarshad Shirwadkar { 240aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 241aa75f4d3SHarshad Shirwadkar 2428016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2438016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 244aa75f4d3SHarshad Shirwadkar return; 245aa75f4d3SHarshad Shirwadkar 246aa75f4d3SHarshad Shirwadkar restart: 247aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 248aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 249aa75f4d3SHarshad Shirwadkar goto out; 250aa75f4d3SHarshad Shirwadkar 251aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 252f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 253aa75f4d3SHarshad Shirwadkar goto restart; 254aa75f4d3SHarshad Shirwadkar } 255aa75f4d3SHarshad Shirwadkar out: 256aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 257aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 258aa75f4d3SHarshad Shirwadkar } 259aa75f4d3SHarshad Shirwadkar 260aa75f4d3SHarshad Shirwadkar /* 261aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 262aa75f4d3SHarshad Shirwadkar */ 263aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 264aa75f4d3SHarshad Shirwadkar { 265aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 266aa75f4d3SHarshad Shirwadkar 2678016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2688016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 269aa75f4d3SHarshad Shirwadkar return; 270aa75f4d3SHarshad Shirwadkar 271aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 272aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 273aa75f4d3SHarshad Shirwadkar } 274aa75f4d3SHarshad Shirwadkar 275aa75f4d3SHarshad Shirwadkar /* 276aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 277aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 278aa75f4d3SHarshad Shirwadkar */ 279aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 280aa75f4d3SHarshad Shirwadkar { 281aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 282aa75f4d3SHarshad Shirwadkar 2838016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2848016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 285aa75f4d3SHarshad Shirwadkar return; 286aa75f4d3SHarshad Shirwadkar 287aa75f4d3SHarshad Shirwadkar restart: 288aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 289aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) { 290aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 291aa75f4d3SHarshad Shirwadkar return; 292aa75f4d3SHarshad Shirwadkar } 293aa75f4d3SHarshad Shirwadkar 294aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 295f6634e26SHarshad Shirwadkar ext4_fc_wait_committing_inode(inode); 296aa75f4d3SHarshad Shirwadkar goto restart; 297aa75f4d3SHarshad Shirwadkar } 298aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 299aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 300aa75f4d3SHarshad Shirwadkar } 301aa75f4d3SHarshad Shirwadkar 302aa75f4d3SHarshad Shirwadkar /* 303aa75f4d3SHarshad Shirwadkar * Mark file system as fast commit ineligible. This means that next commit 304aa75f4d3SHarshad Shirwadkar * operation would result in a full jbd2 commit. 305aa75f4d3SHarshad Shirwadkar */ 306aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason) 307aa75f4d3SHarshad Shirwadkar { 308aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 309aa75f4d3SHarshad Shirwadkar 3108016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3118016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3128016e29fSHarshad Shirwadkar return; 3138016e29fSHarshad Shirwadkar 3149b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 315aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 316aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 317aa75f4d3SHarshad Shirwadkar } 318aa75f4d3SHarshad Shirwadkar 319aa75f4d3SHarshad Shirwadkar /* 320aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 321aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 322aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 323aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 324aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 325aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 326aa75f4d3SHarshad Shirwadkar * 327aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 328aa75f4d3SHarshad Shirwadkar */ 329aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 330a80f7fcfSHarshad Shirwadkar handle_t *handle, struct inode *inode, 331a80f7fcfSHarshad Shirwadkar int (*__fc_track_fn)(struct inode *, void *, bool), 332aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 333aa75f4d3SHarshad Shirwadkar { 334aa75f4d3SHarshad Shirwadkar bool update = false; 335aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 336aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 337a80f7fcfSHarshad Shirwadkar tid_t tid = 0; 338aa75f4d3SHarshad Shirwadkar int ret; 339aa75f4d3SHarshad Shirwadkar 3408016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 3418016e29fSHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_REPLAY)) 342aa75f4d3SHarshad Shirwadkar return -EOPNOTSUPP; 343aa75f4d3SHarshad Shirwadkar 3447bbbe241SHarshad Shirwadkar if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 345aa75f4d3SHarshad Shirwadkar return -EINVAL; 346aa75f4d3SHarshad Shirwadkar 347a80f7fcfSHarshad Shirwadkar tid = handle->h_transaction->t_tid; 348aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 349a80f7fcfSHarshad Shirwadkar if (tid == ei->i_sync_tid) { 350aa75f4d3SHarshad Shirwadkar update = true; 351aa75f4d3SHarshad Shirwadkar } else { 352aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 353a80f7fcfSHarshad Shirwadkar ei->i_sync_tid = tid; 354aa75f4d3SHarshad Shirwadkar } 355aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 356aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 357aa75f4d3SHarshad Shirwadkar 358aa75f4d3SHarshad Shirwadkar if (!enqueue) 359aa75f4d3SHarshad Shirwadkar return ret; 360aa75f4d3SHarshad Shirwadkar 361aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 362aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 363aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 3649b5f6c9bSHarshad Shirwadkar (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? 365aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 366aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 367aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 368aa75f4d3SHarshad Shirwadkar 369aa75f4d3SHarshad Shirwadkar return ret; 370aa75f4d3SHarshad Shirwadkar } 371aa75f4d3SHarshad Shirwadkar 372aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 373aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 374aa75f4d3SHarshad Shirwadkar int op; 375aa75f4d3SHarshad Shirwadkar }; 376aa75f4d3SHarshad Shirwadkar 377aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 378aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 379aa75f4d3SHarshad Shirwadkar { 380aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 381aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 382aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 383aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 384aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 385aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 386aa75f4d3SHarshad Shirwadkar 387aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 388aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 389aa75f4d3SHarshad Shirwadkar if (!node) { 390b21ebf14SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); 391aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 392aa75f4d3SHarshad Shirwadkar return -ENOMEM; 393aa75f4d3SHarshad Shirwadkar } 394aa75f4d3SHarshad Shirwadkar 395aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 396aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 397aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 398aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 399aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 400aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 401aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 402aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 403b21ebf14SHarshad Shirwadkar EXT4_FC_REASON_NOMEM); 404aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 405aa75f4d3SHarshad Shirwadkar return -ENOMEM; 406aa75f4d3SHarshad Shirwadkar } 407aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 408aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 409aa75f4d3SHarshad Shirwadkar } else { 410aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 411aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 412aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 413aa75f4d3SHarshad Shirwadkar } 414aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 415aa75f4d3SHarshad Shirwadkar 416aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 4179b5f6c9bSHarshad Shirwadkar if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) 418aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 419aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 420aa75f4d3SHarshad Shirwadkar else 421aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 422aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 423aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 424aa75f4d3SHarshad Shirwadkar 425aa75f4d3SHarshad Shirwadkar return 0; 426aa75f4d3SHarshad Shirwadkar } 427aa75f4d3SHarshad Shirwadkar 428a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_unlink(handle_t *handle, 429a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 430aa75f4d3SHarshad Shirwadkar { 431aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 432aa75f4d3SHarshad Shirwadkar int ret; 433aa75f4d3SHarshad Shirwadkar 434aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 435aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 436aa75f4d3SHarshad Shirwadkar 437a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 438aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 439aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_unlink(inode, dentry, ret); 440aa75f4d3SHarshad Shirwadkar } 441aa75f4d3SHarshad Shirwadkar 442a80f7fcfSHarshad Shirwadkar void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 443a80f7fcfSHarshad Shirwadkar { 444a80f7fcfSHarshad Shirwadkar __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); 445a80f7fcfSHarshad Shirwadkar } 446a80f7fcfSHarshad Shirwadkar 447a80f7fcfSHarshad Shirwadkar void __ext4_fc_track_link(handle_t *handle, 448a80f7fcfSHarshad Shirwadkar struct inode *inode, struct dentry *dentry) 449aa75f4d3SHarshad Shirwadkar { 450aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 451aa75f4d3SHarshad Shirwadkar int ret; 452aa75f4d3SHarshad Shirwadkar 453aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 454aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 455aa75f4d3SHarshad Shirwadkar 456a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 457aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 458aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_link(inode, dentry, ret); 459aa75f4d3SHarshad Shirwadkar } 460aa75f4d3SHarshad Shirwadkar 461a80f7fcfSHarshad Shirwadkar void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 462a80f7fcfSHarshad Shirwadkar { 463a80f7fcfSHarshad Shirwadkar __ext4_fc_track_link(handle, d_inode(dentry), dentry); 464a80f7fcfSHarshad Shirwadkar } 465a80f7fcfSHarshad Shirwadkar 4668210bb29SHarshad Shirwadkar void __ext4_fc_track_create(handle_t *handle, struct inode *inode, 4678210bb29SHarshad Shirwadkar struct dentry *dentry) 468aa75f4d3SHarshad Shirwadkar { 469aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 470aa75f4d3SHarshad Shirwadkar int ret; 471aa75f4d3SHarshad Shirwadkar 472aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 473aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 474aa75f4d3SHarshad Shirwadkar 475a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 476aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 477aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_create(inode, dentry, ret); 478aa75f4d3SHarshad Shirwadkar } 479aa75f4d3SHarshad Shirwadkar 4808210bb29SHarshad Shirwadkar void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 4818210bb29SHarshad Shirwadkar { 4828210bb29SHarshad Shirwadkar __ext4_fc_track_create(handle, d_inode(dentry), dentry); 4838210bb29SHarshad Shirwadkar } 4848210bb29SHarshad Shirwadkar 485aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 486aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 487aa75f4d3SHarshad Shirwadkar { 488aa75f4d3SHarshad Shirwadkar if (update) 489aa75f4d3SHarshad Shirwadkar return -EEXIST; 490aa75f4d3SHarshad Shirwadkar 491aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 492aa75f4d3SHarshad Shirwadkar 493aa75f4d3SHarshad Shirwadkar return 0; 494aa75f4d3SHarshad Shirwadkar } 495aa75f4d3SHarshad Shirwadkar 496a80f7fcfSHarshad Shirwadkar void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 497aa75f4d3SHarshad Shirwadkar { 498aa75f4d3SHarshad Shirwadkar int ret; 499aa75f4d3SHarshad Shirwadkar 500aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 501aa75f4d3SHarshad Shirwadkar return; 502aa75f4d3SHarshad Shirwadkar 503556e0319SHarshad Shirwadkar if (ext4_should_journal_data(inode)) { 504556e0319SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 505556e0319SHarshad Shirwadkar EXT4_FC_REASON_INODE_JOURNAL_DATA); 506556e0319SHarshad Shirwadkar return; 507556e0319SHarshad Shirwadkar } 508556e0319SHarshad Shirwadkar 509a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 510aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_inode(inode, ret); 511aa75f4d3SHarshad Shirwadkar } 512aa75f4d3SHarshad Shirwadkar 513aa75f4d3SHarshad Shirwadkar struct __track_range_args { 514aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 515aa75f4d3SHarshad Shirwadkar }; 516aa75f4d3SHarshad Shirwadkar 517aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 518aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 519aa75f4d3SHarshad Shirwadkar { 520aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 521aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 522aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 523aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 524aa75f4d3SHarshad Shirwadkar 525aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 526aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 527aa75f4d3SHarshad Shirwadkar return -ECANCELED; 528aa75f4d3SHarshad Shirwadkar } 529aa75f4d3SHarshad Shirwadkar 530aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 531aa75f4d3SHarshad Shirwadkar 532aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 533aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 534aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 535aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 536aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 537aa75f4d3SHarshad Shirwadkar } else { 538aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 539aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 540aa75f4d3SHarshad Shirwadkar } 541aa75f4d3SHarshad Shirwadkar 542aa75f4d3SHarshad Shirwadkar return 0; 543aa75f4d3SHarshad Shirwadkar } 544aa75f4d3SHarshad Shirwadkar 545a80f7fcfSHarshad Shirwadkar void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 546aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 547aa75f4d3SHarshad Shirwadkar { 548aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 549aa75f4d3SHarshad Shirwadkar int ret; 550aa75f4d3SHarshad Shirwadkar 551aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 552aa75f4d3SHarshad Shirwadkar return; 553aa75f4d3SHarshad Shirwadkar 554aa75f4d3SHarshad Shirwadkar args.start = start; 555aa75f4d3SHarshad Shirwadkar args.end = end; 556aa75f4d3SHarshad Shirwadkar 557a80f7fcfSHarshad Shirwadkar ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 558aa75f4d3SHarshad Shirwadkar 559aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_range(inode, start, end, ret); 560aa75f4d3SHarshad Shirwadkar } 561aa75f4d3SHarshad Shirwadkar 562e9f53353SDaejun Park static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) 563aa75f4d3SHarshad Shirwadkar { 564aa75f4d3SHarshad Shirwadkar int write_flags = REQ_SYNC; 565aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 566aa75f4d3SHarshad Shirwadkar 567e9f53353SDaejun Park /* Add REQ_FUA | REQ_PREFLUSH only its tail */ 568e9f53353SDaejun Park if (test_opt(sb, BARRIER) && is_tail) 569aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 570aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 571764b3fd3SHarshad Shirwadkar set_buffer_dirty(bh); 572aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 573aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 574aa75f4d3SHarshad Shirwadkar submit_bh(REQ_OP_WRITE, write_flags, bh); 575aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 576aa75f4d3SHarshad Shirwadkar } 577aa75f4d3SHarshad Shirwadkar 578aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 579aa75f4d3SHarshad Shirwadkar 580aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 581aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 582aa75f4d3SHarshad Shirwadkar u32 *crc) 583aa75f4d3SHarshad Shirwadkar { 584aa75f4d3SHarshad Shirwadkar void *ret; 585aa75f4d3SHarshad Shirwadkar 586aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 587aa75f4d3SHarshad Shirwadkar if (crc) 588aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 589aa75f4d3SHarshad Shirwadkar return ret; 590aa75f4d3SHarshad Shirwadkar } 591aa75f4d3SHarshad Shirwadkar 592aa75f4d3SHarshad Shirwadkar /* 593aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 594aa75f4d3SHarshad Shirwadkar * 595aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 596aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 597aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 598aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 599aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 600aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 601aa75f4d3SHarshad Shirwadkar * we added. 602aa75f4d3SHarshad Shirwadkar */ 603aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 604aa75f4d3SHarshad Shirwadkar { 605aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 606aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 607aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 608aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 609aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 610aa75f4d3SHarshad Shirwadkar int pad_len; 611aa75f4d3SHarshad Shirwadkar 612aa75f4d3SHarshad Shirwadkar /* 613aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 614aa75f4d3SHarshad Shirwadkar * padding. 615aa75f4d3SHarshad Shirwadkar */ 616aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 617aa75f4d3SHarshad Shirwadkar return NULL; 618aa75f4d3SHarshad Shirwadkar 619aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 620aa75f4d3SHarshad Shirwadkar /* 621aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 622aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 623aa75f4d3SHarshad Shirwadkar */ 624aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 625aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 626aa75f4d3SHarshad Shirwadkar if (ret) 627aa75f4d3SHarshad Shirwadkar return NULL; 628aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 629aa75f4d3SHarshad Shirwadkar } 630aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 631aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 632aa75f4d3SHarshad Shirwadkar } 633aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 634aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 635aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 636aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 637aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 638aa75f4d3SHarshad Shirwadkar if (crc) 639aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 640aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 641aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 642e9f53353SDaejun Park ext4_fc_submit_bh(sb, false); 643aa75f4d3SHarshad Shirwadkar 644aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 645aa75f4d3SHarshad Shirwadkar if (ret) 646aa75f4d3SHarshad Shirwadkar return NULL; 647aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 648aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 649aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 650aa75f4d3SHarshad Shirwadkar } 651aa75f4d3SHarshad Shirwadkar 652aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 653aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 654aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 655aa75f4d3SHarshad Shirwadkar { 656aa75f4d3SHarshad Shirwadkar if (crc) 657aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 658aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 659aa75f4d3SHarshad Shirwadkar } 660aa75f4d3SHarshad Shirwadkar 661aa75f4d3SHarshad Shirwadkar /* 662aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 663aa75f4d3SHarshad Shirwadkar * 664aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 665aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 666aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 667aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 668aa75f4d3SHarshad Shirwadkar */ 669aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 670aa75f4d3SHarshad Shirwadkar { 671aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 672aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 673aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 674aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 675aa75f4d3SHarshad Shirwadkar u8 *dst; 676aa75f4d3SHarshad Shirwadkar 677aa75f4d3SHarshad Shirwadkar /* 678aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 679aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 680aa75f4d3SHarshad Shirwadkar */ 681aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 682aa75f4d3SHarshad Shirwadkar if (!dst) 683aa75f4d3SHarshad Shirwadkar return -ENOSPC; 684aa75f4d3SHarshad Shirwadkar 685aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 686aa75f4d3SHarshad Shirwadkar 687aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 688aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 689aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 690aa75f4d3SHarshad Shirwadkar 691aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 692aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 693aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 694aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 695aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 696aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 697aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 698aa75f4d3SHarshad Shirwadkar 699e9f53353SDaejun Park ext4_fc_submit_bh(sb, true); 700aa75f4d3SHarshad Shirwadkar 701aa75f4d3SHarshad Shirwadkar return 0; 702aa75f4d3SHarshad Shirwadkar } 703aa75f4d3SHarshad Shirwadkar 704aa75f4d3SHarshad Shirwadkar /* 705aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 706aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 707aa75f4d3SHarshad Shirwadkar */ 708aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 709aa75f4d3SHarshad Shirwadkar u32 *crc) 710aa75f4d3SHarshad Shirwadkar { 711aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 712aa75f4d3SHarshad Shirwadkar u8 *dst; 713aa75f4d3SHarshad Shirwadkar 714aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 715aa75f4d3SHarshad Shirwadkar if (!dst) 716aa75f4d3SHarshad Shirwadkar return false; 717aa75f4d3SHarshad Shirwadkar 718aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 719aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 720aa75f4d3SHarshad Shirwadkar 721aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 722aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 723aa75f4d3SHarshad Shirwadkar 724aa75f4d3SHarshad Shirwadkar return true; 725aa75f4d3SHarshad Shirwadkar } 726aa75f4d3SHarshad Shirwadkar 727aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 728facec450SGuoqing Jiang static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, 729facec450SGuoqing Jiang struct ext4_fc_dentry_update *fc_dentry) 730aa75f4d3SHarshad Shirwadkar { 731aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 732aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 733facec450SGuoqing Jiang int dlen = fc_dentry->fcd_name.len; 734aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 735aa75f4d3SHarshad Shirwadkar crc); 736aa75f4d3SHarshad Shirwadkar 737aa75f4d3SHarshad Shirwadkar if (!dst) 738aa75f4d3SHarshad Shirwadkar return false; 739aa75f4d3SHarshad Shirwadkar 740facec450SGuoqing Jiang fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); 741facec450SGuoqing Jiang fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 742facec450SGuoqing Jiang tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 743aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 744aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 745aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 746aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 747aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 748facec450SGuoqing Jiang ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); 749aa75f4d3SHarshad Shirwadkar 750aa75f4d3SHarshad Shirwadkar return true; 751aa75f4d3SHarshad Shirwadkar } 752aa75f4d3SHarshad Shirwadkar 753aa75f4d3SHarshad Shirwadkar /* 754aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 755aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 756aa75f4d3SHarshad Shirwadkar */ 757aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 758aa75f4d3SHarshad Shirwadkar { 759aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 760aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 761aa75f4d3SHarshad Shirwadkar int ret; 762aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 763aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 764aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 765aa75f4d3SHarshad Shirwadkar u8 *dst; 766aa75f4d3SHarshad Shirwadkar 767aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 768aa75f4d3SHarshad Shirwadkar if (ret) 769aa75f4d3SHarshad Shirwadkar return ret; 770aa75f4d3SHarshad Shirwadkar 7716c31a689SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 7726c31a689SHarshad Shirwadkar inode_len = EXT4_INODE_SIZE(inode->i_sb); 7736c31a689SHarshad Shirwadkar else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 774aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 775aa75f4d3SHarshad Shirwadkar 776aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 777aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 778aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 779aa75f4d3SHarshad Shirwadkar 780aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 781aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 782aa75f4d3SHarshad Shirwadkar if (!dst) 783aa75f4d3SHarshad Shirwadkar return -ECANCELED; 784aa75f4d3SHarshad Shirwadkar 785aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 786aa75f4d3SHarshad Shirwadkar return -ECANCELED; 787aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 788aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 789aa75f4d3SHarshad Shirwadkar return -ECANCELED; 790aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 791aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 792aa75f4d3SHarshad Shirwadkar inode_len, crc)) 793aa75f4d3SHarshad Shirwadkar return -ECANCELED; 794aa75f4d3SHarshad Shirwadkar 795aa75f4d3SHarshad Shirwadkar return 0; 796aa75f4d3SHarshad Shirwadkar } 797aa75f4d3SHarshad Shirwadkar 798aa75f4d3SHarshad Shirwadkar /* 799aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 800aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 801aa75f4d3SHarshad Shirwadkar */ 802aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 803aa75f4d3SHarshad Shirwadkar { 804aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 805aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 806aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 807aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 808aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 809aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 810aa75f4d3SHarshad Shirwadkar int ret; 811aa75f4d3SHarshad Shirwadkar 812aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 813aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 814aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 815aa75f4d3SHarshad Shirwadkar return 0; 816aa75f4d3SHarshad Shirwadkar } 817aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 818aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 819aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 820aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 821aa75f4d3SHarshad Shirwadkar 822aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 823aa75f4d3SHarshad Shirwadkar jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", 824aa75f4d3SHarshad Shirwadkar __func__, cur_lblk_off, new_blk_size, inode->i_ino); 825aa75f4d3SHarshad Shirwadkar 826aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 827aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 828aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 829aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 830aa75f4d3SHarshad Shirwadkar if (ret < 0) 831aa75f4d3SHarshad Shirwadkar return -ECANCELED; 832aa75f4d3SHarshad Shirwadkar 833aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 834aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 835aa75f4d3SHarshad Shirwadkar continue; 836aa75f4d3SHarshad Shirwadkar } 837aa75f4d3SHarshad Shirwadkar 838aa75f4d3SHarshad Shirwadkar if (ret == 0) { 839aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 840aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 841aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 842aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 843aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 844aa75f4d3SHarshad Shirwadkar return -ENOSPC; 845aa75f4d3SHarshad Shirwadkar } else { 846a2c2f082SHou Tao unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? 847a2c2f082SHou Tao EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; 848a2c2f082SHou Tao 849a2c2f082SHou Tao /* Limit the number of blocks in one extent */ 850a2c2f082SHou Tao map.m_len = min(max, map.m_len); 851a2c2f082SHou Tao 852aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 853aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 854aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 855aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 856aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 857aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 858aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 859aa75f4d3SHarshad Shirwadkar else 860aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 861aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 862aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 863aa75f4d3SHarshad Shirwadkar return -ENOSPC; 864aa75f4d3SHarshad Shirwadkar } 865aa75f4d3SHarshad Shirwadkar 866aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 867aa75f4d3SHarshad Shirwadkar } 868aa75f4d3SHarshad Shirwadkar 869aa75f4d3SHarshad Shirwadkar return 0; 870aa75f4d3SHarshad Shirwadkar } 871aa75f4d3SHarshad Shirwadkar 872aa75f4d3SHarshad Shirwadkar 873aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 874aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 875aa75f4d3SHarshad Shirwadkar { 876aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 877aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 878aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 879aa75f4d3SHarshad Shirwadkar int ret = 0; 880aa75f4d3SHarshad Shirwadkar 881aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 8829b5f6c9bSHarshad Shirwadkar ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); 88396e7c02dSDaejun Park list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 884aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 885aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 886aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 887aa75f4d3SHarshad Shirwadkar 888aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 889aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 890aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 891aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 892aa75f4d3SHarshad Shirwadkar schedule(); 893aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 894aa75f4d3SHarshad Shirwadkar } 895aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 896aa75f4d3SHarshad Shirwadkar } 897aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 898aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 899aa75f4d3SHarshad Shirwadkar if (ret) 900aa75f4d3SHarshad Shirwadkar return ret; 901aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 902aa75f4d3SHarshad Shirwadkar } 903aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 904aa75f4d3SHarshad Shirwadkar 905aa75f4d3SHarshad Shirwadkar return ret; 906aa75f4d3SHarshad Shirwadkar } 907aa75f4d3SHarshad Shirwadkar 908aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 909aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 910aa75f4d3SHarshad Shirwadkar { 911aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 912aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 913aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 914aa75f4d3SHarshad Shirwadkar int ret = 0; 915aa75f4d3SHarshad Shirwadkar 916aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 917aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 918aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 919aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 920aa75f4d3SHarshad Shirwadkar continue; 921aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 922aa75f4d3SHarshad Shirwadkar 923aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 924aa75f4d3SHarshad Shirwadkar if (ret) 925aa75f4d3SHarshad Shirwadkar return ret; 926aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 927aa75f4d3SHarshad Shirwadkar } 928aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 929aa75f4d3SHarshad Shirwadkar 930aa75f4d3SHarshad Shirwadkar return 0; 931aa75f4d3SHarshad Shirwadkar } 932aa75f4d3SHarshad Shirwadkar 933aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 934aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 935fa329e27STheodore Ts'o __acquires(&sbi->s_fc_lock) 936fa329e27STheodore Ts'o __releases(&sbi->s_fc_lock) 937aa75f4d3SHarshad Shirwadkar { 938aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 939aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 94096e7c02dSDaejun Park struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 941aa75f4d3SHarshad Shirwadkar struct inode *inode; 94296e7c02dSDaejun Park struct ext4_inode_info *ei, *ei_n; 943aa75f4d3SHarshad Shirwadkar int ret; 944aa75f4d3SHarshad Shirwadkar 945aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 946aa75f4d3SHarshad Shirwadkar return 0; 94796e7c02dSDaejun Park list_for_each_entry_safe(fc_dentry, fc_dentry_n, 94896e7c02dSDaejun Park &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { 949aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 950aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 951facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 952aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 953aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 954aa75f4d3SHarshad Shirwadkar } 955aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 956aa75f4d3SHarshad Shirwadkar continue; 957aa75f4d3SHarshad Shirwadkar } 958aa75f4d3SHarshad Shirwadkar 959aa75f4d3SHarshad Shirwadkar inode = NULL; 96096e7c02dSDaejun Park list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN], 96196e7c02dSDaejun Park i_fc_list) { 962aa75f4d3SHarshad Shirwadkar if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 963aa75f4d3SHarshad Shirwadkar inode = &ei->vfs_inode; 964aa75f4d3SHarshad Shirwadkar break; 965aa75f4d3SHarshad Shirwadkar } 966aa75f4d3SHarshad Shirwadkar } 967aa75f4d3SHarshad Shirwadkar /* 968aa75f4d3SHarshad Shirwadkar * If we don't find inode in our list, then it was deleted, 969aa75f4d3SHarshad Shirwadkar * in which case, we don't need to record it's create tag. 970aa75f4d3SHarshad Shirwadkar */ 971aa75f4d3SHarshad Shirwadkar if (!inode) 972aa75f4d3SHarshad Shirwadkar continue; 973aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 974aa75f4d3SHarshad Shirwadkar 975aa75f4d3SHarshad Shirwadkar /* 976aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 977aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 978aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 979aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 980aa75f4d3SHarshad Shirwadkar * the recovery code. 981aa75f4d3SHarshad Shirwadkar */ 982aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 983aa75f4d3SHarshad Shirwadkar if (ret) 984aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 985aa75f4d3SHarshad Shirwadkar 986aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 987aa75f4d3SHarshad Shirwadkar if (ret) 988aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 989aa75f4d3SHarshad Shirwadkar 990facec450SGuoqing Jiang if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 991aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 992aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 993aa75f4d3SHarshad Shirwadkar } 994aa75f4d3SHarshad Shirwadkar 995aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 996aa75f4d3SHarshad Shirwadkar } 997aa75f4d3SHarshad Shirwadkar return 0; 998aa75f4d3SHarshad Shirwadkar lock_and_exit: 999aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1000aa75f4d3SHarshad Shirwadkar return ret; 1001aa75f4d3SHarshad Shirwadkar } 1002aa75f4d3SHarshad Shirwadkar 1003aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 1004aa75f4d3SHarshad Shirwadkar { 1005aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1006aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1007aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1008aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 1009aa75f4d3SHarshad Shirwadkar struct inode *inode; 1010aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 1011aa75f4d3SHarshad Shirwadkar int ret = 0; 1012aa75f4d3SHarshad Shirwadkar u32 crc = 0; 1013aa75f4d3SHarshad Shirwadkar 1014aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 1015aa75f4d3SHarshad Shirwadkar if (ret) 1016aa75f4d3SHarshad Shirwadkar return ret; 1017aa75f4d3SHarshad Shirwadkar 1018aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 1019aa75f4d3SHarshad Shirwadkar if (ret) 1020aa75f4d3SHarshad Shirwadkar return ret; 1021aa75f4d3SHarshad Shirwadkar 1022da0c5d26SHarshad Shirwadkar /* 1023da0c5d26SHarshad Shirwadkar * If file system device is different from journal device, issue a cache 1024da0c5d26SHarshad Shirwadkar * flush before we start writing fast commit blocks. 1025da0c5d26SHarshad Shirwadkar */ 1026da0c5d26SHarshad Shirwadkar if (journal->j_fs_dev != journal->j_dev) 1027c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev); 1028da0c5d26SHarshad Shirwadkar 1029aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1030aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1031aa75f4d3SHarshad Shirwadkar /* 1032aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1033aa75f4d3SHarshad Shirwadkar * in this TID. 1034aa75f4d3SHarshad Shirwadkar */ 1035aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1036aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1037aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1038aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1039e1262cd2SXu Yihang (u8 *)&head, &crc)) { 1040e1262cd2SXu Yihang ret = -ENOSPC; 1041aa75f4d3SHarshad Shirwadkar goto out; 1042aa75f4d3SHarshad Shirwadkar } 1043e1262cd2SXu Yihang } 1044aa75f4d3SHarshad Shirwadkar 1045aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1046aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1047aa75f4d3SHarshad Shirwadkar if (ret) { 1048aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1049aa75f4d3SHarshad Shirwadkar goto out; 1050aa75f4d3SHarshad Shirwadkar } 1051aa75f4d3SHarshad Shirwadkar 105296e7c02dSDaejun Park list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 1053aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1054aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1055aa75f4d3SHarshad Shirwadkar continue; 1056aa75f4d3SHarshad Shirwadkar 1057aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1058aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1059aa75f4d3SHarshad Shirwadkar if (ret) 1060aa75f4d3SHarshad Shirwadkar goto out; 1061aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1062aa75f4d3SHarshad Shirwadkar if (ret) 1063aa75f4d3SHarshad Shirwadkar goto out; 1064aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1065aa75f4d3SHarshad Shirwadkar } 1066aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1067aa75f4d3SHarshad Shirwadkar 1068aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1069aa75f4d3SHarshad Shirwadkar 1070aa75f4d3SHarshad Shirwadkar out: 1071aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1072aa75f4d3SHarshad Shirwadkar return ret; 1073aa75f4d3SHarshad Shirwadkar } 1074aa75f4d3SHarshad Shirwadkar 10750915e464SHarshad Shirwadkar static void ext4_fc_update_stats(struct super_block *sb, int status, 10760915e464SHarshad Shirwadkar u64 commit_time, int nblks) 10770915e464SHarshad Shirwadkar { 10780915e464SHarshad Shirwadkar struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; 10790915e464SHarshad Shirwadkar 10800915e464SHarshad Shirwadkar jbd_debug(1, "Fast commit ended with status = %d", status); 10810915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_OK) { 10820915e464SHarshad Shirwadkar stats->fc_num_commits++; 10830915e464SHarshad Shirwadkar stats->fc_numblks += nblks; 10840915e464SHarshad Shirwadkar if (likely(stats->s_fc_avg_commit_time)) 10850915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = 10860915e464SHarshad Shirwadkar (commit_time + 10870915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time * 3) / 4; 10880915e464SHarshad Shirwadkar else 10890915e464SHarshad Shirwadkar stats->s_fc_avg_commit_time = commit_time; 10900915e464SHarshad Shirwadkar } else if (status == EXT4_FC_STATUS_FAILED || 10910915e464SHarshad Shirwadkar status == EXT4_FC_STATUS_INELIGIBLE) { 10920915e464SHarshad Shirwadkar if (status == EXT4_FC_STATUS_FAILED) 10930915e464SHarshad Shirwadkar stats->fc_failed_commits++; 10940915e464SHarshad Shirwadkar stats->fc_ineligible_commits++; 10950915e464SHarshad Shirwadkar } else { 10960915e464SHarshad Shirwadkar stats->fc_skipped_commits++; 10970915e464SHarshad Shirwadkar } 10980915e464SHarshad Shirwadkar trace_ext4_fc_commit_stop(sb, nblks, status); 10990915e464SHarshad Shirwadkar } 11000915e464SHarshad Shirwadkar 1101aa75f4d3SHarshad Shirwadkar /* 1102aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1103aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1104aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1105aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1106aa75f4d3SHarshad Shirwadkar */ 1107aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1108aa75f4d3SHarshad Shirwadkar { 1109aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1110aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1111aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1112aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 11130915e464SHarshad Shirwadkar int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; 1114aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1115aa75f4d3SHarshad Shirwadkar 1116aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_start(sb); 1117aa75f4d3SHarshad Shirwadkar 1118aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1119aa75f4d3SHarshad Shirwadkar 11207bbbe241SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 11217bbbe241SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1122aa75f4d3SHarshad Shirwadkar 1123aa75f4d3SHarshad Shirwadkar restart_fc: 1124aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1125aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1126aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1127aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1128aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1129aa75f4d3SHarshad Shirwadkar goto restart_fc; 11300915e464SHarshad Shirwadkar ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0); 11310915e464SHarshad Shirwadkar return 0; 1132aa75f4d3SHarshad Shirwadkar } else if (ret) { 11330915e464SHarshad Shirwadkar /* 11340915e464SHarshad Shirwadkar * Commit couldn't start. Just update stats and perform a 11350915e464SHarshad Shirwadkar * full commit. 11360915e464SHarshad Shirwadkar */ 11370915e464SHarshad Shirwadkar ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0); 11380915e464SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1139aa75f4d3SHarshad Shirwadkar } 11400915e464SHarshad Shirwadkar 11417bbbe241SHarshad Shirwadkar /* 11427bbbe241SHarshad Shirwadkar * After establishing journal barrier via jbd2_fc_begin_commit(), check 11437bbbe241SHarshad Shirwadkar * if we are fast commit ineligible. 11447bbbe241SHarshad Shirwadkar */ 11457bbbe241SHarshad Shirwadkar if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { 11460915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_INELIGIBLE; 11470915e464SHarshad Shirwadkar goto fallback; 11487bbbe241SHarshad Shirwadkar } 1149aa75f4d3SHarshad Shirwadkar 1150aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1151aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1152aa75f4d3SHarshad Shirwadkar if (ret < 0) { 11530915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 11540915e464SHarshad Shirwadkar goto fallback; 1155aa75f4d3SHarshad Shirwadkar } 1156aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1157aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1158aa75f4d3SHarshad Shirwadkar if (ret < 0) { 11590915e464SHarshad Shirwadkar status = EXT4_FC_STATUS_FAILED; 11600915e464SHarshad Shirwadkar goto fallback; 1161aa75f4d3SHarshad Shirwadkar } 1162aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 11630915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit(journal); 1164aa75f4d3SHarshad Shirwadkar /* 11650915e464SHarshad Shirwadkar * weight the commit time higher than the average time so we 11660915e464SHarshad Shirwadkar * don't react too strongly to vast changes in the commit time 1167aa75f4d3SHarshad Shirwadkar */ 11680915e464SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 11690915e464SHarshad Shirwadkar ext4_fc_update_stats(sb, status, commit_time, nblks); 11700915e464SHarshad Shirwadkar return ret; 11710915e464SHarshad Shirwadkar 11720915e464SHarshad Shirwadkar fallback: 11730915e464SHarshad Shirwadkar ret = jbd2_fc_end_commit_fallback(journal); 11740915e464SHarshad Shirwadkar ext4_fc_update_stats(sb, status, 0, 0); 11750915e464SHarshad Shirwadkar return ret; 1176aa75f4d3SHarshad Shirwadkar } 1177aa75f4d3SHarshad Shirwadkar 1178ff780b91SHarshad Shirwadkar /* 1179ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1180ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1181ff780b91SHarshad Shirwadkar */ 1182ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full) 1183ff780b91SHarshad Shirwadkar { 1184aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1185aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 118696e7c02dSDaejun Park struct ext4_inode_info *iter, *iter_n; 1187aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1188aa75f4d3SHarshad Shirwadkar 1189aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1190aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1191aa75f4d3SHarshad Shirwadkar 1192aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1193aa75f4d3SHarshad Shirwadkar 1194aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 119596e7c02dSDaejun Park list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], 119696e7c02dSDaejun Park i_fc_list) { 1197aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1198aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1199aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1200aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1201aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1202aa75f4d3SHarshad Shirwadkar smp_mb(); 1203aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1204aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1205aa75f4d3SHarshad Shirwadkar #else 1206aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1207aa75f4d3SHarshad Shirwadkar #endif 1208aa75f4d3SHarshad Shirwadkar } 1209aa75f4d3SHarshad Shirwadkar 1210aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1211aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1212aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1213aa75f4d3SHarshad Shirwadkar fcd_list); 1214aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1215aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1216aa75f4d3SHarshad Shirwadkar 1217aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1218aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1219aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1220aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1221aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1222aa75f4d3SHarshad Shirwadkar } 1223aa75f4d3SHarshad Shirwadkar 1224aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1225aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1226aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 122731e203e0SDaejun Park &sbi->s_fc_q[FC_Q_MAIN]); 1228aa75f4d3SHarshad Shirwadkar 12299b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); 12309b5f6c9bSHarshad Shirwadkar ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 1231aa75f4d3SHarshad Shirwadkar 1232aa75f4d3SHarshad Shirwadkar if (full) 1233aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1234aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1235aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1236ff780b91SHarshad Shirwadkar } 12376866d7b3SHarshad Shirwadkar 12388016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 12398016e29fSHarshad Shirwadkar 12408016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 12418016e29fSHarshad Shirwadkar struct dentry_info_args { 12428016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 12438016e29fSHarshad Shirwadkar char *dname; 12448016e29fSHarshad Shirwadkar }; 12458016e29fSHarshad Shirwadkar 12468016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 1247a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 12488016e29fSHarshad Shirwadkar { 1249a7ba36bcSHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 12508016e29fSHarshad Shirwadkar 1251a7ba36bcSHarshad Shirwadkar memcpy(&fcd, val, sizeof(fcd)); 12528016e29fSHarshad Shirwadkar 1253a7ba36bcSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 1254a7ba36bcSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd.fc_ino); 1255a7ba36bcSHarshad Shirwadkar darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 1256a7ba36bcSHarshad Shirwadkar darg->dname_len = le16_to_cpu(tl->fc_len) - 12578016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_dentry_info); 12588016e29fSHarshad Shirwadkar } 12598016e29fSHarshad Shirwadkar 12608016e29fSHarshad Shirwadkar /* Unlink replay function */ 1261a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, 1262a7ba36bcSHarshad Shirwadkar u8 *val) 12638016e29fSHarshad Shirwadkar { 12648016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 12658016e29fSHarshad Shirwadkar struct qstr entry; 12668016e29fSHarshad Shirwadkar struct dentry_info_args darg; 12678016e29fSHarshad Shirwadkar int ret = 0; 12688016e29fSHarshad Shirwadkar 1269a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 12708016e29fSHarshad Shirwadkar 12718016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 12728016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 12738016e29fSHarshad Shirwadkar 12748016e29fSHarshad Shirwadkar entry.name = darg.dname; 12758016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 12768016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 12778016e29fSHarshad Shirwadkar 127823dd561aSYi Li if (IS_ERR(inode)) { 12798016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", darg.ino); 12808016e29fSHarshad Shirwadkar return 0; 12818016e29fSHarshad Shirwadkar } 12828016e29fSHarshad Shirwadkar 12838016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 12848016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 128523dd561aSYi Li if (IS_ERR(old_parent)) { 12868016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); 12878016e29fSHarshad Shirwadkar iput(inode); 12888016e29fSHarshad Shirwadkar return 0; 12898016e29fSHarshad Shirwadkar } 12908016e29fSHarshad Shirwadkar 1291a80f7fcfSHarshad Shirwadkar ret = __ext4_unlink(NULL, old_parent, &entry, inode); 12928016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 12938016e29fSHarshad Shirwadkar if (ret == -ENOENT) 12948016e29fSHarshad Shirwadkar ret = 0; 12958016e29fSHarshad Shirwadkar iput(old_parent); 12968016e29fSHarshad Shirwadkar iput(inode); 12978016e29fSHarshad Shirwadkar return ret; 12988016e29fSHarshad Shirwadkar } 12998016e29fSHarshad Shirwadkar 13008016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 13018016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 13028016e29fSHarshad Shirwadkar struct inode *inode) 13038016e29fSHarshad Shirwadkar { 13048016e29fSHarshad Shirwadkar struct inode *dir = NULL; 13058016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 13068016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 13078016e29fSHarshad Shirwadkar int ret = 0; 13088016e29fSHarshad Shirwadkar 13098016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 13108016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 13118016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino); 13128016e29fSHarshad Shirwadkar dir = NULL; 13138016e29fSHarshad Shirwadkar goto out; 13148016e29fSHarshad Shirwadkar } 13158016e29fSHarshad Shirwadkar 13168016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 13178016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 13188016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to obtain dentry"); 13198016e29fSHarshad Shirwadkar dentry_dir = NULL; 13208016e29fSHarshad Shirwadkar goto out; 13218016e29fSHarshad Shirwadkar } 13228016e29fSHarshad Shirwadkar 13238016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 13248016e29fSHarshad Shirwadkar if (!dentry_inode) { 13258016e29fSHarshad Shirwadkar jbd_debug(1, "Inode dentry not created."); 13268016e29fSHarshad Shirwadkar ret = -ENOMEM; 13278016e29fSHarshad Shirwadkar goto out; 13288016e29fSHarshad Shirwadkar } 13298016e29fSHarshad Shirwadkar 13308016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 13318016e29fSHarshad Shirwadkar /* 13328016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 13338016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 13348016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 13358016e29fSHarshad Shirwadkar * could complete. 13368016e29fSHarshad Shirwadkar */ 13378016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 13388016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to link\n"); 13398016e29fSHarshad Shirwadkar goto out; 13408016e29fSHarshad Shirwadkar } 13418016e29fSHarshad Shirwadkar 13428016e29fSHarshad Shirwadkar ret = 0; 13438016e29fSHarshad Shirwadkar out: 13448016e29fSHarshad Shirwadkar if (dentry_dir) { 13458016e29fSHarshad Shirwadkar d_drop(dentry_dir); 13468016e29fSHarshad Shirwadkar dput(dentry_dir); 13478016e29fSHarshad Shirwadkar } else if (dir) { 13488016e29fSHarshad Shirwadkar iput(dir); 13498016e29fSHarshad Shirwadkar } 13508016e29fSHarshad Shirwadkar if (dentry_inode) { 13518016e29fSHarshad Shirwadkar d_drop(dentry_inode); 13528016e29fSHarshad Shirwadkar dput(dentry_inode); 13538016e29fSHarshad Shirwadkar } 13548016e29fSHarshad Shirwadkar 13558016e29fSHarshad Shirwadkar return ret; 13568016e29fSHarshad Shirwadkar } 13578016e29fSHarshad Shirwadkar 13588016e29fSHarshad Shirwadkar /* Link replay function */ 1359a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl, 1360a7ba36bcSHarshad Shirwadkar u8 *val) 13618016e29fSHarshad Shirwadkar { 13628016e29fSHarshad Shirwadkar struct inode *inode; 13638016e29fSHarshad Shirwadkar struct dentry_info_args darg; 13648016e29fSHarshad Shirwadkar int ret = 0; 13658016e29fSHarshad Shirwadkar 1366a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 13678016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 13688016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 13698016e29fSHarshad Shirwadkar 13708016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 137123dd561aSYi Li if (IS_ERR(inode)) { 13728016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 13738016e29fSHarshad Shirwadkar return 0; 13748016e29fSHarshad Shirwadkar } 13758016e29fSHarshad Shirwadkar 13768016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 13778016e29fSHarshad Shirwadkar iput(inode); 13788016e29fSHarshad Shirwadkar return ret; 13798016e29fSHarshad Shirwadkar } 13808016e29fSHarshad Shirwadkar 13818016e29fSHarshad Shirwadkar /* 13828016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 13838016e29fSHarshad Shirwadkar * block bitmaps correctly. 13848016e29fSHarshad Shirwadkar */ 13858016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 13868016e29fSHarshad Shirwadkar { 13878016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 13888016e29fSHarshad Shirwadkar int i; 13898016e29fSHarshad Shirwadkar 13908016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 13918016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 13928016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 13938016e29fSHarshad Shirwadkar return 0; 13948016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 13958016e29fSHarshad Shirwadkar state->fc_modified_inodes_size += 13968016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 13978016e29fSHarshad Shirwadkar state->fc_modified_inodes = krealloc( 13988016e29fSHarshad Shirwadkar state->fc_modified_inodes, sizeof(int) * 13998016e29fSHarshad Shirwadkar state->fc_modified_inodes_size, 14008016e29fSHarshad Shirwadkar GFP_KERNEL); 14018016e29fSHarshad Shirwadkar if (!state->fc_modified_inodes) 14028016e29fSHarshad Shirwadkar return -ENOMEM; 14038016e29fSHarshad Shirwadkar } 14048016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 14058016e29fSHarshad Shirwadkar return 0; 14068016e29fSHarshad Shirwadkar } 14078016e29fSHarshad Shirwadkar 14088016e29fSHarshad Shirwadkar /* 14098016e29fSHarshad Shirwadkar * Inode replay function 14108016e29fSHarshad Shirwadkar */ 1411a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, 1412a7ba36bcSHarshad Shirwadkar u8 *val) 14138016e29fSHarshad Shirwadkar { 1414a7ba36bcSHarshad Shirwadkar struct ext4_fc_inode fc_inode; 14158016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 14168016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 14178016e29fSHarshad Shirwadkar struct inode *inode = NULL; 14188016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 14198016e29fSHarshad Shirwadkar int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 14208016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 14218016e29fSHarshad Shirwadkar 1422a7ba36bcSHarshad Shirwadkar memcpy(&fc_inode, val, sizeof(fc_inode)); 14238016e29fSHarshad Shirwadkar 1424a7ba36bcSHarshad Shirwadkar ino = le32_to_cpu(fc_inode.fc_ino); 14258016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 14268016e29fSHarshad Shirwadkar 14278016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 142823dd561aSYi Li if (!IS_ERR(inode)) { 14298016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 14308016e29fSHarshad Shirwadkar iput(inode); 14318016e29fSHarshad Shirwadkar } 143223dd561aSYi Li inode = NULL; 14338016e29fSHarshad Shirwadkar 14348016e29fSHarshad Shirwadkar ext4_fc_record_modified_inode(sb, ino); 14358016e29fSHarshad Shirwadkar 1436a7ba36bcSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *) 1437a7ba36bcSHarshad Shirwadkar (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); 14388016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 14398016e29fSHarshad Shirwadkar if (ret) 14408016e29fSHarshad Shirwadkar goto out; 14418016e29fSHarshad Shirwadkar 1442a7ba36bcSHarshad Shirwadkar inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); 14438016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 14448016e29fSHarshad Shirwadkar 14458016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 14468016e29fSHarshad Shirwadkar memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, 14478016e29fSHarshad Shirwadkar inode_len - offsetof(struct ext4_inode, i_generation)); 14488016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 14498016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 14508016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 14518016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 14528016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 14538016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 14548016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 14558016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 14568016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 14578016e29fSHarshad Shirwadkar } 14588016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 14598016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 14608016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 14618016e29fSHarshad Shirwadkar } 14628016e29fSHarshad Shirwadkar 14638016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 14648016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 14658016e29fSHarshad Shirwadkar if (ret) 14668016e29fSHarshad Shirwadkar goto out; 14678016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 14688016e29fSHarshad Shirwadkar if (ret) 14698016e29fSHarshad Shirwadkar goto out; 14708016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 14718016e29fSHarshad Shirwadkar if (ret) 14728016e29fSHarshad Shirwadkar goto out; 14738016e29fSHarshad Shirwadkar 14748016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 14758016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 147623dd561aSYi Li if (IS_ERR(inode)) { 14778016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 14788016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 14798016e29fSHarshad Shirwadkar } 14808016e29fSHarshad Shirwadkar 14818016e29fSHarshad Shirwadkar /* 14828016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 14838016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 14848016e29fSHarshad Shirwadkar * the number of blocks the inode. 14858016e29fSHarshad Shirwadkar */ 14861ebf2178SHarshad Shirwadkar if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 14878016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 14888016e29fSHarshad Shirwadkar 14898016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 14908016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 14918016e29fSHarshad Shirwadkar 14928016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 14938016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 14948016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 14958016e29fSHarshad Shirwadkar brelse(iloc.bh); 14968016e29fSHarshad Shirwadkar out: 14978016e29fSHarshad Shirwadkar iput(inode); 14988016e29fSHarshad Shirwadkar if (!ret) 1499c6bf3f0eSChristoph Hellwig blkdev_issue_flush(sb->s_bdev); 15008016e29fSHarshad Shirwadkar 15018016e29fSHarshad Shirwadkar return 0; 15028016e29fSHarshad Shirwadkar } 15038016e29fSHarshad Shirwadkar 15048016e29fSHarshad Shirwadkar /* 15058016e29fSHarshad Shirwadkar * Dentry create replay function. 15068016e29fSHarshad Shirwadkar * 15078016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 15088016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 15098016e29fSHarshad Shirwadkar * been replayed before we start here. 15108016e29fSHarshad Shirwadkar */ 1511a7ba36bcSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, 1512a7ba36bcSHarshad Shirwadkar u8 *val) 15138016e29fSHarshad Shirwadkar { 15148016e29fSHarshad Shirwadkar int ret = 0; 15158016e29fSHarshad Shirwadkar struct inode *inode = NULL; 15168016e29fSHarshad Shirwadkar struct inode *dir = NULL; 15178016e29fSHarshad Shirwadkar struct dentry_info_args darg; 15188016e29fSHarshad Shirwadkar 1519a7ba36bcSHarshad Shirwadkar tl_to_darg(&darg, tl, val); 15208016e29fSHarshad Shirwadkar 15218016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 15228016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 15238016e29fSHarshad Shirwadkar 15248016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 15258016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 15268016e29fSHarshad Shirwadkar if (ret) 15278016e29fSHarshad Shirwadkar goto out; 15288016e29fSHarshad Shirwadkar 15298016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 153023dd561aSYi Li if (IS_ERR(inode)) { 15318016e29fSHarshad Shirwadkar jbd_debug(1, "inode %d not found.", darg.ino); 15328016e29fSHarshad Shirwadkar inode = NULL; 15338016e29fSHarshad Shirwadkar ret = -EINVAL; 15348016e29fSHarshad Shirwadkar goto out; 15358016e29fSHarshad Shirwadkar } 15368016e29fSHarshad Shirwadkar 15378016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 15388016e29fSHarshad Shirwadkar /* 15398016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 15408016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 15418016e29fSHarshad Shirwadkar */ 15428016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 154323dd561aSYi Li if (IS_ERR(dir)) { 15448016e29fSHarshad Shirwadkar jbd_debug(1, "Dir %d not found.", darg.ino); 15458016e29fSHarshad Shirwadkar goto out; 15468016e29fSHarshad Shirwadkar } 15478016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 15488016e29fSHarshad Shirwadkar iput(dir); 15498016e29fSHarshad Shirwadkar if (ret) { 15508016e29fSHarshad Shirwadkar ret = 0; 15518016e29fSHarshad Shirwadkar goto out; 15528016e29fSHarshad Shirwadkar } 15538016e29fSHarshad Shirwadkar } 15548016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 15558016e29fSHarshad Shirwadkar if (ret) 15568016e29fSHarshad Shirwadkar goto out; 15578016e29fSHarshad Shirwadkar set_nlink(inode, 1); 15588016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 15598016e29fSHarshad Shirwadkar out: 15608016e29fSHarshad Shirwadkar if (inode) 15618016e29fSHarshad Shirwadkar iput(inode); 15628016e29fSHarshad Shirwadkar return ret; 15638016e29fSHarshad Shirwadkar } 15648016e29fSHarshad Shirwadkar 15658016e29fSHarshad Shirwadkar /* 1566*599ea31dSXin Yin * Record physical disk regions which are in use as per fast commit area, 1567*599ea31dSXin Yin * and used by inodes during replay phase. Our simple replay phase 1568*599ea31dSXin Yin * allocator excludes these regions from allocation. 15698016e29fSHarshad Shirwadkar */ 1570*599ea31dSXin Yin int ext4_fc_record_regions(struct super_block *sb, int ino, 1571*599ea31dSXin Yin ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) 15728016e29fSHarshad Shirwadkar { 15738016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 15748016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 15758016e29fSHarshad Shirwadkar 15768016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1577*599ea31dSXin Yin /* 1578*599ea31dSXin Yin * during replay phase, the fc_regions_valid may not same as 1579*599ea31dSXin Yin * fc_regions_used, update it when do new additions. 1580*599ea31dSXin Yin */ 1581*599ea31dSXin Yin if (replay && state->fc_regions_used != state->fc_regions_valid) 1582*599ea31dSXin Yin state->fc_regions_used = state->fc_regions_valid; 15838016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 15848016e29fSHarshad Shirwadkar state->fc_regions_size += 15858016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 15868016e29fSHarshad Shirwadkar state->fc_regions = krealloc( 15878016e29fSHarshad Shirwadkar state->fc_regions, 15888016e29fSHarshad Shirwadkar state->fc_regions_size * 15898016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_alloc_region), 15908016e29fSHarshad Shirwadkar GFP_KERNEL); 15918016e29fSHarshad Shirwadkar if (!state->fc_regions) 15928016e29fSHarshad Shirwadkar return -ENOMEM; 15938016e29fSHarshad Shirwadkar } 15948016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 15958016e29fSHarshad Shirwadkar region->ino = ino; 15968016e29fSHarshad Shirwadkar region->lblk = lblk; 15978016e29fSHarshad Shirwadkar region->pblk = pblk; 15988016e29fSHarshad Shirwadkar region->len = len; 15998016e29fSHarshad Shirwadkar 1600*599ea31dSXin Yin if (replay) 1601*599ea31dSXin Yin state->fc_regions_valid++; 1602*599ea31dSXin Yin 16038016e29fSHarshad Shirwadkar return 0; 16048016e29fSHarshad Shirwadkar } 16058016e29fSHarshad Shirwadkar 16068016e29fSHarshad Shirwadkar /* Replay add range tag */ 16078016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 1608a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl *tl, u8 *val) 16098016e29fSHarshad Shirwadkar { 1610a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range fc_add_ex; 16118016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 16128016e29fSHarshad Shirwadkar struct inode *inode; 16138016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 16148016e29fSHarshad Shirwadkar int remaining, len; 16158016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 16168016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 16178016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 16188016e29fSHarshad Shirwadkar int ret; 16198016e29fSHarshad Shirwadkar 1620a7ba36bcSHarshad Shirwadkar memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); 1621a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex.fc_ex; 16228016e29fSHarshad Shirwadkar 16238016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 1624a7ba36bcSHarshad Shirwadkar le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), 16258016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 16268016e29fSHarshad Shirwadkar 1627a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); 162823dd561aSYi Li if (IS_ERR(inode)) { 16298016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 16308016e29fSHarshad Shirwadkar return 0; 16318016e29fSHarshad Shirwadkar } 16328016e29fSHarshad Shirwadkar 16338016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 16348016e29fSHarshad Shirwadkar 16358016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 16368016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 16378016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 16388016e29fSHarshad Shirwadkar 16398016e29fSHarshad Shirwadkar cur = start; 16408016e29fSHarshad Shirwadkar remaining = len; 16418016e29fSHarshad Shirwadkar jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 16428016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 16438016e29fSHarshad Shirwadkar inode->i_ino); 16448016e29fSHarshad Shirwadkar 16458016e29fSHarshad Shirwadkar while (remaining > 0) { 16468016e29fSHarshad Shirwadkar map.m_lblk = cur; 16478016e29fSHarshad Shirwadkar map.m_len = remaining; 16488016e29fSHarshad Shirwadkar map.m_pblk = 0; 16498016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 16508016e29fSHarshad Shirwadkar 16518016e29fSHarshad Shirwadkar if (ret < 0) { 16528016e29fSHarshad Shirwadkar iput(inode); 16538016e29fSHarshad Shirwadkar return 0; 16548016e29fSHarshad Shirwadkar } 16558016e29fSHarshad Shirwadkar 16568016e29fSHarshad Shirwadkar if (ret == 0) { 16578016e29fSHarshad Shirwadkar /* Range is not mapped */ 16588016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 16598c9be1e5SHarshad Shirwadkar if (IS_ERR(path)) { 16608c9be1e5SHarshad Shirwadkar iput(inode); 16618c9be1e5SHarshad Shirwadkar return 0; 16628c9be1e5SHarshad Shirwadkar } 16638016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 16648016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 16658016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 16668016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 16678016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 16688016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 16698016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 16708016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 16718016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 16728016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 16738016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 16748016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 16758016e29fSHarshad Shirwadkar kfree(path); 16768016e29fSHarshad Shirwadkar if (ret) { 16778016e29fSHarshad Shirwadkar iput(inode); 16788016e29fSHarshad Shirwadkar return 0; 16798016e29fSHarshad Shirwadkar } 16808016e29fSHarshad Shirwadkar goto next; 16818016e29fSHarshad Shirwadkar } 16828016e29fSHarshad Shirwadkar 16838016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 16848016e29fSHarshad Shirwadkar /* 16858016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 16868016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 16878016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 16888016e29fSHarshad Shirwadkar */ 16898016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 16908016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 16918016e29fSHarshad Shirwadkar start_pblk + cur - start); 16928016e29fSHarshad Shirwadkar if (ret) { 16938016e29fSHarshad Shirwadkar iput(inode); 16948016e29fSHarshad Shirwadkar return 0; 16958016e29fSHarshad Shirwadkar } 16968016e29fSHarshad Shirwadkar /* 16978016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 16988016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 16998016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 17008016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 17018016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 17028016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 17038016e29fSHarshad Shirwadkar * modified inodes. 17048016e29fSHarshad Shirwadkar */ 17058016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 17068016e29fSHarshad Shirwadkar goto next; 17078016e29fSHarshad Shirwadkar } 17088016e29fSHarshad Shirwadkar 17098016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 1710fcdf3c34SArnd Bergmann jbd_debug(1, "Converting from %ld to %d %lld", 17118016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 17128016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 17138016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 17148016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 17158016e29fSHarshad Shirwadkar if (ret) { 17168016e29fSHarshad Shirwadkar iput(inode); 17178016e29fSHarshad Shirwadkar return 0; 17188016e29fSHarshad Shirwadkar } 17198016e29fSHarshad Shirwadkar /* 17208016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 17218016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 17228016e29fSHarshad Shirwadkar */ 17238016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 17248016e29fSHarshad Shirwadkar next: 17258016e29fSHarshad Shirwadkar cur += map.m_len; 17268016e29fSHarshad Shirwadkar remaining -= map.m_len; 17278016e29fSHarshad Shirwadkar } 17288016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 17298016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 17308016e29fSHarshad Shirwadkar iput(inode); 17318016e29fSHarshad Shirwadkar return 0; 17328016e29fSHarshad Shirwadkar } 17338016e29fSHarshad Shirwadkar 17348016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 17358016e29fSHarshad Shirwadkar static int 1736a7ba36bcSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, 1737a7ba36bcSHarshad Shirwadkar u8 *val) 17388016e29fSHarshad Shirwadkar { 17398016e29fSHarshad Shirwadkar struct inode *inode; 1740a7ba36bcSHarshad Shirwadkar struct ext4_fc_del_range lrange; 17418016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17428016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 17438016e29fSHarshad Shirwadkar int ret; 17448016e29fSHarshad Shirwadkar 1745a7ba36bcSHarshad Shirwadkar memcpy(&lrange, val, sizeof(lrange)); 1746a7ba36bcSHarshad Shirwadkar cur = le32_to_cpu(lrange.fc_lblk); 1747a7ba36bcSHarshad Shirwadkar remaining = le32_to_cpu(lrange.fc_len); 17488016e29fSHarshad Shirwadkar 17498016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 1750a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_ino), cur, remaining); 17518016e29fSHarshad Shirwadkar 1752a7ba36bcSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); 175323dd561aSYi Li if (IS_ERR(inode)) { 1754a7ba36bcSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino)); 17558016e29fSHarshad Shirwadkar return 0; 17568016e29fSHarshad Shirwadkar } 17578016e29fSHarshad Shirwadkar 17588016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 17598016e29fSHarshad Shirwadkar 17608016e29fSHarshad Shirwadkar jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", 1761a7ba36bcSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange.fc_lblk), 1762a7ba36bcSHarshad Shirwadkar le32_to_cpu(lrange.fc_len)); 17638016e29fSHarshad Shirwadkar while (remaining > 0) { 17648016e29fSHarshad Shirwadkar map.m_lblk = cur; 17658016e29fSHarshad Shirwadkar map.m_len = remaining; 17668016e29fSHarshad Shirwadkar 17678016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 17688016e29fSHarshad Shirwadkar if (ret < 0) { 17698016e29fSHarshad Shirwadkar iput(inode); 17708016e29fSHarshad Shirwadkar return 0; 17718016e29fSHarshad Shirwadkar } 17728016e29fSHarshad Shirwadkar if (ret > 0) { 17738016e29fSHarshad Shirwadkar remaining -= ret; 17748016e29fSHarshad Shirwadkar cur += ret; 17758016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 17768016e29fSHarshad Shirwadkar } else { 17778016e29fSHarshad Shirwadkar remaining -= map.m_len; 17788016e29fSHarshad Shirwadkar cur += map.m_len; 17798016e29fSHarshad Shirwadkar } 17808016e29fSHarshad Shirwadkar } 17818016e29fSHarshad Shirwadkar 17820b5b5a62SXin Yin down_write(&EXT4_I(inode)->i_data_sem); 17830b5b5a62SXin Yin ret = ext4_ext_remove_space(inode, lrange.fc_lblk, 17840b5b5a62SXin Yin lrange.fc_lblk + lrange.fc_len - 1); 17850b5b5a62SXin Yin up_write(&EXT4_I(inode)->i_data_sem); 17860b5b5a62SXin Yin if (ret) { 17870b5b5a62SXin Yin iput(inode); 17880b5b5a62SXin Yin return 0; 17890b5b5a62SXin Yin } 17908016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 17918016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 17928016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 17938016e29fSHarshad Shirwadkar iput(inode); 17948016e29fSHarshad Shirwadkar 17958016e29fSHarshad Shirwadkar return 0; 17968016e29fSHarshad Shirwadkar } 17978016e29fSHarshad Shirwadkar 17988016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 17998016e29fSHarshad Shirwadkar { 18008016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18018016e29fSHarshad Shirwadkar struct inode *inode; 18028016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 18038016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 18048016e29fSHarshad Shirwadkar int i, ret, j; 18058016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 18068016e29fSHarshad Shirwadkar 18078016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 18088016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 18098016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 18108016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 181123dd561aSYi Li if (IS_ERR(inode)) { 18128016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found.", 18138016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 18148016e29fSHarshad Shirwadkar continue; 18158016e29fSHarshad Shirwadkar } 18168016e29fSHarshad Shirwadkar cur = 0; 18178016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 18181ebf2178SHarshad Shirwadkar if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { 18191ebf2178SHarshad Shirwadkar iput(inode); 18201ebf2178SHarshad Shirwadkar continue; 18211ebf2178SHarshad Shirwadkar } 18228016e29fSHarshad Shirwadkar while (cur < end) { 18238016e29fSHarshad Shirwadkar map.m_lblk = cur; 18248016e29fSHarshad Shirwadkar map.m_len = end - cur; 18258016e29fSHarshad Shirwadkar 18268016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 18278016e29fSHarshad Shirwadkar if (ret < 0) 18288016e29fSHarshad Shirwadkar break; 18298016e29fSHarshad Shirwadkar 18308016e29fSHarshad Shirwadkar if (ret > 0) { 18318016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 183223dd561aSYi Li if (!IS_ERR(path)) { 18338016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 18348016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 18358016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 18368016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 18378016e29fSHarshad Shirwadkar kfree(path); 18388016e29fSHarshad Shirwadkar } 18398016e29fSHarshad Shirwadkar cur += ret; 18408016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 18418016e29fSHarshad Shirwadkar map.m_len, 1); 18428016e29fSHarshad Shirwadkar } else { 18438016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 18448016e29fSHarshad Shirwadkar } 18458016e29fSHarshad Shirwadkar } 18468016e29fSHarshad Shirwadkar iput(inode); 18478016e29fSHarshad Shirwadkar } 18488016e29fSHarshad Shirwadkar } 18498016e29fSHarshad Shirwadkar 18508016e29fSHarshad Shirwadkar /* 18518016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 18528016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 18538016e29fSHarshad Shirwadkar * if it is okay to use a block. 18548016e29fSHarshad Shirwadkar */ 18558016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 18568016e29fSHarshad Shirwadkar { 18578016e29fSHarshad Shirwadkar int i; 18588016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18598016e29fSHarshad Shirwadkar 18608016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 18618016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 18628016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 18638016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 18648016e29fSHarshad Shirwadkar continue; 18658016e29fSHarshad Shirwadkar if (blk >= state->fc_regions[i].pblk && 18668016e29fSHarshad Shirwadkar blk < state->fc_regions[i].pblk + state->fc_regions[i].len) 18678016e29fSHarshad Shirwadkar return true; 18688016e29fSHarshad Shirwadkar } 18698016e29fSHarshad Shirwadkar return false; 18708016e29fSHarshad Shirwadkar } 18718016e29fSHarshad Shirwadkar 18728016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 18738016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 18748016e29fSHarshad Shirwadkar { 18758016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 18768016e29fSHarshad Shirwadkar 18778016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 18788016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 18798016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 18808016e29fSHarshad Shirwadkar } 18818016e29fSHarshad Shirwadkar 18828016e29fSHarshad Shirwadkar /* 18838016e29fSHarshad Shirwadkar * Recovery Scan phase handler 18848016e29fSHarshad Shirwadkar * 18858016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 18868016e29fSHarshad Shirwadkar * for doing following things: 18878016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 18888016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 18898016e29fSHarshad Shirwadkar * - Verify CRC 18908016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 18918016e29fSHarshad Shirwadkar * 18928016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 18938016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 18948016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 18958016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 18968016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 18978016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 18988016e29fSHarshad Shirwadkar */ 18998016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 19008016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 19018016e29fSHarshad Shirwadkar tid_t expected_tid) 19028016e29fSHarshad Shirwadkar { 19038016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 19048016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 19058016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 19068016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 1907a7ba36bcSHarshad Shirwadkar struct ext4_fc_add_range ext; 1908a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 1909a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 1910a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 1911a7ba36bcSHarshad Shirwadkar struct ext4_fc_head head; 19128016e29fSHarshad Shirwadkar struct ext4_extent *ex; 19138016e29fSHarshad Shirwadkar 19148016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 19158016e29fSHarshad Shirwadkar 19168016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 19178016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 19188016e29fSHarshad Shirwadkar 19198016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 19208016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 19218016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 19228016e29fSHarshad Shirwadkar state->fc_crc = 0; 19238016e29fSHarshad Shirwadkar state->fc_regions = NULL; 19248016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 19258016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 19268016e29fSHarshad Shirwadkar /* Check if we can stop early */ 19278016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 19288016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 19298016e29fSHarshad Shirwadkar return 0; 19308016e29fSHarshad Shirwadkar } 19318016e29fSHarshad Shirwadkar 19328016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 19338016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 19348016e29fSHarshad Shirwadkar goto out_err; 19358016e29fSHarshad Shirwadkar } 19368016e29fSHarshad Shirwadkar 19378016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 1938a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 1939a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 1940a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 19418016e29fSHarshad Shirwadkar jbd_debug(3, "Scan phase, tag:%s, blk %lld\n", 1942a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); 1943a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 19448016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 1945a7ba36bcSHarshad Shirwadkar memcpy(&ext, val, sizeof(ext)); 1946a7ba36bcSHarshad Shirwadkar ex = (struct ext4_extent *)&ext.fc_ex; 19478016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 1948a7ba36bcSHarshad Shirwadkar le32_to_cpu(ext.fc_ino), 19498016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 1950*599ea31dSXin Yin ext4_ext_get_actual_len(ex), 0); 19518016e29fSHarshad Shirwadkar if (ret < 0) 19528016e29fSHarshad Shirwadkar break; 19538016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 19548016e29fSHarshad Shirwadkar fallthrough; 19558016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 19568016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 19578016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 19588016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 19598016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 19608016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 19618016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1962a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 1963a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 19648016e29fSHarshad Shirwadkar break; 19658016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 19668016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1967a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 1968a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 1969a7ba36bcSHarshad Shirwadkar sizeof(tl) + 19708016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 19718016e29fSHarshad Shirwadkar fc_crc)); 1972a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(tail.fc_tid) == expected_tid && 1973a7ba36bcSHarshad Shirwadkar le32_to_cpu(tail.fc_crc) == state->fc_crc) { 19748016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 19758016e29fSHarshad Shirwadkar state->fc_regions_valid = 19768016e29fSHarshad Shirwadkar state->fc_regions_used; 19778016e29fSHarshad Shirwadkar } else { 19788016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 19798016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 19808016e29fSHarshad Shirwadkar } 19818016e29fSHarshad Shirwadkar state->fc_crc = 0; 19828016e29fSHarshad Shirwadkar break; 19838016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 1984a7ba36bcSHarshad Shirwadkar memcpy(&head, val, sizeof(head)); 1985a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_features) & 19868016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 19878016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 19888016e29fSHarshad Shirwadkar break; 19898016e29fSHarshad Shirwadkar } 1990a7ba36bcSHarshad Shirwadkar if (le32_to_cpu(head.fc_tid) != expected_tid) { 19918016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 19928016e29fSHarshad Shirwadkar break; 19938016e29fSHarshad Shirwadkar } 19948016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1995a7ba36bcSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 1996a7ba36bcSHarshad Shirwadkar sizeof(tl) + le16_to_cpu(tl.fc_len)); 19978016e29fSHarshad Shirwadkar break; 19988016e29fSHarshad Shirwadkar default: 19998016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 20008016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 20018016e29fSHarshad Shirwadkar } 20028016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 20038016e29fSHarshad Shirwadkar break; 20048016e29fSHarshad Shirwadkar } 20058016e29fSHarshad Shirwadkar 20068016e29fSHarshad Shirwadkar out_err: 20078016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 20088016e29fSHarshad Shirwadkar return ret; 20098016e29fSHarshad Shirwadkar } 20108016e29fSHarshad Shirwadkar 20115b849b5fSHarshad Shirwadkar /* 20125b849b5fSHarshad Shirwadkar * Main recovery path entry point. 20138016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 20145b849b5fSHarshad Shirwadkar */ 20155b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 20165b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 20175b849b5fSHarshad Shirwadkar { 20188016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 20198016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 2020a7ba36bcSHarshad Shirwadkar struct ext4_fc_tl tl; 2021a7ba36bcSHarshad Shirwadkar __u8 *start, *end, *cur, *val; 20228016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 20238016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 2024a7ba36bcSHarshad Shirwadkar struct ext4_fc_tail tail; 20258016e29fSHarshad Shirwadkar 20268016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 20278016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 20288016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 20298016e29fSHarshad Shirwadkar } 20308016e29fSHarshad Shirwadkar 20318016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 20328016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 20338016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 20348016e29fSHarshad Shirwadkar } 20358016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 20368016e29fSHarshad Shirwadkar jbd_debug(1, "Replay stops\n"); 20378016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20385b849b5fSHarshad Shirwadkar return 0; 20395b849b5fSHarshad Shirwadkar } 20405b849b5fSHarshad Shirwadkar 20418016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 20428016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 20438016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 20448016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 20458016e29fSHarshad Shirwadkar } 20468016e29fSHarshad Shirwadkar #endif 20478016e29fSHarshad Shirwadkar 20488016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 20498016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 20508016e29fSHarshad Shirwadkar 2051a7ba36bcSHarshad Shirwadkar for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2052a7ba36bcSHarshad Shirwadkar memcpy(&tl, cur, sizeof(tl)); 2053a7ba36bcSHarshad Shirwadkar val = cur + sizeof(tl); 2054a7ba36bcSHarshad Shirwadkar 20558016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 20568016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 20578016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20588016e29fSHarshad Shirwadkar break; 20598016e29fSHarshad Shirwadkar } 20608016e29fSHarshad Shirwadkar jbd_debug(3, "Replay phase, tag:%s\n", 2061a7ba36bcSHarshad Shirwadkar tag2str(le16_to_cpu(tl.fc_tag))); 20628016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 2063a7ba36bcSHarshad Shirwadkar switch (le16_to_cpu(tl.fc_tag)) { 20648016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 2065a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, &tl, val); 20668016e29fSHarshad Shirwadkar break; 20678016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 2068a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, &tl, val); 20698016e29fSHarshad Shirwadkar break; 20708016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2071a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, &tl, val); 20728016e29fSHarshad Shirwadkar break; 20738016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 2074a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, &tl, val); 20758016e29fSHarshad Shirwadkar break; 20768016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 2077a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, &tl, val); 20788016e29fSHarshad Shirwadkar break; 20798016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 2080a7ba36bcSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, &tl, val); 20818016e29fSHarshad Shirwadkar break; 20828016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 20838016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2084a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 20858016e29fSHarshad Shirwadkar break; 20868016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 20878016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 2088a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 2089a7ba36bcSHarshad Shirwadkar memcpy(&tail, val, sizeof(tail)); 2090a7ba36bcSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 20918016e29fSHarshad Shirwadkar break; 20928016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 20938016e29fSHarshad Shirwadkar break; 20948016e29fSHarshad Shirwadkar default: 2095a7ba36bcSHarshad Shirwadkar trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, 2096a7ba36bcSHarshad Shirwadkar le16_to_cpu(tl.fc_len), 0); 20978016e29fSHarshad Shirwadkar ret = -ECANCELED; 20988016e29fSHarshad Shirwadkar break; 20998016e29fSHarshad Shirwadkar } 21008016e29fSHarshad Shirwadkar if (ret < 0) 21018016e29fSHarshad Shirwadkar break; 21028016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 21038016e29fSHarshad Shirwadkar } 21048016e29fSHarshad Shirwadkar return ret; 21058016e29fSHarshad Shirwadkar } 21068016e29fSHarshad Shirwadkar 21076866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 21086866d7b3SHarshad Shirwadkar { 21095b849b5fSHarshad Shirwadkar /* 21105b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 21115b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 21125b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 21135b849b5fSHarshad Shirwadkar */ 21145b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 21156866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 21166866d7b3SHarshad Shirwadkar return; 2117ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 21186866d7b3SHarshad Shirwadkar } 2119aa75f4d3SHarshad Shirwadkar 2120fa329e27STheodore Ts'o static const char *fc_ineligible_reasons[] = { 2121ce8c59d1SHarshad Shirwadkar "Extended attributes changed", 2122ce8c59d1SHarshad Shirwadkar "Cross rename", 2123ce8c59d1SHarshad Shirwadkar "Journal flag changed", 2124ce8c59d1SHarshad Shirwadkar "Insufficient memory", 2125ce8c59d1SHarshad Shirwadkar "Swap boot", 2126ce8c59d1SHarshad Shirwadkar "Resize", 2127ce8c59d1SHarshad Shirwadkar "Dir renamed", 2128ce8c59d1SHarshad Shirwadkar "Falloc range op", 2129556e0319SHarshad Shirwadkar "Data journalling", 2130ce8c59d1SHarshad Shirwadkar "FC Commit Failed" 2131ce8c59d1SHarshad Shirwadkar }; 2132ce8c59d1SHarshad Shirwadkar 2133ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v) 2134ce8c59d1SHarshad Shirwadkar { 2135ce8c59d1SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 2136ce8c59d1SHarshad Shirwadkar struct ext4_fc_stats *stats = &sbi->s_fc_stats; 2137ce8c59d1SHarshad Shirwadkar int i; 2138ce8c59d1SHarshad Shirwadkar 2139ce8c59d1SHarshad Shirwadkar if (v != SEQ_START_TOKEN) 2140ce8c59d1SHarshad Shirwadkar return 0; 2141ce8c59d1SHarshad Shirwadkar 2142ce8c59d1SHarshad Shirwadkar seq_printf(seq, 2143ce8c59d1SHarshad Shirwadkar "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 2144ce8c59d1SHarshad Shirwadkar stats->fc_num_commits, stats->fc_ineligible_commits, 2145ce8c59d1SHarshad Shirwadkar stats->fc_numblks, 21460915e464SHarshad Shirwadkar div_u64(stats->s_fc_avg_commit_time, 1000)); 2147ce8c59d1SHarshad Shirwadkar seq_puts(seq, "Ineligible reasons:\n"); 2148ce8c59d1SHarshad Shirwadkar for (i = 0; i < EXT4_FC_REASON_MAX; i++) 2149ce8c59d1SHarshad Shirwadkar seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 2150ce8c59d1SHarshad Shirwadkar stats->fc_ineligible_reason_count[i]); 2151ce8c59d1SHarshad Shirwadkar 2152ce8c59d1SHarshad Shirwadkar return 0; 2153ce8c59d1SHarshad Shirwadkar } 2154ce8c59d1SHarshad Shirwadkar 2155aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2156aa75f4d3SHarshad Shirwadkar { 2157aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2158aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2159aa75f4d3SHarshad Shirwadkar 2160aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2161aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2162aa75f4d3SHarshad Shirwadkar 2163aa75f4d3SHarshad Shirwadkar return 0; 2164aa75f4d3SHarshad Shirwadkar } 2165ab047d51SSebastian Andrzej Siewior 2166ab047d51SSebastian Andrzej Siewior void ext4_fc_destroy_dentry_cache(void) 2167ab047d51SSebastian Andrzej Siewior { 2168ab047d51SSebastian Andrzej Siewior kmem_cache_destroy(ext4_fc_dentry_cachep); 2169ab047d51SSebastian Andrzej Siewior } 2170