16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 68aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 69aa75f4d3SHarshad Shirwadkar * attributes). Fast commit ineligiblity is marked by calling one of the 70aa75f4d3SHarshad Shirwadkar * two following functions: 71aa75f4d3SHarshad Shirwadkar * 72aa75f4d3SHarshad Shirwadkar * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall 73aa75f4d3SHarshad Shirwadkar * back to full commit. This is useful in case of transient errors. 74aa75f4d3SHarshad Shirwadkar * 75aa75f4d3SHarshad Shirwadkar * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all 76aa75f4d3SHarshad Shirwadkar * the fast commits happening between ext4_fc_start_ineligible() and 77aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() and one fast commit after the call to 78aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() to fall back to full commits. It is important to 79aa75f4d3SHarshad Shirwadkar * make one more fast commit to fall back to full commit after stop call so 80aa75f4d3SHarshad Shirwadkar * that it guaranteed that the fast commit ineligible operation contained 81aa75f4d3SHarshad Shirwadkar * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is 82aa75f4d3SHarshad Shirwadkar * followed by at least 1 full commit. 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * Atomicity of commits 85aa75f4d3SHarshad Shirwadkar * -------------------- 86aa75f4d3SHarshad Shirwadkar * In order to gaurantee atomicity during the commit operation, fast commit 87aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 88aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 89aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 90aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 91aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 92aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 95aa75f4d3SHarshad Shirwadkar * - fsync() 96aa75f4d3SHarshad Shirwadkar * - Append contents to file A 97aa75f4d3SHarshad Shirwadkar * - Truncate file A 98aa75f4d3SHarshad Shirwadkar * - fsync() 99aa75f4d3SHarshad Shirwadkar * 100aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 101aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 102aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 103aa75f4d3SHarshad Shirwadkar * 104aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 105aa75f4d3SHarshad Shirwadkar * 106aa75f4d3SHarshad Shirwadkar * TODOs 107aa75f4d3SHarshad Shirwadkar * ----- 108aa75f4d3SHarshad Shirwadkar * 1) Make fast commit atomic updates more fine grained. Today, a fast commit 109aa75f4d3SHarshad Shirwadkar * eligible update must be protected within ext4_fc_start_update() and 110aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). These routines are called at much higher 111aa75f4d3SHarshad Shirwadkar * routines. This can be made more fine grained by combining with 112aa75f4d3SHarshad Shirwadkar * ext4_journal_start(). 113aa75f4d3SHarshad Shirwadkar * 114aa75f4d3SHarshad Shirwadkar * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() 115aa75f4d3SHarshad Shirwadkar * 116aa75f4d3SHarshad Shirwadkar * 3) Handle more ineligible cases. 117aa75f4d3SHarshad Shirwadkar */ 118aa75f4d3SHarshad Shirwadkar 119aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 120aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 121aa75f4d3SHarshad Shirwadkar 122aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 123aa75f4d3SHarshad Shirwadkar { 124aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 125aa75f4d3SHarshad Shirwadkar if (uptodate) { 126aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 127aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 128aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 129aa75f4d3SHarshad Shirwadkar } else { 130aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 131aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 132aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 133aa75f4d3SHarshad Shirwadkar } 134aa75f4d3SHarshad Shirwadkar 135aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 136aa75f4d3SHarshad Shirwadkar } 137aa75f4d3SHarshad Shirwadkar 138aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 139aa75f4d3SHarshad Shirwadkar { 140aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 141aa75f4d3SHarshad Shirwadkar 142aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 143aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 144aa75f4d3SHarshad Shirwadkar } 145aa75f4d3SHarshad Shirwadkar 146aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 147aa75f4d3SHarshad Shirwadkar { 148aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 149aa75f4d3SHarshad Shirwadkar 150aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 151aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 152aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 153aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 154aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 155aa75f4d3SHarshad Shirwadkar ei->i_fc_committed_subtid = 0; 156aa75f4d3SHarshad Shirwadkar } 157aa75f4d3SHarshad Shirwadkar 158aa75f4d3SHarshad Shirwadkar /* 159aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 160aa75f4d3SHarshad Shirwadkar * 161aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 162aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 163aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 164aa75f4d3SHarshad Shirwadkar */ 165aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 166aa75f4d3SHarshad Shirwadkar { 167aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 168aa75f4d3SHarshad Shirwadkar 169*8016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 170*8016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 171aa75f4d3SHarshad Shirwadkar return; 172aa75f4d3SHarshad Shirwadkar 173aa75f4d3SHarshad Shirwadkar restart: 174aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 175aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 176aa75f4d3SHarshad Shirwadkar goto out; 177aa75f4d3SHarshad Shirwadkar 178aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 179aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 180aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 181aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 182aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 183aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 184aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 185aa75f4d3SHarshad Shirwadkar #else 186aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 187aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 188aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 189aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 190aa75f4d3SHarshad Shirwadkar #endif 191aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 192aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 193aa75f4d3SHarshad Shirwadkar schedule(); 194aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 195aa75f4d3SHarshad Shirwadkar goto restart; 196aa75f4d3SHarshad Shirwadkar } 197aa75f4d3SHarshad Shirwadkar out: 198aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 199aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 200aa75f4d3SHarshad Shirwadkar } 201aa75f4d3SHarshad Shirwadkar 202aa75f4d3SHarshad Shirwadkar /* 203aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 204aa75f4d3SHarshad Shirwadkar */ 205aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 206aa75f4d3SHarshad Shirwadkar { 207aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 208aa75f4d3SHarshad Shirwadkar 209*8016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 210*8016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 211aa75f4d3SHarshad Shirwadkar return; 212aa75f4d3SHarshad Shirwadkar 213aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 214aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 215aa75f4d3SHarshad Shirwadkar } 216aa75f4d3SHarshad Shirwadkar 217aa75f4d3SHarshad Shirwadkar /* 218aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 219aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 220aa75f4d3SHarshad Shirwadkar */ 221aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 222aa75f4d3SHarshad Shirwadkar { 223aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 224aa75f4d3SHarshad Shirwadkar 225*8016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 226*8016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 227aa75f4d3SHarshad Shirwadkar return; 228aa75f4d3SHarshad Shirwadkar 229aa75f4d3SHarshad Shirwadkar restart: 230aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 231aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) { 232aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 233aa75f4d3SHarshad Shirwadkar return; 234aa75f4d3SHarshad Shirwadkar } 235aa75f4d3SHarshad Shirwadkar 236aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 237aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 238aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 239aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 240aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 241aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 242aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 243aa75f4d3SHarshad Shirwadkar #else 244aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 245aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 246aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 247aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 248aa75f4d3SHarshad Shirwadkar #endif 249aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 250aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 251aa75f4d3SHarshad Shirwadkar schedule(); 252aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 253aa75f4d3SHarshad Shirwadkar goto restart; 254aa75f4d3SHarshad Shirwadkar } 255aa75f4d3SHarshad Shirwadkar if (!list_empty(&ei->i_fc_list)) 256aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 257aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 258aa75f4d3SHarshad Shirwadkar } 259aa75f4d3SHarshad Shirwadkar 260aa75f4d3SHarshad Shirwadkar /* 261aa75f4d3SHarshad Shirwadkar * Mark file system as fast commit ineligible. This means that next commit 262aa75f4d3SHarshad Shirwadkar * operation would result in a full jbd2 commit. 263aa75f4d3SHarshad Shirwadkar */ 264aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason) 265aa75f4d3SHarshad Shirwadkar { 266aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 267aa75f4d3SHarshad Shirwadkar 268*8016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 269*8016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 270*8016e29fSHarshad Shirwadkar return; 271*8016e29fSHarshad Shirwadkar 272aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_INELIGIBLE; 273aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 274aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 275aa75f4d3SHarshad Shirwadkar } 276aa75f4d3SHarshad Shirwadkar 277aa75f4d3SHarshad Shirwadkar /* 278aa75f4d3SHarshad Shirwadkar * Start a fast commit ineligible update. Any commits that happen while 279aa75f4d3SHarshad Shirwadkar * such an operation is in progress fall back to full commits. 280aa75f4d3SHarshad Shirwadkar */ 281aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason) 282aa75f4d3SHarshad Shirwadkar { 283aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 284aa75f4d3SHarshad Shirwadkar 285*8016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 286*8016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 287*8016e29fSHarshad Shirwadkar return; 288*8016e29fSHarshad Shirwadkar 289aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 290aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 291aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_ineligible_updates); 292aa75f4d3SHarshad Shirwadkar } 293aa75f4d3SHarshad Shirwadkar 294aa75f4d3SHarshad Shirwadkar /* 295aa75f4d3SHarshad Shirwadkar * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here 296aa75f4d3SHarshad Shirwadkar * to ensure that after stopping the ineligible update, at least one full 297aa75f4d3SHarshad Shirwadkar * commit takes place. 298aa75f4d3SHarshad Shirwadkar */ 299aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb) 300aa75f4d3SHarshad Shirwadkar { 301*8016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 302*8016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 303*8016e29fSHarshad Shirwadkar return; 304*8016e29fSHarshad Shirwadkar 305aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE; 306aa75f4d3SHarshad Shirwadkar atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); 307aa75f4d3SHarshad Shirwadkar } 308aa75f4d3SHarshad Shirwadkar 309aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb) 310aa75f4d3SHarshad Shirwadkar { 311aa75f4d3SHarshad Shirwadkar return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) || 312aa75f4d3SHarshad Shirwadkar atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates); 313aa75f4d3SHarshad Shirwadkar } 314aa75f4d3SHarshad Shirwadkar 315aa75f4d3SHarshad Shirwadkar /* 316aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 317aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 318aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 319aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 320aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 321aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 322aa75f4d3SHarshad Shirwadkar * 323aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 324aa75f4d3SHarshad Shirwadkar */ 325aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 326aa75f4d3SHarshad Shirwadkar struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool), 327aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 328aa75f4d3SHarshad Shirwadkar { 329aa75f4d3SHarshad Shirwadkar tid_t running_txn_tid; 330aa75f4d3SHarshad Shirwadkar bool update = false; 331aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 332aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 333aa75f4d3SHarshad Shirwadkar int ret; 334aa75f4d3SHarshad Shirwadkar 335*8016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 336*8016e29fSHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_REPLAY)) 337aa75f4d3SHarshad Shirwadkar return -EOPNOTSUPP; 338aa75f4d3SHarshad Shirwadkar 339aa75f4d3SHarshad Shirwadkar if (ext4_fc_is_ineligible(inode->i_sb)) 340aa75f4d3SHarshad Shirwadkar return -EINVAL; 341aa75f4d3SHarshad Shirwadkar 342aa75f4d3SHarshad Shirwadkar running_txn_tid = sbi->s_journal ? 343aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_commit_sequence + 1 : 0; 344aa75f4d3SHarshad Shirwadkar 345aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 346aa75f4d3SHarshad Shirwadkar if (running_txn_tid == ei->i_sync_tid) { 347aa75f4d3SHarshad Shirwadkar update = true; 348aa75f4d3SHarshad Shirwadkar } else { 349aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 350aa75f4d3SHarshad Shirwadkar ei->i_sync_tid = running_txn_tid; 351aa75f4d3SHarshad Shirwadkar } 352aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 353aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 354aa75f4d3SHarshad Shirwadkar 355aa75f4d3SHarshad Shirwadkar if (!enqueue) 356aa75f4d3SHarshad Shirwadkar return ret; 357aa75f4d3SHarshad Shirwadkar 358aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 359aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 360aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 361aa75f4d3SHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_COMMITTING) ? 362aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 363aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 364aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 365aa75f4d3SHarshad Shirwadkar 366aa75f4d3SHarshad Shirwadkar return ret; 367aa75f4d3SHarshad Shirwadkar } 368aa75f4d3SHarshad Shirwadkar 369aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 370aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 371aa75f4d3SHarshad Shirwadkar int op; 372aa75f4d3SHarshad Shirwadkar }; 373aa75f4d3SHarshad Shirwadkar 374aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 375aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 376aa75f4d3SHarshad Shirwadkar { 377aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 378aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 379aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 380aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 381aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 382aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 383aa75f4d3SHarshad Shirwadkar 384aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 385aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 386aa75f4d3SHarshad Shirwadkar if (!node) { 387aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM); 388aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 389aa75f4d3SHarshad Shirwadkar return -ENOMEM; 390aa75f4d3SHarshad Shirwadkar } 391aa75f4d3SHarshad Shirwadkar 392aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 393aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 394aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 395aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 396aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 397aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 398aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 399aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 400aa75f4d3SHarshad Shirwadkar EXT4_FC_REASON_MEM); 401aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 402aa75f4d3SHarshad Shirwadkar return -ENOMEM; 403aa75f4d3SHarshad Shirwadkar } 404aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 405aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 406aa75f4d3SHarshad Shirwadkar } else { 407aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 408aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 409aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 410aa75f4d3SHarshad Shirwadkar } 411aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 412aa75f4d3SHarshad Shirwadkar 413aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 414aa75f4d3SHarshad Shirwadkar if (sbi->s_mount_state & EXT4_FC_COMMITTING) 415aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 416aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 417aa75f4d3SHarshad Shirwadkar else 418aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 419aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 420aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 421aa75f4d3SHarshad Shirwadkar 422aa75f4d3SHarshad Shirwadkar return 0; 423aa75f4d3SHarshad Shirwadkar } 424aa75f4d3SHarshad Shirwadkar 425aa75f4d3SHarshad Shirwadkar void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry) 426aa75f4d3SHarshad Shirwadkar { 427aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 428aa75f4d3SHarshad Shirwadkar int ret; 429aa75f4d3SHarshad Shirwadkar 430aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 431aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 432aa75f4d3SHarshad Shirwadkar 433aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 434aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 435aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_unlink(inode, dentry, ret); 436aa75f4d3SHarshad Shirwadkar } 437aa75f4d3SHarshad Shirwadkar 438aa75f4d3SHarshad Shirwadkar void ext4_fc_track_link(struct inode *inode, struct dentry *dentry) 439aa75f4d3SHarshad Shirwadkar { 440aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 441aa75f4d3SHarshad Shirwadkar int ret; 442aa75f4d3SHarshad Shirwadkar 443aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 444aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 445aa75f4d3SHarshad Shirwadkar 446aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 447aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 448aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_link(inode, dentry, ret); 449aa75f4d3SHarshad Shirwadkar } 450aa75f4d3SHarshad Shirwadkar 451aa75f4d3SHarshad Shirwadkar void ext4_fc_track_create(struct inode *inode, struct dentry *dentry) 452aa75f4d3SHarshad Shirwadkar { 453aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 454aa75f4d3SHarshad Shirwadkar int ret; 455aa75f4d3SHarshad Shirwadkar 456aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 457aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 458aa75f4d3SHarshad Shirwadkar 459aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 460aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 461aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_create(inode, dentry, ret); 462aa75f4d3SHarshad Shirwadkar } 463aa75f4d3SHarshad Shirwadkar 464aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 465aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 466aa75f4d3SHarshad Shirwadkar { 467aa75f4d3SHarshad Shirwadkar if (update) 468aa75f4d3SHarshad Shirwadkar return -EEXIST; 469aa75f4d3SHarshad Shirwadkar 470aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 471aa75f4d3SHarshad Shirwadkar 472aa75f4d3SHarshad Shirwadkar return 0; 473aa75f4d3SHarshad Shirwadkar } 474aa75f4d3SHarshad Shirwadkar 475aa75f4d3SHarshad Shirwadkar void ext4_fc_track_inode(struct inode *inode) 476aa75f4d3SHarshad Shirwadkar { 477aa75f4d3SHarshad Shirwadkar int ret; 478aa75f4d3SHarshad Shirwadkar 479aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 480aa75f4d3SHarshad Shirwadkar return; 481aa75f4d3SHarshad Shirwadkar 482aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_inode, NULL, 1); 483aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_inode(inode, ret); 484aa75f4d3SHarshad Shirwadkar } 485aa75f4d3SHarshad Shirwadkar 486aa75f4d3SHarshad Shirwadkar struct __track_range_args { 487aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 488aa75f4d3SHarshad Shirwadkar }; 489aa75f4d3SHarshad Shirwadkar 490aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 491aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 492aa75f4d3SHarshad Shirwadkar { 493aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 494aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 495aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 496aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 497aa75f4d3SHarshad Shirwadkar 498aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 499aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 500aa75f4d3SHarshad Shirwadkar return -ECANCELED; 501aa75f4d3SHarshad Shirwadkar } 502aa75f4d3SHarshad Shirwadkar 503aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 504aa75f4d3SHarshad Shirwadkar 505aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 506aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 507aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 508aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 509aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 510aa75f4d3SHarshad Shirwadkar } else { 511aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 512aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 513aa75f4d3SHarshad Shirwadkar } 514aa75f4d3SHarshad Shirwadkar 515aa75f4d3SHarshad Shirwadkar return 0; 516aa75f4d3SHarshad Shirwadkar } 517aa75f4d3SHarshad Shirwadkar 518aa75f4d3SHarshad Shirwadkar void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, 519aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 520aa75f4d3SHarshad Shirwadkar { 521aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 522aa75f4d3SHarshad Shirwadkar int ret; 523aa75f4d3SHarshad Shirwadkar 524aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 525aa75f4d3SHarshad Shirwadkar return; 526aa75f4d3SHarshad Shirwadkar 527aa75f4d3SHarshad Shirwadkar args.start = start; 528aa75f4d3SHarshad Shirwadkar args.end = end; 529aa75f4d3SHarshad Shirwadkar 530aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_range, &args, 1); 531aa75f4d3SHarshad Shirwadkar 532aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_range(inode, start, end, ret); 533aa75f4d3SHarshad Shirwadkar } 534aa75f4d3SHarshad Shirwadkar 535aa75f4d3SHarshad Shirwadkar static void ext4_fc_submit_bh(struct super_block *sb) 536aa75f4d3SHarshad Shirwadkar { 537aa75f4d3SHarshad Shirwadkar int write_flags = REQ_SYNC; 538aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 539aa75f4d3SHarshad Shirwadkar 540aa75f4d3SHarshad Shirwadkar if (test_opt(sb, BARRIER)) 541aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 542aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 543aa75f4d3SHarshad Shirwadkar clear_buffer_dirty(bh); 544aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 545aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 546aa75f4d3SHarshad Shirwadkar submit_bh(REQ_OP_WRITE, write_flags, bh); 547aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 548aa75f4d3SHarshad Shirwadkar } 549aa75f4d3SHarshad Shirwadkar 550aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 551aa75f4d3SHarshad Shirwadkar 552aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 553aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 554aa75f4d3SHarshad Shirwadkar u32 *crc) 555aa75f4d3SHarshad Shirwadkar { 556aa75f4d3SHarshad Shirwadkar void *ret; 557aa75f4d3SHarshad Shirwadkar 558aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 559aa75f4d3SHarshad Shirwadkar if (crc) 560aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 561aa75f4d3SHarshad Shirwadkar return ret; 562aa75f4d3SHarshad Shirwadkar } 563aa75f4d3SHarshad Shirwadkar 564aa75f4d3SHarshad Shirwadkar /* 565aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 566aa75f4d3SHarshad Shirwadkar * 567aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 568aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 569aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 570aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 571aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 572aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 573aa75f4d3SHarshad Shirwadkar * we added. 574aa75f4d3SHarshad Shirwadkar */ 575aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 576aa75f4d3SHarshad Shirwadkar { 577aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 578aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 579aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 580aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 581aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 582aa75f4d3SHarshad Shirwadkar int pad_len; 583aa75f4d3SHarshad Shirwadkar 584aa75f4d3SHarshad Shirwadkar /* 585aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 586aa75f4d3SHarshad Shirwadkar * padding. 587aa75f4d3SHarshad Shirwadkar */ 588aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 589aa75f4d3SHarshad Shirwadkar return NULL; 590aa75f4d3SHarshad Shirwadkar 591aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 592aa75f4d3SHarshad Shirwadkar /* 593aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 594aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 595aa75f4d3SHarshad Shirwadkar */ 596aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 597aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 598aa75f4d3SHarshad Shirwadkar if (ret) 599aa75f4d3SHarshad Shirwadkar return NULL; 600aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 601aa75f4d3SHarshad Shirwadkar } 602aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 603aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 604aa75f4d3SHarshad Shirwadkar } 605aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 606aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 607aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 608aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 609aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 610aa75f4d3SHarshad Shirwadkar if (crc) 611aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 612aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 613aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 614aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 615aa75f4d3SHarshad Shirwadkar 616aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 617aa75f4d3SHarshad Shirwadkar if (ret) 618aa75f4d3SHarshad Shirwadkar return NULL; 619aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 620aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 621aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 622aa75f4d3SHarshad Shirwadkar } 623aa75f4d3SHarshad Shirwadkar 624aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 625aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 626aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 627aa75f4d3SHarshad Shirwadkar { 628aa75f4d3SHarshad Shirwadkar if (crc) 629aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 630aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 631aa75f4d3SHarshad Shirwadkar } 632aa75f4d3SHarshad Shirwadkar 633aa75f4d3SHarshad Shirwadkar /* 634aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 635aa75f4d3SHarshad Shirwadkar * 636aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 637aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 638aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 639aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 640aa75f4d3SHarshad Shirwadkar */ 641aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 642aa75f4d3SHarshad Shirwadkar { 643aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 644aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 645aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 646aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 647aa75f4d3SHarshad Shirwadkar u8 *dst; 648aa75f4d3SHarshad Shirwadkar 649aa75f4d3SHarshad Shirwadkar /* 650aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 651aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 652aa75f4d3SHarshad Shirwadkar */ 653aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 654aa75f4d3SHarshad Shirwadkar if (!dst) 655aa75f4d3SHarshad Shirwadkar return -ENOSPC; 656aa75f4d3SHarshad Shirwadkar 657aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 658aa75f4d3SHarshad Shirwadkar 659aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 660aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 661aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 662aa75f4d3SHarshad Shirwadkar 663aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 664aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 665aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 666aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 667aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 668aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 669aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 670aa75f4d3SHarshad Shirwadkar 671aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 672aa75f4d3SHarshad Shirwadkar 673aa75f4d3SHarshad Shirwadkar return 0; 674aa75f4d3SHarshad Shirwadkar } 675aa75f4d3SHarshad Shirwadkar 676aa75f4d3SHarshad Shirwadkar /* 677aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 678aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 679aa75f4d3SHarshad Shirwadkar */ 680aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 681aa75f4d3SHarshad Shirwadkar u32 *crc) 682aa75f4d3SHarshad Shirwadkar { 683aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 684aa75f4d3SHarshad Shirwadkar u8 *dst; 685aa75f4d3SHarshad Shirwadkar 686aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 687aa75f4d3SHarshad Shirwadkar if (!dst) 688aa75f4d3SHarshad Shirwadkar return false; 689aa75f4d3SHarshad Shirwadkar 690aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 691aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 692aa75f4d3SHarshad Shirwadkar 693aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 694aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 695aa75f4d3SHarshad Shirwadkar 696aa75f4d3SHarshad Shirwadkar return true; 697aa75f4d3SHarshad Shirwadkar } 698aa75f4d3SHarshad Shirwadkar 699aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 700aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag, 701aa75f4d3SHarshad Shirwadkar int parent_ino, int ino, int dlen, 702aa75f4d3SHarshad Shirwadkar const unsigned char *dname, 703aa75f4d3SHarshad Shirwadkar u32 *crc) 704aa75f4d3SHarshad Shirwadkar { 705aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 706aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 707aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 708aa75f4d3SHarshad Shirwadkar crc); 709aa75f4d3SHarshad Shirwadkar 710aa75f4d3SHarshad Shirwadkar if (!dst) 711aa75f4d3SHarshad Shirwadkar return false; 712aa75f4d3SHarshad Shirwadkar 713aa75f4d3SHarshad Shirwadkar fcd.fc_parent_ino = cpu_to_le32(parent_ino); 714aa75f4d3SHarshad Shirwadkar fcd.fc_ino = cpu_to_le32(ino); 715aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 716aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 717aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 718aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 719aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 720aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 721aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, dname, dlen, crc); 722aa75f4d3SHarshad Shirwadkar dst += dlen; 723aa75f4d3SHarshad Shirwadkar 724aa75f4d3SHarshad Shirwadkar return true; 725aa75f4d3SHarshad Shirwadkar } 726aa75f4d3SHarshad Shirwadkar 727aa75f4d3SHarshad Shirwadkar /* 728aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 729aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 730aa75f4d3SHarshad Shirwadkar */ 731aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 732aa75f4d3SHarshad Shirwadkar { 733aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 734aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 735aa75f4d3SHarshad Shirwadkar int ret; 736aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 737aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 738aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 739aa75f4d3SHarshad Shirwadkar u8 *dst; 740aa75f4d3SHarshad Shirwadkar 741aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 742aa75f4d3SHarshad Shirwadkar if (ret) 743aa75f4d3SHarshad Shirwadkar return ret; 744aa75f4d3SHarshad Shirwadkar 745aa75f4d3SHarshad Shirwadkar if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 746aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 747aa75f4d3SHarshad Shirwadkar 748aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 749aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 750aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 751aa75f4d3SHarshad Shirwadkar 752aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 753aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 754aa75f4d3SHarshad Shirwadkar if (!dst) 755aa75f4d3SHarshad Shirwadkar return -ECANCELED; 756aa75f4d3SHarshad Shirwadkar 757aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 758aa75f4d3SHarshad Shirwadkar return -ECANCELED; 759aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 760aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 761aa75f4d3SHarshad Shirwadkar return -ECANCELED; 762aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 763aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 764aa75f4d3SHarshad Shirwadkar inode_len, crc)) 765aa75f4d3SHarshad Shirwadkar return -ECANCELED; 766aa75f4d3SHarshad Shirwadkar 767aa75f4d3SHarshad Shirwadkar return 0; 768aa75f4d3SHarshad Shirwadkar } 769aa75f4d3SHarshad Shirwadkar 770aa75f4d3SHarshad Shirwadkar /* 771aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 772aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 773aa75f4d3SHarshad Shirwadkar */ 774aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 775aa75f4d3SHarshad Shirwadkar { 776aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 777aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 778aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 779aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 780aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 781aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 782aa75f4d3SHarshad Shirwadkar int ret; 783aa75f4d3SHarshad Shirwadkar 784aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 785aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 786aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 787aa75f4d3SHarshad Shirwadkar return 0; 788aa75f4d3SHarshad Shirwadkar } 789aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 790aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 791aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 792aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 793aa75f4d3SHarshad Shirwadkar 794aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 795aa75f4d3SHarshad Shirwadkar jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", 796aa75f4d3SHarshad Shirwadkar __func__, cur_lblk_off, new_blk_size, inode->i_ino); 797aa75f4d3SHarshad Shirwadkar 798aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 799aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 800aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 801aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 802aa75f4d3SHarshad Shirwadkar if (ret < 0) 803aa75f4d3SHarshad Shirwadkar return -ECANCELED; 804aa75f4d3SHarshad Shirwadkar 805aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 806aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 807aa75f4d3SHarshad Shirwadkar continue; 808aa75f4d3SHarshad Shirwadkar } 809aa75f4d3SHarshad Shirwadkar 810aa75f4d3SHarshad Shirwadkar if (ret == 0) { 811aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 812aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 813aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 814aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 815aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 816aa75f4d3SHarshad Shirwadkar return -ENOSPC; 817aa75f4d3SHarshad Shirwadkar } else { 818aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 819aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 820aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 821aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 822aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 823aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 824aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 825aa75f4d3SHarshad Shirwadkar else 826aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 827aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 828aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 829aa75f4d3SHarshad Shirwadkar return -ENOSPC; 830aa75f4d3SHarshad Shirwadkar } 831aa75f4d3SHarshad Shirwadkar 832aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 833aa75f4d3SHarshad Shirwadkar } 834aa75f4d3SHarshad Shirwadkar 835aa75f4d3SHarshad Shirwadkar return 0; 836aa75f4d3SHarshad Shirwadkar } 837aa75f4d3SHarshad Shirwadkar 838aa75f4d3SHarshad Shirwadkar 839aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 840aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 841aa75f4d3SHarshad Shirwadkar { 842aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 843aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 844aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 845aa75f4d3SHarshad Shirwadkar struct list_head *pos; 846aa75f4d3SHarshad Shirwadkar int ret = 0; 847aa75f4d3SHarshad Shirwadkar 848aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 849aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_COMMITTING; 850aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 851aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 852aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 853aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 854aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 855aa75f4d3SHarshad Shirwadkar 856aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 857aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 858aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 859aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 860aa75f4d3SHarshad Shirwadkar schedule(); 861aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 862aa75f4d3SHarshad Shirwadkar } 863aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 864aa75f4d3SHarshad Shirwadkar } 865aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 866aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 867aa75f4d3SHarshad Shirwadkar if (ret) 868aa75f4d3SHarshad Shirwadkar return ret; 869aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 870aa75f4d3SHarshad Shirwadkar } 871aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 872aa75f4d3SHarshad Shirwadkar 873aa75f4d3SHarshad Shirwadkar return ret; 874aa75f4d3SHarshad Shirwadkar } 875aa75f4d3SHarshad Shirwadkar 876aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 877aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 878aa75f4d3SHarshad Shirwadkar { 879aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 880aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 881aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 882aa75f4d3SHarshad Shirwadkar int ret = 0; 883aa75f4d3SHarshad Shirwadkar 884aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 885aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 886aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 887aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 888aa75f4d3SHarshad Shirwadkar continue; 889aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 890aa75f4d3SHarshad Shirwadkar 891aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 892aa75f4d3SHarshad Shirwadkar if (ret) 893aa75f4d3SHarshad Shirwadkar return ret; 894aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 895aa75f4d3SHarshad Shirwadkar } 896aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 897aa75f4d3SHarshad Shirwadkar 898aa75f4d3SHarshad Shirwadkar return 0; 899aa75f4d3SHarshad Shirwadkar } 900aa75f4d3SHarshad Shirwadkar 901aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 902aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 903aa75f4d3SHarshad Shirwadkar { 904aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 905aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 906aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 907aa75f4d3SHarshad Shirwadkar struct inode *inode; 908aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n, *fcd_pos, *fcd_n; 909aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 910aa75f4d3SHarshad Shirwadkar int ret; 911aa75f4d3SHarshad Shirwadkar 912aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 913aa75f4d3SHarshad Shirwadkar return 0; 914aa75f4d3SHarshad Shirwadkar list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) { 915aa75f4d3SHarshad Shirwadkar fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update, 916aa75f4d3SHarshad Shirwadkar fcd_list); 917aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 918aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 919aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 920aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 921aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 922aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 923aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 924aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 925aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 926aa75f4d3SHarshad Shirwadkar } 927aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 928aa75f4d3SHarshad Shirwadkar continue; 929aa75f4d3SHarshad Shirwadkar } 930aa75f4d3SHarshad Shirwadkar 931aa75f4d3SHarshad Shirwadkar inode = NULL; 932aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 933aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 934aa75f4d3SHarshad Shirwadkar if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 935aa75f4d3SHarshad Shirwadkar inode = &ei->vfs_inode; 936aa75f4d3SHarshad Shirwadkar break; 937aa75f4d3SHarshad Shirwadkar } 938aa75f4d3SHarshad Shirwadkar } 939aa75f4d3SHarshad Shirwadkar /* 940aa75f4d3SHarshad Shirwadkar * If we don't find inode in our list, then it was deleted, 941aa75f4d3SHarshad Shirwadkar * in which case, we don't need to record it's create tag. 942aa75f4d3SHarshad Shirwadkar */ 943aa75f4d3SHarshad Shirwadkar if (!inode) 944aa75f4d3SHarshad Shirwadkar continue; 945aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 946aa75f4d3SHarshad Shirwadkar 947aa75f4d3SHarshad Shirwadkar /* 948aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 949aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 950aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 951aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 952aa75f4d3SHarshad Shirwadkar * the recovery code. 953aa75f4d3SHarshad Shirwadkar */ 954aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 955aa75f4d3SHarshad Shirwadkar if (ret) 956aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 957aa75f4d3SHarshad Shirwadkar 958aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 959aa75f4d3SHarshad Shirwadkar if (ret) 960aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 961aa75f4d3SHarshad Shirwadkar 962aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 963aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 964aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 965aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 966aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 967aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 968aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 969aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 970aa75f4d3SHarshad Shirwadkar } 971aa75f4d3SHarshad Shirwadkar 972aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 973aa75f4d3SHarshad Shirwadkar } 974aa75f4d3SHarshad Shirwadkar return 0; 975aa75f4d3SHarshad Shirwadkar lock_and_exit: 976aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 977aa75f4d3SHarshad Shirwadkar return ret; 978aa75f4d3SHarshad Shirwadkar } 979aa75f4d3SHarshad Shirwadkar 980aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 981aa75f4d3SHarshad Shirwadkar { 982aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 983aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 984aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 985aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 986aa75f4d3SHarshad Shirwadkar struct list_head *pos; 987aa75f4d3SHarshad Shirwadkar struct inode *inode; 988aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 989aa75f4d3SHarshad Shirwadkar int ret = 0; 990aa75f4d3SHarshad Shirwadkar u32 crc = 0; 991aa75f4d3SHarshad Shirwadkar 992aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 993aa75f4d3SHarshad Shirwadkar if (ret) 994aa75f4d3SHarshad Shirwadkar return ret; 995aa75f4d3SHarshad Shirwadkar 996aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 997aa75f4d3SHarshad Shirwadkar if (ret) 998aa75f4d3SHarshad Shirwadkar return ret; 999aa75f4d3SHarshad Shirwadkar 1000aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1001aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1002aa75f4d3SHarshad Shirwadkar /* 1003aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1004aa75f4d3SHarshad Shirwadkar * in this TID. 1005aa75f4d3SHarshad Shirwadkar */ 1006aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1007aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1008aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1009aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1010aa75f4d3SHarshad Shirwadkar (u8 *)&head, &crc)) 1011aa75f4d3SHarshad Shirwadkar goto out; 1012aa75f4d3SHarshad Shirwadkar } 1013aa75f4d3SHarshad Shirwadkar 1014aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1015aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1016aa75f4d3SHarshad Shirwadkar if (ret) { 1017aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1018aa75f4d3SHarshad Shirwadkar goto out; 1019aa75f4d3SHarshad Shirwadkar } 1020aa75f4d3SHarshad Shirwadkar 1021aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 1022aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1023aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1024aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1025aa75f4d3SHarshad Shirwadkar continue; 1026aa75f4d3SHarshad Shirwadkar 1027aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1028aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1029aa75f4d3SHarshad Shirwadkar if (ret) 1030aa75f4d3SHarshad Shirwadkar goto out; 1031aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1032aa75f4d3SHarshad Shirwadkar if (ret) 1033aa75f4d3SHarshad Shirwadkar goto out; 1034aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1035aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_committed_subtid = 1036aa75f4d3SHarshad Shirwadkar atomic_read(&sbi->s_fc_subtid); 1037aa75f4d3SHarshad Shirwadkar } 1038aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1039aa75f4d3SHarshad Shirwadkar 1040aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1041aa75f4d3SHarshad Shirwadkar 1042aa75f4d3SHarshad Shirwadkar out: 1043aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1044aa75f4d3SHarshad Shirwadkar return ret; 1045aa75f4d3SHarshad Shirwadkar } 1046aa75f4d3SHarshad Shirwadkar 1047aa75f4d3SHarshad Shirwadkar /* 1048aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1049aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1050aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1051aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1052aa75f4d3SHarshad Shirwadkar */ 1053aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1054aa75f4d3SHarshad Shirwadkar { 1055aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1056aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1057aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1058aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 1059aa75f4d3SHarshad Shirwadkar int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; 1060aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1061aa75f4d3SHarshad Shirwadkar 1062aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_start(sb); 1063aa75f4d3SHarshad Shirwadkar 1064aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1065aa75f4d3SHarshad Shirwadkar 1066aa75f4d3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 1067aa75f4d3SHarshad Shirwadkar (ext4_fc_is_ineligible(sb))) { 1068aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1069aa75f4d3SHarshad Shirwadkar goto out; 1070aa75f4d3SHarshad Shirwadkar } 1071aa75f4d3SHarshad Shirwadkar 1072aa75f4d3SHarshad Shirwadkar restart_fc: 1073aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1074aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1075aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1076aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1077aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1078aa75f4d3SHarshad Shirwadkar goto restart_fc; 1079aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_ALREADY_COMMITTED; 1080aa75f4d3SHarshad Shirwadkar goto out; 1081aa75f4d3SHarshad Shirwadkar } else if (ret) { 1082aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1083aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_START_FAILED; 1084aa75f4d3SHarshad Shirwadkar goto out; 1085aa75f4d3SHarshad Shirwadkar } 1086aa75f4d3SHarshad Shirwadkar 1087aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1088aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1089aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1090aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1091aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1092aa75f4d3SHarshad Shirwadkar goto out; 1093aa75f4d3SHarshad Shirwadkar } 1094aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1095aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1096aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1097aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1098aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1099aa75f4d3SHarshad Shirwadkar goto out; 1100aa75f4d3SHarshad Shirwadkar } 1101aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 1102aa75f4d3SHarshad Shirwadkar jbd2_fc_end_commit(journal); 1103aa75f4d3SHarshad Shirwadkar out: 1104aa75f4d3SHarshad Shirwadkar /* Has any ineligible update happened since we started? */ 1105aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { 1106aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1107aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1108aa75f4d3SHarshad Shirwadkar } 1109aa75f4d3SHarshad Shirwadkar 1110aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1111aa75f4d3SHarshad Shirwadkar if (reason != EXT4_FC_REASON_OK && 1112aa75f4d3SHarshad Shirwadkar reason != EXT4_FC_REASON_ALREADY_COMMITTED) { 1113aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_commits++; 1114aa75f4d3SHarshad Shirwadkar } else { 1115aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_num_commits++; 1116aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_numblks += nblks; 1117aa75f4d3SHarshad Shirwadkar } 1118aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1119aa75f4d3SHarshad Shirwadkar nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; 1120aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_stop(sb, nblks, reason); 1121aa75f4d3SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1122aa75f4d3SHarshad Shirwadkar /* 1123aa75f4d3SHarshad Shirwadkar * weight the commit time higher than the average time so we don't 1124aa75f4d3SHarshad Shirwadkar * react too strongly to vast changes in the commit time 1125aa75f4d3SHarshad Shirwadkar */ 1126aa75f4d3SHarshad Shirwadkar if (likely(sbi->s_fc_avg_commit_time)) 1127aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = (commit_time + 1128aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time * 3) / 4; 1129aa75f4d3SHarshad Shirwadkar else 1130aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = commit_time; 1131aa75f4d3SHarshad Shirwadkar jbd_debug(1, 1132aa75f4d3SHarshad Shirwadkar "Fast commit ended with blks = %d, reason = %d, subtid - %d", 1133aa75f4d3SHarshad Shirwadkar nblks, reason, subtid); 1134aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_FAILED) 1135aa75f4d3SHarshad Shirwadkar return jbd2_fc_end_commit_fallback(journal, commit_tid); 1136aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_START_FAILED || 1137aa75f4d3SHarshad Shirwadkar reason == EXT4_FC_REASON_INELIGIBLE) 1138aa75f4d3SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1139aa75f4d3SHarshad Shirwadkar return 0; 1140aa75f4d3SHarshad Shirwadkar } 1141aa75f4d3SHarshad Shirwadkar 1142ff780b91SHarshad Shirwadkar /* 1143ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1144ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1145ff780b91SHarshad Shirwadkar */ 1146ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full) 1147ff780b91SHarshad Shirwadkar { 1148aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1149aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1150aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1151aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1152aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n; 1153aa75f4d3SHarshad Shirwadkar 1154aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1155aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1156aa75f4d3SHarshad Shirwadkar 1157aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1158aa75f4d3SHarshad Shirwadkar 1159aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1160aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 1161aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1162aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1163aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1164aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1165aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1166aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1167aa75f4d3SHarshad Shirwadkar smp_mb(); 1168aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1169aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1170aa75f4d3SHarshad Shirwadkar #else 1171aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1172aa75f4d3SHarshad Shirwadkar #endif 1173aa75f4d3SHarshad Shirwadkar } 1174aa75f4d3SHarshad Shirwadkar 1175aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1176aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1177aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1178aa75f4d3SHarshad Shirwadkar fcd_list); 1179aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1180aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1181aa75f4d3SHarshad Shirwadkar 1182aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1183aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1184aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1185aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1186aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1187aa75f4d3SHarshad Shirwadkar } 1188aa75f4d3SHarshad Shirwadkar 1189aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1190aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1191aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 1192aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING]); 1193aa75f4d3SHarshad Shirwadkar 1194aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_COMMITTING; 1195aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE; 1196aa75f4d3SHarshad Shirwadkar 1197aa75f4d3SHarshad Shirwadkar if (full) 1198aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1199aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1200aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1201ff780b91SHarshad Shirwadkar } 12026866d7b3SHarshad Shirwadkar 1203*8016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 1204*8016e29fSHarshad Shirwadkar 1205*8016e29fSHarshad Shirwadkar /* Get length of a particular tlv */ 1206*8016e29fSHarshad Shirwadkar static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) 1207*8016e29fSHarshad Shirwadkar { 1208*8016e29fSHarshad Shirwadkar return le16_to_cpu(tl->fc_len); 1209*8016e29fSHarshad Shirwadkar } 1210*8016e29fSHarshad Shirwadkar 1211*8016e29fSHarshad Shirwadkar /* Get a pointer to "value" of a tlv */ 1212*8016e29fSHarshad Shirwadkar static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) 1213*8016e29fSHarshad Shirwadkar { 1214*8016e29fSHarshad Shirwadkar return (u8 *)tl + sizeof(*tl); 1215*8016e29fSHarshad Shirwadkar } 1216*8016e29fSHarshad Shirwadkar 1217*8016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 1218*8016e29fSHarshad Shirwadkar struct dentry_info_args { 1219*8016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 1220*8016e29fSHarshad Shirwadkar char *dname; 1221*8016e29fSHarshad Shirwadkar }; 1222*8016e29fSHarshad Shirwadkar 1223*8016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 1224*8016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl) 1225*8016e29fSHarshad Shirwadkar { 1226*8016e29fSHarshad Shirwadkar struct ext4_fc_dentry_info *fcd; 1227*8016e29fSHarshad Shirwadkar 1228*8016e29fSHarshad Shirwadkar fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl); 1229*8016e29fSHarshad Shirwadkar 1230*8016e29fSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino); 1231*8016e29fSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd->fc_ino); 1232*8016e29fSHarshad Shirwadkar darg->dname = fcd->fc_dname; 1233*8016e29fSHarshad Shirwadkar darg->dname_len = ext4_fc_tag_len(tl) - 1234*8016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_dentry_info); 1235*8016e29fSHarshad Shirwadkar } 1236*8016e29fSHarshad Shirwadkar 1237*8016e29fSHarshad Shirwadkar /* Unlink replay function */ 1238*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) 1239*8016e29fSHarshad Shirwadkar { 1240*8016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 1241*8016e29fSHarshad Shirwadkar struct qstr entry; 1242*8016e29fSHarshad Shirwadkar struct dentry_info_args darg; 1243*8016e29fSHarshad Shirwadkar int ret = 0; 1244*8016e29fSHarshad Shirwadkar 1245*8016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 1246*8016e29fSHarshad Shirwadkar 1247*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 1248*8016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 1249*8016e29fSHarshad Shirwadkar 1250*8016e29fSHarshad Shirwadkar entry.name = darg.dname; 1251*8016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 1252*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 1253*8016e29fSHarshad Shirwadkar 1254*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1255*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", darg.ino); 1256*8016e29fSHarshad Shirwadkar return 0; 1257*8016e29fSHarshad Shirwadkar } 1258*8016e29fSHarshad Shirwadkar 1259*8016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 1260*8016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 1261*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(old_parent)) { 1262*8016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); 1263*8016e29fSHarshad Shirwadkar iput(inode); 1264*8016e29fSHarshad Shirwadkar return 0; 1265*8016e29fSHarshad Shirwadkar } 1266*8016e29fSHarshad Shirwadkar 1267*8016e29fSHarshad Shirwadkar ret = __ext4_unlink(old_parent, &entry, inode); 1268*8016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 1269*8016e29fSHarshad Shirwadkar if (ret == -ENOENT) 1270*8016e29fSHarshad Shirwadkar ret = 0; 1271*8016e29fSHarshad Shirwadkar iput(old_parent); 1272*8016e29fSHarshad Shirwadkar iput(inode); 1273*8016e29fSHarshad Shirwadkar return ret; 1274*8016e29fSHarshad Shirwadkar } 1275*8016e29fSHarshad Shirwadkar 1276*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 1277*8016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 1278*8016e29fSHarshad Shirwadkar struct inode *inode) 1279*8016e29fSHarshad Shirwadkar { 1280*8016e29fSHarshad Shirwadkar struct inode *dir = NULL; 1281*8016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 1282*8016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 1283*8016e29fSHarshad Shirwadkar int ret = 0; 1284*8016e29fSHarshad Shirwadkar 1285*8016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 1286*8016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 1287*8016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino); 1288*8016e29fSHarshad Shirwadkar dir = NULL; 1289*8016e29fSHarshad Shirwadkar goto out; 1290*8016e29fSHarshad Shirwadkar } 1291*8016e29fSHarshad Shirwadkar 1292*8016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 1293*8016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 1294*8016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to obtain dentry"); 1295*8016e29fSHarshad Shirwadkar dentry_dir = NULL; 1296*8016e29fSHarshad Shirwadkar goto out; 1297*8016e29fSHarshad Shirwadkar } 1298*8016e29fSHarshad Shirwadkar 1299*8016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 1300*8016e29fSHarshad Shirwadkar if (!dentry_inode) { 1301*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode dentry not created."); 1302*8016e29fSHarshad Shirwadkar ret = -ENOMEM; 1303*8016e29fSHarshad Shirwadkar goto out; 1304*8016e29fSHarshad Shirwadkar } 1305*8016e29fSHarshad Shirwadkar 1306*8016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 1307*8016e29fSHarshad Shirwadkar /* 1308*8016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 1309*8016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 1310*8016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 1311*8016e29fSHarshad Shirwadkar * could complete. 1312*8016e29fSHarshad Shirwadkar */ 1313*8016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 1314*8016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to link\n"); 1315*8016e29fSHarshad Shirwadkar goto out; 1316*8016e29fSHarshad Shirwadkar } 1317*8016e29fSHarshad Shirwadkar 1318*8016e29fSHarshad Shirwadkar ret = 0; 1319*8016e29fSHarshad Shirwadkar out: 1320*8016e29fSHarshad Shirwadkar if (dentry_dir) { 1321*8016e29fSHarshad Shirwadkar d_drop(dentry_dir); 1322*8016e29fSHarshad Shirwadkar dput(dentry_dir); 1323*8016e29fSHarshad Shirwadkar } else if (dir) { 1324*8016e29fSHarshad Shirwadkar iput(dir); 1325*8016e29fSHarshad Shirwadkar } 1326*8016e29fSHarshad Shirwadkar if (dentry_inode) { 1327*8016e29fSHarshad Shirwadkar d_drop(dentry_inode); 1328*8016e29fSHarshad Shirwadkar dput(dentry_inode); 1329*8016e29fSHarshad Shirwadkar } 1330*8016e29fSHarshad Shirwadkar 1331*8016e29fSHarshad Shirwadkar return ret; 1332*8016e29fSHarshad Shirwadkar } 1333*8016e29fSHarshad Shirwadkar 1334*8016e29fSHarshad Shirwadkar /* Link replay function */ 1335*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) 1336*8016e29fSHarshad Shirwadkar { 1337*8016e29fSHarshad Shirwadkar struct inode *inode; 1338*8016e29fSHarshad Shirwadkar struct dentry_info_args darg; 1339*8016e29fSHarshad Shirwadkar int ret = 0; 1340*8016e29fSHarshad Shirwadkar 1341*8016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 1342*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 1343*8016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 1344*8016e29fSHarshad Shirwadkar 1345*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 1346*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1347*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 1348*8016e29fSHarshad Shirwadkar return 0; 1349*8016e29fSHarshad Shirwadkar } 1350*8016e29fSHarshad Shirwadkar 1351*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 1352*8016e29fSHarshad Shirwadkar iput(inode); 1353*8016e29fSHarshad Shirwadkar return ret; 1354*8016e29fSHarshad Shirwadkar } 1355*8016e29fSHarshad Shirwadkar 1356*8016e29fSHarshad Shirwadkar /* 1357*8016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 1358*8016e29fSHarshad Shirwadkar * block bitmaps correctly. 1359*8016e29fSHarshad Shirwadkar */ 1360*8016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 1361*8016e29fSHarshad Shirwadkar { 1362*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 1363*8016e29fSHarshad Shirwadkar int i; 1364*8016e29fSHarshad Shirwadkar 1365*8016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1366*8016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 1367*8016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 1368*8016e29fSHarshad Shirwadkar return 0; 1369*8016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 1370*8016e29fSHarshad Shirwadkar state->fc_modified_inodes_size += 1371*8016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 1372*8016e29fSHarshad Shirwadkar state->fc_modified_inodes = krealloc( 1373*8016e29fSHarshad Shirwadkar state->fc_modified_inodes, sizeof(int) * 1374*8016e29fSHarshad Shirwadkar state->fc_modified_inodes_size, 1375*8016e29fSHarshad Shirwadkar GFP_KERNEL); 1376*8016e29fSHarshad Shirwadkar if (!state->fc_modified_inodes) 1377*8016e29fSHarshad Shirwadkar return -ENOMEM; 1378*8016e29fSHarshad Shirwadkar } 1379*8016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 1380*8016e29fSHarshad Shirwadkar return 0; 1381*8016e29fSHarshad Shirwadkar } 1382*8016e29fSHarshad Shirwadkar 1383*8016e29fSHarshad Shirwadkar /* 1384*8016e29fSHarshad Shirwadkar * Inode replay function 1385*8016e29fSHarshad Shirwadkar */ 1386*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) 1387*8016e29fSHarshad Shirwadkar { 1388*8016e29fSHarshad Shirwadkar struct ext4_fc_inode *fc_inode; 1389*8016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 1390*8016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 1391*8016e29fSHarshad Shirwadkar struct inode *inode = NULL; 1392*8016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 1393*8016e29fSHarshad Shirwadkar int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 1394*8016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 1395*8016e29fSHarshad Shirwadkar 1396*8016e29fSHarshad Shirwadkar fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl); 1397*8016e29fSHarshad Shirwadkar 1398*8016e29fSHarshad Shirwadkar ino = le32_to_cpu(fc_inode->fc_ino); 1399*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 1400*8016e29fSHarshad Shirwadkar 1401*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 1402*8016e29fSHarshad Shirwadkar if (!IS_ERR_OR_NULL(inode)) { 1403*8016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 1404*8016e29fSHarshad Shirwadkar iput(inode); 1405*8016e29fSHarshad Shirwadkar } 1406*8016e29fSHarshad Shirwadkar 1407*8016e29fSHarshad Shirwadkar ext4_fc_record_modified_inode(sb, ino); 1408*8016e29fSHarshad Shirwadkar 1409*8016e29fSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode; 1410*8016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 1411*8016e29fSHarshad Shirwadkar if (ret) 1412*8016e29fSHarshad Shirwadkar goto out; 1413*8016e29fSHarshad Shirwadkar 1414*8016e29fSHarshad Shirwadkar inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode); 1415*8016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 1416*8016e29fSHarshad Shirwadkar 1417*8016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 1418*8016e29fSHarshad Shirwadkar memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, 1419*8016e29fSHarshad Shirwadkar inode_len - offsetof(struct ext4_inode, i_generation)); 1420*8016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 1421*8016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 1422*8016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 1423*8016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 1424*8016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 1425*8016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 1426*8016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 1427*8016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 1428*8016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 1429*8016e29fSHarshad Shirwadkar } 1430*8016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 1431*8016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 1432*8016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 1433*8016e29fSHarshad Shirwadkar } 1434*8016e29fSHarshad Shirwadkar 1435*8016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 1436*8016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 1437*8016e29fSHarshad Shirwadkar if (ret) 1438*8016e29fSHarshad Shirwadkar goto out; 1439*8016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 1440*8016e29fSHarshad Shirwadkar if (ret) 1441*8016e29fSHarshad Shirwadkar goto out; 1442*8016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 1443*8016e29fSHarshad Shirwadkar if (ret) 1444*8016e29fSHarshad Shirwadkar goto out; 1445*8016e29fSHarshad Shirwadkar 1446*8016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 1447*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 1448*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1449*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 1450*8016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 1451*8016e29fSHarshad Shirwadkar } 1452*8016e29fSHarshad Shirwadkar 1453*8016e29fSHarshad Shirwadkar /* 1454*8016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 1455*8016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 1456*8016e29fSHarshad Shirwadkar * the number of blocks the inode. 1457*8016e29fSHarshad Shirwadkar */ 1458*8016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 1459*8016e29fSHarshad Shirwadkar 1460*8016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 1461*8016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 1462*8016e29fSHarshad Shirwadkar 1463*8016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 1464*8016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 1465*8016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 1466*8016e29fSHarshad Shirwadkar brelse(iloc.bh); 1467*8016e29fSHarshad Shirwadkar out: 1468*8016e29fSHarshad Shirwadkar iput(inode); 1469*8016e29fSHarshad Shirwadkar if (!ret) 1470*8016e29fSHarshad Shirwadkar blkdev_issue_flush(sb->s_bdev, GFP_KERNEL); 1471*8016e29fSHarshad Shirwadkar 1472*8016e29fSHarshad Shirwadkar return 0; 1473*8016e29fSHarshad Shirwadkar } 1474*8016e29fSHarshad Shirwadkar 1475*8016e29fSHarshad Shirwadkar /* 1476*8016e29fSHarshad Shirwadkar * Dentry create replay function. 1477*8016e29fSHarshad Shirwadkar * 1478*8016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 1479*8016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 1480*8016e29fSHarshad Shirwadkar * been replayed before we start here. 1481*8016e29fSHarshad Shirwadkar */ 1482*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) 1483*8016e29fSHarshad Shirwadkar { 1484*8016e29fSHarshad Shirwadkar int ret = 0; 1485*8016e29fSHarshad Shirwadkar struct inode *inode = NULL; 1486*8016e29fSHarshad Shirwadkar struct inode *dir = NULL; 1487*8016e29fSHarshad Shirwadkar struct dentry_info_args darg; 1488*8016e29fSHarshad Shirwadkar 1489*8016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 1490*8016e29fSHarshad Shirwadkar 1491*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 1492*8016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 1493*8016e29fSHarshad Shirwadkar 1494*8016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 1495*8016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 1496*8016e29fSHarshad Shirwadkar if (ret) 1497*8016e29fSHarshad Shirwadkar goto out; 1498*8016e29fSHarshad Shirwadkar 1499*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 1500*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1501*8016e29fSHarshad Shirwadkar jbd_debug(1, "inode %d not found.", darg.ino); 1502*8016e29fSHarshad Shirwadkar inode = NULL; 1503*8016e29fSHarshad Shirwadkar ret = -EINVAL; 1504*8016e29fSHarshad Shirwadkar goto out; 1505*8016e29fSHarshad Shirwadkar } 1506*8016e29fSHarshad Shirwadkar 1507*8016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 1508*8016e29fSHarshad Shirwadkar /* 1509*8016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 1510*8016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 1511*8016e29fSHarshad Shirwadkar */ 1512*8016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 1513*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(dir)) { 1514*8016e29fSHarshad Shirwadkar jbd_debug(1, "Dir %d not found.", darg.ino); 1515*8016e29fSHarshad Shirwadkar goto out; 1516*8016e29fSHarshad Shirwadkar } 1517*8016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 1518*8016e29fSHarshad Shirwadkar iput(dir); 1519*8016e29fSHarshad Shirwadkar if (ret) { 1520*8016e29fSHarshad Shirwadkar ret = 0; 1521*8016e29fSHarshad Shirwadkar goto out; 1522*8016e29fSHarshad Shirwadkar } 1523*8016e29fSHarshad Shirwadkar } 1524*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 1525*8016e29fSHarshad Shirwadkar if (ret) 1526*8016e29fSHarshad Shirwadkar goto out; 1527*8016e29fSHarshad Shirwadkar set_nlink(inode, 1); 1528*8016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 1529*8016e29fSHarshad Shirwadkar out: 1530*8016e29fSHarshad Shirwadkar if (inode) 1531*8016e29fSHarshad Shirwadkar iput(inode); 1532*8016e29fSHarshad Shirwadkar return ret; 1533*8016e29fSHarshad Shirwadkar } 1534*8016e29fSHarshad Shirwadkar 1535*8016e29fSHarshad Shirwadkar /* 1536*8016e29fSHarshad Shirwadkar * Record physical disk regions which are in use as per fast commit area. Our 1537*8016e29fSHarshad Shirwadkar * simple replay phase allocator excludes these regions from allocation. 1538*8016e29fSHarshad Shirwadkar */ 1539*8016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino, 1540*8016e29fSHarshad Shirwadkar ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) 1541*8016e29fSHarshad Shirwadkar { 1542*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 1543*8016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 1544*8016e29fSHarshad Shirwadkar 1545*8016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1546*8016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 1547*8016e29fSHarshad Shirwadkar state->fc_regions_size += 1548*8016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 1549*8016e29fSHarshad Shirwadkar state->fc_regions = krealloc( 1550*8016e29fSHarshad Shirwadkar state->fc_regions, 1551*8016e29fSHarshad Shirwadkar state->fc_regions_size * 1552*8016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_alloc_region), 1553*8016e29fSHarshad Shirwadkar GFP_KERNEL); 1554*8016e29fSHarshad Shirwadkar if (!state->fc_regions) 1555*8016e29fSHarshad Shirwadkar return -ENOMEM; 1556*8016e29fSHarshad Shirwadkar } 1557*8016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 1558*8016e29fSHarshad Shirwadkar region->ino = ino; 1559*8016e29fSHarshad Shirwadkar region->lblk = lblk; 1560*8016e29fSHarshad Shirwadkar region->pblk = pblk; 1561*8016e29fSHarshad Shirwadkar region->len = len; 1562*8016e29fSHarshad Shirwadkar 1563*8016e29fSHarshad Shirwadkar return 0; 1564*8016e29fSHarshad Shirwadkar } 1565*8016e29fSHarshad Shirwadkar 1566*8016e29fSHarshad Shirwadkar /* Replay add range tag */ 1567*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 1568*8016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl) 1569*8016e29fSHarshad Shirwadkar { 1570*8016e29fSHarshad Shirwadkar struct ext4_fc_add_range *fc_add_ex; 1571*8016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 1572*8016e29fSHarshad Shirwadkar struct inode *inode; 1573*8016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 1574*8016e29fSHarshad Shirwadkar int remaining, len; 1575*8016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 1576*8016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 1577*8016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 1578*8016e29fSHarshad Shirwadkar int ret; 1579*8016e29fSHarshad Shirwadkar 1580*8016e29fSHarshad Shirwadkar fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl); 1581*8016e29fSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex->fc_ex; 1582*8016e29fSHarshad Shirwadkar 1583*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 1584*8016e29fSHarshad Shirwadkar le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block), 1585*8016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 1586*8016e29fSHarshad Shirwadkar 1587*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), 1588*8016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 1589*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1590*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 1591*8016e29fSHarshad Shirwadkar return 0; 1592*8016e29fSHarshad Shirwadkar } 1593*8016e29fSHarshad Shirwadkar 1594*8016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1595*8016e29fSHarshad Shirwadkar 1596*8016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 1597*8016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 1598*8016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 1599*8016e29fSHarshad Shirwadkar 1600*8016e29fSHarshad Shirwadkar cur = start; 1601*8016e29fSHarshad Shirwadkar remaining = len; 1602*8016e29fSHarshad Shirwadkar jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 1603*8016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 1604*8016e29fSHarshad Shirwadkar inode->i_ino); 1605*8016e29fSHarshad Shirwadkar 1606*8016e29fSHarshad Shirwadkar while (remaining > 0) { 1607*8016e29fSHarshad Shirwadkar map.m_lblk = cur; 1608*8016e29fSHarshad Shirwadkar map.m_len = remaining; 1609*8016e29fSHarshad Shirwadkar map.m_pblk = 0; 1610*8016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 1611*8016e29fSHarshad Shirwadkar 1612*8016e29fSHarshad Shirwadkar if (ret < 0) { 1613*8016e29fSHarshad Shirwadkar iput(inode); 1614*8016e29fSHarshad Shirwadkar return 0; 1615*8016e29fSHarshad Shirwadkar } 1616*8016e29fSHarshad Shirwadkar 1617*8016e29fSHarshad Shirwadkar if (ret == 0) { 1618*8016e29fSHarshad Shirwadkar /* Range is not mapped */ 1619*8016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 1620*8016e29fSHarshad Shirwadkar if (!path) 1621*8016e29fSHarshad Shirwadkar continue; 1622*8016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 1623*8016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 1624*8016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 1625*8016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 1626*8016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 1627*8016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 1628*8016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 1629*8016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 1630*8016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 1631*8016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 1632*8016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 1633*8016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 1634*8016e29fSHarshad Shirwadkar kfree(path); 1635*8016e29fSHarshad Shirwadkar if (ret) { 1636*8016e29fSHarshad Shirwadkar iput(inode); 1637*8016e29fSHarshad Shirwadkar return 0; 1638*8016e29fSHarshad Shirwadkar } 1639*8016e29fSHarshad Shirwadkar goto next; 1640*8016e29fSHarshad Shirwadkar } 1641*8016e29fSHarshad Shirwadkar 1642*8016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 1643*8016e29fSHarshad Shirwadkar /* 1644*8016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 1645*8016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 1646*8016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 1647*8016e29fSHarshad Shirwadkar */ 1648*8016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 1649*8016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 1650*8016e29fSHarshad Shirwadkar start_pblk + cur - start); 1651*8016e29fSHarshad Shirwadkar if (ret) { 1652*8016e29fSHarshad Shirwadkar iput(inode); 1653*8016e29fSHarshad Shirwadkar return 0; 1654*8016e29fSHarshad Shirwadkar } 1655*8016e29fSHarshad Shirwadkar /* 1656*8016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 1657*8016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 1658*8016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 1659*8016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 1660*8016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 1661*8016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 1662*8016e29fSHarshad Shirwadkar * modified inodes. 1663*8016e29fSHarshad Shirwadkar */ 1664*8016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 1665*8016e29fSHarshad Shirwadkar goto next; 1666*8016e29fSHarshad Shirwadkar } 1667*8016e29fSHarshad Shirwadkar 1668*8016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 1669*8016e29fSHarshad Shirwadkar jbd_debug(1, "Converting from %d to %d %lld", 1670*8016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 1671*8016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 1672*8016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 1673*8016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 1674*8016e29fSHarshad Shirwadkar if (ret) { 1675*8016e29fSHarshad Shirwadkar iput(inode); 1676*8016e29fSHarshad Shirwadkar return 0; 1677*8016e29fSHarshad Shirwadkar } 1678*8016e29fSHarshad Shirwadkar /* 1679*8016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 1680*8016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 1681*8016e29fSHarshad Shirwadkar */ 1682*8016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 1683*8016e29fSHarshad Shirwadkar next: 1684*8016e29fSHarshad Shirwadkar cur += map.m_len; 1685*8016e29fSHarshad Shirwadkar remaining -= map.m_len; 1686*8016e29fSHarshad Shirwadkar } 1687*8016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 1688*8016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 1689*8016e29fSHarshad Shirwadkar iput(inode); 1690*8016e29fSHarshad Shirwadkar return 0; 1691*8016e29fSHarshad Shirwadkar } 1692*8016e29fSHarshad Shirwadkar 1693*8016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 1694*8016e29fSHarshad Shirwadkar static int 1695*8016e29fSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) 1696*8016e29fSHarshad Shirwadkar { 1697*8016e29fSHarshad Shirwadkar struct inode *inode; 1698*8016e29fSHarshad Shirwadkar struct ext4_fc_del_range *lrange; 1699*8016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 1700*8016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 1701*8016e29fSHarshad Shirwadkar int ret; 1702*8016e29fSHarshad Shirwadkar 1703*8016e29fSHarshad Shirwadkar lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl); 1704*8016e29fSHarshad Shirwadkar cur = le32_to_cpu(lrange->fc_lblk); 1705*8016e29fSHarshad Shirwadkar remaining = le32_to_cpu(lrange->fc_len); 1706*8016e29fSHarshad Shirwadkar 1707*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 1708*8016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_ino), cur, remaining); 1709*8016e29fSHarshad Shirwadkar 1710*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); 1711*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1712*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); 1713*8016e29fSHarshad Shirwadkar return 0; 1714*8016e29fSHarshad Shirwadkar } 1715*8016e29fSHarshad Shirwadkar 1716*8016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 1717*8016e29fSHarshad Shirwadkar 1718*8016e29fSHarshad Shirwadkar jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", 1719*8016e29fSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange->fc_lblk), 1720*8016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_len)); 1721*8016e29fSHarshad Shirwadkar while (remaining > 0) { 1722*8016e29fSHarshad Shirwadkar map.m_lblk = cur; 1723*8016e29fSHarshad Shirwadkar map.m_len = remaining; 1724*8016e29fSHarshad Shirwadkar 1725*8016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 1726*8016e29fSHarshad Shirwadkar if (ret < 0) { 1727*8016e29fSHarshad Shirwadkar iput(inode); 1728*8016e29fSHarshad Shirwadkar return 0; 1729*8016e29fSHarshad Shirwadkar } 1730*8016e29fSHarshad Shirwadkar if (ret > 0) { 1731*8016e29fSHarshad Shirwadkar remaining -= ret; 1732*8016e29fSHarshad Shirwadkar cur += ret; 1733*8016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 1734*8016e29fSHarshad Shirwadkar } else { 1735*8016e29fSHarshad Shirwadkar remaining -= map.m_len; 1736*8016e29fSHarshad Shirwadkar cur += map.m_len; 1737*8016e29fSHarshad Shirwadkar } 1738*8016e29fSHarshad Shirwadkar } 1739*8016e29fSHarshad Shirwadkar 1740*8016e29fSHarshad Shirwadkar ret = ext4_punch_hole(inode, 1741*8016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits, 1742*8016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_len) << sb->s_blocksize_bits); 1743*8016e29fSHarshad Shirwadkar if (ret) 1744*8016e29fSHarshad Shirwadkar jbd_debug(1, "ext4_punch_hole returned %d", ret); 1745*8016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 1746*8016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 1747*8016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 1748*8016e29fSHarshad Shirwadkar iput(inode); 1749*8016e29fSHarshad Shirwadkar 1750*8016e29fSHarshad Shirwadkar return 0; 1751*8016e29fSHarshad Shirwadkar } 1752*8016e29fSHarshad Shirwadkar 1753*8016e29fSHarshad Shirwadkar static inline const char *tag2str(u16 tag) 1754*8016e29fSHarshad Shirwadkar { 1755*8016e29fSHarshad Shirwadkar switch (tag) { 1756*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 1757*8016e29fSHarshad Shirwadkar return "TAG_ADD_ENTRY"; 1758*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 1759*8016e29fSHarshad Shirwadkar return "TAG_DEL_ENTRY"; 1760*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 1761*8016e29fSHarshad Shirwadkar return "TAG_ADD_RANGE"; 1762*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 1763*8016e29fSHarshad Shirwadkar return "TAG_CREAT_DENTRY"; 1764*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 1765*8016e29fSHarshad Shirwadkar return "TAG_DEL_RANGE"; 1766*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 1767*8016e29fSHarshad Shirwadkar return "TAG_INODE"; 1768*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 1769*8016e29fSHarshad Shirwadkar return "TAG_PAD"; 1770*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 1771*8016e29fSHarshad Shirwadkar return "TAG_TAIL"; 1772*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 1773*8016e29fSHarshad Shirwadkar return "TAG_HEAD"; 1774*8016e29fSHarshad Shirwadkar default: 1775*8016e29fSHarshad Shirwadkar return "TAG_ERROR"; 1776*8016e29fSHarshad Shirwadkar } 1777*8016e29fSHarshad Shirwadkar } 1778*8016e29fSHarshad Shirwadkar 1779*8016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 1780*8016e29fSHarshad Shirwadkar { 1781*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 1782*8016e29fSHarshad Shirwadkar struct inode *inode; 1783*8016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 1784*8016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 1785*8016e29fSHarshad Shirwadkar int i, ret, j; 1786*8016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 1787*8016e29fSHarshad Shirwadkar 1788*8016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1789*8016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 1790*8016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 1791*8016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 1792*8016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 1793*8016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found.", 1794*8016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 1795*8016e29fSHarshad Shirwadkar continue; 1796*8016e29fSHarshad Shirwadkar } 1797*8016e29fSHarshad Shirwadkar cur = 0; 1798*8016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 1799*8016e29fSHarshad Shirwadkar while (cur < end) { 1800*8016e29fSHarshad Shirwadkar map.m_lblk = cur; 1801*8016e29fSHarshad Shirwadkar map.m_len = end - cur; 1802*8016e29fSHarshad Shirwadkar 1803*8016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 1804*8016e29fSHarshad Shirwadkar if (ret < 0) 1805*8016e29fSHarshad Shirwadkar break; 1806*8016e29fSHarshad Shirwadkar 1807*8016e29fSHarshad Shirwadkar if (ret > 0) { 1808*8016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 1809*8016e29fSHarshad Shirwadkar if (!IS_ERR_OR_NULL(path)) { 1810*8016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 1811*8016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 1812*8016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 1813*8016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 1814*8016e29fSHarshad Shirwadkar kfree(path); 1815*8016e29fSHarshad Shirwadkar } 1816*8016e29fSHarshad Shirwadkar cur += ret; 1817*8016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 1818*8016e29fSHarshad Shirwadkar map.m_len, 1); 1819*8016e29fSHarshad Shirwadkar } else { 1820*8016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 1821*8016e29fSHarshad Shirwadkar } 1822*8016e29fSHarshad Shirwadkar } 1823*8016e29fSHarshad Shirwadkar iput(inode); 1824*8016e29fSHarshad Shirwadkar } 1825*8016e29fSHarshad Shirwadkar } 1826*8016e29fSHarshad Shirwadkar 1827*8016e29fSHarshad Shirwadkar /* 1828*8016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 1829*8016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 1830*8016e29fSHarshad Shirwadkar * if it is okay to use a block. 1831*8016e29fSHarshad Shirwadkar */ 1832*8016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 1833*8016e29fSHarshad Shirwadkar { 1834*8016e29fSHarshad Shirwadkar int i; 1835*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 1836*8016e29fSHarshad Shirwadkar 1837*8016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 1838*8016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 1839*8016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 1840*8016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 1841*8016e29fSHarshad Shirwadkar continue; 1842*8016e29fSHarshad Shirwadkar if (blk >= state->fc_regions[i].pblk && 1843*8016e29fSHarshad Shirwadkar blk < state->fc_regions[i].pblk + state->fc_regions[i].len) 1844*8016e29fSHarshad Shirwadkar return true; 1845*8016e29fSHarshad Shirwadkar } 1846*8016e29fSHarshad Shirwadkar return false; 1847*8016e29fSHarshad Shirwadkar } 1848*8016e29fSHarshad Shirwadkar 1849*8016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 1850*8016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 1851*8016e29fSHarshad Shirwadkar { 1852*8016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1853*8016e29fSHarshad Shirwadkar 1854*8016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 1855*8016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 1856*8016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 1857*8016e29fSHarshad Shirwadkar } 1858*8016e29fSHarshad Shirwadkar 1859*8016e29fSHarshad Shirwadkar /* 1860*8016e29fSHarshad Shirwadkar * Recovery Scan phase handler 1861*8016e29fSHarshad Shirwadkar * 1862*8016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 1863*8016e29fSHarshad Shirwadkar * for doing following things: 1864*8016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 1865*8016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 1866*8016e29fSHarshad Shirwadkar * - Verify CRC 1867*8016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 1868*8016e29fSHarshad Shirwadkar * 1869*8016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 1870*8016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 1871*8016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 1872*8016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 1873*8016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 1874*8016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 1875*8016e29fSHarshad Shirwadkar */ 1876*8016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 1877*8016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 1878*8016e29fSHarshad Shirwadkar tid_t expected_tid) 1879*8016e29fSHarshad Shirwadkar { 1880*8016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 1881*8016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1882*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 1883*8016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 1884*8016e29fSHarshad Shirwadkar struct ext4_fc_add_range *ext; 1885*8016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl; 1886*8016e29fSHarshad Shirwadkar struct ext4_fc_tail *tail; 1887*8016e29fSHarshad Shirwadkar __u8 *start, *end; 1888*8016e29fSHarshad Shirwadkar struct ext4_fc_head *head; 1889*8016e29fSHarshad Shirwadkar struct ext4_extent *ex; 1890*8016e29fSHarshad Shirwadkar 1891*8016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 1892*8016e29fSHarshad Shirwadkar 1893*8016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 1894*8016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 1895*8016e29fSHarshad Shirwadkar 1896*8016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 1897*8016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 1898*8016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 1899*8016e29fSHarshad Shirwadkar state->fc_crc = 0; 1900*8016e29fSHarshad Shirwadkar state->fc_regions = NULL; 1901*8016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 1902*8016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 1903*8016e29fSHarshad Shirwadkar /* Check if we can stop early */ 1904*8016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 1905*8016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 1906*8016e29fSHarshad Shirwadkar return 0; 1907*8016e29fSHarshad Shirwadkar } 1908*8016e29fSHarshad Shirwadkar 1909*8016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 1910*8016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 1911*8016e29fSHarshad Shirwadkar goto out_err; 1912*8016e29fSHarshad Shirwadkar } 1913*8016e29fSHarshad Shirwadkar 1914*8016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 1915*8016e29fSHarshad Shirwadkar fc_for_each_tl(start, end, tl) { 1916*8016e29fSHarshad Shirwadkar jbd_debug(3, "Scan phase, tag:%s, blk %lld\n", 1917*8016e29fSHarshad Shirwadkar tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr); 1918*8016e29fSHarshad Shirwadkar switch (le16_to_cpu(tl->fc_tag)) { 1919*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 1920*8016e29fSHarshad Shirwadkar ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl); 1921*8016e29fSHarshad Shirwadkar ex = (struct ext4_extent *)&ext->fc_ex; 1922*8016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 1923*8016e29fSHarshad Shirwadkar le32_to_cpu(ext->fc_ino), 1924*8016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 1925*8016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 1926*8016e29fSHarshad Shirwadkar if (ret < 0) 1927*8016e29fSHarshad Shirwadkar break; 1928*8016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 1929*8016e29fSHarshad Shirwadkar fallthrough; 1930*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 1931*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 1932*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 1933*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 1934*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 1935*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 1936*8016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1937*8016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 1938*8016e29fSHarshad Shirwadkar sizeof(*tl) + ext4_fc_tag_len(tl)); 1939*8016e29fSHarshad Shirwadkar break; 1940*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 1941*8016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1942*8016e29fSHarshad Shirwadkar tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl); 1943*8016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 1944*8016e29fSHarshad Shirwadkar sizeof(*tl) + 1945*8016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 1946*8016e29fSHarshad Shirwadkar fc_crc)); 1947*8016e29fSHarshad Shirwadkar if (le32_to_cpu(tail->fc_tid) == expected_tid && 1948*8016e29fSHarshad Shirwadkar le32_to_cpu(tail->fc_crc) == state->fc_crc) { 1949*8016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 1950*8016e29fSHarshad Shirwadkar state->fc_regions_valid = 1951*8016e29fSHarshad Shirwadkar state->fc_regions_used; 1952*8016e29fSHarshad Shirwadkar } else { 1953*8016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 1954*8016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 1955*8016e29fSHarshad Shirwadkar } 1956*8016e29fSHarshad Shirwadkar state->fc_crc = 0; 1957*8016e29fSHarshad Shirwadkar break; 1958*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 1959*8016e29fSHarshad Shirwadkar head = (struct ext4_fc_head *)ext4_fc_tag_val(tl); 1960*8016e29fSHarshad Shirwadkar if (le32_to_cpu(head->fc_features) & 1961*8016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 1962*8016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 1963*8016e29fSHarshad Shirwadkar break; 1964*8016e29fSHarshad Shirwadkar } 1965*8016e29fSHarshad Shirwadkar if (le32_to_cpu(head->fc_tid) != expected_tid) { 1966*8016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 1967*8016e29fSHarshad Shirwadkar break; 1968*8016e29fSHarshad Shirwadkar } 1969*8016e29fSHarshad Shirwadkar state->fc_cur_tag++; 1970*8016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 1971*8016e29fSHarshad Shirwadkar sizeof(*tl) + ext4_fc_tag_len(tl)); 1972*8016e29fSHarshad Shirwadkar break; 1973*8016e29fSHarshad Shirwadkar default: 1974*8016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 1975*8016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 1976*8016e29fSHarshad Shirwadkar } 1977*8016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 1978*8016e29fSHarshad Shirwadkar break; 1979*8016e29fSHarshad Shirwadkar } 1980*8016e29fSHarshad Shirwadkar 1981*8016e29fSHarshad Shirwadkar out_err: 1982*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 1983*8016e29fSHarshad Shirwadkar return ret; 1984*8016e29fSHarshad Shirwadkar } 1985*8016e29fSHarshad Shirwadkar 19865b849b5fSHarshad Shirwadkar /* 19875b849b5fSHarshad Shirwadkar * Main recovery path entry point. 1988*8016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 19895b849b5fSHarshad Shirwadkar */ 19905b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 19915b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 19925b849b5fSHarshad Shirwadkar { 1993*8016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 1994*8016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1995*8016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl; 1996*8016e29fSHarshad Shirwadkar __u8 *start, *end; 1997*8016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 1998*8016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 1999*8016e29fSHarshad Shirwadkar struct ext4_fc_tail *tail; 2000*8016e29fSHarshad Shirwadkar 2001*8016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 2002*8016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 2003*8016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 2004*8016e29fSHarshad Shirwadkar } 2005*8016e29fSHarshad Shirwadkar 2006*8016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 2007*8016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 2008*8016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 2009*8016e29fSHarshad Shirwadkar } 2010*8016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 2011*8016e29fSHarshad Shirwadkar jbd_debug(1, "Replay stops\n"); 2012*8016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20135b849b5fSHarshad Shirwadkar return 0; 20145b849b5fSHarshad Shirwadkar } 20155b849b5fSHarshad Shirwadkar 2016*8016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 2017*8016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 2018*8016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 2019*8016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 2020*8016e29fSHarshad Shirwadkar } 2021*8016e29fSHarshad Shirwadkar #endif 2022*8016e29fSHarshad Shirwadkar 2023*8016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 2024*8016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 2025*8016e29fSHarshad Shirwadkar 2026*8016e29fSHarshad Shirwadkar fc_for_each_tl(start, end, tl) { 2027*8016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 2028*8016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 2029*8016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 2030*8016e29fSHarshad Shirwadkar break; 2031*8016e29fSHarshad Shirwadkar } 2032*8016e29fSHarshad Shirwadkar jbd_debug(3, "Replay phase, tag:%s\n", 2033*8016e29fSHarshad Shirwadkar tag2str(le16_to_cpu(tl->fc_tag))); 2034*8016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 2035*8016e29fSHarshad Shirwadkar switch (le16_to_cpu(tl->fc_tag)) { 2036*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 2037*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, tl); 2038*8016e29fSHarshad Shirwadkar break; 2039*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 2040*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, tl); 2041*8016e29fSHarshad Shirwadkar break; 2042*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 2043*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, tl); 2044*8016e29fSHarshad Shirwadkar break; 2045*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 2046*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, tl); 2047*8016e29fSHarshad Shirwadkar break; 2048*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 2049*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, tl); 2050*8016e29fSHarshad Shirwadkar break; 2051*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 2052*8016e29fSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, tl); 2053*8016e29fSHarshad Shirwadkar break; 2054*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 2055*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2056*8016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 2057*8016e29fSHarshad Shirwadkar break; 2058*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 2059*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 2060*8016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 2061*8016e29fSHarshad Shirwadkar tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl); 2062*8016e29fSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid); 2063*8016e29fSHarshad Shirwadkar break; 2064*8016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 2065*8016e29fSHarshad Shirwadkar break; 2066*8016e29fSHarshad Shirwadkar default: 2067*8016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0, 2068*8016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 2069*8016e29fSHarshad Shirwadkar ret = -ECANCELED; 2070*8016e29fSHarshad Shirwadkar break; 2071*8016e29fSHarshad Shirwadkar } 2072*8016e29fSHarshad Shirwadkar if (ret < 0) 2073*8016e29fSHarshad Shirwadkar break; 2074*8016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 2075*8016e29fSHarshad Shirwadkar } 2076*8016e29fSHarshad Shirwadkar return ret; 2077*8016e29fSHarshad Shirwadkar } 2078*8016e29fSHarshad Shirwadkar 20796866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 20806866d7b3SHarshad Shirwadkar { 20815b849b5fSHarshad Shirwadkar /* 20825b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 20835b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 20845b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 20855b849b5fSHarshad Shirwadkar */ 20865b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 20876866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 20886866d7b3SHarshad Shirwadkar return; 2089ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 20906866d7b3SHarshad Shirwadkar if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { 20916866d7b3SHarshad Shirwadkar pr_warn("Error while enabling fast commits, turning off."); 20926866d7b3SHarshad Shirwadkar ext4_clear_feature_fast_commit(sb); 20936866d7b3SHarshad Shirwadkar } 20946866d7b3SHarshad Shirwadkar } 2095aa75f4d3SHarshad Shirwadkar 2096aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2097aa75f4d3SHarshad Shirwadkar { 2098aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2099aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2100aa75f4d3SHarshad Shirwadkar 2101aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2102aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2103aa75f4d3SHarshad Shirwadkar 2104aa75f4d3SHarshad Shirwadkar return 0; 2105aa75f4d3SHarshad Shirwadkar } 2106