16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14aa75f4d3SHarshad Shirwadkar 15aa75f4d3SHarshad Shirwadkar /* 16aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17aa75f4d3SHarshad Shirwadkar * ----------------- 18aa75f4d3SHarshad Shirwadkar * 19aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20aa75f4d3SHarshad Shirwadkar * 21aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26aa75f4d3SHarshad Shirwadkar * 27aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28aa75f4d3SHarshad Shirwadkar * 29aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32aa75f4d3SHarshad Shirwadkar * 33aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34aa75f4d3SHarshad Shirwadkar * 35aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37aa75f4d3SHarshad Shirwadkar * 38aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39aa75f4d3SHarshad Shirwadkar * 40aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43aa75f4d3SHarshad Shirwadkar * replay. 44aa75f4d3SHarshad Shirwadkar * Commit Operation 45aa75f4d3SHarshad Shirwadkar * ---------------- 46aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51aa75f4d3SHarshad Shirwadkar * 52aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58aa75f4d3SHarshad Shirwadkar * section for more details). 59aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60aa75f4d3SHarshad Shirwadkar * 61aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65aa75f4d3SHarshad Shirwadkar * 66aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67aa75f4d3SHarshad Shirwadkar * ------------------------- 68aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 69aa75f4d3SHarshad Shirwadkar * attributes). Fast commit ineligiblity is marked by calling one of the 70aa75f4d3SHarshad Shirwadkar * two following functions: 71aa75f4d3SHarshad Shirwadkar * 72aa75f4d3SHarshad Shirwadkar * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall 73aa75f4d3SHarshad Shirwadkar * back to full commit. This is useful in case of transient errors. 74aa75f4d3SHarshad Shirwadkar * 75aa75f4d3SHarshad Shirwadkar * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all 76aa75f4d3SHarshad Shirwadkar * the fast commits happening between ext4_fc_start_ineligible() and 77aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() and one fast commit after the call to 78aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() to fall back to full commits. It is important to 79aa75f4d3SHarshad Shirwadkar * make one more fast commit to fall back to full commit after stop call so 80aa75f4d3SHarshad Shirwadkar * that it guaranteed that the fast commit ineligible operation contained 81aa75f4d3SHarshad Shirwadkar * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is 82aa75f4d3SHarshad Shirwadkar * followed by at least 1 full commit. 83aa75f4d3SHarshad Shirwadkar * 84aa75f4d3SHarshad Shirwadkar * Atomicity of commits 85aa75f4d3SHarshad Shirwadkar * -------------------- 86aa75f4d3SHarshad Shirwadkar * In order to gaurantee atomicity during the commit operation, fast commit 87aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 88aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 89aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 90aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 91aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 92aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 93aa75f4d3SHarshad Shirwadkar * 94aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 95aa75f4d3SHarshad Shirwadkar * - fsync() 96aa75f4d3SHarshad Shirwadkar * - Append contents to file A 97aa75f4d3SHarshad Shirwadkar * - Truncate file A 98aa75f4d3SHarshad Shirwadkar * - fsync() 99aa75f4d3SHarshad Shirwadkar * 100aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 101aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 102aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 103aa75f4d3SHarshad Shirwadkar * 104aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 105aa75f4d3SHarshad Shirwadkar * 106aa75f4d3SHarshad Shirwadkar * TODOs 107aa75f4d3SHarshad Shirwadkar * ----- 108aa75f4d3SHarshad Shirwadkar * 1) Make fast commit atomic updates more fine grained. Today, a fast commit 109aa75f4d3SHarshad Shirwadkar * eligible update must be protected within ext4_fc_start_update() and 110aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). These routines are called at much higher 111aa75f4d3SHarshad Shirwadkar * routines. This can be made more fine grained by combining with 112aa75f4d3SHarshad Shirwadkar * ext4_journal_start(). 113aa75f4d3SHarshad Shirwadkar * 114aa75f4d3SHarshad Shirwadkar * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() 115aa75f4d3SHarshad Shirwadkar * 116aa75f4d3SHarshad Shirwadkar * 3) Handle more ineligible cases. 117aa75f4d3SHarshad Shirwadkar */ 118aa75f4d3SHarshad Shirwadkar 119aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 120aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 121aa75f4d3SHarshad Shirwadkar 122aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 123aa75f4d3SHarshad Shirwadkar { 124aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 125aa75f4d3SHarshad Shirwadkar if (uptodate) { 126aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 127aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 128aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 129aa75f4d3SHarshad Shirwadkar } else { 130aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 131aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 132aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 133aa75f4d3SHarshad Shirwadkar } 134aa75f4d3SHarshad Shirwadkar 135aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 136aa75f4d3SHarshad Shirwadkar } 137aa75f4d3SHarshad Shirwadkar 138aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 139aa75f4d3SHarshad Shirwadkar { 140aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 141aa75f4d3SHarshad Shirwadkar 142aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 143aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 144aa75f4d3SHarshad Shirwadkar } 145aa75f4d3SHarshad Shirwadkar 146aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 147aa75f4d3SHarshad Shirwadkar { 148aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 149aa75f4d3SHarshad Shirwadkar 150aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 151aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 152aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 153aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 154aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 155aa75f4d3SHarshad Shirwadkar ei->i_fc_committed_subtid = 0; 156aa75f4d3SHarshad Shirwadkar } 157aa75f4d3SHarshad Shirwadkar 158aa75f4d3SHarshad Shirwadkar /* 159aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 160aa75f4d3SHarshad Shirwadkar * 161aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 162aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 163aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 164aa75f4d3SHarshad Shirwadkar */ 165aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 166aa75f4d3SHarshad Shirwadkar { 167aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 168aa75f4d3SHarshad Shirwadkar 1698016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 1708016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 171aa75f4d3SHarshad Shirwadkar return; 172aa75f4d3SHarshad Shirwadkar 173aa75f4d3SHarshad Shirwadkar restart: 174aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 175aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 176aa75f4d3SHarshad Shirwadkar goto out; 177aa75f4d3SHarshad Shirwadkar 178aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 179aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 180aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 181aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 182aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 183aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 184aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 185aa75f4d3SHarshad Shirwadkar #else 186aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 187aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 188aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 189aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 190aa75f4d3SHarshad Shirwadkar #endif 191aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 192aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 193aa75f4d3SHarshad Shirwadkar schedule(); 194aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 195aa75f4d3SHarshad Shirwadkar goto restart; 196aa75f4d3SHarshad Shirwadkar } 197aa75f4d3SHarshad Shirwadkar out: 198aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 199aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 200aa75f4d3SHarshad Shirwadkar } 201aa75f4d3SHarshad Shirwadkar 202aa75f4d3SHarshad Shirwadkar /* 203aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 204aa75f4d3SHarshad Shirwadkar */ 205aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 206aa75f4d3SHarshad Shirwadkar { 207aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 208aa75f4d3SHarshad Shirwadkar 2098016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2108016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 211aa75f4d3SHarshad Shirwadkar return; 212aa75f4d3SHarshad Shirwadkar 213aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 214aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 215aa75f4d3SHarshad Shirwadkar } 216aa75f4d3SHarshad Shirwadkar 217aa75f4d3SHarshad Shirwadkar /* 218aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 219aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 220aa75f4d3SHarshad Shirwadkar */ 221aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 222aa75f4d3SHarshad Shirwadkar { 223aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 224aa75f4d3SHarshad Shirwadkar 2258016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 2268016e29fSHarshad Shirwadkar (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 227aa75f4d3SHarshad Shirwadkar return; 228aa75f4d3SHarshad Shirwadkar 229aa75f4d3SHarshad Shirwadkar restart: 230aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 231aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) { 232aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 233aa75f4d3SHarshad Shirwadkar return; 234aa75f4d3SHarshad Shirwadkar } 235aa75f4d3SHarshad Shirwadkar 236aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 237aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 238aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 239aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 240aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 241aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 242aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 243aa75f4d3SHarshad Shirwadkar #else 244aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 245aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 246aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 247aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 248aa75f4d3SHarshad Shirwadkar #endif 249aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 250aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 251aa75f4d3SHarshad Shirwadkar schedule(); 252aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 253aa75f4d3SHarshad Shirwadkar goto restart; 254aa75f4d3SHarshad Shirwadkar } 255aa75f4d3SHarshad Shirwadkar if (!list_empty(&ei->i_fc_list)) 256aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 257aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 258aa75f4d3SHarshad Shirwadkar } 259aa75f4d3SHarshad Shirwadkar 260aa75f4d3SHarshad Shirwadkar /* 261aa75f4d3SHarshad Shirwadkar * Mark file system as fast commit ineligible. This means that next commit 262aa75f4d3SHarshad Shirwadkar * operation would result in a full jbd2 commit. 263aa75f4d3SHarshad Shirwadkar */ 264aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason) 265aa75f4d3SHarshad Shirwadkar { 266aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 267aa75f4d3SHarshad Shirwadkar 2688016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 2698016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 2708016e29fSHarshad Shirwadkar return; 2718016e29fSHarshad Shirwadkar 272aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_INELIGIBLE; 273aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 274aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 275aa75f4d3SHarshad Shirwadkar } 276aa75f4d3SHarshad Shirwadkar 277aa75f4d3SHarshad Shirwadkar /* 278aa75f4d3SHarshad Shirwadkar * Start a fast commit ineligible update. Any commits that happen while 279aa75f4d3SHarshad Shirwadkar * such an operation is in progress fall back to full commits. 280aa75f4d3SHarshad Shirwadkar */ 281aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason) 282aa75f4d3SHarshad Shirwadkar { 283aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 284aa75f4d3SHarshad Shirwadkar 2858016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 2868016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 2878016e29fSHarshad Shirwadkar return; 2888016e29fSHarshad Shirwadkar 289aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 290aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 291aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_ineligible_updates); 292aa75f4d3SHarshad Shirwadkar } 293aa75f4d3SHarshad Shirwadkar 294aa75f4d3SHarshad Shirwadkar /* 295aa75f4d3SHarshad Shirwadkar * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here 296aa75f4d3SHarshad Shirwadkar * to ensure that after stopping the ineligible update, at least one full 297aa75f4d3SHarshad Shirwadkar * commit takes place. 298aa75f4d3SHarshad Shirwadkar */ 299aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb) 300aa75f4d3SHarshad Shirwadkar { 3018016e29fSHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 3028016e29fSHarshad Shirwadkar (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 3038016e29fSHarshad Shirwadkar return; 3048016e29fSHarshad Shirwadkar 305aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE; 306aa75f4d3SHarshad Shirwadkar atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); 307aa75f4d3SHarshad Shirwadkar } 308aa75f4d3SHarshad Shirwadkar 309aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb) 310aa75f4d3SHarshad Shirwadkar { 311aa75f4d3SHarshad Shirwadkar return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) || 312aa75f4d3SHarshad Shirwadkar atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates); 313aa75f4d3SHarshad Shirwadkar } 314aa75f4d3SHarshad Shirwadkar 315aa75f4d3SHarshad Shirwadkar /* 316aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 317aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 318aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 319aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 320aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 321aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 322aa75f4d3SHarshad Shirwadkar * 323aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 324aa75f4d3SHarshad Shirwadkar */ 325aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 326aa75f4d3SHarshad Shirwadkar struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool), 327aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 328aa75f4d3SHarshad Shirwadkar { 329aa75f4d3SHarshad Shirwadkar tid_t running_txn_tid; 330aa75f4d3SHarshad Shirwadkar bool update = false; 331aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 332aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 333aa75f4d3SHarshad Shirwadkar int ret; 334aa75f4d3SHarshad Shirwadkar 3358016e29fSHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 3368016e29fSHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_REPLAY)) 337aa75f4d3SHarshad Shirwadkar return -EOPNOTSUPP; 338aa75f4d3SHarshad Shirwadkar 339aa75f4d3SHarshad Shirwadkar if (ext4_fc_is_ineligible(inode->i_sb)) 340aa75f4d3SHarshad Shirwadkar return -EINVAL; 341aa75f4d3SHarshad Shirwadkar 342aa75f4d3SHarshad Shirwadkar running_txn_tid = sbi->s_journal ? 343aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_commit_sequence + 1 : 0; 344aa75f4d3SHarshad Shirwadkar 345aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 346aa75f4d3SHarshad Shirwadkar if (running_txn_tid == ei->i_sync_tid) { 347aa75f4d3SHarshad Shirwadkar update = true; 348aa75f4d3SHarshad Shirwadkar } else { 349aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 350aa75f4d3SHarshad Shirwadkar ei->i_sync_tid = running_txn_tid; 351aa75f4d3SHarshad Shirwadkar } 352aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 353aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 354aa75f4d3SHarshad Shirwadkar 355aa75f4d3SHarshad Shirwadkar if (!enqueue) 356aa75f4d3SHarshad Shirwadkar return ret; 357aa75f4d3SHarshad Shirwadkar 358aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 359aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 360aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 361aa75f4d3SHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_COMMITTING) ? 362aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 363aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 364aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 365aa75f4d3SHarshad Shirwadkar 366aa75f4d3SHarshad Shirwadkar return ret; 367aa75f4d3SHarshad Shirwadkar } 368aa75f4d3SHarshad Shirwadkar 369aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 370aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 371aa75f4d3SHarshad Shirwadkar int op; 372aa75f4d3SHarshad Shirwadkar }; 373aa75f4d3SHarshad Shirwadkar 374aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 375aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 376aa75f4d3SHarshad Shirwadkar { 377aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 378aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 379aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 380aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 381aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 382aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 383aa75f4d3SHarshad Shirwadkar 384aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 385aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 386aa75f4d3SHarshad Shirwadkar if (!node) { 387aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM); 388aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 389aa75f4d3SHarshad Shirwadkar return -ENOMEM; 390aa75f4d3SHarshad Shirwadkar } 391aa75f4d3SHarshad Shirwadkar 392aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 393aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 394aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 395aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 396aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 397aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 398aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 399aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 400aa75f4d3SHarshad Shirwadkar EXT4_FC_REASON_MEM); 401aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 402aa75f4d3SHarshad Shirwadkar return -ENOMEM; 403aa75f4d3SHarshad Shirwadkar } 404aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 405aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 406aa75f4d3SHarshad Shirwadkar } else { 407aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 408aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 409aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 410aa75f4d3SHarshad Shirwadkar } 411aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 412aa75f4d3SHarshad Shirwadkar 413aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 414aa75f4d3SHarshad Shirwadkar if (sbi->s_mount_state & EXT4_FC_COMMITTING) 415aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 416aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 417aa75f4d3SHarshad Shirwadkar else 418aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 419aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 420aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 421aa75f4d3SHarshad Shirwadkar 422aa75f4d3SHarshad Shirwadkar return 0; 423aa75f4d3SHarshad Shirwadkar } 424aa75f4d3SHarshad Shirwadkar 425aa75f4d3SHarshad Shirwadkar void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry) 426aa75f4d3SHarshad Shirwadkar { 427aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 428aa75f4d3SHarshad Shirwadkar int ret; 429aa75f4d3SHarshad Shirwadkar 430aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 431aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 432aa75f4d3SHarshad Shirwadkar 433aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 434aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 435aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_unlink(inode, dentry, ret); 436aa75f4d3SHarshad Shirwadkar } 437aa75f4d3SHarshad Shirwadkar 438aa75f4d3SHarshad Shirwadkar void ext4_fc_track_link(struct inode *inode, struct dentry *dentry) 439aa75f4d3SHarshad Shirwadkar { 440aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 441aa75f4d3SHarshad Shirwadkar int ret; 442aa75f4d3SHarshad Shirwadkar 443aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 444aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 445aa75f4d3SHarshad Shirwadkar 446aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 447aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 448aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_link(inode, dentry, ret); 449aa75f4d3SHarshad Shirwadkar } 450aa75f4d3SHarshad Shirwadkar 451aa75f4d3SHarshad Shirwadkar void ext4_fc_track_create(struct inode *inode, struct dentry *dentry) 452aa75f4d3SHarshad Shirwadkar { 453aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 454aa75f4d3SHarshad Shirwadkar int ret; 455aa75f4d3SHarshad Shirwadkar 456aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 457aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 458aa75f4d3SHarshad Shirwadkar 459aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 460aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 461aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_create(inode, dentry, ret); 462aa75f4d3SHarshad Shirwadkar } 463aa75f4d3SHarshad Shirwadkar 464aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 465aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 466aa75f4d3SHarshad Shirwadkar { 467aa75f4d3SHarshad Shirwadkar if (update) 468aa75f4d3SHarshad Shirwadkar return -EEXIST; 469aa75f4d3SHarshad Shirwadkar 470aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 471aa75f4d3SHarshad Shirwadkar 472aa75f4d3SHarshad Shirwadkar return 0; 473aa75f4d3SHarshad Shirwadkar } 474aa75f4d3SHarshad Shirwadkar 475aa75f4d3SHarshad Shirwadkar void ext4_fc_track_inode(struct inode *inode) 476aa75f4d3SHarshad Shirwadkar { 477aa75f4d3SHarshad Shirwadkar int ret; 478aa75f4d3SHarshad Shirwadkar 479aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 480aa75f4d3SHarshad Shirwadkar return; 481aa75f4d3SHarshad Shirwadkar 482aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_inode, NULL, 1); 483aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_inode(inode, ret); 484aa75f4d3SHarshad Shirwadkar } 485aa75f4d3SHarshad Shirwadkar 486aa75f4d3SHarshad Shirwadkar struct __track_range_args { 487aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 488aa75f4d3SHarshad Shirwadkar }; 489aa75f4d3SHarshad Shirwadkar 490aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 491aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 492aa75f4d3SHarshad Shirwadkar { 493aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 494aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 495aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 496aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 497aa75f4d3SHarshad Shirwadkar 498aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 499aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 500aa75f4d3SHarshad Shirwadkar return -ECANCELED; 501aa75f4d3SHarshad Shirwadkar } 502aa75f4d3SHarshad Shirwadkar 503aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 504aa75f4d3SHarshad Shirwadkar 505aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 506aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 507aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 508aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 509aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 510aa75f4d3SHarshad Shirwadkar } else { 511aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 512aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 513aa75f4d3SHarshad Shirwadkar } 514aa75f4d3SHarshad Shirwadkar 515aa75f4d3SHarshad Shirwadkar return 0; 516aa75f4d3SHarshad Shirwadkar } 517aa75f4d3SHarshad Shirwadkar 518aa75f4d3SHarshad Shirwadkar void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, 519aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 520aa75f4d3SHarshad Shirwadkar { 521aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 522aa75f4d3SHarshad Shirwadkar int ret; 523aa75f4d3SHarshad Shirwadkar 524aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 525aa75f4d3SHarshad Shirwadkar return; 526aa75f4d3SHarshad Shirwadkar 527aa75f4d3SHarshad Shirwadkar args.start = start; 528aa75f4d3SHarshad Shirwadkar args.end = end; 529aa75f4d3SHarshad Shirwadkar 530aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_range, &args, 1); 531aa75f4d3SHarshad Shirwadkar 532aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_range(inode, start, end, ret); 533aa75f4d3SHarshad Shirwadkar } 534aa75f4d3SHarshad Shirwadkar 535aa75f4d3SHarshad Shirwadkar static void ext4_fc_submit_bh(struct super_block *sb) 536aa75f4d3SHarshad Shirwadkar { 537aa75f4d3SHarshad Shirwadkar int write_flags = REQ_SYNC; 538aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 539aa75f4d3SHarshad Shirwadkar 540aa75f4d3SHarshad Shirwadkar if (test_opt(sb, BARRIER)) 541aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 542aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 543aa75f4d3SHarshad Shirwadkar clear_buffer_dirty(bh); 544aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 545aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 546aa75f4d3SHarshad Shirwadkar submit_bh(REQ_OP_WRITE, write_flags, bh); 547aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 548aa75f4d3SHarshad Shirwadkar } 549aa75f4d3SHarshad Shirwadkar 550aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 551aa75f4d3SHarshad Shirwadkar 552aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 553aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 554aa75f4d3SHarshad Shirwadkar u32 *crc) 555aa75f4d3SHarshad Shirwadkar { 556aa75f4d3SHarshad Shirwadkar void *ret; 557aa75f4d3SHarshad Shirwadkar 558aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 559aa75f4d3SHarshad Shirwadkar if (crc) 560aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 561aa75f4d3SHarshad Shirwadkar return ret; 562aa75f4d3SHarshad Shirwadkar } 563aa75f4d3SHarshad Shirwadkar 564aa75f4d3SHarshad Shirwadkar /* 565aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 566aa75f4d3SHarshad Shirwadkar * 567aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 568aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 569aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 570aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 571aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 572aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 573aa75f4d3SHarshad Shirwadkar * we added. 574aa75f4d3SHarshad Shirwadkar */ 575aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 576aa75f4d3SHarshad Shirwadkar { 577aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 578aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 579aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 580aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 581aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 582aa75f4d3SHarshad Shirwadkar int pad_len; 583aa75f4d3SHarshad Shirwadkar 584aa75f4d3SHarshad Shirwadkar /* 585aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 586aa75f4d3SHarshad Shirwadkar * padding. 587aa75f4d3SHarshad Shirwadkar */ 588aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 589aa75f4d3SHarshad Shirwadkar return NULL; 590aa75f4d3SHarshad Shirwadkar 591aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 592aa75f4d3SHarshad Shirwadkar /* 593aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 594aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 595aa75f4d3SHarshad Shirwadkar */ 596aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 597aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 598aa75f4d3SHarshad Shirwadkar if (ret) 599aa75f4d3SHarshad Shirwadkar return NULL; 600aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 601aa75f4d3SHarshad Shirwadkar } 602aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 603aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 604aa75f4d3SHarshad Shirwadkar } 605aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 606aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 607aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 608aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 609aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 610aa75f4d3SHarshad Shirwadkar if (crc) 611aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 612aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 613aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 614aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 615aa75f4d3SHarshad Shirwadkar 616aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 617aa75f4d3SHarshad Shirwadkar if (ret) 618aa75f4d3SHarshad Shirwadkar return NULL; 619aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 620aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 621aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 622aa75f4d3SHarshad Shirwadkar } 623aa75f4d3SHarshad Shirwadkar 624aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 625aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 626aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 627aa75f4d3SHarshad Shirwadkar { 628aa75f4d3SHarshad Shirwadkar if (crc) 629aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 630aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 631aa75f4d3SHarshad Shirwadkar } 632aa75f4d3SHarshad Shirwadkar 633aa75f4d3SHarshad Shirwadkar /* 634aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 635aa75f4d3SHarshad Shirwadkar * 636aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 637aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 638aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 639aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 640aa75f4d3SHarshad Shirwadkar */ 641aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 642aa75f4d3SHarshad Shirwadkar { 643aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 644aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 645aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 646aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 647aa75f4d3SHarshad Shirwadkar u8 *dst; 648aa75f4d3SHarshad Shirwadkar 649aa75f4d3SHarshad Shirwadkar /* 650aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 651aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 652aa75f4d3SHarshad Shirwadkar */ 653aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 654aa75f4d3SHarshad Shirwadkar if (!dst) 655aa75f4d3SHarshad Shirwadkar return -ENOSPC; 656aa75f4d3SHarshad Shirwadkar 657aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 658aa75f4d3SHarshad Shirwadkar 659aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 660aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 661aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 662aa75f4d3SHarshad Shirwadkar 663aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 664aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 665aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 666aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 667aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 668aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 669aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 670aa75f4d3SHarshad Shirwadkar 671aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 672aa75f4d3SHarshad Shirwadkar 673aa75f4d3SHarshad Shirwadkar return 0; 674aa75f4d3SHarshad Shirwadkar } 675aa75f4d3SHarshad Shirwadkar 676aa75f4d3SHarshad Shirwadkar /* 677aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 678aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 679aa75f4d3SHarshad Shirwadkar */ 680aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 681aa75f4d3SHarshad Shirwadkar u32 *crc) 682aa75f4d3SHarshad Shirwadkar { 683aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 684aa75f4d3SHarshad Shirwadkar u8 *dst; 685aa75f4d3SHarshad Shirwadkar 686aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 687aa75f4d3SHarshad Shirwadkar if (!dst) 688aa75f4d3SHarshad Shirwadkar return false; 689aa75f4d3SHarshad Shirwadkar 690aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 691aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 692aa75f4d3SHarshad Shirwadkar 693aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 694aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 695aa75f4d3SHarshad Shirwadkar 696aa75f4d3SHarshad Shirwadkar return true; 697aa75f4d3SHarshad Shirwadkar } 698aa75f4d3SHarshad Shirwadkar 699aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 700aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag, 701aa75f4d3SHarshad Shirwadkar int parent_ino, int ino, int dlen, 702aa75f4d3SHarshad Shirwadkar const unsigned char *dname, 703aa75f4d3SHarshad Shirwadkar u32 *crc) 704aa75f4d3SHarshad Shirwadkar { 705aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 706aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 707aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 708aa75f4d3SHarshad Shirwadkar crc); 709aa75f4d3SHarshad Shirwadkar 710aa75f4d3SHarshad Shirwadkar if (!dst) 711aa75f4d3SHarshad Shirwadkar return false; 712aa75f4d3SHarshad Shirwadkar 713aa75f4d3SHarshad Shirwadkar fcd.fc_parent_ino = cpu_to_le32(parent_ino); 714aa75f4d3SHarshad Shirwadkar fcd.fc_ino = cpu_to_le32(ino); 715aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 716aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 717aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 718aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 719aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 720aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 721aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, dname, dlen, crc); 722aa75f4d3SHarshad Shirwadkar dst += dlen; 723aa75f4d3SHarshad Shirwadkar 724aa75f4d3SHarshad Shirwadkar return true; 725aa75f4d3SHarshad Shirwadkar } 726aa75f4d3SHarshad Shirwadkar 727aa75f4d3SHarshad Shirwadkar /* 728aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 729aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 730aa75f4d3SHarshad Shirwadkar */ 731aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 732aa75f4d3SHarshad Shirwadkar { 733aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 734aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 735aa75f4d3SHarshad Shirwadkar int ret; 736aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 737aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 738aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 739aa75f4d3SHarshad Shirwadkar u8 *dst; 740aa75f4d3SHarshad Shirwadkar 741aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 742aa75f4d3SHarshad Shirwadkar if (ret) 743aa75f4d3SHarshad Shirwadkar return ret; 744aa75f4d3SHarshad Shirwadkar 745aa75f4d3SHarshad Shirwadkar if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 746aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 747aa75f4d3SHarshad Shirwadkar 748aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 749aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 750aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 751aa75f4d3SHarshad Shirwadkar 752aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 753aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 754aa75f4d3SHarshad Shirwadkar if (!dst) 755aa75f4d3SHarshad Shirwadkar return -ECANCELED; 756aa75f4d3SHarshad Shirwadkar 757aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 758aa75f4d3SHarshad Shirwadkar return -ECANCELED; 759aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 760aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 761aa75f4d3SHarshad Shirwadkar return -ECANCELED; 762aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 763aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 764aa75f4d3SHarshad Shirwadkar inode_len, crc)) 765aa75f4d3SHarshad Shirwadkar return -ECANCELED; 766aa75f4d3SHarshad Shirwadkar 767aa75f4d3SHarshad Shirwadkar return 0; 768aa75f4d3SHarshad Shirwadkar } 769aa75f4d3SHarshad Shirwadkar 770aa75f4d3SHarshad Shirwadkar /* 771aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 772aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 773aa75f4d3SHarshad Shirwadkar */ 774aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 775aa75f4d3SHarshad Shirwadkar { 776aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 777aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 778aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 779aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 780aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 781aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 782aa75f4d3SHarshad Shirwadkar int ret; 783aa75f4d3SHarshad Shirwadkar 784aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 785aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 786aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 787aa75f4d3SHarshad Shirwadkar return 0; 788aa75f4d3SHarshad Shirwadkar } 789aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 790aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 791aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 792aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 793aa75f4d3SHarshad Shirwadkar 794aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 795aa75f4d3SHarshad Shirwadkar jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", 796aa75f4d3SHarshad Shirwadkar __func__, cur_lblk_off, new_blk_size, inode->i_ino); 797aa75f4d3SHarshad Shirwadkar 798aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 799aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 800aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 801aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 802aa75f4d3SHarshad Shirwadkar if (ret < 0) 803aa75f4d3SHarshad Shirwadkar return -ECANCELED; 804aa75f4d3SHarshad Shirwadkar 805aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 806aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 807aa75f4d3SHarshad Shirwadkar continue; 808aa75f4d3SHarshad Shirwadkar } 809aa75f4d3SHarshad Shirwadkar 810aa75f4d3SHarshad Shirwadkar if (ret == 0) { 811aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 812aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 813aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 814aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 815aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 816aa75f4d3SHarshad Shirwadkar return -ENOSPC; 817aa75f4d3SHarshad Shirwadkar } else { 818aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 819aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 820aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 821aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 822aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 823aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 824aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 825aa75f4d3SHarshad Shirwadkar else 826aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 827aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 828aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 829aa75f4d3SHarshad Shirwadkar return -ENOSPC; 830aa75f4d3SHarshad Shirwadkar } 831aa75f4d3SHarshad Shirwadkar 832aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 833aa75f4d3SHarshad Shirwadkar } 834aa75f4d3SHarshad Shirwadkar 835aa75f4d3SHarshad Shirwadkar return 0; 836aa75f4d3SHarshad Shirwadkar } 837aa75f4d3SHarshad Shirwadkar 838aa75f4d3SHarshad Shirwadkar 839aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 840aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 841aa75f4d3SHarshad Shirwadkar { 842aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 843aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 844aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 845aa75f4d3SHarshad Shirwadkar struct list_head *pos; 846aa75f4d3SHarshad Shirwadkar int ret = 0; 847aa75f4d3SHarshad Shirwadkar 848aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 849aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_COMMITTING; 850aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 851aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 852aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 853aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 854aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 855aa75f4d3SHarshad Shirwadkar 856aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 857aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 858aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 859aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 860aa75f4d3SHarshad Shirwadkar schedule(); 861aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 862aa75f4d3SHarshad Shirwadkar } 863aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 864aa75f4d3SHarshad Shirwadkar } 865aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 866aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 867aa75f4d3SHarshad Shirwadkar if (ret) 868aa75f4d3SHarshad Shirwadkar return ret; 869aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 870aa75f4d3SHarshad Shirwadkar } 871aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 872aa75f4d3SHarshad Shirwadkar 873aa75f4d3SHarshad Shirwadkar return ret; 874aa75f4d3SHarshad Shirwadkar } 875aa75f4d3SHarshad Shirwadkar 876aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 877aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 878aa75f4d3SHarshad Shirwadkar { 879aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 880aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 881aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 882aa75f4d3SHarshad Shirwadkar int ret = 0; 883aa75f4d3SHarshad Shirwadkar 884aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 885aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 886aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 887aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 888aa75f4d3SHarshad Shirwadkar continue; 889aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 890aa75f4d3SHarshad Shirwadkar 891aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 892aa75f4d3SHarshad Shirwadkar if (ret) 893aa75f4d3SHarshad Shirwadkar return ret; 894aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 895aa75f4d3SHarshad Shirwadkar } 896aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 897aa75f4d3SHarshad Shirwadkar 898aa75f4d3SHarshad Shirwadkar return 0; 899aa75f4d3SHarshad Shirwadkar } 900aa75f4d3SHarshad Shirwadkar 901aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 902aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 903aa75f4d3SHarshad Shirwadkar { 904aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 905aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 906aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 907aa75f4d3SHarshad Shirwadkar struct inode *inode; 908aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n, *fcd_pos, *fcd_n; 909aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 910aa75f4d3SHarshad Shirwadkar int ret; 911aa75f4d3SHarshad Shirwadkar 912aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 913aa75f4d3SHarshad Shirwadkar return 0; 914aa75f4d3SHarshad Shirwadkar list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) { 915aa75f4d3SHarshad Shirwadkar fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update, 916aa75f4d3SHarshad Shirwadkar fcd_list); 917aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 918aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 919aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 920aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 921aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 922aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 923aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 924aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 925aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 926aa75f4d3SHarshad Shirwadkar } 927aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 928aa75f4d3SHarshad Shirwadkar continue; 929aa75f4d3SHarshad Shirwadkar } 930aa75f4d3SHarshad Shirwadkar 931aa75f4d3SHarshad Shirwadkar inode = NULL; 932aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 933aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 934aa75f4d3SHarshad Shirwadkar if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 935aa75f4d3SHarshad Shirwadkar inode = &ei->vfs_inode; 936aa75f4d3SHarshad Shirwadkar break; 937aa75f4d3SHarshad Shirwadkar } 938aa75f4d3SHarshad Shirwadkar } 939aa75f4d3SHarshad Shirwadkar /* 940aa75f4d3SHarshad Shirwadkar * If we don't find inode in our list, then it was deleted, 941aa75f4d3SHarshad Shirwadkar * in which case, we don't need to record it's create tag. 942aa75f4d3SHarshad Shirwadkar */ 943aa75f4d3SHarshad Shirwadkar if (!inode) 944aa75f4d3SHarshad Shirwadkar continue; 945aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 946aa75f4d3SHarshad Shirwadkar 947aa75f4d3SHarshad Shirwadkar /* 948aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 949aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 950aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 951aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 952aa75f4d3SHarshad Shirwadkar * the recovery code. 953aa75f4d3SHarshad Shirwadkar */ 954aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 955aa75f4d3SHarshad Shirwadkar if (ret) 956aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 957aa75f4d3SHarshad Shirwadkar 958aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 959aa75f4d3SHarshad Shirwadkar if (ret) 960aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 961aa75f4d3SHarshad Shirwadkar 962aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 963aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 964aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 965aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 966aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 967aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 968aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 969aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 970aa75f4d3SHarshad Shirwadkar } 971aa75f4d3SHarshad Shirwadkar 972aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 973aa75f4d3SHarshad Shirwadkar } 974aa75f4d3SHarshad Shirwadkar return 0; 975aa75f4d3SHarshad Shirwadkar lock_and_exit: 976aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 977aa75f4d3SHarshad Shirwadkar return ret; 978aa75f4d3SHarshad Shirwadkar } 979aa75f4d3SHarshad Shirwadkar 980aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 981aa75f4d3SHarshad Shirwadkar { 982aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 983aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 984aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 985aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 986aa75f4d3SHarshad Shirwadkar struct list_head *pos; 987aa75f4d3SHarshad Shirwadkar struct inode *inode; 988aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 989aa75f4d3SHarshad Shirwadkar int ret = 0; 990aa75f4d3SHarshad Shirwadkar u32 crc = 0; 991aa75f4d3SHarshad Shirwadkar 992aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 993aa75f4d3SHarshad Shirwadkar if (ret) 994aa75f4d3SHarshad Shirwadkar return ret; 995aa75f4d3SHarshad Shirwadkar 996aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 997aa75f4d3SHarshad Shirwadkar if (ret) 998aa75f4d3SHarshad Shirwadkar return ret; 999aa75f4d3SHarshad Shirwadkar 1000aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 1001aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 1002aa75f4d3SHarshad Shirwadkar /* 1003aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 1004aa75f4d3SHarshad Shirwadkar * in this TID. 1005aa75f4d3SHarshad Shirwadkar */ 1006aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 1007aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 1008aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 1009aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 1010aa75f4d3SHarshad Shirwadkar (u8 *)&head, &crc)) 1011aa75f4d3SHarshad Shirwadkar goto out; 1012aa75f4d3SHarshad Shirwadkar } 1013aa75f4d3SHarshad Shirwadkar 1014aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1015aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1016aa75f4d3SHarshad Shirwadkar if (ret) { 1017aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1018aa75f4d3SHarshad Shirwadkar goto out; 1019aa75f4d3SHarshad Shirwadkar } 1020aa75f4d3SHarshad Shirwadkar 1021aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 1022aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1023aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1024aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1025aa75f4d3SHarshad Shirwadkar continue; 1026aa75f4d3SHarshad Shirwadkar 1027aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1028aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1029aa75f4d3SHarshad Shirwadkar if (ret) 1030aa75f4d3SHarshad Shirwadkar goto out; 1031aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1032aa75f4d3SHarshad Shirwadkar if (ret) 1033aa75f4d3SHarshad Shirwadkar goto out; 1034aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1035aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_committed_subtid = 1036aa75f4d3SHarshad Shirwadkar atomic_read(&sbi->s_fc_subtid); 1037aa75f4d3SHarshad Shirwadkar } 1038aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1039aa75f4d3SHarshad Shirwadkar 1040aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1041aa75f4d3SHarshad Shirwadkar 1042aa75f4d3SHarshad Shirwadkar out: 1043aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1044aa75f4d3SHarshad Shirwadkar return ret; 1045aa75f4d3SHarshad Shirwadkar } 1046aa75f4d3SHarshad Shirwadkar 1047aa75f4d3SHarshad Shirwadkar /* 1048aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1049aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1050aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1051aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1052aa75f4d3SHarshad Shirwadkar */ 1053aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1054aa75f4d3SHarshad Shirwadkar { 1055aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1056aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1057aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1058aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 1059aa75f4d3SHarshad Shirwadkar int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; 1060aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1061aa75f4d3SHarshad Shirwadkar 1062aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_start(sb); 1063aa75f4d3SHarshad Shirwadkar 1064aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1065aa75f4d3SHarshad Shirwadkar 1066aa75f4d3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 1067aa75f4d3SHarshad Shirwadkar (ext4_fc_is_ineligible(sb))) { 1068aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1069aa75f4d3SHarshad Shirwadkar goto out; 1070aa75f4d3SHarshad Shirwadkar } 1071aa75f4d3SHarshad Shirwadkar 1072aa75f4d3SHarshad Shirwadkar restart_fc: 1073aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1074aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1075aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1076aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1077aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1078aa75f4d3SHarshad Shirwadkar goto restart_fc; 1079aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_ALREADY_COMMITTED; 1080aa75f4d3SHarshad Shirwadkar goto out; 1081aa75f4d3SHarshad Shirwadkar } else if (ret) { 1082aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1083aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_START_FAILED; 1084aa75f4d3SHarshad Shirwadkar goto out; 1085aa75f4d3SHarshad Shirwadkar } 1086aa75f4d3SHarshad Shirwadkar 1087aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1088aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1089aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1090aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1091aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1092aa75f4d3SHarshad Shirwadkar goto out; 1093aa75f4d3SHarshad Shirwadkar } 1094aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1095aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1096aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1097aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1098aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1099aa75f4d3SHarshad Shirwadkar goto out; 1100aa75f4d3SHarshad Shirwadkar } 1101aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 1102aa75f4d3SHarshad Shirwadkar jbd2_fc_end_commit(journal); 1103aa75f4d3SHarshad Shirwadkar out: 1104aa75f4d3SHarshad Shirwadkar /* Has any ineligible update happened since we started? */ 1105aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { 1106aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1107aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1108aa75f4d3SHarshad Shirwadkar } 1109aa75f4d3SHarshad Shirwadkar 1110aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1111aa75f4d3SHarshad Shirwadkar if (reason != EXT4_FC_REASON_OK && 1112aa75f4d3SHarshad Shirwadkar reason != EXT4_FC_REASON_ALREADY_COMMITTED) { 1113aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_commits++; 1114aa75f4d3SHarshad Shirwadkar } else { 1115aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_num_commits++; 1116aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_numblks += nblks; 1117aa75f4d3SHarshad Shirwadkar } 1118aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1119aa75f4d3SHarshad Shirwadkar nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; 1120aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_stop(sb, nblks, reason); 1121aa75f4d3SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1122aa75f4d3SHarshad Shirwadkar /* 1123aa75f4d3SHarshad Shirwadkar * weight the commit time higher than the average time so we don't 1124aa75f4d3SHarshad Shirwadkar * react too strongly to vast changes in the commit time 1125aa75f4d3SHarshad Shirwadkar */ 1126aa75f4d3SHarshad Shirwadkar if (likely(sbi->s_fc_avg_commit_time)) 1127aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = (commit_time + 1128aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time * 3) / 4; 1129aa75f4d3SHarshad Shirwadkar else 1130aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = commit_time; 1131aa75f4d3SHarshad Shirwadkar jbd_debug(1, 1132aa75f4d3SHarshad Shirwadkar "Fast commit ended with blks = %d, reason = %d, subtid - %d", 1133aa75f4d3SHarshad Shirwadkar nblks, reason, subtid); 1134aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_FAILED) 1135aa75f4d3SHarshad Shirwadkar return jbd2_fc_end_commit_fallback(journal, commit_tid); 1136aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_START_FAILED || 1137aa75f4d3SHarshad Shirwadkar reason == EXT4_FC_REASON_INELIGIBLE) 1138aa75f4d3SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1139aa75f4d3SHarshad Shirwadkar return 0; 1140aa75f4d3SHarshad Shirwadkar } 1141aa75f4d3SHarshad Shirwadkar 1142ff780b91SHarshad Shirwadkar /* 1143ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1144ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1145ff780b91SHarshad Shirwadkar */ 1146ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full) 1147ff780b91SHarshad Shirwadkar { 1148aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1149aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1150aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1151aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1152aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n; 1153aa75f4d3SHarshad Shirwadkar 1154aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1155aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1156aa75f4d3SHarshad Shirwadkar 1157aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1158aa75f4d3SHarshad Shirwadkar 1159aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1160aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 1161aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1162aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1163aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1164aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1165aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1166aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1167aa75f4d3SHarshad Shirwadkar smp_mb(); 1168aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1169aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1170aa75f4d3SHarshad Shirwadkar #else 1171aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1172aa75f4d3SHarshad Shirwadkar #endif 1173aa75f4d3SHarshad Shirwadkar } 1174aa75f4d3SHarshad Shirwadkar 1175aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1176aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1177aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1178aa75f4d3SHarshad Shirwadkar fcd_list); 1179aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1180aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1181aa75f4d3SHarshad Shirwadkar 1182aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1183aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1184aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1185aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1186aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1187aa75f4d3SHarshad Shirwadkar } 1188aa75f4d3SHarshad Shirwadkar 1189aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1190aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1191aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 1192aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING]); 1193aa75f4d3SHarshad Shirwadkar 1194aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_COMMITTING; 1195aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE; 1196aa75f4d3SHarshad Shirwadkar 1197aa75f4d3SHarshad Shirwadkar if (full) 1198aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1199aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1200aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1201ff780b91SHarshad Shirwadkar } 12026866d7b3SHarshad Shirwadkar 12038016e29fSHarshad Shirwadkar /* Ext4 Replay Path Routines */ 12048016e29fSHarshad Shirwadkar 12058016e29fSHarshad Shirwadkar /* Get length of a particular tlv */ 12068016e29fSHarshad Shirwadkar static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) 12078016e29fSHarshad Shirwadkar { 12088016e29fSHarshad Shirwadkar return le16_to_cpu(tl->fc_len); 12098016e29fSHarshad Shirwadkar } 12108016e29fSHarshad Shirwadkar 12118016e29fSHarshad Shirwadkar /* Get a pointer to "value" of a tlv */ 12128016e29fSHarshad Shirwadkar static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) 12138016e29fSHarshad Shirwadkar { 12148016e29fSHarshad Shirwadkar return (u8 *)tl + sizeof(*tl); 12158016e29fSHarshad Shirwadkar } 12168016e29fSHarshad Shirwadkar 12178016e29fSHarshad Shirwadkar /* Helper struct for dentry replay routines */ 12188016e29fSHarshad Shirwadkar struct dentry_info_args { 12198016e29fSHarshad Shirwadkar int parent_ino, dname_len, ino, inode_len; 12208016e29fSHarshad Shirwadkar char *dname; 12218016e29fSHarshad Shirwadkar }; 12228016e29fSHarshad Shirwadkar 12238016e29fSHarshad Shirwadkar static inline void tl_to_darg(struct dentry_info_args *darg, 12248016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl) 12258016e29fSHarshad Shirwadkar { 12268016e29fSHarshad Shirwadkar struct ext4_fc_dentry_info *fcd; 12278016e29fSHarshad Shirwadkar 12288016e29fSHarshad Shirwadkar fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl); 12298016e29fSHarshad Shirwadkar 12308016e29fSHarshad Shirwadkar darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino); 12318016e29fSHarshad Shirwadkar darg->ino = le32_to_cpu(fcd->fc_ino); 12328016e29fSHarshad Shirwadkar darg->dname = fcd->fc_dname; 12338016e29fSHarshad Shirwadkar darg->dname_len = ext4_fc_tag_len(tl) - 12348016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_dentry_info); 12358016e29fSHarshad Shirwadkar } 12368016e29fSHarshad Shirwadkar 12378016e29fSHarshad Shirwadkar /* Unlink replay function */ 12388016e29fSHarshad Shirwadkar static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) 12398016e29fSHarshad Shirwadkar { 12408016e29fSHarshad Shirwadkar struct inode *inode, *old_parent; 12418016e29fSHarshad Shirwadkar struct qstr entry; 12428016e29fSHarshad Shirwadkar struct dentry_info_args darg; 12438016e29fSHarshad Shirwadkar int ret = 0; 12448016e29fSHarshad Shirwadkar 12458016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 12468016e29fSHarshad Shirwadkar 12478016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 12488016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 12498016e29fSHarshad Shirwadkar 12508016e29fSHarshad Shirwadkar entry.name = darg.dname; 12518016e29fSHarshad Shirwadkar entry.len = darg.dname_len; 12528016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 12538016e29fSHarshad Shirwadkar 12548016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 12558016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", darg.ino); 12568016e29fSHarshad Shirwadkar return 0; 12578016e29fSHarshad Shirwadkar } 12588016e29fSHarshad Shirwadkar 12598016e29fSHarshad Shirwadkar old_parent = ext4_iget(sb, darg.parent_ino, 12608016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 12618016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(old_parent)) { 12628016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); 12638016e29fSHarshad Shirwadkar iput(inode); 12648016e29fSHarshad Shirwadkar return 0; 12658016e29fSHarshad Shirwadkar } 12668016e29fSHarshad Shirwadkar 12678016e29fSHarshad Shirwadkar ret = __ext4_unlink(old_parent, &entry, inode); 12688016e29fSHarshad Shirwadkar /* -ENOENT ok coz it might not exist anymore. */ 12698016e29fSHarshad Shirwadkar if (ret == -ENOENT) 12708016e29fSHarshad Shirwadkar ret = 0; 12718016e29fSHarshad Shirwadkar iput(old_parent); 12728016e29fSHarshad Shirwadkar iput(inode); 12738016e29fSHarshad Shirwadkar return ret; 12748016e29fSHarshad Shirwadkar } 12758016e29fSHarshad Shirwadkar 12768016e29fSHarshad Shirwadkar static int ext4_fc_replay_link_internal(struct super_block *sb, 12778016e29fSHarshad Shirwadkar struct dentry_info_args *darg, 12788016e29fSHarshad Shirwadkar struct inode *inode) 12798016e29fSHarshad Shirwadkar { 12808016e29fSHarshad Shirwadkar struct inode *dir = NULL; 12818016e29fSHarshad Shirwadkar struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 12828016e29fSHarshad Shirwadkar struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 12838016e29fSHarshad Shirwadkar int ret = 0; 12848016e29fSHarshad Shirwadkar 12858016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 12868016e29fSHarshad Shirwadkar if (IS_ERR(dir)) { 12878016e29fSHarshad Shirwadkar jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino); 12888016e29fSHarshad Shirwadkar dir = NULL; 12898016e29fSHarshad Shirwadkar goto out; 12908016e29fSHarshad Shirwadkar } 12918016e29fSHarshad Shirwadkar 12928016e29fSHarshad Shirwadkar dentry_dir = d_obtain_alias(dir); 12938016e29fSHarshad Shirwadkar if (IS_ERR(dentry_dir)) { 12948016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to obtain dentry"); 12958016e29fSHarshad Shirwadkar dentry_dir = NULL; 12968016e29fSHarshad Shirwadkar goto out; 12978016e29fSHarshad Shirwadkar } 12988016e29fSHarshad Shirwadkar 12998016e29fSHarshad Shirwadkar dentry_inode = d_alloc(dentry_dir, &qstr_dname); 13008016e29fSHarshad Shirwadkar if (!dentry_inode) { 13018016e29fSHarshad Shirwadkar jbd_debug(1, "Inode dentry not created."); 13028016e29fSHarshad Shirwadkar ret = -ENOMEM; 13038016e29fSHarshad Shirwadkar goto out; 13048016e29fSHarshad Shirwadkar } 13058016e29fSHarshad Shirwadkar 13068016e29fSHarshad Shirwadkar ret = __ext4_link(dir, inode, dentry_inode); 13078016e29fSHarshad Shirwadkar /* 13088016e29fSHarshad Shirwadkar * It's possible that link already existed since data blocks 13098016e29fSHarshad Shirwadkar * for the dir in question got persisted before we crashed OR 13108016e29fSHarshad Shirwadkar * we replayed this tag and crashed before the entire replay 13118016e29fSHarshad Shirwadkar * could complete. 13128016e29fSHarshad Shirwadkar */ 13138016e29fSHarshad Shirwadkar if (ret && ret != -EEXIST) { 13148016e29fSHarshad Shirwadkar jbd_debug(1, "Failed to link\n"); 13158016e29fSHarshad Shirwadkar goto out; 13168016e29fSHarshad Shirwadkar } 13178016e29fSHarshad Shirwadkar 13188016e29fSHarshad Shirwadkar ret = 0; 13198016e29fSHarshad Shirwadkar out: 13208016e29fSHarshad Shirwadkar if (dentry_dir) { 13218016e29fSHarshad Shirwadkar d_drop(dentry_dir); 13228016e29fSHarshad Shirwadkar dput(dentry_dir); 13238016e29fSHarshad Shirwadkar } else if (dir) { 13248016e29fSHarshad Shirwadkar iput(dir); 13258016e29fSHarshad Shirwadkar } 13268016e29fSHarshad Shirwadkar if (dentry_inode) { 13278016e29fSHarshad Shirwadkar d_drop(dentry_inode); 13288016e29fSHarshad Shirwadkar dput(dentry_inode); 13298016e29fSHarshad Shirwadkar } 13308016e29fSHarshad Shirwadkar 13318016e29fSHarshad Shirwadkar return ret; 13328016e29fSHarshad Shirwadkar } 13338016e29fSHarshad Shirwadkar 13348016e29fSHarshad Shirwadkar /* Link replay function */ 13358016e29fSHarshad Shirwadkar static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) 13368016e29fSHarshad Shirwadkar { 13378016e29fSHarshad Shirwadkar struct inode *inode; 13388016e29fSHarshad Shirwadkar struct dentry_info_args darg; 13398016e29fSHarshad Shirwadkar int ret = 0; 13408016e29fSHarshad Shirwadkar 13418016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 13428016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 13438016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 13448016e29fSHarshad Shirwadkar 13458016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 13468016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 13478016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 13488016e29fSHarshad Shirwadkar return 0; 13498016e29fSHarshad Shirwadkar } 13508016e29fSHarshad Shirwadkar 13518016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 13528016e29fSHarshad Shirwadkar iput(inode); 13538016e29fSHarshad Shirwadkar return ret; 13548016e29fSHarshad Shirwadkar } 13558016e29fSHarshad Shirwadkar 13568016e29fSHarshad Shirwadkar /* 13578016e29fSHarshad Shirwadkar * Record all the modified inodes during replay. We use this later to setup 13588016e29fSHarshad Shirwadkar * block bitmaps correctly. 13598016e29fSHarshad Shirwadkar */ 13608016e29fSHarshad Shirwadkar static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 13618016e29fSHarshad Shirwadkar { 13628016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 13638016e29fSHarshad Shirwadkar int i; 13648016e29fSHarshad Shirwadkar 13658016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 13668016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) 13678016e29fSHarshad Shirwadkar if (state->fc_modified_inodes[i] == ino) 13688016e29fSHarshad Shirwadkar return 0; 13698016e29fSHarshad Shirwadkar if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 13708016e29fSHarshad Shirwadkar state->fc_modified_inodes_size += 13718016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 13728016e29fSHarshad Shirwadkar state->fc_modified_inodes = krealloc( 13738016e29fSHarshad Shirwadkar state->fc_modified_inodes, sizeof(int) * 13748016e29fSHarshad Shirwadkar state->fc_modified_inodes_size, 13758016e29fSHarshad Shirwadkar GFP_KERNEL); 13768016e29fSHarshad Shirwadkar if (!state->fc_modified_inodes) 13778016e29fSHarshad Shirwadkar return -ENOMEM; 13788016e29fSHarshad Shirwadkar } 13798016e29fSHarshad Shirwadkar state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 13808016e29fSHarshad Shirwadkar return 0; 13818016e29fSHarshad Shirwadkar } 13828016e29fSHarshad Shirwadkar 13838016e29fSHarshad Shirwadkar /* 13848016e29fSHarshad Shirwadkar * Inode replay function 13858016e29fSHarshad Shirwadkar */ 13868016e29fSHarshad Shirwadkar static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) 13878016e29fSHarshad Shirwadkar { 13888016e29fSHarshad Shirwadkar struct ext4_fc_inode *fc_inode; 13898016e29fSHarshad Shirwadkar struct ext4_inode *raw_inode; 13908016e29fSHarshad Shirwadkar struct ext4_inode *raw_fc_inode; 13918016e29fSHarshad Shirwadkar struct inode *inode = NULL; 13928016e29fSHarshad Shirwadkar struct ext4_iloc iloc; 13938016e29fSHarshad Shirwadkar int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 13948016e29fSHarshad Shirwadkar struct ext4_extent_header *eh; 13958016e29fSHarshad Shirwadkar 13968016e29fSHarshad Shirwadkar fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl); 13978016e29fSHarshad Shirwadkar 13988016e29fSHarshad Shirwadkar ino = le32_to_cpu(fc_inode->fc_ino); 13998016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, tag, ino, 0, 0); 14008016e29fSHarshad Shirwadkar 14018016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 14028016e29fSHarshad Shirwadkar if (!IS_ERR_OR_NULL(inode)) { 14038016e29fSHarshad Shirwadkar ext4_ext_clear_bb(inode); 14048016e29fSHarshad Shirwadkar iput(inode); 14058016e29fSHarshad Shirwadkar } 14068016e29fSHarshad Shirwadkar 14078016e29fSHarshad Shirwadkar ext4_fc_record_modified_inode(sb, ino); 14088016e29fSHarshad Shirwadkar 14098016e29fSHarshad Shirwadkar raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode; 14108016e29fSHarshad Shirwadkar ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 14118016e29fSHarshad Shirwadkar if (ret) 14128016e29fSHarshad Shirwadkar goto out; 14138016e29fSHarshad Shirwadkar 14148016e29fSHarshad Shirwadkar inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode); 14158016e29fSHarshad Shirwadkar raw_inode = ext4_raw_inode(&iloc); 14168016e29fSHarshad Shirwadkar 14178016e29fSHarshad Shirwadkar memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 14188016e29fSHarshad Shirwadkar memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, 14198016e29fSHarshad Shirwadkar inode_len - offsetof(struct ext4_inode, i_generation)); 14208016e29fSHarshad Shirwadkar if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 14218016e29fSHarshad Shirwadkar eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 14228016e29fSHarshad Shirwadkar if (eh->eh_magic != EXT4_EXT_MAGIC) { 14238016e29fSHarshad Shirwadkar memset(eh, 0, sizeof(*eh)); 14248016e29fSHarshad Shirwadkar eh->eh_magic = EXT4_EXT_MAGIC; 14258016e29fSHarshad Shirwadkar eh->eh_max = cpu_to_le16( 14268016e29fSHarshad Shirwadkar (sizeof(raw_inode->i_block) - 14278016e29fSHarshad Shirwadkar sizeof(struct ext4_extent_header)) 14288016e29fSHarshad Shirwadkar / sizeof(struct ext4_extent)); 14298016e29fSHarshad Shirwadkar } 14308016e29fSHarshad Shirwadkar } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 14318016e29fSHarshad Shirwadkar memcpy(raw_inode->i_block, raw_fc_inode->i_block, 14328016e29fSHarshad Shirwadkar sizeof(raw_inode->i_block)); 14338016e29fSHarshad Shirwadkar } 14348016e29fSHarshad Shirwadkar 14358016e29fSHarshad Shirwadkar /* Immediately update the inode on disk. */ 14368016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 14378016e29fSHarshad Shirwadkar if (ret) 14388016e29fSHarshad Shirwadkar goto out; 14398016e29fSHarshad Shirwadkar ret = sync_dirty_buffer(iloc.bh); 14408016e29fSHarshad Shirwadkar if (ret) 14418016e29fSHarshad Shirwadkar goto out; 14428016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, ino); 14438016e29fSHarshad Shirwadkar if (ret) 14448016e29fSHarshad Shirwadkar goto out; 14458016e29fSHarshad Shirwadkar 14468016e29fSHarshad Shirwadkar /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 14478016e29fSHarshad Shirwadkar inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 14488016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 14498016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 14508016e29fSHarshad Shirwadkar return -EFSCORRUPTED; 14518016e29fSHarshad Shirwadkar } 14528016e29fSHarshad Shirwadkar 14538016e29fSHarshad Shirwadkar /* 14548016e29fSHarshad Shirwadkar * Our allocator could have made different decisions than before 14558016e29fSHarshad Shirwadkar * crashing. This should be fixed but until then, we calculate 14568016e29fSHarshad Shirwadkar * the number of blocks the inode. 14578016e29fSHarshad Shirwadkar */ 14588016e29fSHarshad Shirwadkar ext4_ext_replay_set_iblocks(inode); 14598016e29fSHarshad Shirwadkar 14608016e29fSHarshad Shirwadkar inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 14618016e29fSHarshad Shirwadkar ext4_reset_inode_seed(inode); 14628016e29fSHarshad Shirwadkar 14638016e29fSHarshad Shirwadkar ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 14648016e29fSHarshad Shirwadkar ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 14658016e29fSHarshad Shirwadkar sync_dirty_buffer(iloc.bh); 14668016e29fSHarshad Shirwadkar brelse(iloc.bh); 14678016e29fSHarshad Shirwadkar out: 14688016e29fSHarshad Shirwadkar iput(inode); 14698016e29fSHarshad Shirwadkar if (!ret) 14708016e29fSHarshad Shirwadkar blkdev_issue_flush(sb->s_bdev, GFP_KERNEL); 14718016e29fSHarshad Shirwadkar 14728016e29fSHarshad Shirwadkar return 0; 14738016e29fSHarshad Shirwadkar } 14748016e29fSHarshad Shirwadkar 14758016e29fSHarshad Shirwadkar /* 14768016e29fSHarshad Shirwadkar * Dentry create replay function. 14778016e29fSHarshad Shirwadkar * 14788016e29fSHarshad Shirwadkar * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 14798016e29fSHarshad Shirwadkar * inode for which we are trying to create a dentry here, should already have 14808016e29fSHarshad Shirwadkar * been replayed before we start here. 14818016e29fSHarshad Shirwadkar */ 14828016e29fSHarshad Shirwadkar static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) 14838016e29fSHarshad Shirwadkar { 14848016e29fSHarshad Shirwadkar int ret = 0; 14858016e29fSHarshad Shirwadkar struct inode *inode = NULL; 14868016e29fSHarshad Shirwadkar struct inode *dir = NULL; 14878016e29fSHarshad Shirwadkar struct dentry_info_args darg; 14888016e29fSHarshad Shirwadkar 14898016e29fSHarshad Shirwadkar tl_to_darg(&darg, tl); 14908016e29fSHarshad Shirwadkar 14918016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 14928016e29fSHarshad Shirwadkar darg.parent_ino, darg.dname_len); 14938016e29fSHarshad Shirwadkar 14948016e29fSHarshad Shirwadkar /* This takes care of update group descriptor and other metadata */ 14958016e29fSHarshad Shirwadkar ret = ext4_mark_inode_used(sb, darg.ino); 14968016e29fSHarshad Shirwadkar if (ret) 14978016e29fSHarshad Shirwadkar goto out; 14988016e29fSHarshad Shirwadkar 14998016e29fSHarshad Shirwadkar inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 15008016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 15018016e29fSHarshad Shirwadkar jbd_debug(1, "inode %d not found.", darg.ino); 15028016e29fSHarshad Shirwadkar inode = NULL; 15038016e29fSHarshad Shirwadkar ret = -EINVAL; 15048016e29fSHarshad Shirwadkar goto out; 15058016e29fSHarshad Shirwadkar } 15068016e29fSHarshad Shirwadkar 15078016e29fSHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) { 15088016e29fSHarshad Shirwadkar /* 15098016e29fSHarshad Shirwadkar * If we are creating a directory, we need to make sure that the 15108016e29fSHarshad Shirwadkar * dot and dot dot dirents are setup properly. 15118016e29fSHarshad Shirwadkar */ 15128016e29fSHarshad Shirwadkar dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 15138016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(dir)) { 15148016e29fSHarshad Shirwadkar jbd_debug(1, "Dir %d not found.", darg.ino); 15158016e29fSHarshad Shirwadkar goto out; 15168016e29fSHarshad Shirwadkar } 15178016e29fSHarshad Shirwadkar ret = ext4_init_new_dir(NULL, dir, inode); 15188016e29fSHarshad Shirwadkar iput(dir); 15198016e29fSHarshad Shirwadkar if (ret) { 15208016e29fSHarshad Shirwadkar ret = 0; 15218016e29fSHarshad Shirwadkar goto out; 15228016e29fSHarshad Shirwadkar } 15238016e29fSHarshad Shirwadkar } 15248016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link_internal(sb, &darg, inode); 15258016e29fSHarshad Shirwadkar if (ret) 15268016e29fSHarshad Shirwadkar goto out; 15278016e29fSHarshad Shirwadkar set_nlink(inode, 1); 15288016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 15298016e29fSHarshad Shirwadkar out: 15308016e29fSHarshad Shirwadkar if (inode) 15318016e29fSHarshad Shirwadkar iput(inode); 15328016e29fSHarshad Shirwadkar return ret; 15338016e29fSHarshad Shirwadkar } 15348016e29fSHarshad Shirwadkar 15358016e29fSHarshad Shirwadkar /* 15368016e29fSHarshad Shirwadkar * Record physical disk regions which are in use as per fast commit area. Our 15378016e29fSHarshad Shirwadkar * simple replay phase allocator excludes these regions from allocation. 15388016e29fSHarshad Shirwadkar */ 15398016e29fSHarshad Shirwadkar static int ext4_fc_record_regions(struct super_block *sb, int ino, 15408016e29fSHarshad Shirwadkar ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) 15418016e29fSHarshad Shirwadkar { 15428016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 15438016e29fSHarshad Shirwadkar struct ext4_fc_alloc_region *region; 15448016e29fSHarshad Shirwadkar 15458016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 15468016e29fSHarshad Shirwadkar if (state->fc_regions_used == state->fc_regions_size) { 15478016e29fSHarshad Shirwadkar state->fc_regions_size += 15488016e29fSHarshad Shirwadkar EXT4_FC_REPLAY_REALLOC_INCREMENT; 15498016e29fSHarshad Shirwadkar state->fc_regions = krealloc( 15508016e29fSHarshad Shirwadkar state->fc_regions, 15518016e29fSHarshad Shirwadkar state->fc_regions_size * 15528016e29fSHarshad Shirwadkar sizeof(struct ext4_fc_alloc_region), 15538016e29fSHarshad Shirwadkar GFP_KERNEL); 15548016e29fSHarshad Shirwadkar if (!state->fc_regions) 15558016e29fSHarshad Shirwadkar return -ENOMEM; 15568016e29fSHarshad Shirwadkar } 15578016e29fSHarshad Shirwadkar region = &state->fc_regions[state->fc_regions_used++]; 15588016e29fSHarshad Shirwadkar region->ino = ino; 15598016e29fSHarshad Shirwadkar region->lblk = lblk; 15608016e29fSHarshad Shirwadkar region->pblk = pblk; 15618016e29fSHarshad Shirwadkar region->len = len; 15628016e29fSHarshad Shirwadkar 15638016e29fSHarshad Shirwadkar return 0; 15648016e29fSHarshad Shirwadkar } 15658016e29fSHarshad Shirwadkar 15668016e29fSHarshad Shirwadkar /* Replay add range tag */ 15678016e29fSHarshad Shirwadkar static int ext4_fc_replay_add_range(struct super_block *sb, 15688016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl) 15698016e29fSHarshad Shirwadkar { 15708016e29fSHarshad Shirwadkar struct ext4_fc_add_range *fc_add_ex; 15718016e29fSHarshad Shirwadkar struct ext4_extent newex, *ex; 15728016e29fSHarshad Shirwadkar struct inode *inode; 15738016e29fSHarshad Shirwadkar ext4_lblk_t start, cur; 15748016e29fSHarshad Shirwadkar int remaining, len; 15758016e29fSHarshad Shirwadkar ext4_fsblk_t start_pblk; 15768016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 15778016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 15788016e29fSHarshad Shirwadkar int ret; 15798016e29fSHarshad Shirwadkar 15808016e29fSHarshad Shirwadkar fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl); 15818016e29fSHarshad Shirwadkar ex = (struct ext4_extent *)&fc_add_ex->fc_ex; 15828016e29fSHarshad Shirwadkar 15838016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 15848016e29fSHarshad Shirwadkar le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block), 15858016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 15868016e29fSHarshad Shirwadkar 15878016e29fSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), 15888016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 15898016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 15908016e29fSHarshad Shirwadkar jbd_debug(1, "Inode not found."); 15918016e29fSHarshad Shirwadkar return 0; 15928016e29fSHarshad Shirwadkar } 15938016e29fSHarshad Shirwadkar 15948016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 15958016e29fSHarshad Shirwadkar 15968016e29fSHarshad Shirwadkar start = le32_to_cpu(ex->ee_block); 15978016e29fSHarshad Shirwadkar start_pblk = ext4_ext_pblock(ex); 15988016e29fSHarshad Shirwadkar len = ext4_ext_get_actual_len(ex); 15998016e29fSHarshad Shirwadkar 16008016e29fSHarshad Shirwadkar cur = start; 16018016e29fSHarshad Shirwadkar remaining = len; 16028016e29fSHarshad Shirwadkar jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 16038016e29fSHarshad Shirwadkar start, start_pblk, len, ext4_ext_is_unwritten(ex), 16048016e29fSHarshad Shirwadkar inode->i_ino); 16058016e29fSHarshad Shirwadkar 16068016e29fSHarshad Shirwadkar while (remaining > 0) { 16078016e29fSHarshad Shirwadkar map.m_lblk = cur; 16088016e29fSHarshad Shirwadkar map.m_len = remaining; 16098016e29fSHarshad Shirwadkar map.m_pblk = 0; 16108016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 16118016e29fSHarshad Shirwadkar 16128016e29fSHarshad Shirwadkar if (ret < 0) { 16138016e29fSHarshad Shirwadkar iput(inode); 16148016e29fSHarshad Shirwadkar return 0; 16158016e29fSHarshad Shirwadkar } 16168016e29fSHarshad Shirwadkar 16178016e29fSHarshad Shirwadkar if (ret == 0) { 16188016e29fSHarshad Shirwadkar /* Range is not mapped */ 16198016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, cur, NULL, 0); 16208016e29fSHarshad Shirwadkar if (!path) 16218016e29fSHarshad Shirwadkar continue; 16228016e29fSHarshad Shirwadkar memset(&newex, 0, sizeof(newex)); 16238016e29fSHarshad Shirwadkar newex.ee_block = cpu_to_le32(cur); 16248016e29fSHarshad Shirwadkar ext4_ext_store_pblock( 16258016e29fSHarshad Shirwadkar &newex, start_pblk + cur - start); 16268016e29fSHarshad Shirwadkar newex.ee_len = cpu_to_le16(map.m_len); 16278016e29fSHarshad Shirwadkar if (ext4_ext_is_unwritten(ex)) 16288016e29fSHarshad Shirwadkar ext4_ext_mark_unwritten(&newex); 16298016e29fSHarshad Shirwadkar down_write(&EXT4_I(inode)->i_data_sem); 16308016e29fSHarshad Shirwadkar ret = ext4_ext_insert_extent( 16318016e29fSHarshad Shirwadkar NULL, inode, &path, &newex, 0); 16328016e29fSHarshad Shirwadkar up_write((&EXT4_I(inode)->i_data_sem)); 16338016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 16348016e29fSHarshad Shirwadkar kfree(path); 16358016e29fSHarshad Shirwadkar if (ret) { 16368016e29fSHarshad Shirwadkar iput(inode); 16378016e29fSHarshad Shirwadkar return 0; 16388016e29fSHarshad Shirwadkar } 16398016e29fSHarshad Shirwadkar goto next; 16408016e29fSHarshad Shirwadkar } 16418016e29fSHarshad Shirwadkar 16428016e29fSHarshad Shirwadkar if (start_pblk + cur - start != map.m_pblk) { 16438016e29fSHarshad Shirwadkar /* 16448016e29fSHarshad Shirwadkar * Logical to physical mapping changed. This can happen 16458016e29fSHarshad Shirwadkar * if this range was removed and then reallocated to 16468016e29fSHarshad Shirwadkar * map to new physical blocks during a fast commit. 16478016e29fSHarshad Shirwadkar */ 16488016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 16498016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), 16508016e29fSHarshad Shirwadkar start_pblk + cur - start); 16518016e29fSHarshad Shirwadkar if (ret) { 16528016e29fSHarshad Shirwadkar iput(inode); 16538016e29fSHarshad Shirwadkar return 0; 16548016e29fSHarshad Shirwadkar } 16558016e29fSHarshad Shirwadkar /* 16568016e29fSHarshad Shirwadkar * Mark the old blocks as free since they aren't used 16578016e29fSHarshad Shirwadkar * anymore. We maintain an array of all the modified 16588016e29fSHarshad Shirwadkar * inodes. In case these blocks are still used at either 16598016e29fSHarshad Shirwadkar * a different logical range in the same inode or in 16608016e29fSHarshad Shirwadkar * some different inode, we will mark them as allocated 16618016e29fSHarshad Shirwadkar * at the end of the FC replay using our array of 16628016e29fSHarshad Shirwadkar * modified inodes. 16638016e29fSHarshad Shirwadkar */ 16648016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 16658016e29fSHarshad Shirwadkar goto next; 16668016e29fSHarshad Shirwadkar } 16678016e29fSHarshad Shirwadkar 16688016e29fSHarshad Shirwadkar /* Range is mapped and needs a state change */ 16698016e29fSHarshad Shirwadkar jbd_debug(1, "Converting from %d to %d %lld", 16708016e29fSHarshad Shirwadkar map.m_flags & EXT4_MAP_UNWRITTEN, 16718016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 16728016e29fSHarshad Shirwadkar ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 16738016e29fSHarshad Shirwadkar ext4_ext_is_unwritten(ex), map.m_pblk); 16748016e29fSHarshad Shirwadkar if (ret) { 16758016e29fSHarshad Shirwadkar iput(inode); 16768016e29fSHarshad Shirwadkar return 0; 16778016e29fSHarshad Shirwadkar } 16788016e29fSHarshad Shirwadkar /* 16798016e29fSHarshad Shirwadkar * We may have split the extent tree while toggling the state. 16808016e29fSHarshad Shirwadkar * Try to shrink the extent tree now. 16818016e29fSHarshad Shirwadkar */ 16828016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, start + len); 16838016e29fSHarshad Shirwadkar next: 16848016e29fSHarshad Shirwadkar cur += map.m_len; 16858016e29fSHarshad Shirwadkar remaining -= map.m_len; 16868016e29fSHarshad Shirwadkar } 16878016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 16888016e29fSHarshad Shirwadkar sb->s_blocksize_bits); 16898016e29fSHarshad Shirwadkar iput(inode); 16908016e29fSHarshad Shirwadkar return 0; 16918016e29fSHarshad Shirwadkar } 16928016e29fSHarshad Shirwadkar 16938016e29fSHarshad Shirwadkar /* Replay DEL_RANGE tag */ 16948016e29fSHarshad Shirwadkar static int 16958016e29fSHarshad Shirwadkar ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) 16968016e29fSHarshad Shirwadkar { 16978016e29fSHarshad Shirwadkar struct inode *inode; 16988016e29fSHarshad Shirwadkar struct ext4_fc_del_range *lrange; 16998016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17008016e29fSHarshad Shirwadkar ext4_lblk_t cur, remaining; 17018016e29fSHarshad Shirwadkar int ret; 17028016e29fSHarshad Shirwadkar 17038016e29fSHarshad Shirwadkar lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl); 17048016e29fSHarshad Shirwadkar cur = le32_to_cpu(lrange->fc_lblk); 17058016e29fSHarshad Shirwadkar remaining = le32_to_cpu(lrange->fc_len); 17068016e29fSHarshad Shirwadkar 17078016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 17088016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_ino), cur, remaining); 17098016e29fSHarshad Shirwadkar 17108016e29fSHarshad Shirwadkar inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); 17118016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 17128016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); 17138016e29fSHarshad Shirwadkar return 0; 17148016e29fSHarshad Shirwadkar } 17158016e29fSHarshad Shirwadkar 17168016e29fSHarshad Shirwadkar ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 17178016e29fSHarshad Shirwadkar 17188016e29fSHarshad Shirwadkar jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", 17198016e29fSHarshad Shirwadkar inode->i_ino, le32_to_cpu(lrange->fc_lblk), 17208016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_len)); 17218016e29fSHarshad Shirwadkar while (remaining > 0) { 17228016e29fSHarshad Shirwadkar map.m_lblk = cur; 17238016e29fSHarshad Shirwadkar map.m_len = remaining; 17248016e29fSHarshad Shirwadkar 17258016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 17268016e29fSHarshad Shirwadkar if (ret < 0) { 17278016e29fSHarshad Shirwadkar iput(inode); 17288016e29fSHarshad Shirwadkar return 0; 17298016e29fSHarshad Shirwadkar } 17308016e29fSHarshad Shirwadkar if (ret > 0) { 17318016e29fSHarshad Shirwadkar remaining -= ret; 17328016e29fSHarshad Shirwadkar cur += ret; 17338016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 17348016e29fSHarshad Shirwadkar } else { 17358016e29fSHarshad Shirwadkar remaining -= map.m_len; 17368016e29fSHarshad Shirwadkar cur += map.m_len; 17378016e29fSHarshad Shirwadkar } 17388016e29fSHarshad Shirwadkar } 17398016e29fSHarshad Shirwadkar 17408016e29fSHarshad Shirwadkar ret = ext4_punch_hole(inode, 17418016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits, 17428016e29fSHarshad Shirwadkar le32_to_cpu(lrange->fc_len) << sb->s_blocksize_bits); 17438016e29fSHarshad Shirwadkar if (ret) 17448016e29fSHarshad Shirwadkar jbd_debug(1, "ext4_punch_hole returned %d", ret); 17458016e29fSHarshad Shirwadkar ext4_ext_replay_shrink_inode(inode, 17468016e29fSHarshad Shirwadkar i_size_read(inode) >> sb->s_blocksize_bits); 17478016e29fSHarshad Shirwadkar ext4_mark_inode_dirty(NULL, inode); 17488016e29fSHarshad Shirwadkar iput(inode); 17498016e29fSHarshad Shirwadkar 17508016e29fSHarshad Shirwadkar return 0; 17518016e29fSHarshad Shirwadkar } 17528016e29fSHarshad Shirwadkar 17538016e29fSHarshad Shirwadkar static inline const char *tag2str(u16 tag) 17548016e29fSHarshad Shirwadkar { 17558016e29fSHarshad Shirwadkar switch (tag) { 17568016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 17578016e29fSHarshad Shirwadkar return "TAG_ADD_ENTRY"; 17588016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 17598016e29fSHarshad Shirwadkar return "TAG_DEL_ENTRY"; 17608016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 17618016e29fSHarshad Shirwadkar return "TAG_ADD_RANGE"; 17628016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 17638016e29fSHarshad Shirwadkar return "TAG_CREAT_DENTRY"; 17648016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 17658016e29fSHarshad Shirwadkar return "TAG_DEL_RANGE"; 17668016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 17678016e29fSHarshad Shirwadkar return "TAG_INODE"; 17688016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 17698016e29fSHarshad Shirwadkar return "TAG_PAD"; 17708016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 17718016e29fSHarshad Shirwadkar return "TAG_TAIL"; 17728016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 17738016e29fSHarshad Shirwadkar return "TAG_HEAD"; 17748016e29fSHarshad Shirwadkar default: 17758016e29fSHarshad Shirwadkar return "TAG_ERROR"; 17768016e29fSHarshad Shirwadkar } 17778016e29fSHarshad Shirwadkar } 17788016e29fSHarshad Shirwadkar 17798016e29fSHarshad Shirwadkar static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 17808016e29fSHarshad Shirwadkar { 17818016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 17828016e29fSHarshad Shirwadkar struct inode *inode; 17838016e29fSHarshad Shirwadkar struct ext4_ext_path *path = NULL; 17848016e29fSHarshad Shirwadkar struct ext4_map_blocks map; 17858016e29fSHarshad Shirwadkar int i, ret, j; 17868016e29fSHarshad Shirwadkar ext4_lblk_t cur, end; 17878016e29fSHarshad Shirwadkar 17888016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 17898016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_modified_inodes_used; i++) { 17908016e29fSHarshad Shirwadkar inode = ext4_iget(sb, state->fc_modified_inodes[i], 17918016e29fSHarshad Shirwadkar EXT4_IGET_NORMAL); 17928016e29fSHarshad Shirwadkar if (IS_ERR_OR_NULL(inode)) { 17938016e29fSHarshad Shirwadkar jbd_debug(1, "Inode %d not found.", 17948016e29fSHarshad Shirwadkar state->fc_modified_inodes[i]); 17958016e29fSHarshad Shirwadkar continue; 17968016e29fSHarshad Shirwadkar } 17978016e29fSHarshad Shirwadkar cur = 0; 17988016e29fSHarshad Shirwadkar end = EXT_MAX_BLOCKS; 17998016e29fSHarshad Shirwadkar while (cur < end) { 18008016e29fSHarshad Shirwadkar map.m_lblk = cur; 18018016e29fSHarshad Shirwadkar map.m_len = end - cur; 18028016e29fSHarshad Shirwadkar 18038016e29fSHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 18048016e29fSHarshad Shirwadkar if (ret < 0) 18058016e29fSHarshad Shirwadkar break; 18068016e29fSHarshad Shirwadkar 18078016e29fSHarshad Shirwadkar if (ret > 0) { 18088016e29fSHarshad Shirwadkar path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 18098016e29fSHarshad Shirwadkar if (!IS_ERR_OR_NULL(path)) { 18108016e29fSHarshad Shirwadkar for (j = 0; j < path->p_depth; j++) 18118016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, 18128016e29fSHarshad Shirwadkar path[j].p_block, 1, 1); 18138016e29fSHarshad Shirwadkar ext4_ext_drop_refs(path); 18148016e29fSHarshad Shirwadkar kfree(path); 18158016e29fSHarshad Shirwadkar } 18168016e29fSHarshad Shirwadkar cur += ret; 18178016e29fSHarshad Shirwadkar ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 18188016e29fSHarshad Shirwadkar map.m_len, 1); 18198016e29fSHarshad Shirwadkar } else { 18208016e29fSHarshad Shirwadkar cur = cur + (map.m_len ? map.m_len : 1); 18218016e29fSHarshad Shirwadkar } 18228016e29fSHarshad Shirwadkar } 18238016e29fSHarshad Shirwadkar iput(inode); 18248016e29fSHarshad Shirwadkar } 18258016e29fSHarshad Shirwadkar } 18268016e29fSHarshad Shirwadkar 18278016e29fSHarshad Shirwadkar /* 18288016e29fSHarshad Shirwadkar * Check if block is in excluded regions for block allocation. The simple 18298016e29fSHarshad Shirwadkar * allocator that runs during replay phase is calls this function to see 18308016e29fSHarshad Shirwadkar * if it is okay to use a block. 18318016e29fSHarshad Shirwadkar */ 18328016e29fSHarshad Shirwadkar bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 18338016e29fSHarshad Shirwadkar { 18348016e29fSHarshad Shirwadkar int i; 18358016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18368016e29fSHarshad Shirwadkar 18378016e29fSHarshad Shirwadkar state = &EXT4_SB(sb)->s_fc_replay_state; 18388016e29fSHarshad Shirwadkar for (i = 0; i < state->fc_regions_valid; i++) { 18398016e29fSHarshad Shirwadkar if (state->fc_regions[i].ino == 0 || 18408016e29fSHarshad Shirwadkar state->fc_regions[i].len == 0) 18418016e29fSHarshad Shirwadkar continue; 18428016e29fSHarshad Shirwadkar if (blk >= state->fc_regions[i].pblk && 18438016e29fSHarshad Shirwadkar blk < state->fc_regions[i].pblk + state->fc_regions[i].len) 18448016e29fSHarshad Shirwadkar return true; 18458016e29fSHarshad Shirwadkar } 18468016e29fSHarshad Shirwadkar return false; 18478016e29fSHarshad Shirwadkar } 18488016e29fSHarshad Shirwadkar 18498016e29fSHarshad Shirwadkar /* Cleanup function called after replay */ 18508016e29fSHarshad Shirwadkar void ext4_fc_replay_cleanup(struct super_block *sb) 18518016e29fSHarshad Shirwadkar { 18528016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 18538016e29fSHarshad Shirwadkar 18548016e29fSHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_REPLAY; 18558016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_regions); 18568016e29fSHarshad Shirwadkar kfree(sbi->s_fc_replay_state.fc_modified_inodes); 18578016e29fSHarshad Shirwadkar } 18588016e29fSHarshad Shirwadkar 18598016e29fSHarshad Shirwadkar /* 18608016e29fSHarshad Shirwadkar * Recovery Scan phase handler 18618016e29fSHarshad Shirwadkar * 18628016e29fSHarshad Shirwadkar * This function is called during the scan phase and is responsible 18638016e29fSHarshad Shirwadkar * for doing following things: 18648016e29fSHarshad Shirwadkar * - Make sure the fast commit area has valid tags for replay 18658016e29fSHarshad Shirwadkar * - Count number of tags that need to be replayed by the replay handler 18668016e29fSHarshad Shirwadkar * - Verify CRC 18678016e29fSHarshad Shirwadkar * - Create a list of excluded blocks for allocation during replay phase 18688016e29fSHarshad Shirwadkar * 18698016e29fSHarshad Shirwadkar * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 18708016e29fSHarshad Shirwadkar * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 18718016e29fSHarshad Shirwadkar * to indicate that scan has finished and JBD2 can now start replay phase. 18728016e29fSHarshad Shirwadkar * It returns a negative error to indicate that there was an error. At the end 18738016e29fSHarshad Shirwadkar * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 18748016e29fSHarshad Shirwadkar * to indicate the number of tags that need to replayed during the replay phase. 18758016e29fSHarshad Shirwadkar */ 18768016e29fSHarshad Shirwadkar static int ext4_fc_replay_scan(journal_t *journal, 18778016e29fSHarshad Shirwadkar struct buffer_head *bh, int off, 18788016e29fSHarshad Shirwadkar tid_t expected_tid) 18798016e29fSHarshad Shirwadkar { 18808016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 18818016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 18828016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state; 18838016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 18848016e29fSHarshad Shirwadkar struct ext4_fc_add_range *ext; 18858016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl; 18868016e29fSHarshad Shirwadkar struct ext4_fc_tail *tail; 18878016e29fSHarshad Shirwadkar __u8 *start, *end; 18888016e29fSHarshad Shirwadkar struct ext4_fc_head *head; 18898016e29fSHarshad Shirwadkar struct ext4_extent *ex; 18908016e29fSHarshad Shirwadkar 18918016e29fSHarshad Shirwadkar state = &sbi->s_fc_replay_state; 18928016e29fSHarshad Shirwadkar 18938016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 18948016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 18958016e29fSHarshad Shirwadkar 18968016e29fSHarshad Shirwadkar if (state->fc_replay_expected_off == 0) { 18978016e29fSHarshad Shirwadkar state->fc_cur_tag = 0; 18988016e29fSHarshad Shirwadkar state->fc_replay_num_tags = 0; 18998016e29fSHarshad Shirwadkar state->fc_crc = 0; 19008016e29fSHarshad Shirwadkar state->fc_regions = NULL; 19018016e29fSHarshad Shirwadkar state->fc_regions_valid = state->fc_regions_used = 19028016e29fSHarshad Shirwadkar state->fc_regions_size = 0; 19038016e29fSHarshad Shirwadkar /* Check if we can stop early */ 19048016e29fSHarshad Shirwadkar if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 19058016e29fSHarshad Shirwadkar != EXT4_FC_TAG_HEAD) 19068016e29fSHarshad Shirwadkar return 0; 19078016e29fSHarshad Shirwadkar } 19088016e29fSHarshad Shirwadkar 19098016e29fSHarshad Shirwadkar if (off != state->fc_replay_expected_off) { 19108016e29fSHarshad Shirwadkar ret = -EFSCORRUPTED; 19118016e29fSHarshad Shirwadkar goto out_err; 19128016e29fSHarshad Shirwadkar } 19138016e29fSHarshad Shirwadkar 19148016e29fSHarshad Shirwadkar state->fc_replay_expected_off++; 19158016e29fSHarshad Shirwadkar fc_for_each_tl(start, end, tl) { 19168016e29fSHarshad Shirwadkar jbd_debug(3, "Scan phase, tag:%s, blk %lld\n", 19178016e29fSHarshad Shirwadkar tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr); 19188016e29fSHarshad Shirwadkar switch (le16_to_cpu(tl->fc_tag)) { 19198016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 19208016e29fSHarshad Shirwadkar ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl); 19218016e29fSHarshad Shirwadkar ex = (struct ext4_extent *)&ext->fc_ex; 19228016e29fSHarshad Shirwadkar ret = ext4_fc_record_regions(sb, 19238016e29fSHarshad Shirwadkar le32_to_cpu(ext->fc_ino), 19248016e29fSHarshad Shirwadkar le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 19258016e29fSHarshad Shirwadkar ext4_ext_get_actual_len(ex)); 19268016e29fSHarshad Shirwadkar if (ret < 0) 19278016e29fSHarshad Shirwadkar break; 19288016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 19298016e29fSHarshad Shirwadkar fallthrough; 19308016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 19318016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 19328016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 19338016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 19348016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 19358016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 19368016e29fSHarshad Shirwadkar state->fc_cur_tag++; 19378016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 19388016e29fSHarshad Shirwadkar sizeof(*tl) + ext4_fc_tag_len(tl)); 19398016e29fSHarshad Shirwadkar break; 19408016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 19418016e29fSHarshad Shirwadkar state->fc_cur_tag++; 19428016e29fSHarshad Shirwadkar tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl); 19438016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 19448016e29fSHarshad Shirwadkar sizeof(*tl) + 19458016e29fSHarshad Shirwadkar offsetof(struct ext4_fc_tail, 19468016e29fSHarshad Shirwadkar fc_crc)); 19478016e29fSHarshad Shirwadkar if (le32_to_cpu(tail->fc_tid) == expected_tid && 19488016e29fSHarshad Shirwadkar le32_to_cpu(tail->fc_crc) == state->fc_crc) { 19498016e29fSHarshad Shirwadkar state->fc_replay_num_tags = state->fc_cur_tag; 19508016e29fSHarshad Shirwadkar state->fc_regions_valid = 19518016e29fSHarshad Shirwadkar state->fc_regions_used; 19528016e29fSHarshad Shirwadkar } else { 19538016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 19548016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -EFSBADCRC; 19558016e29fSHarshad Shirwadkar } 19568016e29fSHarshad Shirwadkar state->fc_crc = 0; 19578016e29fSHarshad Shirwadkar break; 19588016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 19598016e29fSHarshad Shirwadkar head = (struct ext4_fc_head *)ext4_fc_tag_val(tl); 19608016e29fSHarshad Shirwadkar if (le32_to_cpu(head->fc_features) & 19618016e29fSHarshad Shirwadkar ~EXT4_FC_SUPPORTED_FEATURES) { 19628016e29fSHarshad Shirwadkar ret = -EOPNOTSUPP; 19638016e29fSHarshad Shirwadkar break; 19648016e29fSHarshad Shirwadkar } 19658016e29fSHarshad Shirwadkar if (le32_to_cpu(head->fc_tid) != expected_tid) { 19668016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 19678016e29fSHarshad Shirwadkar break; 19688016e29fSHarshad Shirwadkar } 19698016e29fSHarshad Shirwadkar state->fc_cur_tag++; 19708016e29fSHarshad Shirwadkar state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl, 19718016e29fSHarshad Shirwadkar sizeof(*tl) + ext4_fc_tag_len(tl)); 19728016e29fSHarshad Shirwadkar break; 19738016e29fSHarshad Shirwadkar default: 19748016e29fSHarshad Shirwadkar ret = state->fc_replay_num_tags ? 19758016e29fSHarshad Shirwadkar JBD2_FC_REPLAY_STOP : -ECANCELED; 19768016e29fSHarshad Shirwadkar } 19778016e29fSHarshad Shirwadkar if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 19788016e29fSHarshad Shirwadkar break; 19798016e29fSHarshad Shirwadkar } 19808016e29fSHarshad Shirwadkar 19818016e29fSHarshad Shirwadkar out_err: 19828016e29fSHarshad Shirwadkar trace_ext4_fc_replay_scan(sb, ret, off); 19838016e29fSHarshad Shirwadkar return ret; 19848016e29fSHarshad Shirwadkar } 19858016e29fSHarshad Shirwadkar 19865b849b5fSHarshad Shirwadkar /* 19875b849b5fSHarshad Shirwadkar * Main recovery path entry point. 19888016e29fSHarshad Shirwadkar * The meaning of return codes is similar as above. 19895b849b5fSHarshad Shirwadkar */ 19905b849b5fSHarshad Shirwadkar static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 19915b849b5fSHarshad Shirwadkar enum passtype pass, int off, tid_t expected_tid) 19925b849b5fSHarshad Shirwadkar { 19938016e29fSHarshad Shirwadkar struct super_block *sb = journal->j_private; 19948016e29fSHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 19958016e29fSHarshad Shirwadkar struct ext4_fc_tl *tl; 19968016e29fSHarshad Shirwadkar __u8 *start, *end; 19978016e29fSHarshad Shirwadkar int ret = JBD2_FC_REPLAY_CONTINUE; 19988016e29fSHarshad Shirwadkar struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 19998016e29fSHarshad Shirwadkar struct ext4_fc_tail *tail; 20008016e29fSHarshad Shirwadkar 20018016e29fSHarshad Shirwadkar if (pass == PASS_SCAN) { 20028016e29fSHarshad Shirwadkar state->fc_current_pass = PASS_SCAN; 20038016e29fSHarshad Shirwadkar return ext4_fc_replay_scan(journal, bh, off, expected_tid); 20048016e29fSHarshad Shirwadkar } 20058016e29fSHarshad Shirwadkar 20068016e29fSHarshad Shirwadkar if (state->fc_current_pass != pass) { 20078016e29fSHarshad Shirwadkar state->fc_current_pass = pass; 20088016e29fSHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_REPLAY; 20098016e29fSHarshad Shirwadkar } 20108016e29fSHarshad Shirwadkar if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 20118016e29fSHarshad Shirwadkar jbd_debug(1, "Replay stops\n"); 20128016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20135b849b5fSHarshad Shirwadkar return 0; 20145b849b5fSHarshad Shirwadkar } 20155b849b5fSHarshad Shirwadkar 20168016e29fSHarshad Shirwadkar #ifdef CONFIG_EXT4_DEBUG 20178016e29fSHarshad Shirwadkar if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 20188016e29fSHarshad Shirwadkar pr_warn("Dropping fc block %d because max_replay set\n", off); 20198016e29fSHarshad Shirwadkar return JBD2_FC_REPLAY_STOP; 20208016e29fSHarshad Shirwadkar } 20218016e29fSHarshad Shirwadkar #endif 20228016e29fSHarshad Shirwadkar 20238016e29fSHarshad Shirwadkar start = (u8 *)bh->b_data; 20248016e29fSHarshad Shirwadkar end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 20258016e29fSHarshad Shirwadkar 20268016e29fSHarshad Shirwadkar fc_for_each_tl(start, end, tl) { 20278016e29fSHarshad Shirwadkar if (state->fc_replay_num_tags == 0) { 20288016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_STOP; 20298016e29fSHarshad Shirwadkar ext4_fc_set_bitmaps_and_counters(sb); 20308016e29fSHarshad Shirwadkar break; 20318016e29fSHarshad Shirwadkar } 20328016e29fSHarshad Shirwadkar jbd_debug(3, "Replay phase, tag:%s\n", 20338016e29fSHarshad Shirwadkar tag2str(le16_to_cpu(tl->fc_tag))); 20348016e29fSHarshad Shirwadkar state->fc_replay_num_tags--; 20358016e29fSHarshad Shirwadkar switch (le16_to_cpu(tl->fc_tag)) { 20368016e29fSHarshad Shirwadkar case EXT4_FC_TAG_LINK: 20378016e29fSHarshad Shirwadkar ret = ext4_fc_replay_link(sb, tl); 20388016e29fSHarshad Shirwadkar break; 20398016e29fSHarshad Shirwadkar case EXT4_FC_TAG_UNLINK: 20408016e29fSHarshad Shirwadkar ret = ext4_fc_replay_unlink(sb, tl); 20418016e29fSHarshad Shirwadkar break; 20428016e29fSHarshad Shirwadkar case EXT4_FC_TAG_ADD_RANGE: 20438016e29fSHarshad Shirwadkar ret = ext4_fc_replay_add_range(sb, tl); 20448016e29fSHarshad Shirwadkar break; 20458016e29fSHarshad Shirwadkar case EXT4_FC_TAG_CREAT: 20468016e29fSHarshad Shirwadkar ret = ext4_fc_replay_create(sb, tl); 20478016e29fSHarshad Shirwadkar break; 20488016e29fSHarshad Shirwadkar case EXT4_FC_TAG_DEL_RANGE: 20498016e29fSHarshad Shirwadkar ret = ext4_fc_replay_del_range(sb, tl); 20508016e29fSHarshad Shirwadkar break; 20518016e29fSHarshad Shirwadkar case EXT4_FC_TAG_INODE: 20528016e29fSHarshad Shirwadkar ret = ext4_fc_replay_inode(sb, tl); 20538016e29fSHarshad Shirwadkar break; 20548016e29fSHarshad Shirwadkar case EXT4_FC_TAG_PAD: 20558016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 20568016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 20578016e29fSHarshad Shirwadkar break; 20588016e29fSHarshad Shirwadkar case EXT4_FC_TAG_TAIL: 20598016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 20608016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 20618016e29fSHarshad Shirwadkar tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl); 20628016e29fSHarshad Shirwadkar WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid); 20638016e29fSHarshad Shirwadkar break; 20648016e29fSHarshad Shirwadkar case EXT4_FC_TAG_HEAD: 20658016e29fSHarshad Shirwadkar break; 20668016e29fSHarshad Shirwadkar default: 20678016e29fSHarshad Shirwadkar trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0, 20688016e29fSHarshad Shirwadkar ext4_fc_tag_len(tl), 0); 20698016e29fSHarshad Shirwadkar ret = -ECANCELED; 20708016e29fSHarshad Shirwadkar break; 20718016e29fSHarshad Shirwadkar } 20728016e29fSHarshad Shirwadkar if (ret < 0) 20738016e29fSHarshad Shirwadkar break; 20748016e29fSHarshad Shirwadkar ret = JBD2_FC_REPLAY_CONTINUE; 20758016e29fSHarshad Shirwadkar } 20768016e29fSHarshad Shirwadkar return ret; 20778016e29fSHarshad Shirwadkar } 20788016e29fSHarshad Shirwadkar 20796866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 20806866d7b3SHarshad Shirwadkar { 20815b849b5fSHarshad Shirwadkar /* 20825b849b5fSHarshad Shirwadkar * We set replay callback even if fast commit disabled because we may 20835b849b5fSHarshad Shirwadkar * could still have fast commit blocks that need to be replayed even if 20845b849b5fSHarshad Shirwadkar * fast commit has now been turned off. 20855b849b5fSHarshad Shirwadkar */ 20865b849b5fSHarshad Shirwadkar journal->j_fc_replay_callback = ext4_fc_replay; 20876866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 20886866d7b3SHarshad Shirwadkar return; 2089ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 20906866d7b3SHarshad Shirwadkar if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { 20916866d7b3SHarshad Shirwadkar pr_warn("Error while enabling fast commits, turning off."); 20926866d7b3SHarshad Shirwadkar ext4_clear_feature_fast_commit(sb); 20936866d7b3SHarshad Shirwadkar } 20946866d7b3SHarshad Shirwadkar } 2095aa75f4d3SHarshad Shirwadkar 2096*ce8c59d1SHarshad Shirwadkar const char *fc_ineligible_reasons[] = { 2097*ce8c59d1SHarshad Shirwadkar "Extended attributes changed", 2098*ce8c59d1SHarshad Shirwadkar "Cross rename", 2099*ce8c59d1SHarshad Shirwadkar "Journal flag changed", 2100*ce8c59d1SHarshad Shirwadkar "Insufficient memory", 2101*ce8c59d1SHarshad Shirwadkar "Swap boot", 2102*ce8c59d1SHarshad Shirwadkar "Resize", 2103*ce8c59d1SHarshad Shirwadkar "Dir renamed", 2104*ce8c59d1SHarshad Shirwadkar "Falloc range op", 2105*ce8c59d1SHarshad Shirwadkar "FC Commit Failed" 2106*ce8c59d1SHarshad Shirwadkar }; 2107*ce8c59d1SHarshad Shirwadkar 2108*ce8c59d1SHarshad Shirwadkar int ext4_fc_info_show(struct seq_file *seq, void *v) 2109*ce8c59d1SHarshad Shirwadkar { 2110*ce8c59d1SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 2111*ce8c59d1SHarshad Shirwadkar struct ext4_fc_stats *stats = &sbi->s_fc_stats; 2112*ce8c59d1SHarshad Shirwadkar int i; 2113*ce8c59d1SHarshad Shirwadkar 2114*ce8c59d1SHarshad Shirwadkar if (v != SEQ_START_TOKEN) 2115*ce8c59d1SHarshad Shirwadkar return 0; 2116*ce8c59d1SHarshad Shirwadkar 2117*ce8c59d1SHarshad Shirwadkar seq_printf(seq, 2118*ce8c59d1SHarshad Shirwadkar "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 2119*ce8c59d1SHarshad Shirwadkar stats->fc_num_commits, stats->fc_ineligible_commits, 2120*ce8c59d1SHarshad Shirwadkar stats->fc_numblks, 2121*ce8c59d1SHarshad Shirwadkar div_u64(sbi->s_fc_avg_commit_time, 1000)); 2122*ce8c59d1SHarshad Shirwadkar seq_puts(seq, "Ineligible reasons:\n"); 2123*ce8c59d1SHarshad Shirwadkar for (i = 0; i < EXT4_FC_REASON_MAX; i++) 2124*ce8c59d1SHarshad Shirwadkar seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 2125*ce8c59d1SHarshad Shirwadkar stats->fc_ineligible_reason_count[i]); 2126*ce8c59d1SHarshad Shirwadkar 2127*ce8c59d1SHarshad Shirwadkar return 0; 2128*ce8c59d1SHarshad Shirwadkar } 2129*ce8c59d1SHarshad Shirwadkar 2130aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 2131aa75f4d3SHarshad Shirwadkar { 2132aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 2133aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 2134aa75f4d3SHarshad Shirwadkar 2135aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 2136aa75f4d3SHarshad Shirwadkar return -ENOMEM; 2137aa75f4d3SHarshad Shirwadkar 2138aa75f4d3SHarshad Shirwadkar return 0; 2139aa75f4d3SHarshad Shirwadkar } 2140