16866d7b3SHarshad Shirwadkar // SPDX-License-Identifier: GPL-2.0 26866d7b3SHarshad Shirwadkar 36866d7b3SHarshad Shirwadkar /* 46866d7b3SHarshad Shirwadkar * fs/ext4/fast_commit.c 56866d7b3SHarshad Shirwadkar * 66866d7b3SHarshad Shirwadkar * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 76866d7b3SHarshad Shirwadkar * 86866d7b3SHarshad Shirwadkar * Ext4 fast commits routines. 96866d7b3SHarshad Shirwadkar */ 10*aa75f4d3SHarshad Shirwadkar #include "ext4.h" 116866d7b3SHarshad Shirwadkar #include "ext4_jbd2.h" 12*aa75f4d3SHarshad Shirwadkar #include "ext4_extents.h" 13*aa75f4d3SHarshad Shirwadkar #include "mballoc.h" 14*aa75f4d3SHarshad Shirwadkar 15*aa75f4d3SHarshad Shirwadkar /* 16*aa75f4d3SHarshad Shirwadkar * Ext4 Fast Commits 17*aa75f4d3SHarshad Shirwadkar * ----------------- 18*aa75f4d3SHarshad Shirwadkar * 19*aa75f4d3SHarshad Shirwadkar * Ext4 fast commits implement fine grained journalling for Ext4. 20*aa75f4d3SHarshad Shirwadkar * 21*aa75f4d3SHarshad Shirwadkar * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 22*aa75f4d3SHarshad Shirwadkar * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 23*aa75f4d3SHarshad Shirwadkar * TLV during the recovery phase. For the scenarios for which we currently 24*aa75f4d3SHarshad Shirwadkar * don't have replay code, fast commit falls back to full commits. 25*aa75f4d3SHarshad Shirwadkar * Fast commits record delta in one of the following three categories. 26*aa75f4d3SHarshad Shirwadkar * 27*aa75f4d3SHarshad Shirwadkar * (A) Directory entry updates: 28*aa75f4d3SHarshad Shirwadkar * 29*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_UNLINK - records directory entry unlink 30*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_LINK - records directory entry link 31*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 32*aa75f4d3SHarshad Shirwadkar * 33*aa75f4d3SHarshad Shirwadkar * (B) File specific data range updates: 34*aa75f4d3SHarshad Shirwadkar * 35*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 36*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 37*aa75f4d3SHarshad Shirwadkar * 38*aa75f4d3SHarshad Shirwadkar * (C) Inode metadata (mtime / ctime etc): 39*aa75f4d3SHarshad Shirwadkar * 40*aa75f4d3SHarshad Shirwadkar * - EXT4_FC_TAG_INODE - record the inode that should be replayed 41*aa75f4d3SHarshad Shirwadkar * during recovery. Note that iblocks field is 42*aa75f4d3SHarshad Shirwadkar * not replayed and instead derived during 43*aa75f4d3SHarshad Shirwadkar * replay. 44*aa75f4d3SHarshad Shirwadkar * Commit Operation 45*aa75f4d3SHarshad Shirwadkar * ---------------- 46*aa75f4d3SHarshad Shirwadkar * With fast commits, we maintain all the directory entry operations in the 47*aa75f4d3SHarshad Shirwadkar * order in which they are issued in an in-memory queue. This queue is flushed 48*aa75f4d3SHarshad Shirwadkar * to disk during the commit operation. We also maintain a list of inodes 49*aa75f4d3SHarshad Shirwadkar * that need to be committed during a fast commit in another in memory queue of 50*aa75f4d3SHarshad Shirwadkar * inodes. During the commit operation, we commit in the following order: 51*aa75f4d3SHarshad Shirwadkar * 52*aa75f4d3SHarshad Shirwadkar * [1] Lock inodes for any further data updates by setting COMMITTING state 53*aa75f4d3SHarshad Shirwadkar * [2] Submit data buffers of all the inodes 54*aa75f4d3SHarshad Shirwadkar * [3] Wait for [2] to complete 55*aa75f4d3SHarshad Shirwadkar * [4] Commit all the directory entry updates in the fast commit space 56*aa75f4d3SHarshad Shirwadkar * [5] Commit all the changed inode structures 57*aa75f4d3SHarshad Shirwadkar * [6] Write tail tag (this tag ensures the atomicity, please read the following 58*aa75f4d3SHarshad Shirwadkar * section for more details). 59*aa75f4d3SHarshad Shirwadkar * [7] Wait for [4], [5] and [6] to complete. 60*aa75f4d3SHarshad Shirwadkar * 61*aa75f4d3SHarshad Shirwadkar * All the inode updates must call ext4_fc_start_update() before starting an 62*aa75f4d3SHarshad Shirwadkar * update. If such an ongoing update is present, fast commit waits for it to 63*aa75f4d3SHarshad Shirwadkar * complete. The completion of such an update is marked by 64*aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). 65*aa75f4d3SHarshad Shirwadkar * 66*aa75f4d3SHarshad Shirwadkar * Fast Commit Ineligibility 67*aa75f4d3SHarshad Shirwadkar * ------------------------- 68*aa75f4d3SHarshad Shirwadkar * Not all operations are supported by fast commits today (e.g extended 69*aa75f4d3SHarshad Shirwadkar * attributes). Fast commit ineligiblity is marked by calling one of the 70*aa75f4d3SHarshad Shirwadkar * two following functions: 71*aa75f4d3SHarshad Shirwadkar * 72*aa75f4d3SHarshad Shirwadkar * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall 73*aa75f4d3SHarshad Shirwadkar * back to full commit. This is useful in case of transient errors. 74*aa75f4d3SHarshad Shirwadkar * 75*aa75f4d3SHarshad Shirwadkar * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all 76*aa75f4d3SHarshad Shirwadkar * the fast commits happening between ext4_fc_start_ineligible() and 77*aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() and one fast commit after the call to 78*aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_ineligible() to fall back to full commits. It is important to 79*aa75f4d3SHarshad Shirwadkar * make one more fast commit to fall back to full commit after stop call so 80*aa75f4d3SHarshad Shirwadkar * that it guaranteed that the fast commit ineligible operation contained 81*aa75f4d3SHarshad Shirwadkar * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is 82*aa75f4d3SHarshad Shirwadkar * followed by at least 1 full commit. 83*aa75f4d3SHarshad Shirwadkar * 84*aa75f4d3SHarshad Shirwadkar * Atomicity of commits 85*aa75f4d3SHarshad Shirwadkar * -------------------- 86*aa75f4d3SHarshad Shirwadkar * In order to gaurantee atomicity during the commit operation, fast commit 87*aa75f4d3SHarshad Shirwadkar * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 88*aa75f4d3SHarshad Shirwadkar * tag contains CRC of the contents and TID of the transaction after which 89*aa75f4d3SHarshad Shirwadkar * this fast commit should be applied. Recovery code replays fast commit 90*aa75f4d3SHarshad Shirwadkar * logs only if there's at least 1 valid tail present. For every fast commit 91*aa75f4d3SHarshad Shirwadkar * operation, there is 1 tail. This means, we may end up with multiple tails 92*aa75f4d3SHarshad Shirwadkar * in the fast commit space. Here's an example: 93*aa75f4d3SHarshad Shirwadkar * 94*aa75f4d3SHarshad Shirwadkar * - Create a new file A and remove existing file B 95*aa75f4d3SHarshad Shirwadkar * - fsync() 96*aa75f4d3SHarshad Shirwadkar * - Append contents to file A 97*aa75f4d3SHarshad Shirwadkar * - Truncate file A 98*aa75f4d3SHarshad Shirwadkar * - fsync() 99*aa75f4d3SHarshad Shirwadkar * 100*aa75f4d3SHarshad Shirwadkar * The fast commit space at the end of above operations would look like this: 101*aa75f4d3SHarshad Shirwadkar * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 102*aa75f4d3SHarshad Shirwadkar * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 103*aa75f4d3SHarshad Shirwadkar * 104*aa75f4d3SHarshad Shirwadkar * Replay code should thus check for all the valid tails in the FC area. 105*aa75f4d3SHarshad Shirwadkar * 106*aa75f4d3SHarshad Shirwadkar * TODOs 107*aa75f4d3SHarshad Shirwadkar * ----- 108*aa75f4d3SHarshad Shirwadkar * 1) Make fast commit atomic updates more fine grained. Today, a fast commit 109*aa75f4d3SHarshad Shirwadkar * eligible update must be protected within ext4_fc_start_update() and 110*aa75f4d3SHarshad Shirwadkar * ext4_fc_stop_update(). These routines are called at much higher 111*aa75f4d3SHarshad Shirwadkar * routines. This can be made more fine grained by combining with 112*aa75f4d3SHarshad Shirwadkar * ext4_journal_start(). 113*aa75f4d3SHarshad Shirwadkar * 114*aa75f4d3SHarshad Shirwadkar * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() 115*aa75f4d3SHarshad Shirwadkar * 116*aa75f4d3SHarshad Shirwadkar * 3) Handle more ineligible cases. 117*aa75f4d3SHarshad Shirwadkar */ 118*aa75f4d3SHarshad Shirwadkar 119*aa75f4d3SHarshad Shirwadkar #include <trace/events/ext4.h> 120*aa75f4d3SHarshad Shirwadkar static struct kmem_cache *ext4_fc_dentry_cachep; 121*aa75f4d3SHarshad Shirwadkar 122*aa75f4d3SHarshad Shirwadkar static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 123*aa75f4d3SHarshad Shirwadkar { 124*aa75f4d3SHarshad Shirwadkar BUFFER_TRACE(bh, ""); 125*aa75f4d3SHarshad Shirwadkar if (uptodate) { 126*aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld up-to-date", 127*aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 128*aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 129*aa75f4d3SHarshad Shirwadkar } else { 130*aa75f4d3SHarshad Shirwadkar ext4_debug("%s: Block %lld not up-to-date", 131*aa75f4d3SHarshad Shirwadkar __func__, bh->b_blocknr); 132*aa75f4d3SHarshad Shirwadkar clear_buffer_uptodate(bh); 133*aa75f4d3SHarshad Shirwadkar } 134*aa75f4d3SHarshad Shirwadkar 135*aa75f4d3SHarshad Shirwadkar unlock_buffer(bh); 136*aa75f4d3SHarshad Shirwadkar } 137*aa75f4d3SHarshad Shirwadkar 138*aa75f4d3SHarshad Shirwadkar static inline void ext4_fc_reset_inode(struct inode *inode) 139*aa75f4d3SHarshad Shirwadkar { 140*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 141*aa75f4d3SHarshad Shirwadkar 142*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = 0; 143*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 144*aa75f4d3SHarshad Shirwadkar } 145*aa75f4d3SHarshad Shirwadkar 146*aa75f4d3SHarshad Shirwadkar void ext4_fc_init_inode(struct inode *inode) 147*aa75f4d3SHarshad Shirwadkar { 148*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 149*aa75f4d3SHarshad Shirwadkar 150*aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 151*aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 152*aa75f4d3SHarshad Shirwadkar INIT_LIST_HEAD(&ei->i_fc_list); 153*aa75f4d3SHarshad Shirwadkar init_waitqueue_head(&ei->i_fc_wait); 154*aa75f4d3SHarshad Shirwadkar atomic_set(&ei->i_fc_updates, 0); 155*aa75f4d3SHarshad Shirwadkar ei->i_fc_committed_subtid = 0; 156*aa75f4d3SHarshad Shirwadkar } 157*aa75f4d3SHarshad Shirwadkar 158*aa75f4d3SHarshad Shirwadkar /* 159*aa75f4d3SHarshad Shirwadkar * Inform Ext4's fast about start of an inode update 160*aa75f4d3SHarshad Shirwadkar * 161*aa75f4d3SHarshad Shirwadkar * This function is called by the high level call VFS callbacks before 162*aa75f4d3SHarshad Shirwadkar * performing any inode update. This function blocks if there's an ongoing 163*aa75f4d3SHarshad Shirwadkar * fast commit on the inode in question. 164*aa75f4d3SHarshad Shirwadkar */ 165*aa75f4d3SHarshad Shirwadkar void ext4_fc_start_update(struct inode *inode) 166*aa75f4d3SHarshad Shirwadkar { 167*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 168*aa75f4d3SHarshad Shirwadkar 169*aa75f4d3SHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 170*aa75f4d3SHarshad Shirwadkar return; 171*aa75f4d3SHarshad Shirwadkar 172*aa75f4d3SHarshad Shirwadkar restart: 173*aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 174*aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) 175*aa75f4d3SHarshad Shirwadkar goto out; 176*aa75f4d3SHarshad Shirwadkar 177*aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 178*aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 179*aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 180*aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 181*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 182*aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 183*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 184*aa75f4d3SHarshad Shirwadkar #else 185*aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 186*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 187*aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 188*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 189*aa75f4d3SHarshad Shirwadkar #endif 190*aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 191*aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 192*aa75f4d3SHarshad Shirwadkar schedule(); 193*aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 194*aa75f4d3SHarshad Shirwadkar goto restart; 195*aa75f4d3SHarshad Shirwadkar } 196*aa75f4d3SHarshad Shirwadkar out: 197*aa75f4d3SHarshad Shirwadkar atomic_inc(&ei->i_fc_updates); 198*aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 199*aa75f4d3SHarshad Shirwadkar } 200*aa75f4d3SHarshad Shirwadkar 201*aa75f4d3SHarshad Shirwadkar /* 202*aa75f4d3SHarshad Shirwadkar * Stop inode update and wake up waiting fast commits if any. 203*aa75f4d3SHarshad Shirwadkar */ 204*aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_update(struct inode *inode) 205*aa75f4d3SHarshad Shirwadkar { 206*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 207*aa75f4d3SHarshad Shirwadkar 208*aa75f4d3SHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 209*aa75f4d3SHarshad Shirwadkar return; 210*aa75f4d3SHarshad Shirwadkar 211*aa75f4d3SHarshad Shirwadkar if (atomic_dec_and_test(&ei->i_fc_updates)) 212*aa75f4d3SHarshad Shirwadkar wake_up_all(&ei->i_fc_wait); 213*aa75f4d3SHarshad Shirwadkar } 214*aa75f4d3SHarshad Shirwadkar 215*aa75f4d3SHarshad Shirwadkar /* 216*aa75f4d3SHarshad Shirwadkar * Remove inode from fast commit list. If the inode is being committed 217*aa75f4d3SHarshad Shirwadkar * we wait until inode commit is done. 218*aa75f4d3SHarshad Shirwadkar */ 219*aa75f4d3SHarshad Shirwadkar void ext4_fc_del(struct inode *inode) 220*aa75f4d3SHarshad Shirwadkar { 221*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 222*aa75f4d3SHarshad Shirwadkar 223*aa75f4d3SHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 224*aa75f4d3SHarshad Shirwadkar return; 225*aa75f4d3SHarshad Shirwadkar 226*aa75f4d3SHarshad Shirwadkar 227*aa75f4d3SHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 228*aa75f4d3SHarshad Shirwadkar return; 229*aa75f4d3SHarshad Shirwadkar 230*aa75f4d3SHarshad Shirwadkar restart: 231*aa75f4d3SHarshad Shirwadkar spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 232*aa75f4d3SHarshad Shirwadkar if (list_empty(&ei->i_fc_list)) { 233*aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 234*aa75f4d3SHarshad Shirwadkar return; 235*aa75f4d3SHarshad Shirwadkar } 236*aa75f4d3SHarshad Shirwadkar 237*aa75f4d3SHarshad Shirwadkar if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 238*aa75f4d3SHarshad Shirwadkar wait_queue_head_t *wq; 239*aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 240*aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 241*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 242*aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_state_flags, 243*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 244*aa75f4d3SHarshad Shirwadkar #else 245*aa75f4d3SHarshad Shirwadkar DEFINE_WAIT_BIT(wait, &ei->i_flags, 246*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 247*aa75f4d3SHarshad Shirwadkar wq = bit_waitqueue(&ei->i_flags, 248*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 249*aa75f4d3SHarshad Shirwadkar #endif 250*aa75f4d3SHarshad Shirwadkar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 251*aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 252*aa75f4d3SHarshad Shirwadkar schedule(); 253*aa75f4d3SHarshad Shirwadkar finish_wait(wq, &wait.wq_entry); 254*aa75f4d3SHarshad Shirwadkar goto restart; 255*aa75f4d3SHarshad Shirwadkar } 256*aa75f4d3SHarshad Shirwadkar if (!list_empty(&ei->i_fc_list)) 257*aa75f4d3SHarshad Shirwadkar list_del_init(&ei->i_fc_list); 258*aa75f4d3SHarshad Shirwadkar spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 259*aa75f4d3SHarshad Shirwadkar } 260*aa75f4d3SHarshad Shirwadkar 261*aa75f4d3SHarshad Shirwadkar /* 262*aa75f4d3SHarshad Shirwadkar * Mark file system as fast commit ineligible. This means that next commit 263*aa75f4d3SHarshad Shirwadkar * operation would result in a full jbd2 commit. 264*aa75f4d3SHarshad Shirwadkar */ 265*aa75f4d3SHarshad Shirwadkar void ext4_fc_mark_ineligible(struct super_block *sb, int reason) 266*aa75f4d3SHarshad Shirwadkar { 267*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 268*aa75f4d3SHarshad Shirwadkar 269*aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_INELIGIBLE; 270*aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 271*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 272*aa75f4d3SHarshad Shirwadkar } 273*aa75f4d3SHarshad Shirwadkar 274*aa75f4d3SHarshad Shirwadkar /* 275*aa75f4d3SHarshad Shirwadkar * Start a fast commit ineligible update. Any commits that happen while 276*aa75f4d3SHarshad Shirwadkar * such an operation is in progress fall back to full commits. 277*aa75f4d3SHarshad Shirwadkar */ 278*aa75f4d3SHarshad Shirwadkar void ext4_fc_start_ineligible(struct super_block *sb, int reason) 279*aa75f4d3SHarshad Shirwadkar { 280*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 281*aa75f4d3SHarshad Shirwadkar 282*aa75f4d3SHarshad Shirwadkar WARN_ON(reason >= EXT4_FC_REASON_MAX); 283*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 284*aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_ineligible_updates); 285*aa75f4d3SHarshad Shirwadkar } 286*aa75f4d3SHarshad Shirwadkar 287*aa75f4d3SHarshad Shirwadkar /* 288*aa75f4d3SHarshad Shirwadkar * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here 289*aa75f4d3SHarshad Shirwadkar * to ensure that after stopping the ineligible update, at least one full 290*aa75f4d3SHarshad Shirwadkar * commit takes place. 291*aa75f4d3SHarshad Shirwadkar */ 292*aa75f4d3SHarshad Shirwadkar void ext4_fc_stop_ineligible(struct super_block *sb) 293*aa75f4d3SHarshad Shirwadkar { 294*aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE; 295*aa75f4d3SHarshad Shirwadkar atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); 296*aa75f4d3SHarshad Shirwadkar } 297*aa75f4d3SHarshad Shirwadkar 298*aa75f4d3SHarshad Shirwadkar static inline int ext4_fc_is_ineligible(struct super_block *sb) 299*aa75f4d3SHarshad Shirwadkar { 300*aa75f4d3SHarshad Shirwadkar return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) || 301*aa75f4d3SHarshad Shirwadkar atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates); 302*aa75f4d3SHarshad Shirwadkar } 303*aa75f4d3SHarshad Shirwadkar 304*aa75f4d3SHarshad Shirwadkar /* 305*aa75f4d3SHarshad Shirwadkar * Generic fast commit tracking function. If this is the first time this we are 306*aa75f4d3SHarshad Shirwadkar * called after a full commit, we initialize fast commit fields and then call 307*aa75f4d3SHarshad Shirwadkar * __fc_track_fn() with update = 0. If we have already been called after a full 308*aa75f4d3SHarshad Shirwadkar * commit, we pass update = 1. Based on that, the track function can determine 309*aa75f4d3SHarshad Shirwadkar * if it needs to track a field for the first time or if it needs to just 310*aa75f4d3SHarshad Shirwadkar * update the previously tracked value. 311*aa75f4d3SHarshad Shirwadkar * 312*aa75f4d3SHarshad Shirwadkar * If enqueue is set, this function enqueues the inode in fast commit list. 313*aa75f4d3SHarshad Shirwadkar */ 314*aa75f4d3SHarshad Shirwadkar static int ext4_fc_track_template( 315*aa75f4d3SHarshad Shirwadkar struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool), 316*aa75f4d3SHarshad Shirwadkar void *args, int enqueue) 317*aa75f4d3SHarshad Shirwadkar { 318*aa75f4d3SHarshad Shirwadkar tid_t running_txn_tid; 319*aa75f4d3SHarshad Shirwadkar bool update = false; 320*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 321*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 322*aa75f4d3SHarshad Shirwadkar int ret; 323*aa75f4d3SHarshad Shirwadkar 324*aa75f4d3SHarshad Shirwadkar if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 325*aa75f4d3SHarshad Shirwadkar return -EOPNOTSUPP; 326*aa75f4d3SHarshad Shirwadkar 327*aa75f4d3SHarshad Shirwadkar if (ext4_fc_is_ineligible(inode->i_sb)) 328*aa75f4d3SHarshad Shirwadkar return -EINVAL; 329*aa75f4d3SHarshad Shirwadkar 330*aa75f4d3SHarshad Shirwadkar running_txn_tid = sbi->s_journal ? 331*aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_commit_sequence + 1 : 0; 332*aa75f4d3SHarshad Shirwadkar 333*aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 334*aa75f4d3SHarshad Shirwadkar if (running_txn_tid == ei->i_sync_tid) { 335*aa75f4d3SHarshad Shirwadkar update = true; 336*aa75f4d3SHarshad Shirwadkar } else { 337*aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(inode); 338*aa75f4d3SHarshad Shirwadkar ei->i_sync_tid = running_txn_tid; 339*aa75f4d3SHarshad Shirwadkar } 340*aa75f4d3SHarshad Shirwadkar ret = __fc_track_fn(inode, args, update); 341*aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 342*aa75f4d3SHarshad Shirwadkar 343*aa75f4d3SHarshad Shirwadkar if (!enqueue) 344*aa75f4d3SHarshad Shirwadkar return ret; 345*aa75f4d3SHarshad Shirwadkar 346*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 347*aa75f4d3SHarshad Shirwadkar if (list_empty(&EXT4_I(inode)->i_fc_list)) 348*aa75f4d3SHarshad Shirwadkar list_add_tail(&EXT4_I(inode)->i_fc_list, 349*aa75f4d3SHarshad Shirwadkar (sbi->s_mount_state & EXT4_FC_COMMITTING) ? 350*aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING] : 351*aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_MAIN]); 352*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 353*aa75f4d3SHarshad Shirwadkar 354*aa75f4d3SHarshad Shirwadkar return ret; 355*aa75f4d3SHarshad Shirwadkar } 356*aa75f4d3SHarshad Shirwadkar 357*aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args { 358*aa75f4d3SHarshad Shirwadkar struct dentry *dentry; 359*aa75f4d3SHarshad Shirwadkar int op; 360*aa75f4d3SHarshad Shirwadkar }; 361*aa75f4d3SHarshad Shirwadkar 362*aa75f4d3SHarshad Shirwadkar /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 363*aa75f4d3SHarshad Shirwadkar static int __track_dentry_update(struct inode *inode, void *arg, bool update) 364*aa75f4d3SHarshad Shirwadkar { 365*aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *node; 366*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 367*aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args *dentry_update = 368*aa75f4d3SHarshad Shirwadkar (struct __track_dentry_update_args *)arg; 369*aa75f4d3SHarshad Shirwadkar struct dentry *dentry = dentry_update->dentry; 370*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 371*aa75f4d3SHarshad Shirwadkar 372*aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 373*aa75f4d3SHarshad Shirwadkar node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 374*aa75f4d3SHarshad Shirwadkar if (!node) { 375*aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM); 376*aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 377*aa75f4d3SHarshad Shirwadkar return -ENOMEM; 378*aa75f4d3SHarshad Shirwadkar } 379*aa75f4d3SHarshad Shirwadkar 380*aa75f4d3SHarshad Shirwadkar node->fcd_op = dentry_update->op; 381*aa75f4d3SHarshad Shirwadkar node->fcd_parent = dentry->d_parent->d_inode->i_ino; 382*aa75f4d3SHarshad Shirwadkar node->fcd_ino = inode->i_ino; 383*aa75f4d3SHarshad Shirwadkar if (dentry->d_name.len > DNAME_INLINE_LEN) { 384*aa75f4d3SHarshad Shirwadkar node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 385*aa75f4d3SHarshad Shirwadkar if (!node->fcd_name.name) { 386*aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, node); 387*aa75f4d3SHarshad Shirwadkar ext4_fc_mark_ineligible(inode->i_sb, 388*aa75f4d3SHarshad Shirwadkar EXT4_FC_REASON_MEM); 389*aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 390*aa75f4d3SHarshad Shirwadkar return -ENOMEM; 391*aa75f4d3SHarshad Shirwadkar } 392*aa75f4d3SHarshad Shirwadkar memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 393*aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 394*aa75f4d3SHarshad Shirwadkar } else { 395*aa75f4d3SHarshad Shirwadkar memcpy(node->fcd_iname, dentry->d_name.name, 396*aa75f4d3SHarshad Shirwadkar dentry->d_name.len); 397*aa75f4d3SHarshad Shirwadkar node->fcd_name.name = node->fcd_iname; 398*aa75f4d3SHarshad Shirwadkar } 399*aa75f4d3SHarshad Shirwadkar node->fcd_name.len = dentry->d_name.len; 400*aa75f4d3SHarshad Shirwadkar 401*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 402*aa75f4d3SHarshad Shirwadkar if (sbi->s_mount_state & EXT4_FC_COMMITTING) 403*aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, 404*aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_STAGING]); 405*aa75f4d3SHarshad Shirwadkar else 406*aa75f4d3SHarshad Shirwadkar list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 407*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 408*aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 409*aa75f4d3SHarshad Shirwadkar 410*aa75f4d3SHarshad Shirwadkar return 0; 411*aa75f4d3SHarshad Shirwadkar } 412*aa75f4d3SHarshad Shirwadkar 413*aa75f4d3SHarshad Shirwadkar void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry) 414*aa75f4d3SHarshad Shirwadkar { 415*aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 416*aa75f4d3SHarshad Shirwadkar int ret; 417*aa75f4d3SHarshad Shirwadkar 418*aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 419*aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_UNLINK; 420*aa75f4d3SHarshad Shirwadkar 421*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 422*aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 423*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_unlink(inode, dentry, ret); 424*aa75f4d3SHarshad Shirwadkar } 425*aa75f4d3SHarshad Shirwadkar 426*aa75f4d3SHarshad Shirwadkar void ext4_fc_track_link(struct inode *inode, struct dentry *dentry) 427*aa75f4d3SHarshad Shirwadkar { 428*aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 429*aa75f4d3SHarshad Shirwadkar int ret; 430*aa75f4d3SHarshad Shirwadkar 431*aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 432*aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_LINK; 433*aa75f4d3SHarshad Shirwadkar 434*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 435*aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 436*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_link(inode, dentry, ret); 437*aa75f4d3SHarshad Shirwadkar } 438*aa75f4d3SHarshad Shirwadkar 439*aa75f4d3SHarshad Shirwadkar void ext4_fc_track_create(struct inode *inode, struct dentry *dentry) 440*aa75f4d3SHarshad Shirwadkar { 441*aa75f4d3SHarshad Shirwadkar struct __track_dentry_update_args args; 442*aa75f4d3SHarshad Shirwadkar int ret; 443*aa75f4d3SHarshad Shirwadkar 444*aa75f4d3SHarshad Shirwadkar args.dentry = dentry; 445*aa75f4d3SHarshad Shirwadkar args.op = EXT4_FC_TAG_CREAT; 446*aa75f4d3SHarshad Shirwadkar 447*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_dentry_update, 448*aa75f4d3SHarshad Shirwadkar (void *)&args, 0); 449*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_create(inode, dentry, ret); 450*aa75f4d3SHarshad Shirwadkar } 451*aa75f4d3SHarshad Shirwadkar 452*aa75f4d3SHarshad Shirwadkar /* __track_fn for inode tracking */ 453*aa75f4d3SHarshad Shirwadkar static int __track_inode(struct inode *inode, void *arg, bool update) 454*aa75f4d3SHarshad Shirwadkar { 455*aa75f4d3SHarshad Shirwadkar if (update) 456*aa75f4d3SHarshad Shirwadkar return -EEXIST; 457*aa75f4d3SHarshad Shirwadkar 458*aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_lblk_len = 0; 459*aa75f4d3SHarshad Shirwadkar 460*aa75f4d3SHarshad Shirwadkar return 0; 461*aa75f4d3SHarshad Shirwadkar } 462*aa75f4d3SHarshad Shirwadkar 463*aa75f4d3SHarshad Shirwadkar void ext4_fc_track_inode(struct inode *inode) 464*aa75f4d3SHarshad Shirwadkar { 465*aa75f4d3SHarshad Shirwadkar int ret; 466*aa75f4d3SHarshad Shirwadkar 467*aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 468*aa75f4d3SHarshad Shirwadkar return; 469*aa75f4d3SHarshad Shirwadkar 470*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_inode, NULL, 1); 471*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_inode(inode, ret); 472*aa75f4d3SHarshad Shirwadkar } 473*aa75f4d3SHarshad Shirwadkar 474*aa75f4d3SHarshad Shirwadkar struct __track_range_args { 475*aa75f4d3SHarshad Shirwadkar ext4_lblk_t start, end; 476*aa75f4d3SHarshad Shirwadkar }; 477*aa75f4d3SHarshad Shirwadkar 478*aa75f4d3SHarshad Shirwadkar /* __track_fn for tracking data updates */ 479*aa75f4d3SHarshad Shirwadkar static int __track_range(struct inode *inode, void *arg, bool update) 480*aa75f4d3SHarshad Shirwadkar { 481*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 482*aa75f4d3SHarshad Shirwadkar ext4_lblk_t oldstart; 483*aa75f4d3SHarshad Shirwadkar struct __track_range_args *__arg = 484*aa75f4d3SHarshad Shirwadkar (struct __track_range_args *)arg; 485*aa75f4d3SHarshad Shirwadkar 486*aa75f4d3SHarshad Shirwadkar if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 487*aa75f4d3SHarshad Shirwadkar ext4_debug("Special inode %ld being modified\n", inode->i_ino); 488*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 489*aa75f4d3SHarshad Shirwadkar } 490*aa75f4d3SHarshad Shirwadkar 491*aa75f4d3SHarshad Shirwadkar oldstart = ei->i_fc_lblk_start; 492*aa75f4d3SHarshad Shirwadkar 493*aa75f4d3SHarshad Shirwadkar if (update && ei->i_fc_lblk_len > 0) { 494*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 495*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 496*aa75f4d3SHarshad Shirwadkar max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 497*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start + 1; 498*aa75f4d3SHarshad Shirwadkar } else { 499*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_start = __arg->start; 500*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 501*aa75f4d3SHarshad Shirwadkar } 502*aa75f4d3SHarshad Shirwadkar 503*aa75f4d3SHarshad Shirwadkar return 0; 504*aa75f4d3SHarshad Shirwadkar } 505*aa75f4d3SHarshad Shirwadkar 506*aa75f4d3SHarshad Shirwadkar void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, 507*aa75f4d3SHarshad Shirwadkar ext4_lblk_t end) 508*aa75f4d3SHarshad Shirwadkar { 509*aa75f4d3SHarshad Shirwadkar struct __track_range_args args; 510*aa75f4d3SHarshad Shirwadkar int ret; 511*aa75f4d3SHarshad Shirwadkar 512*aa75f4d3SHarshad Shirwadkar if (S_ISDIR(inode->i_mode)) 513*aa75f4d3SHarshad Shirwadkar return; 514*aa75f4d3SHarshad Shirwadkar 515*aa75f4d3SHarshad Shirwadkar args.start = start; 516*aa75f4d3SHarshad Shirwadkar args.end = end; 517*aa75f4d3SHarshad Shirwadkar 518*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_track_template(inode, __track_range, &args, 1); 519*aa75f4d3SHarshad Shirwadkar 520*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_track_range(inode, start, end, ret); 521*aa75f4d3SHarshad Shirwadkar } 522*aa75f4d3SHarshad Shirwadkar 523*aa75f4d3SHarshad Shirwadkar static void ext4_fc_submit_bh(struct super_block *sb) 524*aa75f4d3SHarshad Shirwadkar { 525*aa75f4d3SHarshad Shirwadkar int write_flags = REQ_SYNC; 526*aa75f4d3SHarshad Shirwadkar struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 527*aa75f4d3SHarshad Shirwadkar 528*aa75f4d3SHarshad Shirwadkar if (test_opt(sb, BARRIER)) 529*aa75f4d3SHarshad Shirwadkar write_flags |= REQ_FUA | REQ_PREFLUSH; 530*aa75f4d3SHarshad Shirwadkar lock_buffer(bh); 531*aa75f4d3SHarshad Shirwadkar clear_buffer_dirty(bh); 532*aa75f4d3SHarshad Shirwadkar set_buffer_uptodate(bh); 533*aa75f4d3SHarshad Shirwadkar bh->b_end_io = ext4_end_buffer_io_sync; 534*aa75f4d3SHarshad Shirwadkar submit_bh(REQ_OP_WRITE, write_flags, bh); 535*aa75f4d3SHarshad Shirwadkar EXT4_SB(sb)->s_fc_bh = NULL; 536*aa75f4d3SHarshad Shirwadkar } 537*aa75f4d3SHarshad Shirwadkar 538*aa75f4d3SHarshad Shirwadkar /* Ext4 commit path routines */ 539*aa75f4d3SHarshad Shirwadkar 540*aa75f4d3SHarshad Shirwadkar /* memzero and update CRC */ 541*aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, 542*aa75f4d3SHarshad Shirwadkar u32 *crc) 543*aa75f4d3SHarshad Shirwadkar { 544*aa75f4d3SHarshad Shirwadkar void *ret; 545*aa75f4d3SHarshad Shirwadkar 546*aa75f4d3SHarshad Shirwadkar ret = memset(dst, 0, len); 547*aa75f4d3SHarshad Shirwadkar if (crc) 548*aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len); 549*aa75f4d3SHarshad Shirwadkar return ret; 550*aa75f4d3SHarshad Shirwadkar } 551*aa75f4d3SHarshad Shirwadkar 552*aa75f4d3SHarshad Shirwadkar /* 553*aa75f4d3SHarshad Shirwadkar * Allocate len bytes on a fast commit buffer. 554*aa75f4d3SHarshad Shirwadkar * 555*aa75f4d3SHarshad Shirwadkar * During the commit time this function is used to manage fast commit 556*aa75f4d3SHarshad Shirwadkar * block space. We don't split a fast commit log onto different 557*aa75f4d3SHarshad Shirwadkar * blocks. So this function makes sure that if there's not enough space 558*aa75f4d3SHarshad Shirwadkar * on the current block, the remaining space in the current block is 559*aa75f4d3SHarshad Shirwadkar * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 560*aa75f4d3SHarshad Shirwadkar * new block is from jbd2 and CRC is updated to reflect the padding 561*aa75f4d3SHarshad Shirwadkar * we added. 562*aa75f4d3SHarshad Shirwadkar */ 563*aa75f4d3SHarshad Shirwadkar static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 564*aa75f4d3SHarshad Shirwadkar { 565*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl *tl; 566*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 567*aa75f4d3SHarshad Shirwadkar struct buffer_head *bh; 568*aa75f4d3SHarshad Shirwadkar int bsize = sbi->s_journal->j_blocksize; 569*aa75f4d3SHarshad Shirwadkar int ret, off = sbi->s_fc_bytes % bsize; 570*aa75f4d3SHarshad Shirwadkar int pad_len; 571*aa75f4d3SHarshad Shirwadkar 572*aa75f4d3SHarshad Shirwadkar /* 573*aa75f4d3SHarshad Shirwadkar * After allocating len, we should have space at least for a 0 byte 574*aa75f4d3SHarshad Shirwadkar * padding. 575*aa75f4d3SHarshad Shirwadkar */ 576*aa75f4d3SHarshad Shirwadkar if (len + sizeof(struct ext4_fc_tl) > bsize) 577*aa75f4d3SHarshad Shirwadkar return NULL; 578*aa75f4d3SHarshad Shirwadkar 579*aa75f4d3SHarshad Shirwadkar if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 580*aa75f4d3SHarshad Shirwadkar /* 581*aa75f4d3SHarshad Shirwadkar * Only allocate from current buffer if we have enough space for 582*aa75f4d3SHarshad Shirwadkar * this request AND we have space to add a zero byte padding. 583*aa75f4d3SHarshad Shirwadkar */ 584*aa75f4d3SHarshad Shirwadkar if (!sbi->s_fc_bh) { 585*aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 586*aa75f4d3SHarshad Shirwadkar if (ret) 587*aa75f4d3SHarshad Shirwadkar return NULL; 588*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 589*aa75f4d3SHarshad Shirwadkar } 590*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes += len; 591*aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data + off; 592*aa75f4d3SHarshad Shirwadkar } 593*aa75f4d3SHarshad Shirwadkar /* Need to add PAD tag */ 594*aa75f4d3SHarshad Shirwadkar tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 595*aa75f4d3SHarshad Shirwadkar tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 596*aa75f4d3SHarshad Shirwadkar pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 597*aa75f4d3SHarshad Shirwadkar tl->fc_len = cpu_to_le16(pad_len); 598*aa75f4d3SHarshad Shirwadkar if (crc) 599*aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 600*aa75f4d3SHarshad Shirwadkar if (pad_len > 0) 601*aa75f4d3SHarshad Shirwadkar ext4_fc_memzero(sb, tl + 1, pad_len, crc); 602*aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 603*aa75f4d3SHarshad Shirwadkar 604*aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 605*aa75f4d3SHarshad Shirwadkar if (ret) 606*aa75f4d3SHarshad Shirwadkar return NULL; 607*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = bh; 608*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; 609*aa75f4d3SHarshad Shirwadkar return sbi->s_fc_bh->b_data; 610*aa75f4d3SHarshad Shirwadkar } 611*aa75f4d3SHarshad Shirwadkar 612*aa75f4d3SHarshad Shirwadkar /* memcpy to fc reserved space and update CRC */ 613*aa75f4d3SHarshad Shirwadkar static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, 614*aa75f4d3SHarshad Shirwadkar int len, u32 *crc) 615*aa75f4d3SHarshad Shirwadkar { 616*aa75f4d3SHarshad Shirwadkar if (crc) 617*aa75f4d3SHarshad Shirwadkar *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); 618*aa75f4d3SHarshad Shirwadkar return memcpy(dst, src, len); 619*aa75f4d3SHarshad Shirwadkar } 620*aa75f4d3SHarshad Shirwadkar 621*aa75f4d3SHarshad Shirwadkar /* 622*aa75f4d3SHarshad Shirwadkar * Complete a fast commit by writing tail tag. 623*aa75f4d3SHarshad Shirwadkar * 624*aa75f4d3SHarshad Shirwadkar * Writing tail tag marks the end of a fast commit. In order to guarantee 625*aa75f4d3SHarshad Shirwadkar * atomicity, after writing tail tag, even if there's space remaining 626*aa75f4d3SHarshad Shirwadkar * in the block, next commit shouldn't use it. That's why tail tag 627*aa75f4d3SHarshad Shirwadkar * has the length as that of the remaining space on the block. 628*aa75f4d3SHarshad Shirwadkar */ 629*aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_tail(struct super_block *sb, u32 crc) 630*aa75f4d3SHarshad Shirwadkar { 631*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 632*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 633*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tail tail; 634*aa75f4d3SHarshad Shirwadkar int off, bsize = sbi->s_journal->j_blocksize; 635*aa75f4d3SHarshad Shirwadkar u8 *dst; 636*aa75f4d3SHarshad Shirwadkar 637*aa75f4d3SHarshad Shirwadkar /* 638*aa75f4d3SHarshad Shirwadkar * ext4_fc_reserve_space takes care of allocating an extra block if 639*aa75f4d3SHarshad Shirwadkar * there's no enough space on this block for accommodating this tail. 640*aa75f4d3SHarshad Shirwadkar */ 641*aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 642*aa75f4d3SHarshad Shirwadkar if (!dst) 643*aa75f4d3SHarshad Shirwadkar return -ENOSPC; 644*aa75f4d3SHarshad Shirwadkar 645*aa75f4d3SHarshad Shirwadkar off = sbi->s_fc_bytes % bsize; 646*aa75f4d3SHarshad Shirwadkar 647*aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 648*aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 649*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 650*aa75f4d3SHarshad Shirwadkar 651*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 652*aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 653*aa75f4d3SHarshad Shirwadkar tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 654*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 655*aa75f4d3SHarshad Shirwadkar dst += sizeof(tail.fc_tid); 656*aa75f4d3SHarshad Shirwadkar tail.fc_crc = cpu_to_le32(crc); 657*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); 658*aa75f4d3SHarshad Shirwadkar 659*aa75f4d3SHarshad Shirwadkar ext4_fc_submit_bh(sb); 660*aa75f4d3SHarshad Shirwadkar 661*aa75f4d3SHarshad Shirwadkar return 0; 662*aa75f4d3SHarshad Shirwadkar } 663*aa75f4d3SHarshad Shirwadkar 664*aa75f4d3SHarshad Shirwadkar /* 665*aa75f4d3SHarshad Shirwadkar * Adds tag, length, value and updates CRC. Returns true if tlv was added. 666*aa75f4d3SHarshad Shirwadkar * Returns false if there's not enough space. 667*aa75f4d3SHarshad Shirwadkar */ 668*aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 669*aa75f4d3SHarshad Shirwadkar u32 *crc) 670*aa75f4d3SHarshad Shirwadkar { 671*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 672*aa75f4d3SHarshad Shirwadkar u8 *dst; 673*aa75f4d3SHarshad Shirwadkar 674*aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 675*aa75f4d3SHarshad Shirwadkar if (!dst) 676*aa75f4d3SHarshad Shirwadkar return false; 677*aa75f4d3SHarshad Shirwadkar 678*aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 679*aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(len); 680*aa75f4d3SHarshad Shirwadkar 681*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 682*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 683*aa75f4d3SHarshad Shirwadkar 684*aa75f4d3SHarshad Shirwadkar return true; 685*aa75f4d3SHarshad Shirwadkar } 686*aa75f4d3SHarshad Shirwadkar 687*aa75f4d3SHarshad Shirwadkar /* Same as above, but adds dentry tlv. */ 688*aa75f4d3SHarshad Shirwadkar static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag, 689*aa75f4d3SHarshad Shirwadkar int parent_ino, int ino, int dlen, 690*aa75f4d3SHarshad Shirwadkar const unsigned char *dname, 691*aa75f4d3SHarshad Shirwadkar u32 *crc) 692*aa75f4d3SHarshad Shirwadkar { 693*aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_info fcd; 694*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 695*aa75f4d3SHarshad Shirwadkar u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 696*aa75f4d3SHarshad Shirwadkar crc); 697*aa75f4d3SHarshad Shirwadkar 698*aa75f4d3SHarshad Shirwadkar if (!dst) 699*aa75f4d3SHarshad Shirwadkar return false; 700*aa75f4d3SHarshad Shirwadkar 701*aa75f4d3SHarshad Shirwadkar fcd.fc_parent_ino = cpu_to_le32(parent_ino); 702*aa75f4d3SHarshad Shirwadkar fcd.fc_ino = cpu_to_le32(ino); 703*aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(tag); 704*aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 705*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 706*aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 707*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 708*aa75f4d3SHarshad Shirwadkar dst += sizeof(fcd); 709*aa75f4d3SHarshad Shirwadkar ext4_fc_memcpy(sb, dst, dname, dlen, crc); 710*aa75f4d3SHarshad Shirwadkar dst += dlen; 711*aa75f4d3SHarshad Shirwadkar 712*aa75f4d3SHarshad Shirwadkar return true; 713*aa75f4d3SHarshad Shirwadkar } 714*aa75f4d3SHarshad Shirwadkar 715*aa75f4d3SHarshad Shirwadkar /* 716*aa75f4d3SHarshad Shirwadkar * Writes inode in the fast commit space under TLV with tag @tag. 717*aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error on failure. 718*aa75f4d3SHarshad Shirwadkar */ 719*aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode(struct inode *inode, u32 *crc) 720*aa75f4d3SHarshad Shirwadkar { 721*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 722*aa75f4d3SHarshad Shirwadkar int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 723*aa75f4d3SHarshad Shirwadkar int ret; 724*aa75f4d3SHarshad Shirwadkar struct ext4_iloc iloc; 725*aa75f4d3SHarshad Shirwadkar struct ext4_fc_inode fc_inode; 726*aa75f4d3SHarshad Shirwadkar struct ext4_fc_tl tl; 727*aa75f4d3SHarshad Shirwadkar u8 *dst; 728*aa75f4d3SHarshad Shirwadkar 729*aa75f4d3SHarshad Shirwadkar ret = ext4_get_inode_loc(inode, &iloc); 730*aa75f4d3SHarshad Shirwadkar if (ret) 731*aa75f4d3SHarshad Shirwadkar return ret; 732*aa75f4d3SHarshad Shirwadkar 733*aa75f4d3SHarshad Shirwadkar if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 734*aa75f4d3SHarshad Shirwadkar inode_len += ei->i_extra_isize; 735*aa75f4d3SHarshad Shirwadkar 736*aa75f4d3SHarshad Shirwadkar fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 737*aa75f4d3SHarshad Shirwadkar tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 738*aa75f4d3SHarshad Shirwadkar tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 739*aa75f4d3SHarshad Shirwadkar 740*aa75f4d3SHarshad Shirwadkar dst = ext4_fc_reserve_space(inode->i_sb, 741*aa75f4d3SHarshad Shirwadkar sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 742*aa75f4d3SHarshad Shirwadkar if (!dst) 743*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 744*aa75f4d3SHarshad Shirwadkar 745*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 746*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 747*aa75f4d3SHarshad Shirwadkar dst += sizeof(tl); 748*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 749*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 750*aa75f4d3SHarshad Shirwadkar dst += sizeof(fc_inode); 751*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 752*aa75f4d3SHarshad Shirwadkar inode_len, crc)) 753*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 754*aa75f4d3SHarshad Shirwadkar 755*aa75f4d3SHarshad Shirwadkar return 0; 756*aa75f4d3SHarshad Shirwadkar } 757*aa75f4d3SHarshad Shirwadkar 758*aa75f4d3SHarshad Shirwadkar /* 759*aa75f4d3SHarshad Shirwadkar * Writes updated data ranges for the inode in question. Updates CRC. 760*aa75f4d3SHarshad Shirwadkar * Returns 0 on success, error otherwise. 761*aa75f4d3SHarshad Shirwadkar */ 762*aa75f4d3SHarshad Shirwadkar static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 763*aa75f4d3SHarshad Shirwadkar { 764*aa75f4d3SHarshad Shirwadkar ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 765*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei = EXT4_I(inode); 766*aa75f4d3SHarshad Shirwadkar struct ext4_map_blocks map; 767*aa75f4d3SHarshad Shirwadkar struct ext4_fc_add_range fc_ext; 768*aa75f4d3SHarshad Shirwadkar struct ext4_fc_del_range lrange; 769*aa75f4d3SHarshad Shirwadkar struct ext4_extent *ex; 770*aa75f4d3SHarshad Shirwadkar int ret; 771*aa75f4d3SHarshad Shirwadkar 772*aa75f4d3SHarshad Shirwadkar mutex_lock(&ei->i_fc_lock); 773*aa75f4d3SHarshad Shirwadkar if (ei->i_fc_lblk_len == 0) { 774*aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 775*aa75f4d3SHarshad Shirwadkar return 0; 776*aa75f4d3SHarshad Shirwadkar } 777*aa75f4d3SHarshad Shirwadkar old_blk_size = ei->i_fc_lblk_start; 778*aa75f4d3SHarshad Shirwadkar new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 779*aa75f4d3SHarshad Shirwadkar ei->i_fc_lblk_len = 0; 780*aa75f4d3SHarshad Shirwadkar mutex_unlock(&ei->i_fc_lock); 781*aa75f4d3SHarshad Shirwadkar 782*aa75f4d3SHarshad Shirwadkar cur_lblk_off = old_blk_size; 783*aa75f4d3SHarshad Shirwadkar jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", 784*aa75f4d3SHarshad Shirwadkar __func__, cur_lblk_off, new_blk_size, inode->i_ino); 785*aa75f4d3SHarshad Shirwadkar 786*aa75f4d3SHarshad Shirwadkar while (cur_lblk_off <= new_blk_size) { 787*aa75f4d3SHarshad Shirwadkar map.m_lblk = cur_lblk_off; 788*aa75f4d3SHarshad Shirwadkar map.m_len = new_blk_size - cur_lblk_off + 1; 789*aa75f4d3SHarshad Shirwadkar ret = ext4_map_blocks(NULL, inode, &map, 0); 790*aa75f4d3SHarshad Shirwadkar if (ret < 0) 791*aa75f4d3SHarshad Shirwadkar return -ECANCELED; 792*aa75f4d3SHarshad Shirwadkar 793*aa75f4d3SHarshad Shirwadkar if (map.m_len == 0) { 794*aa75f4d3SHarshad Shirwadkar cur_lblk_off++; 795*aa75f4d3SHarshad Shirwadkar continue; 796*aa75f4d3SHarshad Shirwadkar } 797*aa75f4d3SHarshad Shirwadkar 798*aa75f4d3SHarshad Shirwadkar if (ret == 0) { 799*aa75f4d3SHarshad Shirwadkar lrange.fc_ino = cpu_to_le32(inode->i_ino); 800*aa75f4d3SHarshad Shirwadkar lrange.fc_lblk = cpu_to_le32(map.m_lblk); 801*aa75f4d3SHarshad Shirwadkar lrange.fc_len = cpu_to_le32(map.m_len); 802*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 803*aa75f4d3SHarshad Shirwadkar sizeof(lrange), (u8 *)&lrange, crc)) 804*aa75f4d3SHarshad Shirwadkar return -ENOSPC; 805*aa75f4d3SHarshad Shirwadkar } else { 806*aa75f4d3SHarshad Shirwadkar fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 807*aa75f4d3SHarshad Shirwadkar ex = (struct ext4_extent *)&fc_ext.fc_ex; 808*aa75f4d3SHarshad Shirwadkar ex->ee_block = cpu_to_le32(map.m_lblk); 809*aa75f4d3SHarshad Shirwadkar ex->ee_len = cpu_to_le16(map.m_len); 810*aa75f4d3SHarshad Shirwadkar ext4_ext_store_pblock(ex, map.m_pblk); 811*aa75f4d3SHarshad Shirwadkar if (map.m_flags & EXT4_MAP_UNWRITTEN) 812*aa75f4d3SHarshad Shirwadkar ext4_ext_mark_unwritten(ex); 813*aa75f4d3SHarshad Shirwadkar else 814*aa75f4d3SHarshad Shirwadkar ext4_ext_mark_initialized(ex); 815*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 816*aa75f4d3SHarshad Shirwadkar sizeof(fc_ext), (u8 *)&fc_ext, crc)) 817*aa75f4d3SHarshad Shirwadkar return -ENOSPC; 818*aa75f4d3SHarshad Shirwadkar } 819*aa75f4d3SHarshad Shirwadkar 820*aa75f4d3SHarshad Shirwadkar cur_lblk_off += map.m_len; 821*aa75f4d3SHarshad Shirwadkar } 822*aa75f4d3SHarshad Shirwadkar 823*aa75f4d3SHarshad Shirwadkar return 0; 824*aa75f4d3SHarshad Shirwadkar } 825*aa75f4d3SHarshad Shirwadkar 826*aa75f4d3SHarshad Shirwadkar 827*aa75f4d3SHarshad Shirwadkar /* Submit data for all the fast commit inodes */ 828*aa75f4d3SHarshad Shirwadkar static int ext4_fc_submit_inode_data_all(journal_t *journal) 829*aa75f4d3SHarshad Shirwadkar { 830*aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 831*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 832*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 833*aa75f4d3SHarshad Shirwadkar struct list_head *pos; 834*aa75f4d3SHarshad Shirwadkar int ret = 0; 835*aa75f4d3SHarshad Shirwadkar 836*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 837*aa75f4d3SHarshad Shirwadkar sbi->s_mount_state |= EXT4_FC_COMMITTING; 838*aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 839*aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 840*aa75f4d3SHarshad Shirwadkar ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 841*aa75f4d3SHarshad Shirwadkar while (atomic_read(&ei->i_fc_updates)) { 842*aa75f4d3SHarshad Shirwadkar DEFINE_WAIT(wait); 843*aa75f4d3SHarshad Shirwadkar 844*aa75f4d3SHarshad Shirwadkar prepare_to_wait(&ei->i_fc_wait, &wait, 845*aa75f4d3SHarshad Shirwadkar TASK_UNINTERRUPTIBLE); 846*aa75f4d3SHarshad Shirwadkar if (atomic_read(&ei->i_fc_updates)) { 847*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 848*aa75f4d3SHarshad Shirwadkar schedule(); 849*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 850*aa75f4d3SHarshad Shirwadkar } 851*aa75f4d3SHarshad Shirwadkar finish_wait(&ei->i_fc_wait, &wait); 852*aa75f4d3SHarshad Shirwadkar } 853*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 854*aa75f4d3SHarshad Shirwadkar ret = jbd2_submit_inode_data(ei->jinode); 855*aa75f4d3SHarshad Shirwadkar if (ret) 856*aa75f4d3SHarshad Shirwadkar return ret; 857*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 858*aa75f4d3SHarshad Shirwadkar } 859*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 860*aa75f4d3SHarshad Shirwadkar 861*aa75f4d3SHarshad Shirwadkar return ret; 862*aa75f4d3SHarshad Shirwadkar } 863*aa75f4d3SHarshad Shirwadkar 864*aa75f4d3SHarshad Shirwadkar /* Wait for completion of data for all the fast commit inodes */ 865*aa75f4d3SHarshad Shirwadkar static int ext4_fc_wait_inode_data_all(journal_t *journal) 866*aa75f4d3SHarshad Shirwadkar { 867*aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 868*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 869*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *pos, *n; 870*aa75f4d3SHarshad Shirwadkar int ret = 0; 871*aa75f4d3SHarshad Shirwadkar 872*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 873*aa75f4d3SHarshad Shirwadkar list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 874*aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(&pos->vfs_inode, 875*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING)) 876*aa75f4d3SHarshad Shirwadkar continue; 877*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 878*aa75f4d3SHarshad Shirwadkar 879*aa75f4d3SHarshad Shirwadkar ret = jbd2_wait_inode_data(journal, pos->jinode); 880*aa75f4d3SHarshad Shirwadkar if (ret) 881*aa75f4d3SHarshad Shirwadkar return ret; 882*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 883*aa75f4d3SHarshad Shirwadkar } 884*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 885*aa75f4d3SHarshad Shirwadkar 886*aa75f4d3SHarshad Shirwadkar return 0; 887*aa75f4d3SHarshad Shirwadkar } 888*aa75f4d3SHarshad Shirwadkar 889*aa75f4d3SHarshad Shirwadkar /* Commit all the directory entry updates */ 890*aa75f4d3SHarshad Shirwadkar static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 891*aa75f4d3SHarshad Shirwadkar { 892*aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 893*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 894*aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 895*aa75f4d3SHarshad Shirwadkar struct inode *inode; 896*aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n, *fcd_pos, *fcd_n; 897*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *ei; 898*aa75f4d3SHarshad Shirwadkar int ret; 899*aa75f4d3SHarshad Shirwadkar 900*aa75f4d3SHarshad Shirwadkar if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 901*aa75f4d3SHarshad Shirwadkar return 0; 902*aa75f4d3SHarshad Shirwadkar list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) { 903*aa75f4d3SHarshad Shirwadkar fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update, 904*aa75f4d3SHarshad Shirwadkar fcd_list); 905*aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 906*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 907*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 908*aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 909*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 910*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 911*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 912*aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 913*aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 914*aa75f4d3SHarshad Shirwadkar } 915*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 916*aa75f4d3SHarshad Shirwadkar continue; 917*aa75f4d3SHarshad Shirwadkar } 918*aa75f4d3SHarshad Shirwadkar 919*aa75f4d3SHarshad Shirwadkar inode = NULL; 920*aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 921*aa75f4d3SHarshad Shirwadkar ei = list_entry(pos, struct ext4_inode_info, i_fc_list); 922*aa75f4d3SHarshad Shirwadkar if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { 923*aa75f4d3SHarshad Shirwadkar inode = &ei->vfs_inode; 924*aa75f4d3SHarshad Shirwadkar break; 925*aa75f4d3SHarshad Shirwadkar } 926*aa75f4d3SHarshad Shirwadkar } 927*aa75f4d3SHarshad Shirwadkar /* 928*aa75f4d3SHarshad Shirwadkar * If we don't find inode in our list, then it was deleted, 929*aa75f4d3SHarshad Shirwadkar * in which case, we don't need to record it's create tag. 930*aa75f4d3SHarshad Shirwadkar */ 931*aa75f4d3SHarshad Shirwadkar if (!inode) 932*aa75f4d3SHarshad Shirwadkar continue; 933*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 934*aa75f4d3SHarshad Shirwadkar 935*aa75f4d3SHarshad Shirwadkar /* 936*aa75f4d3SHarshad Shirwadkar * We first write the inode and then the create dirent. This 937*aa75f4d3SHarshad Shirwadkar * allows the recovery code to create an unnamed inode first 938*aa75f4d3SHarshad Shirwadkar * and then link it to a directory entry. This allows us 939*aa75f4d3SHarshad Shirwadkar * to use namei.c routines almost as is and simplifies 940*aa75f4d3SHarshad Shirwadkar * the recovery code. 941*aa75f4d3SHarshad Shirwadkar */ 942*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, crc); 943*aa75f4d3SHarshad Shirwadkar if (ret) 944*aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 945*aa75f4d3SHarshad Shirwadkar 946*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, crc); 947*aa75f4d3SHarshad Shirwadkar if (ret) 948*aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 949*aa75f4d3SHarshad Shirwadkar 950*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_dentry_tlv( 951*aa75f4d3SHarshad Shirwadkar sb, fc_dentry->fcd_op, 952*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_parent, fc_dentry->fcd_ino, 953*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len, 954*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.name, crc)) { 955*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 956*aa75f4d3SHarshad Shirwadkar ret = -ENOSPC; 957*aa75f4d3SHarshad Shirwadkar goto lock_and_exit; 958*aa75f4d3SHarshad Shirwadkar } 959*aa75f4d3SHarshad Shirwadkar 960*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 961*aa75f4d3SHarshad Shirwadkar } 962*aa75f4d3SHarshad Shirwadkar return 0; 963*aa75f4d3SHarshad Shirwadkar lock_and_exit: 964*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 965*aa75f4d3SHarshad Shirwadkar return ret; 966*aa75f4d3SHarshad Shirwadkar } 967*aa75f4d3SHarshad Shirwadkar 968*aa75f4d3SHarshad Shirwadkar static int ext4_fc_perform_commit(journal_t *journal) 969*aa75f4d3SHarshad Shirwadkar { 970*aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 971*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 972*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 973*aa75f4d3SHarshad Shirwadkar struct ext4_fc_head head; 974*aa75f4d3SHarshad Shirwadkar struct list_head *pos; 975*aa75f4d3SHarshad Shirwadkar struct inode *inode; 976*aa75f4d3SHarshad Shirwadkar struct blk_plug plug; 977*aa75f4d3SHarshad Shirwadkar int ret = 0; 978*aa75f4d3SHarshad Shirwadkar u32 crc = 0; 979*aa75f4d3SHarshad Shirwadkar 980*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_submit_inode_data_all(journal); 981*aa75f4d3SHarshad Shirwadkar if (ret) 982*aa75f4d3SHarshad Shirwadkar return ret; 983*aa75f4d3SHarshad Shirwadkar 984*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_wait_inode_data_all(journal); 985*aa75f4d3SHarshad Shirwadkar if (ret) 986*aa75f4d3SHarshad Shirwadkar return ret; 987*aa75f4d3SHarshad Shirwadkar 988*aa75f4d3SHarshad Shirwadkar blk_start_plug(&plug); 989*aa75f4d3SHarshad Shirwadkar if (sbi->s_fc_bytes == 0) { 990*aa75f4d3SHarshad Shirwadkar /* 991*aa75f4d3SHarshad Shirwadkar * Add a head tag only if this is the first fast commit 992*aa75f4d3SHarshad Shirwadkar * in this TID. 993*aa75f4d3SHarshad Shirwadkar */ 994*aa75f4d3SHarshad Shirwadkar head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 995*aa75f4d3SHarshad Shirwadkar head.fc_tid = cpu_to_le32( 996*aa75f4d3SHarshad Shirwadkar sbi->s_journal->j_running_transaction->t_tid); 997*aa75f4d3SHarshad Shirwadkar if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 998*aa75f4d3SHarshad Shirwadkar (u8 *)&head, &crc)) 999*aa75f4d3SHarshad Shirwadkar goto out; 1000*aa75f4d3SHarshad Shirwadkar } 1001*aa75f4d3SHarshad Shirwadkar 1002*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1003*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_commit_dentry_updates(journal, &crc); 1004*aa75f4d3SHarshad Shirwadkar if (ret) { 1005*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1006*aa75f4d3SHarshad Shirwadkar goto out; 1007*aa75f4d3SHarshad Shirwadkar } 1008*aa75f4d3SHarshad Shirwadkar 1009*aa75f4d3SHarshad Shirwadkar list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { 1010*aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1011*aa75f4d3SHarshad Shirwadkar inode = &iter->vfs_inode; 1012*aa75f4d3SHarshad Shirwadkar if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 1013*aa75f4d3SHarshad Shirwadkar continue; 1014*aa75f4d3SHarshad Shirwadkar 1015*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1016*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode_data(inode, &crc); 1017*aa75f4d3SHarshad Shirwadkar if (ret) 1018*aa75f4d3SHarshad Shirwadkar goto out; 1019*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_inode(inode, &crc); 1020*aa75f4d3SHarshad Shirwadkar if (ret) 1021*aa75f4d3SHarshad Shirwadkar goto out; 1022*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1023*aa75f4d3SHarshad Shirwadkar EXT4_I(inode)->i_fc_committed_subtid = 1024*aa75f4d3SHarshad Shirwadkar atomic_read(&sbi->s_fc_subtid); 1025*aa75f4d3SHarshad Shirwadkar } 1026*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1027*aa75f4d3SHarshad Shirwadkar 1028*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_write_tail(sb, crc); 1029*aa75f4d3SHarshad Shirwadkar 1030*aa75f4d3SHarshad Shirwadkar out: 1031*aa75f4d3SHarshad Shirwadkar blk_finish_plug(&plug); 1032*aa75f4d3SHarshad Shirwadkar return ret; 1033*aa75f4d3SHarshad Shirwadkar } 1034*aa75f4d3SHarshad Shirwadkar 1035*aa75f4d3SHarshad Shirwadkar /* 1036*aa75f4d3SHarshad Shirwadkar * The main commit entry point. Performs a fast commit for transaction 1037*aa75f4d3SHarshad Shirwadkar * commit_tid if needed. If it's not possible to perform a fast commit 1038*aa75f4d3SHarshad Shirwadkar * due to various reasons, we fall back to full commit. Returns 0 1039*aa75f4d3SHarshad Shirwadkar * on success, error otherwise. 1040*aa75f4d3SHarshad Shirwadkar */ 1041*aa75f4d3SHarshad Shirwadkar int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1042*aa75f4d3SHarshad Shirwadkar { 1043*aa75f4d3SHarshad Shirwadkar struct super_block *sb = (struct super_block *)(journal->j_private); 1044*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1045*aa75f4d3SHarshad Shirwadkar int nblks = 0, ret, bsize = journal->j_blocksize; 1046*aa75f4d3SHarshad Shirwadkar int subtid = atomic_read(&sbi->s_fc_subtid); 1047*aa75f4d3SHarshad Shirwadkar int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; 1048*aa75f4d3SHarshad Shirwadkar ktime_t start_time, commit_time; 1049*aa75f4d3SHarshad Shirwadkar 1050*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_start(sb); 1051*aa75f4d3SHarshad Shirwadkar 1052*aa75f4d3SHarshad Shirwadkar start_time = ktime_get(); 1053*aa75f4d3SHarshad Shirwadkar 1054*aa75f4d3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 1055*aa75f4d3SHarshad Shirwadkar (ext4_fc_is_ineligible(sb))) { 1056*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1057*aa75f4d3SHarshad Shirwadkar goto out; 1058*aa75f4d3SHarshad Shirwadkar } 1059*aa75f4d3SHarshad Shirwadkar 1060*aa75f4d3SHarshad Shirwadkar restart_fc: 1061*aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_begin_commit(journal, commit_tid); 1062*aa75f4d3SHarshad Shirwadkar if (ret == -EALREADY) { 1063*aa75f4d3SHarshad Shirwadkar /* There was an ongoing commit, check if we need to restart */ 1064*aa75f4d3SHarshad Shirwadkar if (atomic_read(&sbi->s_fc_subtid) <= subtid && 1065*aa75f4d3SHarshad Shirwadkar commit_tid > journal->j_commit_sequence) 1066*aa75f4d3SHarshad Shirwadkar goto restart_fc; 1067*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_ALREADY_COMMITTED; 1068*aa75f4d3SHarshad Shirwadkar goto out; 1069*aa75f4d3SHarshad Shirwadkar } else if (ret) { 1070*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1071*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_START_FAILED; 1072*aa75f4d3SHarshad Shirwadkar goto out; 1073*aa75f4d3SHarshad Shirwadkar } 1074*aa75f4d3SHarshad Shirwadkar 1075*aa75f4d3SHarshad Shirwadkar fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 1076*aa75f4d3SHarshad Shirwadkar ret = ext4_fc_perform_commit(journal); 1077*aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1078*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1079*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1080*aa75f4d3SHarshad Shirwadkar goto out; 1081*aa75f4d3SHarshad Shirwadkar } 1082*aa75f4d3SHarshad Shirwadkar nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 1083*aa75f4d3SHarshad Shirwadkar ret = jbd2_fc_wait_bufs(journal, nblks); 1084*aa75f4d3SHarshad Shirwadkar if (ret < 0) { 1085*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1086*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_FC_FAILED; 1087*aa75f4d3SHarshad Shirwadkar goto out; 1088*aa75f4d3SHarshad Shirwadkar } 1089*aa75f4d3SHarshad Shirwadkar atomic_inc(&sbi->s_fc_subtid); 1090*aa75f4d3SHarshad Shirwadkar jbd2_fc_end_commit(journal); 1091*aa75f4d3SHarshad Shirwadkar out: 1092*aa75f4d3SHarshad Shirwadkar /* Has any ineligible update happened since we started? */ 1093*aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { 1094*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; 1095*aa75f4d3SHarshad Shirwadkar reason = EXT4_FC_REASON_INELIGIBLE; 1096*aa75f4d3SHarshad Shirwadkar } 1097*aa75f4d3SHarshad Shirwadkar 1098*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1099*aa75f4d3SHarshad Shirwadkar if (reason != EXT4_FC_REASON_OK && 1100*aa75f4d3SHarshad Shirwadkar reason != EXT4_FC_REASON_ALREADY_COMMITTED) { 1101*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_ineligible_commits++; 1102*aa75f4d3SHarshad Shirwadkar } else { 1103*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_num_commits++; 1104*aa75f4d3SHarshad Shirwadkar sbi->s_fc_stats.fc_numblks += nblks; 1105*aa75f4d3SHarshad Shirwadkar } 1106*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1107*aa75f4d3SHarshad Shirwadkar nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; 1108*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_commit_stop(sb, nblks, reason); 1109*aa75f4d3SHarshad Shirwadkar commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1110*aa75f4d3SHarshad Shirwadkar /* 1111*aa75f4d3SHarshad Shirwadkar * weight the commit time higher than the average time so we don't 1112*aa75f4d3SHarshad Shirwadkar * react too strongly to vast changes in the commit time 1113*aa75f4d3SHarshad Shirwadkar */ 1114*aa75f4d3SHarshad Shirwadkar if (likely(sbi->s_fc_avg_commit_time)) 1115*aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = (commit_time + 1116*aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time * 3) / 4; 1117*aa75f4d3SHarshad Shirwadkar else 1118*aa75f4d3SHarshad Shirwadkar sbi->s_fc_avg_commit_time = commit_time; 1119*aa75f4d3SHarshad Shirwadkar jbd_debug(1, 1120*aa75f4d3SHarshad Shirwadkar "Fast commit ended with blks = %d, reason = %d, subtid - %d", 1121*aa75f4d3SHarshad Shirwadkar nblks, reason, subtid); 1122*aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_FAILED) 1123*aa75f4d3SHarshad Shirwadkar return jbd2_fc_end_commit_fallback(journal, commit_tid); 1124*aa75f4d3SHarshad Shirwadkar if (reason == EXT4_FC_REASON_FC_START_FAILED || 1125*aa75f4d3SHarshad Shirwadkar reason == EXT4_FC_REASON_INELIGIBLE) 1126*aa75f4d3SHarshad Shirwadkar return jbd2_complete_transaction(journal, commit_tid); 1127*aa75f4d3SHarshad Shirwadkar return 0; 1128*aa75f4d3SHarshad Shirwadkar } 1129*aa75f4d3SHarshad Shirwadkar 1130ff780b91SHarshad Shirwadkar /* 1131ff780b91SHarshad Shirwadkar * Fast commit cleanup routine. This is called after every fast commit and 1132ff780b91SHarshad Shirwadkar * full commit. full is true if we are called after a full commit. 1133ff780b91SHarshad Shirwadkar */ 1134ff780b91SHarshad Shirwadkar static void ext4_fc_cleanup(journal_t *journal, int full) 1135ff780b91SHarshad Shirwadkar { 1136*aa75f4d3SHarshad Shirwadkar struct super_block *sb = journal->j_private; 1137*aa75f4d3SHarshad Shirwadkar struct ext4_sb_info *sbi = EXT4_SB(sb); 1138*aa75f4d3SHarshad Shirwadkar struct ext4_inode_info *iter; 1139*aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update *fc_dentry; 1140*aa75f4d3SHarshad Shirwadkar struct list_head *pos, *n; 1141*aa75f4d3SHarshad Shirwadkar 1142*aa75f4d3SHarshad Shirwadkar if (full && sbi->s_fc_bh) 1143*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bh = NULL; 1144*aa75f4d3SHarshad Shirwadkar 1145*aa75f4d3SHarshad Shirwadkar jbd2_fc_release_bufs(journal); 1146*aa75f4d3SHarshad Shirwadkar 1147*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1148*aa75f4d3SHarshad Shirwadkar list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) { 1149*aa75f4d3SHarshad Shirwadkar iter = list_entry(pos, struct ext4_inode_info, i_fc_list); 1150*aa75f4d3SHarshad Shirwadkar list_del_init(&iter->i_fc_list); 1151*aa75f4d3SHarshad Shirwadkar ext4_clear_inode_state(&iter->vfs_inode, 1152*aa75f4d3SHarshad Shirwadkar EXT4_STATE_FC_COMMITTING); 1153*aa75f4d3SHarshad Shirwadkar ext4_fc_reset_inode(&iter->vfs_inode); 1154*aa75f4d3SHarshad Shirwadkar /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1155*aa75f4d3SHarshad Shirwadkar smp_mb(); 1156*aa75f4d3SHarshad Shirwadkar #if (BITS_PER_LONG < 64) 1157*aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 1158*aa75f4d3SHarshad Shirwadkar #else 1159*aa75f4d3SHarshad Shirwadkar wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 1160*aa75f4d3SHarshad Shirwadkar #endif 1161*aa75f4d3SHarshad Shirwadkar } 1162*aa75f4d3SHarshad Shirwadkar 1163*aa75f4d3SHarshad Shirwadkar while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 1164*aa75f4d3SHarshad Shirwadkar fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 1165*aa75f4d3SHarshad Shirwadkar struct ext4_fc_dentry_update, 1166*aa75f4d3SHarshad Shirwadkar fcd_list); 1167*aa75f4d3SHarshad Shirwadkar list_del_init(&fc_dentry->fcd_list); 1168*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1169*aa75f4d3SHarshad Shirwadkar 1170*aa75f4d3SHarshad Shirwadkar if (fc_dentry->fcd_name.name && 1171*aa75f4d3SHarshad Shirwadkar fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 1172*aa75f4d3SHarshad Shirwadkar kfree(fc_dentry->fcd_name.name); 1173*aa75f4d3SHarshad Shirwadkar kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 1174*aa75f4d3SHarshad Shirwadkar spin_lock(&sbi->s_fc_lock); 1175*aa75f4d3SHarshad Shirwadkar } 1176*aa75f4d3SHarshad Shirwadkar 1177*aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 1178*aa75f4d3SHarshad Shirwadkar &sbi->s_fc_dentry_q[FC_Q_MAIN]); 1179*aa75f4d3SHarshad Shirwadkar list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 1180*aa75f4d3SHarshad Shirwadkar &sbi->s_fc_q[FC_Q_STAGING]); 1181*aa75f4d3SHarshad Shirwadkar 1182*aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_COMMITTING; 1183*aa75f4d3SHarshad Shirwadkar sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE; 1184*aa75f4d3SHarshad Shirwadkar 1185*aa75f4d3SHarshad Shirwadkar if (full) 1186*aa75f4d3SHarshad Shirwadkar sbi->s_fc_bytes = 0; 1187*aa75f4d3SHarshad Shirwadkar spin_unlock(&sbi->s_fc_lock); 1188*aa75f4d3SHarshad Shirwadkar trace_ext4_fc_stats(sb); 1189ff780b91SHarshad Shirwadkar } 11906866d7b3SHarshad Shirwadkar 11916866d7b3SHarshad Shirwadkar void ext4_fc_init(struct super_block *sb, journal_t *journal) 11926866d7b3SHarshad Shirwadkar { 11936866d7b3SHarshad Shirwadkar if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 11946866d7b3SHarshad Shirwadkar return; 1195ff780b91SHarshad Shirwadkar journal->j_fc_cleanup_callback = ext4_fc_cleanup; 11966866d7b3SHarshad Shirwadkar if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { 11976866d7b3SHarshad Shirwadkar pr_warn("Error while enabling fast commits, turning off."); 11986866d7b3SHarshad Shirwadkar ext4_clear_feature_fast_commit(sb); 11996866d7b3SHarshad Shirwadkar } 12006866d7b3SHarshad Shirwadkar } 1201*aa75f4d3SHarshad Shirwadkar 1202*aa75f4d3SHarshad Shirwadkar int __init ext4_fc_init_dentry_cache(void) 1203*aa75f4d3SHarshad Shirwadkar { 1204*aa75f4d3SHarshad Shirwadkar ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 1205*aa75f4d3SHarshad Shirwadkar SLAB_RECLAIM_ACCOUNT); 1206*aa75f4d3SHarshad Shirwadkar 1207*aa75f4d3SHarshad Shirwadkar if (ext4_fc_dentry_cachep == NULL) 1208*aa75f4d3SHarshad Shirwadkar return -ENOMEM; 1209*aa75f4d3SHarshad Shirwadkar 1210*aa75f4d3SHarshad Shirwadkar return 0; 1211*aa75f4d3SHarshad Shirwadkar } 1212