1e02119d5SChris Mason /* 2e02119d5SChris Mason * Copyright (C) 2008 Oracle. All rights reserved. 3e02119d5SChris Mason * 4e02119d5SChris Mason * This program is free software; you can redistribute it and/or 5e02119d5SChris Mason * modify it under the terms of the GNU General Public 6e02119d5SChris Mason * License v2 as published by the Free Software Foundation. 7e02119d5SChris Mason * 8e02119d5SChris Mason * This program is distributed in the hope that it will be useful, 9e02119d5SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 10e02119d5SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11e02119d5SChris Mason * General Public License for more details. 12e02119d5SChris Mason * 13e02119d5SChris Mason * You should have received a copy of the GNU General Public 14e02119d5SChris Mason * License along with this program; if not, write to the 15e02119d5SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16e02119d5SChris Mason * Boston, MA 021110-1307, USA. 17e02119d5SChris Mason */ 18e02119d5SChris Mason 19e02119d5SChris Mason #include <linux/sched.h> 205a0e3ad6STejun Heo #include <linux/slab.h> 21c6adc9ccSMiao Xie #include <linux/blkdev.h> 225dc562c5SJosef Bacik #include <linux/list_sort.h> 23995946ddSMiao Xie #include "tree-log.h" 24e02119d5SChris Mason #include "disk-io.h" 25e02119d5SChris Mason #include "locking.h" 26e02119d5SChris Mason #include "print-tree.h" 27f186373fSMark Fasheh #include "backref.h" 28f186373fSMark Fasheh #include "hash.h" 29e02119d5SChris Mason 30e02119d5SChris Mason /* magic values for the inode_only field in btrfs_log_inode: 31e02119d5SChris Mason * 32e02119d5SChris Mason * LOG_INODE_ALL means to log everything 33e02119d5SChris Mason * LOG_INODE_EXISTS means to log just enough to recreate the inode 34e02119d5SChris Mason * during log replay 35e02119d5SChris Mason */ 36e02119d5SChris Mason #define LOG_INODE_ALL 0 37e02119d5SChris Mason #define LOG_INODE_EXISTS 1 38e02119d5SChris Mason 39e02119d5SChris Mason /* 4012fcfd22SChris Mason * directory trouble cases 4112fcfd22SChris Mason * 4212fcfd22SChris Mason * 1) on rename or unlink, if the inode being unlinked isn't in the fsync 4312fcfd22SChris Mason * log, we must force a full commit before doing an fsync of the directory 4412fcfd22SChris Mason * where the unlink was done. 4512fcfd22SChris Mason * ---> record transid of last unlink/rename per directory 4612fcfd22SChris Mason * 4712fcfd22SChris Mason * mkdir foo/some_dir 4812fcfd22SChris Mason * normal commit 4912fcfd22SChris Mason * rename foo/some_dir foo2/some_dir 5012fcfd22SChris Mason * mkdir foo/some_dir 5112fcfd22SChris Mason * fsync foo/some_dir/some_file 5212fcfd22SChris Mason * 5312fcfd22SChris Mason * The fsync above will unlink the original some_dir without recording 5412fcfd22SChris Mason * it in its new location (foo2). After a crash, some_dir will be gone 5512fcfd22SChris Mason * unless the fsync of some_file forces a full commit 5612fcfd22SChris Mason * 5712fcfd22SChris Mason * 2) we must log any new names for any file or dir that is in the fsync 5812fcfd22SChris Mason * log. ---> check inode while renaming/linking. 5912fcfd22SChris Mason * 6012fcfd22SChris Mason * 2a) we must log any new names for any file or dir during rename 6112fcfd22SChris Mason * when the directory they are being removed from was logged. 6212fcfd22SChris Mason * ---> check inode and old parent dir during rename 6312fcfd22SChris Mason * 6412fcfd22SChris Mason * 2a is actually the more important variant. With the extra logging 6512fcfd22SChris Mason * a crash might unlink the old name without recreating the new one 6612fcfd22SChris Mason * 6712fcfd22SChris Mason * 3) after a crash, we must go through any directories with a link count 6812fcfd22SChris Mason * of zero and redo the rm -rf 6912fcfd22SChris Mason * 7012fcfd22SChris Mason * mkdir f1/foo 7112fcfd22SChris Mason * normal commit 7212fcfd22SChris Mason * rm -rf f1/foo 7312fcfd22SChris Mason * fsync(f1) 7412fcfd22SChris Mason * 7512fcfd22SChris Mason * The directory f1 was fully removed from the FS, but fsync was never 7612fcfd22SChris Mason * called on f1, only its parent dir. After a crash the rm -rf must 7712fcfd22SChris Mason * be replayed. This must be able to recurse down the entire 7812fcfd22SChris Mason * directory tree. The inode link count fixup code takes care of the 7912fcfd22SChris Mason * ugly details. 8012fcfd22SChris Mason */ 8112fcfd22SChris Mason 8212fcfd22SChris Mason /* 83e02119d5SChris Mason * stages for the tree walking. The first 84e02119d5SChris Mason * stage (0) is to only pin down the blocks we find 85e02119d5SChris Mason * the second stage (1) is to make sure that all the inodes 86e02119d5SChris Mason * we find in the log are created in the subvolume. 87e02119d5SChris Mason * 88e02119d5SChris Mason * The last stage is to deal with directories and links and extents 89e02119d5SChris Mason * and all the other fun semantics 90e02119d5SChris Mason */ 91e02119d5SChris Mason #define LOG_WALK_PIN_ONLY 0 92e02119d5SChris Mason #define LOG_WALK_REPLAY_INODES 1 93dd8e7217SJosef Bacik #define LOG_WALK_REPLAY_DIR_INDEX 2 94dd8e7217SJosef Bacik #define LOG_WALK_REPLAY_ALL 3 95e02119d5SChris Mason 9612fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 97e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 9849dae1bcSFilipe Manana int inode_only, 9949dae1bcSFilipe Manana const loff_t start, 1008407f553SFilipe Manana const loff_t end, 1018407f553SFilipe Manana struct btrfs_log_ctx *ctx); 102ec051c0fSYan Zheng static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 103ec051c0fSYan Zheng struct btrfs_root *root, 104ec051c0fSYan Zheng struct btrfs_path *path, u64 objectid); 10512fcfd22SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 10612fcfd22SChris Mason struct btrfs_root *root, 10712fcfd22SChris Mason struct btrfs_root *log, 10812fcfd22SChris Mason struct btrfs_path *path, 10912fcfd22SChris Mason u64 dirid, int del_all); 110e02119d5SChris Mason 111e02119d5SChris Mason /* 112e02119d5SChris Mason * tree logging is a special write ahead log used to make sure that 113e02119d5SChris Mason * fsyncs and O_SYNCs can happen without doing full tree commits. 114e02119d5SChris Mason * 115e02119d5SChris Mason * Full tree commits are expensive because they require commonly 116e02119d5SChris Mason * modified blocks to be recowed, creating many dirty pages in the 117e02119d5SChris Mason * extent tree an 4x-6x higher write load than ext3. 118e02119d5SChris Mason * 119e02119d5SChris Mason * Instead of doing a tree commit on every fsync, we use the 120e02119d5SChris Mason * key ranges and transaction ids to find items for a given file or directory 121e02119d5SChris Mason * that have changed in this transaction. Those items are copied into 122e02119d5SChris Mason * a special tree (one per subvolume root), that tree is written to disk 123e02119d5SChris Mason * and then the fsync is considered complete. 124e02119d5SChris Mason * 125e02119d5SChris Mason * After a crash, items are copied out of the log-tree back into the 126e02119d5SChris Mason * subvolume tree. Any file data extents found are recorded in the extent 127e02119d5SChris Mason * allocation tree, and the log-tree freed. 128e02119d5SChris Mason * 129e02119d5SChris Mason * The log tree is read three times, once to pin down all the extents it is 130e02119d5SChris Mason * using in ram and once, once to create all the inodes logged in the tree 131e02119d5SChris Mason * and once to do all the other items. 132e02119d5SChris Mason */ 133e02119d5SChris Mason 134e02119d5SChris Mason /* 135e02119d5SChris Mason * start a sub transaction and setup the log tree 136e02119d5SChris Mason * this increments the log tree writer count to make the people 137e02119d5SChris Mason * syncing the tree wait for us to finish 138e02119d5SChris Mason */ 139e02119d5SChris Mason static int start_log_trans(struct btrfs_trans_handle *trans, 1408b050d35SMiao Xie struct btrfs_root *root, 1418b050d35SMiao Xie struct btrfs_log_ctx *ctx) 142e02119d5SChris Mason { 1438b050d35SMiao Xie int index; 144e02119d5SChris Mason int ret; 1457237f183SYan Zheng 1467237f183SYan Zheng mutex_lock(&root->log_mutex); 1477237f183SYan Zheng if (root->log_root) { 148995946ddSMiao Xie if (btrfs_need_log_full_commit(root->fs_info, trans)) { 14950471a38SMiao Xie ret = -EAGAIN; 15050471a38SMiao Xie goto out; 15150471a38SMiao Xie } 152ff782e0aSJosef Bacik if (!root->log_start_pid) { 153ff782e0aSJosef Bacik root->log_start_pid = current->pid; 15427cdeb70SMiao Xie clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 155ff782e0aSJosef Bacik } else if (root->log_start_pid != current->pid) { 15627cdeb70SMiao Xie set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 157ff782e0aSJosef Bacik } 158ff782e0aSJosef Bacik 1592ecb7923SMiao Xie atomic_inc(&root->log_batch); 1607237f183SYan Zheng atomic_inc(&root->log_writers); 1618b050d35SMiao Xie if (ctx) { 1628b050d35SMiao Xie index = root->log_transid % 2; 1638b050d35SMiao Xie list_add_tail(&ctx->list, &root->log_ctxs[index]); 164d1433debSMiao Xie ctx->log_transid = root->log_transid; 1658b050d35SMiao Xie } 1667237f183SYan Zheng mutex_unlock(&root->log_mutex); 1677237f183SYan Zheng return 0; 1687237f183SYan Zheng } 169e87ac136SMiao Xie 170e87ac136SMiao Xie ret = 0; 171e02119d5SChris Mason mutex_lock(&root->fs_info->tree_log_mutex); 172e87ac136SMiao Xie if (!root->fs_info->log_root_tree) 173e02119d5SChris Mason ret = btrfs_init_log_root_tree(trans, root->fs_info); 174e87ac136SMiao Xie mutex_unlock(&root->fs_info->tree_log_mutex); 1754a500fd1SYan, Zheng if (ret) 176e87ac136SMiao Xie goto out; 177e87ac136SMiao Xie 178e87ac136SMiao Xie if (!root->log_root) { 179e02119d5SChris Mason ret = btrfs_add_log_tree(trans, root); 1804a500fd1SYan, Zheng if (ret) 181e87ac136SMiao Xie goto out; 182e02119d5SChris Mason } 18327cdeb70SMiao Xie clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 184e87ac136SMiao Xie root->log_start_pid = current->pid; 1852ecb7923SMiao Xie atomic_inc(&root->log_batch); 1867237f183SYan Zheng atomic_inc(&root->log_writers); 1878b050d35SMiao Xie if (ctx) { 1888b050d35SMiao Xie index = root->log_transid % 2; 1898b050d35SMiao Xie list_add_tail(&ctx->list, &root->log_ctxs[index]); 190d1433debSMiao Xie ctx->log_transid = root->log_transid; 1918b050d35SMiao Xie } 192e87ac136SMiao Xie out: 1937237f183SYan Zheng mutex_unlock(&root->log_mutex); 194e87ac136SMiao Xie return ret; 195e02119d5SChris Mason } 196e02119d5SChris Mason 197e02119d5SChris Mason /* 198e02119d5SChris Mason * returns 0 if there was a log transaction running and we were able 199e02119d5SChris Mason * to join, or returns -ENOENT if there were not transactions 200e02119d5SChris Mason * in progress 201e02119d5SChris Mason */ 202e02119d5SChris Mason static int join_running_log_trans(struct btrfs_root *root) 203e02119d5SChris Mason { 204e02119d5SChris Mason int ret = -ENOENT; 205e02119d5SChris Mason 206e02119d5SChris Mason smp_mb(); 207e02119d5SChris Mason if (!root->log_root) 208e02119d5SChris Mason return -ENOENT; 209e02119d5SChris Mason 2107237f183SYan Zheng mutex_lock(&root->log_mutex); 211e02119d5SChris Mason if (root->log_root) { 212e02119d5SChris Mason ret = 0; 2137237f183SYan Zheng atomic_inc(&root->log_writers); 214e02119d5SChris Mason } 2157237f183SYan Zheng mutex_unlock(&root->log_mutex); 216e02119d5SChris Mason return ret; 217e02119d5SChris Mason } 218e02119d5SChris Mason 219e02119d5SChris Mason /* 22012fcfd22SChris Mason * This either makes the current running log transaction wait 22112fcfd22SChris Mason * until you call btrfs_end_log_trans() or it makes any future 22212fcfd22SChris Mason * log transactions wait until you call btrfs_end_log_trans() 22312fcfd22SChris Mason */ 22412fcfd22SChris Mason int btrfs_pin_log_trans(struct btrfs_root *root) 22512fcfd22SChris Mason { 22612fcfd22SChris Mason int ret = -ENOENT; 22712fcfd22SChris Mason 22812fcfd22SChris Mason mutex_lock(&root->log_mutex); 22912fcfd22SChris Mason atomic_inc(&root->log_writers); 23012fcfd22SChris Mason mutex_unlock(&root->log_mutex); 23112fcfd22SChris Mason return ret; 23212fcfd22SChris Mason } 23312fcfd22SChris Mason 23412fcfd22SChris Mason /* 235e02119d5SChris Mason * indicate we're done making changes to the log tree 236e02119d5SChris Mason * and wake up anyone waiting to do a sync 237e02119d5SChris Mason */ 238143bede5SJeff Mahoney void btrfs_end_log_trans(struct btrfs_root *root) 239e02119d5SChris Mason { 2407237f183SYan Zheng if (atomic_dec_and_test(&root->log_writers)) { 241e02119d5SChris Mason smp_mb(); 2427237f183SYan Zheng if (waitqueue_active(&root->log_writer_wait)) 2437237f183SYan Zheng wake_up(&root->log_writer_wait); 2447237f183SYan Zheng } 245e02119d5SChris Mason } 246e02119d5SChris Mason 247e02119d5SChris Mason 248e02119d5SChris Mason /* 249e02119d5SChris Mason * the walk control struct is used to pass state down the chain when 250e02119d5SChris Mason * processing the log tree. The stage field tells us which part 251e02119d5SChris Mason * of the log tree processing we are currently doing. The others 252e02119d5SChris Mason * are state fields used for that specific part 253e02119d5SChris Mason */ 254e02119d5SChris Mason struct walk_control { 255e02119d5SChris Mason /* should we free the extent on disk when done? This is used 256e02119d5SChris Mason * at transaction commit time while freeing a log tree 257e02119d5SChris Mason */ 258e02119d5SChris Mason int free; 259e02119d5SChris Mason 260e02119d5SChris Mason /* should we write out the extent buffer? This is used 261e02119d5SChris Mason * while flushing the log tree to disk during a sync 262e02119d5SChris Mason */ 263e02119d5SChris Mason int write; 264e02119d5SChris Mason 265e02119d5SChris Mason /* should we wait for the extent buffer io to finish? Also used 266e02119d5SChris Mason * while flushing the log tree to disk for a sync 267e02119d5SChris Mason */ 268e02119d5SChris Mason int wait; 269e02119d5SChris Mason 270e02119d5SChris Mason /* pin only walk, we record which extents on disk belong to the 271e02119d5SChris Mason * log trees 272e02119d5SChris Mason */ 273e02119d5SChris Mason int pin; 274e02119d5SChris Mason 275e02119d5SChris Mason /* what stage of the replay code we're currently in */ 276e02119d5SChris Mason int stage; 277e02119d5SChris Mason 278e02119d5SChris Mason /* the root we are currently replaying */ 279e02119d5SChris Mason struct btrfs_root *replay_dest; 280e02119d5SChris Mason 281e02119d5SChris Mason /* the trans handle for the current replay */ 282e02119d5SChris Mason struct btrfs_trans_handle *trans; 283e02119d5SChris Mason 284e02119d5SChris Mason /* the function that gets used to process blocks we find in the 285e02119d5SChris Mason * tree. Note the extent_buffer might not be up to date when it is 286e02119d5SChris Mason * passed in, and it must be checked or read if you need the data 287e02119d5SChris Mason * inside it 288e02119d5SChris Mason */ 289e02119d5SChris Mason int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 290e02119d5SChris Mason struct walk_control *wc, u64 gen); 291e02119d5SChris Mason }; 292e02119d5SChris Mason 293e02119d5SChris Mason /* 294e02119d5SChris Mason * process_func used to pin down extents, write them or wait on them 295e02119d5SChris Mason */ 296e02119d5SChris Mason static int process_one_buffer(struct btrfs_root *log, 297e02119d5SChris Mason struct extent_buffer *eb, 298e02119d5SChris Mason struct walk_control *wc, u64 gen) 299e02119d5SChris Mason { 300b50c6e25SJosef Bacik int ret = 0; 301b50c6e25SJosef Bacik 3028c2a1a30SJosef Bacik /* 3038c2a1a30SJosef Bacik * If this fs is mixed then we need to be able to process the leaves to 3048c2a1a30SJosef Bacik * pin down any logged extents, so we have to read the block. 3058c2a1a30SJosef Bacik */ 3068c2a1a30SJosef Bacik if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) { 3078c2a1a30SJosef Bacik ret = btrfs_read_buffer(eb, gen); 3088c2a1a30SJosef Bacik if (ret) 3098c2a1a30SJosef Bacik return ret; 3108c2a1a30SJosef Bacik } 3118c2a1a30SJosef Bacik 31204018de5SJosef Bacik if (wc->pin) 313b50c6e25SJosef Bacik ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, 314e688b725SChris Mason eb->start, eb->len); 315e02119d5SChris Mason 316b50c6e25SJosef Bacik if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { 3178c2a1a30SJosef Bacik if (wc->pin && btrfs_header_level(eb) == 0) 3188c2a1a30SJosef Bacik ret = btrfs_exclude_logged_extents(log, eb); 319e02119d5SChris Mason if (wc->write) 320e02119d5SChris Mason btrfs_write_tree_block(eb); 321e02119d5SChris Mason if (wc->wait) 322e02119d5SChris Mason btrfs_wait_tree_block_writeback(eb); 323e02119d5SChris Mason } 324b50c6e25SJosef Bacik return ret; 325e02119d5SChris Mason } 326e02119d5SChris Mason 327e02119d5SChris Mason /* 328e02119d5SChris Mason * Item overwrite used by replay and tree logging. eb, slot and key all refer 329e02119d5SChris Mason * to the src data we are copying out. 330e02119d5SChris Mason * 331e02119d5SChris Mason * root is the tree we are copying into, and path is a scratch 332e02119d5SChris Mason * path for use in this function (it should be released on entry and 333e02119d5SChris Mason * will be released on exit). 334e02119d5SChris Mason * 335e02119d5SChris Mason * If the key is already in the destination tree the existing item is 336e02119d5SChris Mason * overwritten. If the existing item isn't big enough, it is extended. 337e02119d5SChris Mason * If it is too large, it is truncated. 338e02119d5SChris Mason * 339e02119d5SChris Mason * If the key isn't in the destination yet, a new item is inserted. 340e02119d5SChris Mason */ 341e02119d5SChris Mason static noinline int overwrite_item(struct btrfs_trans_handle *trans, 342e02119d5SChris Mason struct btrfs_root *root, 343e02119d5SChris Mason struct btrfs_path *path, 344e02119d5SChris Mason struct extent_buffer *eb, int slot, 345e02119d5SChris Mason struct btrfs_key *key) 346e02119d5SChris Mason { 347e02119d5SChris Mason int ret; 348e02119d5SChris Mason u32 item_size; 349e02119d5SChris Mason u64 saved_i_size = 0; 350e02119d5SChris Mason int save_old_i_size = 0; 351e02119d5SChris Mason unsigned long src_ptr; 352e02119d5SChris Mason unsigned long dst_ptr; 353e02119d5SChris Mason int overwrite_root = 0; 3544bc4bee4SJosef Bacik bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; 355e02119d5SChris Mason 356e02119d5SChris Mason if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 357e02119d5SChris Mason overwrite_root = 1; 358e02119d5SChris Mason 359e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 360e02119d5SChris Mason src_ptr = btrfs_item_ptr_offset(eb, slot); 361e02119d5SChris Mason 362e02119d5SChris Mason /* look for the key in the destination tree */ 363e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 3644bc4bee4SJosef Bacik if (ret < 0) 3654bc4bee4SJosef Bacik return ret; 3664bc4bee4SJosef Bacik 367e02119d5SChris Mason if (ret == 0) { 368e02119d5SChris Mason char *src_copy; 369e02119d5SChris Mason char *dst_copy; 370e02119d5SChris Mason u32 dst_size = btrfs_item_size_nr(path->nodes[0], 371e02119d5SChris Mason path->slots[0]); 372e02119d5SChris Mason if (dst_size != item_size) 373e02119d5SChris Mason goto insert; 374e02119d5SChris Mason 375e02119d5SChris Mason if (item_size == 0) { 376b3b4aa74SDavid Sterba btrfs_release_path(path); 377e02119d5SChris Mason return 0; 378e02119d5SChris Mason } 379e02119d5SChris Mason dst_copy = kmalloc(item_size, GFP_NOFS); 380e02119d5SChris Mason src_copy = kmalloc(item_size, GFP_NOFS); 3812a29edc6Sliubo if (!dst_copy || !src_copy) { 382b3b4aa74SDavid Sterba btrfs_release_path(path); 3832a29edc6Sliubo kfree(dst_copy); 3842a29edc6Sliubo kfree(src_copy); 3852a29edc6Sliubo return -ENOMEM; 3862a29edc6Sliubo } 387e02119d5SChris Mason 388e02119d5SChris Mason read_extent_buffer(eb, src_copy, src_ptr, item_size); 389e02119d5SChris Mason 390e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 391e02119d5SChris Mason read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, 392e02119d5SChris Mason item_size); 393e02119d5SChris Mason ret = memcmp(dst_copy, src_copy, item_size); 394e02119d5SChris Mason 395e02119d5SChris Mason kfree(dst_copy); 396e02119d5SChris Mason kfree(src_copy); 397e02119d5SChris Mason /* 398e02119d5SChris Mason * they have the same contents, just return, this saves 399e02119d5SChris Mason * us from cowing blocks in the destination tree and doing 400e02119d5SChris Mason * extra writes that may not have been done by a previous 401e02119d5SChris Mason * sync 402e02119d5SChris Mason */ 403e02119d5SChris Mason if (ret == 0) { 404b3b4aa74SDavid Sterba btrfs_release_path(path); 405e02119d5SChris Mason return 0; 406e02119d5SChris Mason } 407e02119d5SChris Mason 4084bc4bee4SJosef Bacik /* 4094bc4bee4SJosef Bacik * We need to load the old nbytes into the inode so when we 4104bc4bee4SJosef Bacik * replay the extents we've logged we get the right nbytes. 4114bc4bee4SJosef Bacik */ 4124bc4bee4SJosef Bacik if (inode_item) { 4134bc4bee4SJosef Bacik struct btrfs_inode_item *item; 4144bc4bee4SJosef Bacik u64 nbytes; 415d555438bSJosef Bacik u32 mode; 4164bc4bee4SJosef Bacik 4174bc4bee4SJosef Bacik item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4184bc4bee4SJosef Bacik struct btrfs_inode_item); 4194bc4bee4SJosef Bacik nbytes = btrfs_inode_nbytes(path->nodes[0], item); 4204bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, 4214bc4bee4SJosef Bacik struct btrfs_inode_item); 4224bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, nbytes); 423d555438bSJosef Bacik 424d555438bSJosef Bacik /* 425d555438bSJosef Bacik * If this is a directory we need to reset the i_size to 426d555438bSJosef Bacik * 0 so that we can set it up properly when replaying 427d555438bSJosef Bacik * the rest of the items in this log. 428d555438bSJosef Bacik */ 429d555438bSJosef Bacik mode = btrfs_inode_mode(eb, item); 430d555438bSJosef Bacik if (S_ISDIR(mode)) 431d555438bSJosef Bacik btrfs_set_inode_size(eb, item, 0); 4324bc4bee4SJosef Bacik } 4334bc4bee4SJosef Bacik } else if (inode_item) { 4344bc4bee4SJosef Bacik struct btrfs_inode_item *item; 435d555438bSJosef Bacik u32 mode; 4364bc4bee4SJosef Bacik 4374bc4bee4SJosef Bacik /* 4384bc4bee4SJosef Bacik * New inode, set nbytes to 0 so that the nbytes comes out 4394bc4bee4SJosef Bacik * properly when we replay the extents. 4404bc4bee4SJosef Bacik */ 4414bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 4424bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, 0); 443d555438bSJosef Bacik 444d555438bSJosef Bacik /* 445d555438bSJosef Bacik * If this is a directory we need to reset the i_size to 0 so 446d555438bSJosef Bacik * that we can set it up properly when replaying the rest of 447d555438bSJosef Bacik * the items in this log. 448d555438bSJosef Bacik */ 449d555438bSJosef Bacik mode = btrfs_inode_mode(eb, item); 450d555438bSJosef Bacik if (S_ISDIR(mode)) 451d555438bSJosef Bacik btrfs_set_inode_size(eb, item, 0); 452e02119d5SChris Mason } 453e02119d5SChris Mason insert: 454b3b4aa74SDavid Sterba btrfs_release_path(path); 455e02119d5SChris Mason /* try to insert the key into the destination tree */ 456e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, 457e02119d5SChris Mason key, item_size); 458e02119d5SChris Mason 459e02119d5SChris Mason /* make sure any existing item is the correct size */ 460e02119d5SChris Mason if (ret == -EEXIST) { 461e02119d5SChris Mason u32 found_size; 462e02119d5SChris Mason found_size = btrfs_item_size_nr(path->nodes[0], 463e02119d5SChris Mason path->slots[0]); 464143bede5SJeff Mahoney if (found_size > item_size) 465afe5fea7STsutomu Itoh btrfs_truncate_item(root, path, item_size, 1); 466143bede5SJeff Mahoney else if (found_size < item_size) 4674b90c680STsutomu Itoh btrfs_extend_item(root, path, 46887b29b20SYan Zheng item_size - found_size); 469e02119d5SChris Mason } else if (ret) { 4704a500fd1SYan, Zheng return ret; 471e02119d5SChris Mason } 472e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], 473e02119d5SChris Mason path->slots[0]); 474e02119d5SChris Mason 475e02119d5SChris Mason /* don't overwrite an existing inode if the generation number 476e02119d5SChris Mason * was logged as zero. This is done when the tree logging code 477e02119d5SChris Mason * is just logging an inode to make sure it exists after recovery. 478e02119d5SChris Mason * 479e02119d5SChris Mason * Also, don't overwrite i_size on directories during replay. 480e02119d5SChris Mason * log replay inserts and removes directory items based on the 481e02119d5SChris Mason * state of the tree found in the subvolume, and i_size is modified 482e02119d5SChris Mason * as it goes 483e02119d5SChris Mason */ 484e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { 485e02119d5SChris Mason struct btrfs_inode_item *src_item; 486e02119d5SChris Mason struct btrfs_inode_item *dst_item; 487e02119d5SChris Mason 488e02119d5SChris Mason src_item = (struct btrfs_inode_item *)src_ptr; 489e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 490e02119d5SChris Mason 491e02119d5SChris Mason if (btrfs_inode_generation(eb, src_item) == 0) 492e02119d5SChris Mason goto no_copy; 493e02119d5SChris Mason 494e02119d5SChris Mason if (overwrite_root && 495e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(eb, src_item)) && 496e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { 497e02119d5SChris Mason save_old_i_size = 1; 498e02119d5SChris Mason saved_i_size = btrfs_inode_size(path->nodes[0], 499e02119d5SChris Mason dst_item); 500e02119d5SChris Mason } 501e02119d5SChris Mason } 502e02119d5SChris Mason 503e02119d5SChris Mason copy_extent_buffer(path->nodes[0], eb, dst_ptr, 504e02119d5SChris Mason src_ptr, item_size); 505e02119d5SChris Mason 506e02119d5SChris Mason if (save_old_i_size) { 507e02119d5SChris Mason struct btrfs_inode_item *dst_item; 508e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 509e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); 510e02119d5SChris Mason } 511e02119d5SChris Mason 512e02119d5SChris Mason /* make sure the generation is filled in */ 513e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY) { 514e02119d5SChris Mason struct btrfs_inode_item *dst_item; 515e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 516e02119d5SChris Mason if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { 517e02119d5SChris Mason btrfs_set_inode_generation(path->nodes[0], dst_item, 518e02119d5SChris Mason trans->transid); 519e02119d5SChris Mason } 520e02119d5SChris Mason } 521e02119d5SChris Mason no_copy: 522e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 523b3b4aa74SDavid Sterba btrfs_release_path(path); 524e02119d5SChris Mason return 0; 525e02119d5SChris Mason } 526e02119d5SChris Mason 527e02119d5SChris Mason /* 528e02119d5SChris Mason * simple helper to read an inode off the disk from a given root 529e02119d5SChris Mason * This can only be called for subvolume roots and not for the log 530e02119d5SChris Mason */ 531e02119d5SChris Mason static noinline struct inode *read_one_inode(struct btrfs_root *root, 532e02119d5SChris Mason u64 objectid) 533e02119d5SChris Mason { 5345d4f98a2SYan Zheng struct btrfs_key key; 535e02119d5SChris Mason struct inode *inode; 536e02119d5SChris Mason 5375d4f98a2SYan Zheng key.objectid = objectid; 5385d4f98a2SYan Zheng key.type = BTRFS_INODE_ITEM_KEY; 5395d4f98a2SYan Zheng key.offset = 0; 54073f73415SJosef Bacik inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 5415d4f98a2SYan Zheng if (IS_ERR(inode)) { 5425d4f98a2SYan Zheng inode = NULL; 5435d4f98a2SYan Zheng } else if (is_bad_inode(inode)) { 544e02119d5SChris Mason iput(inode); 545e02119d5SChris Mason inode = NULL; 546e02119d5SChris Mason } 547e02119d5SChris Mason return inode; 548e02119d5SChris Mason } 549e02119d5SChris Mason 550e02119d5SChris Mason /* replays a single extent in 'eb' at 'slot' with 'key' into the 551e02119d5SChris Mason * subvolume 'root'. path is released on entry and should be released 552e02119d5SChris Mason * on exit. 553e02119d5SChris Mason * 554e02119d5SChris Mason * extents in the log tree have not been allocated out of the extent 555e02119d5SChris Mason * tree yet. So, this completes the allocation, taking a reference 556e02119d5SChris Mason * as required if the extent already exists or creating a new extent 557e02119d5SChris Mason * if it isn't in the extent allocation tree yet. 558e02119d5SChris Mason * 559e02119d5SChris Mason * The extent is inserted into the file, dropping any existing extents 560e02119d5SChris Mason * from the file that overlap the new one. 561e02119d5SChris Mason */ 562e02119d5SChris Mason static noinline int replay_one_extent(struct btrfs_trans_handle *trans, 563e02119d5SChris Mason struct btrfs_root *root, 564e02119d5SChris Mason struct btrfs_path *path, 565e02119d5SChris Mason struct extent_buffer *eb, int slot, 566e02119d5SChris Mason struct btrfs_key *key) 567e02119d5SChris Mason { 568e02119d5SChris Mason int found_type; 569e02119d5SChris Mason u64 extent_end; 570e02119d5SChris Mason u64 start = key->offset; 5714bc4bee4SJosef Bacik u64 nbytes = 0; 572e02119d5SChris Mason struct btrfs_file_extent_item *item; 573e02119d5SChris Mason struct inode *inode = NULL; 574e02119d5SChris Mason unsigned long size; 575e02119d5SChris Mason int ret = 0; 576e02119d5SChris Mason 577e02119d5SChris Mason item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 578e02119d5SChris Mason found_type = btrfs_file_extent_type(eb, item); 579e02119d5SChris Mason 580d899e052SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 5814bc4bee4SJosef Bacik found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5824bc4bee4SJosef Bacik nbytes = btrfs_file_extent_num_bytes(eb, item); 5834bc4bee4SJosef Bacik extent_end = start + nbytes; 5844bc4bee4SJosef Bacik 5854bc4bee4SJosef Bacik /* 5864bc4bee4SJosef Bacik * We don't add to the inodes nbytes if we are prealloc or a 5874bc4bee4SJosef Bacik * hole. 5884bc4bee4SJosef Bacik */ 5894bc4bee4SJosef Bacik if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 5904bc4bee4SJosef Bacik nbytes = 0; 5914bc4bee4SJosef Bacik } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 592514ac8adSChris Mason size = btrfs_file_extent_inline_len(eb, slot, item); 5934bc4bee4SJosef Bacik nbytes = btrfs_file_extent_ram_bytes(eb, item); 594fda2832fSQu Wenruo extent_end = ALIGN(start + size, root->sectorsize); 595e02119d5SChris Mason } else { 596e02119d5SChris Mason ret = 0; 597e02119d5SChris Mason goto out; 598e02119d5SChris Mason } 599e02119d5SChris Mason 600e02119d5SChris Mason inode = read_one_inode(root, key->objectid); 601e02119d5SChris Mason if (!inode) { 602e02119d5SChris Mason ret = -EIO; 603e02119d5SChris Mason goto out; 604e02119d5SChris Mason } 605e02119d5SChris Mason 606e02119d5SChris Mason /* 607e02119d5SChris Mason * first check to see if we already have this extent in the 608e02119d5SChris Mason * file. This must be done before the btrfs_drop_extents run 609e02119d5SChris Mason * so we don't try to drop this extent. 610e02119d5SChris Mason */ 61133345d01SLi Zefan ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 612e02119d5SChris Mason start, 0); 613e02119d5SChris Mason 614d899e052SYan Zheng if (ret == 0 && 615d899e052SYan Zheng (found_type == BTRFS_FILE_EXTENT_REG || 616d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 617e02119d5SChris Mason struct btrfs_file_extent_item cmp1; 618e02119d5SChris Mason struct btrfs_file_extent_item cmp2; 619e02119d5SChris Mason struct btrfs_file_extent_item *existing; 620e02119d5SChris Mason struct extent_buffer *leaf; 621e02119d5SChris Mason 622e02119d5SChris Mason leaf = path->nodes[0]; 623e02119d5SChris Mason existing = btrfs_item_ptr(leaf, path->slots[0], 624e02119d5SChris Mason struct btrfs_file_extent_item); 625e02119d5SChris Mason 626e02119d5SChris Mason read_extent_buffer(eb, &cmp1, (unsigned long)item, 627e02119d5SChris Mason sizeof(cmp1)); 628e02119d5SChris Mason read_extent_buffer(leaf, &cmp2, (unsigned long)existing, 629e02119d5SChris Mason sizeof(cmp2)); 630e02119d5SChris Mason 631e02119d5SChris Mason /* 632e02119d5SChris Mason * we already have a pointer to this exact extent, 633e02119d5SChris Mason * we don't have to do anything 634e02119d5SChris Mason */ 635e02119d5SChris Mason if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 636b3b4aa74SDavid Sterba btrfs_release_path(path); 637e02119d5SChris Mason goto out; 638e02119d5SChris Mason } 639e02119d5SChris Mason } 640b3b4aa74SDavid Sterba btrfs_release_path(path); 641e02119d5SChris Mason 642e02119d5SChris Mason /* drop any overlapping extents */ 6432671485dSJosef Bacik ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 6443650860bSJosef Bacik if (ret) 6453650860bSJosef Bacik goto out; 646e02119d5SChris Mason 64707d400a6SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 64807d400a6SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) { 6495d4f98a2SYan Zheng u64 offset; 65007d400a6SYan Zheng unsigned long dest_offset; 65107d400a6SYan Zheng struct btrfs_key ins; 65207d400a6SYan Zheng 65307d400a6SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, key, 65407d400a6SYan Zheng sizeof(*item)); 6553650860bSJosef Bacik if (ret) 6563650860bSJosef Bacik goto out; 65707d400a6SYan Zheng dest_offset = btrfs_item_ptr_offset(path->nodes[0], 65807d400a6SYan Zheng path->slots[0]); 65907d400a6SYan Zheng copy_extent_buffer(path->nodes[0], eb, dest_offset, 66007d400a6SYan Zheng (unsigned long)item, sizeof(*item)); 66107d400a6SYan Zheng 66207d400a6SYan Zheng ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 66307d400a6SYan Zheng ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 66407d400a6SYan Zheng ins.type = BTRFS_EXTENT_ITEM_KEY; 6655d4f98a2SYan Zheng offset = key->offset - btrfs_file_extent_offset(eb, item); 66607d400a6SYan Zheng 66707d400a6SYan Zheng if (ins.objectid > 0) { 66807d400a6SYan Zheng u64 csum_start; 66907d400a6SYan Zheng u64 csum_end; 67007d400a6SYan Zheng LIST_HEAD(ordered_sums); 67107d400a6SYan Zheng /* 67207d400a6SYan Zheng * is this extent already allocated in the extent 67307d400a6SYan Zheng * allocation tree? If so, just add a reference 67407d400a6SYan Zheng */ 6751a4ed8fdSFilipe Manana ret = btrfs_lookup_data_extent(root, ins.objectid, 67607d400a6SYan Zheng ins.offset); 67707d400a6SYan Zheng if (ret == 0) { 67807d400a6SYan Zheng ret = btrfs_inc_extent_ref(trans, root, 67907d400a6SYan Zheng ins.objectid, ins.offset, 6805d4f98a2SYan Zheng 0, root->root_key.objectid, 68166d7e7f0SArne Jansen key->objectid, offset, 0); 682b50c6e25SJosef Bacik if (ret) 683b50c6e25SJosef Bacik goto out; 68407d400a6SYan Zheng } else { 68507d400a6SYan Zheng /* 68607d400a6SYan Zheng * insert the extent pointer in the extent 68707d400a6SYan Zheng * allocation tree 68807d400a6SYan Zheng */ 6895d4f98a2SYan Zheng ret = btrfs_alloc_logged_file_extent(trans, 6905d4f98a2SYan Zheng root, root->root_key.objectid, 6915d4f98a2SYan Zheng key->objectid, offset, &ins); 692b50c6e25SJosef Bacik if (ret) 693b50c6e25SJosef Bacik goto out; 69407d400a6SYan Zheng } 695b3b4aa74SDavid Sterba btrfs_release_path(path); 69607d400a6SYan Zheng 69707d400a6SYan Zheng if (btrfs_file_extent_compression(eb, item)) { 69807d400a6SYan Zheng csum_start = ins.objectid; 69907d400a6SYan Zheng csum_end = csum_start + ins.offset; 70007d400a6SYan Zheng } else { 70107d400a6SYan Zheng csum_start = ins.objectid + 70207d400a6SYan Zheng btrfs_file_extent_offset(eb, item); 70307d400a6SYan Zheng csum_end = csum_start + 70407d400a6SYan Zheng btrfs_file_extent_num_bytes(eb, item); 70507d400a6SYan Zheng } 70607d400a6SYan Zheng 70707d400a6SYan Zheng ret = btrfs_lookup_csums_range(root->log_root, 70807d400a6SYan Zheng csum_start, csum_end - 1, 709a2de733cSArne Jansen &ordered_sums, 0); 7103650860bSJosef Bacik if (ret) 7113650860bSJosef Bacik goto out; 71207d400a6SYan Zheng while (!list_empty(&ordered_sums)) { 71307d400a6SYan Zheng struct btrfs_ordered_sum *sums; 71407d400a6SYan Zheng sums = list_entry(ordered_sums.next, 71507d400a6SYan Zheng struct btrfs_ordered_sum, 71607d400a6SYan Zheng list); 7173650860bSJosef Bacik if (!ret) 71807d400a6SYan Zheng ret = btrfs_csum_file_blocks(trans, 71907d400a6SYan Zheng root->fs_info->csum_root, 72007d400a6SYan Zheng sums); 72107d400a6SYan Zheng list_del(&sums->list); 72207d400a6SYan Zheng kfree(sums); 72307d400a6SYan Zheng } 7243650860bSJosef Bacik if (ret) 7253650860bSJosef Bacik goto out; 72607d400a6SYan Zheng } else { 727b3b4aa74SDavid Sterba btrfs_release_path(path); 72807d400a6SYan Zheng } 72907d400a6SYan Zheng } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 73007d400a6SYan Zheng /* inline extents are easy, we just overwrite them */ 731e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 7323650860bSJosef Bacik if (ret) 7333650860bSJosef Bacik goto out; 73407d400a6SYan Zheng } 735e02119d5SChris Mason 7364bc4bee4SJosef Bacik inode_add_bytes(inode, nbytes); 737b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 738e02119d5SChris Mason out: 739e02119d5SChris Mason if (inode) 740e02119d5SChris Mason iput(inode); 741e02119d5SChris Mason return ret; 742e02119d5SChris Mason } 743e02119d5SChris Mason 744e02119d5SChris Mason /* 745e02119d5SChris Mason * when cleaning up conflicts between the directory names in the 746e02119d5SChris Mason * subvolume, directory names in the log and directory names in the 747e02119d5SChris Mason * inode back references, we may have to unlink inodes from directories. 748e02119d5SChris Mason * 749e02119d5SChris Mason * This is a helper function to do the unlink of a specific directory 750e02119d5SChris Mason * item 751e02119d5SChris Mason */ 752e02119d5SChris Mason static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, 753e02119d5SChris Mason struct btrfs_root *root, 754e02119d5SChris Mason struct btrfs_path *path, 755e02119d5SChris Mason struct inode *dir, 756e02119d5SChris Mason struct btrfs_dir_item *di) 757e02119d5SChris Mason { 758e02119d5SChris Mason struct inode *inode; 759e02119d5SChris Mason char *name; 760e02119d5SChris Mason int name_len; 761e02119d5SChris Mason struct extent_buffer *leaf; 762e02119d5SChris Mason struct btrfs_key location; 763e02119d5SChris Mason int ret; 764e02119d5SChris Mason 765e02119d5SChris Mason leaf = path->nodes[0]; 766e02119d5SChris Mason 767e02119d5SChris Mason btrfs_dir_item_key_to_cpu(leaf, di, &location); 768e02119d5SChris Mason name_len = btrfs_dir_name_len(leaf, di); 769e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 7702a29edc6Sliubo if (!name) 7712a29edc6Sliubo return -ENOMEM; 7722a29edc6Sliubo 773e02119d5SChris Mason read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 774b3b4aa74SDavid Sterba btrfs_release_path(path); 775e02119d5SChris Mason 776e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 777c00e9493STsutomu Itoh if (!inode) { 7783650860bSJosef Bacik ret = -EIO; 7793650860bSJosef Bacik goto out; 780c00e9493STsutomu Itoh } 781e02119d5SChris Mason 782ec051c0fSYan Zheng ret = link_to_fixup_dir(trans, root, path, location.objectid); 7833650860bSJosef Bacik if (ret) 7843650860bSJosef Bacik goto out; 78512fcfd22SChris Mason 786e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 7873650860bSJosef Bacik if (ret) 7883650860bSJosef Bacik goto out; 789ada9af21SFilipe David Borba Manana else 790ada9af21SFilipe David Borba Manana ret = btrfs_run_delayed_items(trans, root); 7913650860bSJosef Bacik out: 7923650860bSJosef Bacik kfree(name); 7933650860bSJosef Bacik iput(inode); 794e02119d5SChris Mason return ret; 795e02119d5SChris Mason } 796e02119d5SChris Mason 797e02119d5SChris Mason /* 798e02119d5SChris Mason * helper function to see if a given name and sequence number found 799e02119d5SChris Mason * in an inode back reference are already in a directory and correctly 800e02119d5SChris Mason * point to this inode 801e02119d5SChris Mason */ 802e02119d5SChris Mason static noinline int inode_in_dir(struct btrfs_root *root, 803e02119d5SChris Mason struct btrfs_path *path, 804e02119d5SChris Mason u64 dirid, u64 objectid, u64 index, 805e02119d5SChris Mason const char *name, int name_len) 806e02119d5SChris Mason { 807e02119d5SChris Mason struct btrfs_dir_item *di; 808e02119d5SChris Mason struct btrfs_key location; 809e02119d5SChris Mason int match = 0; 810e02119d5SChris Mason 811e02119d5SChris Mason di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, 812e02119d5SChris Mason index, name, name_len, 0); 813e02119d5SChris Mason if (di && !IS_ERR(di)) { 814e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 815e02119d5SChris Mason if (location.objectid != objectid) 816e02119d5SChris Mason goto out; 817e02119d5SChris Mason } else 818e02119d5SChris Mason goto out; 819b3b4aa74SDavid Sterba btrfs_release_path(path); 820e02119d5SChris Mason 821e02119d5SChris Mason di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 822e02119d5SChris Mason if (di && !IS_ERR(di)) { 823e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 824e02119d5SChris Mason if (location.objectid != objectid) 825e02119d5SChris Mason goto out; 826e02119d5SChris Mason } else 827e02119d5SChris Mason goto out; 828e02119d5SChris Mason match = 1; 829e02119d5SChris Mason out: 830b3b4aa74SDavid Sterba btrfs_release_path(path); 831e02119d5SChris Mason return match; 832e02119d5SChris Mason } 833e02119d5SChris Mason 834e02119d5SChris Mason /* 835e02119d5SChris Mason * helper function to check a log tree for a named back reference in 836e02119d5SChris Mason * an inode. This is used to decide if a back reference that is 837e02119d5SChris Mason * found in the subvolume conflicts with what we find in the log. 838e02119d5SChris Mason * 839e02119d5SChris Mason * inode backreferences may have multiple refs in a single item, 840e02119d5SChris Mason * during replay we process one reference at a time, and we don't 841e02119d5SChris Mason * want to delete valid links to a file from the subvolume if that 842e02119d5SChris Mason * link is also in the log. 843e02119d5SChris Mason */ 844e02119d5SChris Mason static noinline int backref_in_log(struct btrfs_root *log, 845e02119d5SChris Mason struct btrfs_key *key, 846f186373fSMark Fasheh u64 ref_objectid, 847e02119d5SChris Mason char *name, int namelen) 848e02119d5SChris Mason { 849e02119d5SChris Mason struct btrfs_path *path; 850e02119d5SChris Mason struct btrfs_inode_ref *ref; 851e02119d5SChris Mason unsigned long ptr; 852e02119d5SChris Mason unsigned long ptr_end; 853e02119d5SChris Mason unsigned long name_ptr; 854e02119d5SChris Mason int found_name_len; 855e02119d5SChris Mason int item_size; 856e02119d5SChris Mason int ret; 857e02119d5SChris Mason int match = 0; 858e02119d5SChris Mason 859e02119d5SChris Mason path = btrfs_alloc_path(); 8602a29edc6Sliubo if (!path) 8612a29edc6Sliubo return -ENOMEM; 8622a29edc6Sliubo 863e02119d5SChris Mason ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 864e02119d5SChris Mason if (ret != 0) 865e02119d5SChris Mason goto out; 866e02119d5SChris Mason 867e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 868f186373fSMark Fasheh 869f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 870f186373fSMark Fasheh if (btrfs_find_name_in_ext_backref(path, ref_objectid, 871f186373fSMark Fasheh name, namelen, NULL)) 872f186373fSMark Fasheh match = 1; 873f186373fSMark Fasheh 874f186373fSMark Fasheh goto out; 875f186373fSMark Fasheh } 876f186373fSMark Fasheh 877f186373fSMark Fasheh item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 878e02119d5SChris Mason ptr_end = ptr + item_size; 879e02119d5SChris Mason while (ptr < ptr_end) { 880e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 881e02119d5SChris Mason found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); 882e02119d5SChris Mason if (found_name_len == namelen) { 883e02119d5SChris Mason name_ptr = (unsigned long)(ref + 1); 884e02119d5SChris Mason ret = memcmp_extent_buffer(path->nodes[0], name, 885e02119d5SChris Mason name_ptr, namelen); 886e02119d5SChris Mason if (ret == 0) { 887e02119d5SChris Mason match = 1; 888e02119d5SChris Mason goto out; 889e02119d5SChris Mason } 890e02119d5SChris Mason } 891e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + found_name_len; 892e02119d5SChris Mason } 893e02119d5SChris Mason out: 894e02119d5SChris Mason btrfs_free_path(path); 895e02119d5SChris Mason return match; 896e02119d5SChris Mason } 897e02119d5SChris Mason 8985a1d7843SJan Schmidt static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 8995a1d7843SJan Schmidt struct btrfs_root *root, 9005a1d7843SJan Schmidt struct btrfs_path *path, 9015a1d7843SJan Schmidt struct btrfs_root *log_root, 9025a1d7843SJan Schmidt struct inode *dir, struct inode *inode, 9035a1d7843SJan Schmidt struct extent_buffer *eb, 904f186373fSMark Fasheh u64 inode_objectid, u64 parent_objectid, 905f186373fSMark Fasheh u64 ref_index, char *name, int namelen, 906f186373fSMark Fasheh int *search_done) 9075a1d7843SJan Schmidt { 9085a1d7843SJan Schmidt int ret; 9095a1d7843SJan Schmidt char *victim_name; 9105a1d7843SJan Schmidt int victim_name_len; 911f186373fSMark Fasheh struct extent_buffer *leaf; 912f186373fSMark Fasheh struct btrfs_dir_item *di; 913f186373fSMark Fasheh struct btrfs_key search_key; 914f186373fSMark Fasheh struct btrfs_inode_extref *extref; 915f186373fSMark Fasheh 916f186373fSMark Fasheh again: 917f186373fSMark Fasheh /* Search old style refs */ 918f186373fSMark Fasheh search_key.objectid = inode_objectid; 919f186373fSMark Fasheh search_key.type = BTRFS_INODE_REF_KEY; 920f186373fSMark Fasheh search_key.offset = parent_objectid; 921f186373fSMark Fasheh ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 922f186373fSMark Fasheh if (ret == 0) { 9235a1d7843SJan Schmidt struct btrfs_inode_ref *victim_ref; 9245a1d7843SJan Schmidt unsigned long ptr; 9255a1d7843SJan Schmidt unsigned long ptr_end; 926f186373fSMark Fasheh 927f186373fSMark Fasheh leaf = path->nodes[0]; 9285a1d7843SJan Schmidt 9295a1d7843SJan Schmidt /* are we trying to overwrite a back ref for the root directory 9305a1d7843SJan Schmidt * if so, just jump out, we're done 9315a1d7843SJan Schmidt */ 932f186373fSMark Fasheh if (search_key.objectid == search_key.offset) 9335a1d7843SJan Schmidt return 1; 9345a1d7843SJan Schmidt 9355a1d7843SJan Schmidt /* check all the names in this back reference to see 9365a1d7843SJan Schmidt * if they are in the log. if so, we allow them to stay 9375a1d7843SJan Schmidt * otherwise they must be unlinked as a conflict 9385a1d7843SJan Schmidt */ 9395a1d7843SJan Schmidt ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 9405a1d7843SJan Schmidt ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); 9415a1d7843SJan Schmidt while (ptr < ptr_end) { 9425a1d7843SJan Schmidt victim_ref = (struct btrfs_inode_ref *)ptr; 9435a1d7843SJan Schmidt victim_name_len = btrfs_inode_ref_name_len(leaf, 9445a1d7843SJan Schmidt victim_ref); 9455a1d7843SJan Schmidt victim_name = kmalloc(victim_name_len, GFP_NOFS); 9463650860bSJosef Bacik if (!victim_name) 9473650860bSJosef Bacik return -ENOMEM; 9485a1d7843SJan Schmidt 9495a1d7843SJan Schmidt read_extent_buffer(leaf, victim_name, 9505a1d7843SJan Schmidt (unsigned long)(victim_ref + 1), 9515a1d7843SJan Schmidt victim_name_len); 9525a1d7843SJan Schmidt 953f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 954f186373fSMark Fasheh parent_objectid, 955f186373fSMark Fasheh victim_name, 9565a1d7843SJan Schmidt victim_name_len)) { 9578b558c5fSZach Brown inc_nlink(inode); 9585a1d7843SJan Schmidt btrfs_release_path(path); 9595a1d7843SJan Schmidt 9605a1d7843SJan Schmidt ret = btrfs_unlink_inode(trans, root, dir, 9615a1d7843SJan Schmidt inode, victim_name, 9625a1d7843SJan Schmidt victim_name_len); 963f186373fSMark Fasheh kfree(victim_name); 9643650860bSJosef Bacik if (ret) 9653650860bSJosef Bacik return ret; 966ada9af21SFilipe David Borba Manana ret = btrfs_run_delayed_items(trans, root); 967ada9af21SFilipe David Borba Manana if (ret) 968ada9af21SFilipe David Borba Manana return ret; 969f186373fSMark Fasheh *search_done = 1; 970f186373fSMark Fasheh goto again; 9715a1d7843SJan Schmidt } 9725a1d7843SJan Schmidt kfree(victim_name); 973f186373fSMark Fasheh 9745a1d7843SJan Schmidt ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 9755a1d7843SJan Schmidt } 9765a1d7843SJan Schmidt 9775a1d7843SJan Schmidt /* 9785a1d7843SJan Schmidt * NOTE: we have searched root tree and checked the 9795a1d7843SJan Schmidt * coresponding ref, it does not need to check again. 9805a1d7843SJan Schmidt */ 9815a1d7843SJan Schmidt *search_done = 1; 9825a1d7843SJan Schmidt } 9835a1d7843SJan Schmidt btrfs_release_path(path); 9845a1d7843SJan Schmidt 985f186373fSMark Fasheh /* Same search but for extended refs */ 986f186373fSMark Fasheh extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, 987f186373fSMark Fasheh inode_objectid, parent_objectid, 0, 988f186373fSMark Fasheh 0); 989f186373fSMark Fasheh if (!IS_ERR_OR_NULL(extref)) { 990f186373fSMark Fasheh u32 item_size; 991f186373fSMark Fasheh u32 cur_offset = 0; 992f186373fSMark Fasheh unsigned long base; 993f186373fSMark Fasheh struct inode *victim_parent; 994f186373fSMark Fasheh 995f186373fSMark Fasheh leaf = path->nodes[0]; 996f186373fSMark Fasheh 997f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 998f186373fSMark Fasheh base = btrfs_item_ptr_offset(leaf, path->slots[0]); 999f186373fSMark Fasheh 1000f186373fSMark Fasheh while (cur_offset < item_size) { 1001f186373fSMark Fasheh extref = (struct btrfs_inode_extref *)base + cur_offset; 1002f186373fSMark Fasheh 1003f186373fSMark Fasheh victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1004f186373fSMark Fasheh 1005f186373fSMark Fasheh if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) 1006f186373fSMark Fasheh goto next; 1007f186373fSMark Fasheh 1008f186373fSMark Fasheh victim_name = kmalloc(victim_name_len, GFP_NOFS); 10093650860bSJosef Bacik if (!victim_name) 10103650860bSJosef Bacik return -ENOMEM; 1011f186373fSMark Fasheh read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, 1012f186373fSMark Fasheh victim_name_len); 1013f186373fSMark Fasheh 1014f186373fSMark Fasheh search_key.objectid = inode_objectid; 1015f186373fSMark Fasheh search_key.type = BTRFS_INODE_EXTREF_KEY; 1016f186373fSMark Fasheh search_key.offset = btrfs_extref_hash(parent_objectid, 1017f186373fSMark Fasheh victim_name, 1018f186373fSMark Fasheh victim_name_len); 1019f186373fSMark Fasheh ret = 0; 1020f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 1021f186373fSMark Fasheh parent_objectid, victim_name, 1022f186373fSMark Fasheh victim_name_len)) { 1023f186373fSMark Fasheh ret = -ENOENT; 1024f186373fSMark Fasheh victim_parent = read_one_inode(root, 1025f186373fSMark Fasheh parent_objectid); 1026f186373fSMark Fasheh if (victim_parent) { 10278b558c5fSZach Brown inc_nlink(inode); 1028f186373fSMark Fasheh btrfs_release_path(path); 1029f186373fSMark Fasheh 1030f186373fSMark Fasheh ret = btrfs_unlink_inode(trans, root, 1031f186373fSMark Fasheh victim_parent, 1032f186373fSMark Fasheh inode, 1033f186373fSMark Fasheh victim_name, 1034f186373fSMark Fasheh victim_name_len); 1035ada9af21SFilipe David Borba Manana if (!ret) 1036ada9af21SFilipe David Borba Manana ret = btrfs_run_delayed_items( 1037ada9af21SFilipe David Borba Manana trans, root); 1038f186373fSMark Fasheh } 1039f186373fSMark Fasheh iput(victim_parent); 1040f186373fSMark Fasheh kfree(victim_name); 10413650860bSJosef Bacik if (ret) 10423650860bSJosef Bacik return ret; 1043f186373fSMark Fasheh *search_done = 1; 1044f186373fSMark Fasheh goto again; 1045f186373fSMark Fasheh } 1046f186373fSMark Fasheh kfree(victim_name); 10473650860bSJosef Bacik if (ret) 10483650860bSJosef Bacik return ret; 1049f186373fSMark Fasheh next: 1050f186373fSMark Fasheh cur_offset += victim_name_len + sizeof(*extref); 1051f186373fSMark Fasheh } 1052f186373fSMark Fasheh *search_done = 1; 1053f186373fSMark Fasheh } 1054f186373fSMark Fasheh btrfs_release_path(path); 1055f186373fSMark Fasheh 10565a1d7843SJan Schmidt /* look for a conflicting sequence number */ 10575a1d7843SJan Schmidt di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 1058f186373fSMark Fasheh ref_index, name, namelen, 0); 10595a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 10605a1d7843SJan Schmidt ret = drop_one_dir_item(trans, root, path, dir, di); 10613650860bSJosef Bacik if (ret) 10623650860bSJosef Bacik return ret; 10635a1d7843SJan Schmidt } 10645a1d7843SJan Schmidt btrfs_release_path(path); 10655a1d7843SJan Schmidt 10665a1d7843SJan Schmidt /* look for a conflicing name */ 10675a1d7843SJan Schmidt di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), 10685a1d7843SJan Schmidt name, namelen, 0); 10695a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 10705a1d7843SJan Schmidt ret = drop_one_dir_item(trans, root, path, dir, di); 10713650860bSJosef Bacik if (ret) 10723650860bSJosef Bacik return ret; 10735a1d7843SJan Schmidt } 10745a1d7843SJan Schmidt btrfs_release_path(path); 10755a1d7843SJan Schmidt 10765a1d7843SJan Schmidt return 0; 10775a1d7843SJan Schmidt } 1078e02119d5SChris Mason 1079f186373fSMark Fasheh static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1080f186373fSMark Fasheh u32 *namelen, char **name, u64 *index, 1081f186373fSMark Fasheh u64 *parent_objectid) 1082f186373fSMark Fasheh { 1083f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1084f186373fSMark Fasheh 1085f186373fSMark Fasheh extref = (struct btrfs_inode_extref *)ref_ptr; 1086f186373fSMark Fasheh 1087f186373fSMark Fasheh *namelen = btrfs_inode_extref_name_len(eb, extref); 1088f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1089f186373fSMark Fasheh if (*name == NULL) 1090f186373fSMark Fasheh return -ENOMEM; 1091f186373fSMark Fasheh 1092f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)&extref->name, 1093f186373fSMark Fasheh *namelen); 1094f186373fSMark Fasheh 1095f186373fSMark Fasheh *index = btrfs_inode_extref_index(eb, extref); 1096f186373fSMark Fasheh if (parent_objectid) 1097f186373fSMark Fasheh *parent_objectid = btrfs_inode_extref_parent(eb, extref); 1098f186373fSMark Fasheh 1099f186373fSMark Fasheh return 0; 1100f186373fSMark Fasheh } 1101f186373fSMark Fasheh 1102f186373fSMark Fasheh static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1103f186373fSMark Fasheh u32 *namelen, char **name, u64 *index) 1104f186373fSMark Fasheh { 1105f186373fSMark Fasheh struct btrfs_inode_ref *ref; 1106f186373fSMark Fasheh 1107f186373fSMark Fasheh ref = (struct btrfs_inode_ref *)ref_ptr; 1108f186373fSMark Fasheh 1109f186373fSMark Fasheh *namelen = btrfs_inode_ref_name_len(eb, ref); 1110f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1111f186373fSMark Fasheh if (*name == NULL) 1112f186373fSMark Fasheh return -ENOMEM; 1113f186373fSMark Fasheh 1114f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); 1115f186373fSMark Fasheh 1116f186373fSMark Fasheh *index = btrfs_inode_ref_index(eb, ref); 1117f186373fSMark Fasheh 1118f186373fSMark Fasheh return 0; 1119f186373fSMark Fasheh } 1120f186373fSMark Fasheh 1121e02119d5SChris Mason /* 1122e02119d5SChris Mason * replay one inode back reference item found in the log tree. 1123e02119d5SChris Mason * eb, slot and key refer to the buffer and key found in the log tree. 1124e02119d5SChris Mason * root is the destination we are replaying into, and path is for temp 1125e02119d5SChris Mason * use by this function. (it should be released on return). 1126e02119d5SChris Mason */ 1127e02119d5SChris Mason static noinline int add_inode_ref(struct btrfs_trans_handle *trans, 1128e02119d5SChris Mason struct btrfs_root *root, 1129e02119d5SChris Mason struct btrfs_root *log, 1130e02119d5SChris Mason struct btrfs_path *path, 1131e02119d5SChris Mason struct extent_buffer *eb, int slot, 1132e02119d5SChris Mason struct btrfs_key *key) 1133e02119d5SChris Mason { 113403b2f08bSGeyslan G. Bem struct inode *dir = NULL; 113503b2f08bSGeyslan G. Bem struct inode *inode = NULL; 1136e02119d5SChris Mason unsigned long ref_ptr; 1137e02119d5SChris Mason unsigned long ref_end; 113803b2f08bSGeyslan G. Bem char *name = NULL; 113934f3e4f2Sliubo int namelen; 114034f3e4f2Sliubo int ret; 1141c622ae60Sliubo int search_done = 0; 1142f186373fSMark Fasheh int log_ref_ver = 0; 1143f186373fSMark Fasheh u64 parent_objectid; 1144f186373fSMark Fasheh u64 inode_objectid; 1145f46dbe3dSChris Mason u64 ref_index = 0; 1146f186373fSMark Fasheh int ref_struct_size; 1147f186373fSMark Fasheh 1148f186373fSMark Fasheh ref_ptr = btrfs_item_ptr_offset(eb, slot); 1149f186373fSMark Fasheh ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 1150f186373fSMark Fasheh 1151f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 1152f186373fSMark Fasheh struct btrfs_inode_extref *r; 1153f186373fSMark Fasheh 1154f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_extref); 1155f186373fSMark Fasheh log_ref_ver = 1; 1156f186373fSMark Fasheh r = (struct btrfs_inode_extref *)ref_ptr; 1157f186373fSMark Fasheh parent_objectid = btrfs_inode_extref_parent(eb, r); 1158f186373fSMark Fasheh } else { 1159f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_ref); 1160f186373fSMark Fasheh parent_objectid = key->offset; 1161f186373fSMark Fasheh } 1162f186373fSMark Fasheh inode_objectid = key->objectid; 1163e02119d5SChris Mason 1164e02119d5SChris Mason /* 1165e02119d5SChris Mason * it is possible that we didn't log all the parent directories 1166e02119d5SChris Mason * for a given inode. If we don't find the dir, just don't 1167e02119d5SChris Mason * copy the back ref in. The link count fixup code will take 1168e02119d5SChris Mason * care of the rest 1169e02119d5SChris Mason */ 1170f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 117103b2f08bSGeyslan G. Bem if (!dir) { 117203b2f08bSGeyslan G. Bem ret = -ENOENT; 117303b2f08bSGeyslan G. Bem goto out; 117403b2f08bSGeyslan G. Bem } 1175e02119d5SChris Mason 1176f186373fSMark Fasheh inode = read_one_inode(root, inode_objectid); 1177c00e9493STsutomu Itoh if (!inode) { 117803b2f08bSGeyslan G. Bem ret = -EIO; 117903b2f08bSGeyslan G. Bem goto out; 1180c00e9493STsutomu Itoh } 1181e02119d5SChris Mason 11825a1d7843SJan Schmidt while (ref_ptr < ref_end) { 1183f186373fSMark Fasheh if (log_ref_ver) { 1184f186373fSMark Fasheh ret = extref_get_fields(eb, ref_ptr, &namelen, &name, 1185f186373fSMark Fasheh &ref_index, &parent_objectid); 1186f186373fSMark Fasheh /* 1187f186373fSMark Fasheh * parent object can change from one array 1188f186373fSMark Fasheh * item to another. 1189f186373fSMark Fasheh */ 1190f186373fSMark Fasheh if (!dir) 1191f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 119203b2f08bSGeyslan G. Bem if (!dir) { 119303b2f08bSGeyslan G. Bem ret = -ENOENT; 119403b2f08bSGeyslan G. Bem goto out; 119503b2f08bSGeyslan G. Bem } 1196f186373fSMark Fasheh } else { 1197f186373fSMark Fasheh ret = ref_get_fields(eb, ref_ptr, &namelen, &name, 1198f186373fSMark Fasheh &ref_index); 1199f186373fSMark Fasheh } 1200f186373fSMark Fasheh if (ret) 120103b2f08bSGeyslan G. Bem goto out; 1202e02119d5SChris Mason 1203e02119d5SChris Mason /* if we already have a perfect match, we're done */ 12045a1d7843SJan Schmidt if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1205f186373fSMark Fasheh ref_index, name, namelen)) { 12065a1d7843SJan Schmidt /* 12075a1d7843SJan Schmidt * look for a conflicting back reference in the 12085a1d7843SJan Schmidt * metadata. if we find one we have to unlink that name 12095a1d7843SJan Schmidt * of the file before we add our new link. Later on, we 12105a1d7843SJan Schmidt * overwrite any existing back reference, and we don't 12115a1d7843SJan Schmidt * want to create dangling pointers in the directory. 12125a1d7843SJan Schmidt */ 12135a1d7843SJan Schmidt 12145a1d7843SJan Schmidt if (!search_done) { 12155a1d7843SJan Schmidt ret = __add_inode_ref(trans, root, path, log, 1216f186373fSMark Fasheh dir, inode, eb, 1217f186373fSMark Fasheh inode_objectid, 1218f186373fSMark Fasheh parent_objectid, 1219f186373fSMark Fasheh ref_index, name, namelen, 12205a1d7843SJan Schmidt &search_done); 122103b2f08bSGeyslan G. Bem if (ret) { 122203b2f08bSGeyslan G. Bem if (ret == 1) 12233650860bSJosef Bacik ret = 0; 1224e02119d5SChris Mason goto out; 12253650860bSJosef Bacik } 122634f3e4f2Sliubo } 122734f3e4f2Sliubo 1228e02119d5SChris Mason /* insert our name */ 12295a1d7843SJan Schmidt ret = btrfs_add_link(trans, dir, inode, name, namelen, 1230f186373fSMark Fasheh 0, ref_index); 12313650860bSJosef Bacik if (ret) 12323650860bSJosef Bacik goto out; 1233e02119d5SChris Mason 1234e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 12355a1d7843SJan Schmidt } 1236e02119d5SChris Mason 1237f186373fSMark Fasheh ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; 1238e02119d5SChris Mason kfree(name); 123903b2f08bSGeyslan G. Bem name = NULL; 1240f186373fSMark Fasheh if (log_ref_ver) { 1241f186373fSMark Fasheh iput(dir); 1242f186373fSMark Fasheh dir = NULL; 1243f186373fSMark Fasheh } 12445a1d7843SJan Schmidt } 1245e02119d5SChris Mason 1246e02119d5SChris Mason /* finally write the back reference in the inode */ 1247e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 12485a1d7843SJan Schmidt out: 1249b3b4aa74SDavid Sterba btrfs_release_path(path); 125003b2f08bSGeyslan G. Bem kfree(name); 1251e02119d5SChris Mason iput(dir); 1252e02119d5SChris Mason iput(inode); 12533650860bSJosef Bacik return ret; 1254e02119d5SChris Mason } 1255e02119d5SChris Mason 1256c71bf099SYan, Zheng static int insert_orphan_item(struct btrfs_trans_handle *trans, 1257c71bf099SYan, Zheng struct btrfs_root *root, u64 offset) 1258c71bf099SYan, Zheng { 1259c71bf099SYan, Zheng int ret; 12603f870c28SKelley Nielsen ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, 12613f870c28SKelley Nielsen offset, BTRFS_ORPHAN_ITEM_KEY, NULL); 1262c71bf099SYan, Zheng if (ret > 0) 1263c71bf099SYan, Zheng ret = btrfs_insert_orphan_item(trans, root, offset); 1264c71bf099SYan, Zheng return ret; 1265c71bf099SYan, Zheng } 1266c71bf099SYan, Zheng 1267f186373fSMark Fasheh static int count_inode_extrefs(struct btrfs_root *root, 1268f186373fSMark Fasheh struct inode *inode, struct btrfs_path *path) 1269e02119d5SChris Mason { 1270f186373fSMark Fasheh int ret = 0; 1271f186373fSMark Fasheh int name_len; 1272f186373fSMark Fasheh unsigned int nlink = 0; 1273f186373fSMark Fasheh u32 item_size; 1274f186373fSMark Fasheh u32 cur_offset = 0; 1275f186373fSMark Fasheh u64 inode_objectid = btrfs_ino(inode); 1276f186373fSMark Fasheh u64 offset = 0; 1277f186373fSMark Fasheh unsigned long ptr; 1278f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1279f186373fSMark Fasheh struct extent_buffer *leaf; 1280f186373fSMark Fasheh 1281f186373fSMark Fasheh while (1) { 1282f186373fSMark Fasheh ret = btrfs_find_one_extref(root, inode_objectid, offset, path, 1283f186373fSMark Fasheh &extref, &offset); 1284f186373fSMark Fasheh if (ret) 1285f186373fSMark Fasheh break; 1286f186373fSMark Fasheh 1287f186373fSMark Fasheh leaf = path->nodes[0]; 1288f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1289f186373fSMark Fasheh ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1290f186373fSMark Fasheh 1291f186373fSMark Fasheh while (cur_offset < item_size) { 1292f186373fSMark Fasheh extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1293f186373fSMark Fasheh name_len = btrfs_inode_extref_name_len(leaf, extref); 1294f186373fSMark Fasheh 1295f186373fSMark Fasheh nlink++; 1296f186373fSMark Fasheh 1297f186373fSMark Fasheh cur_offset += name_len + sizeof(*extref); 1298f186373fSMark Fasheh } 1299f186373fSMark Fasheh 1300f186373fSMark Fasheh offset++; 1301f186373fSMark Fasheh btrfs_release_path(path); 1302f186373fSMark Fasheh } 1303f186373fSMark Fasheh btrfs_release_path(path); 1304f186373fSMark Fasheh 1305f186373fSMark Fasheh if (ret < 0) 1306f186373fSMark Fasheh return ret; 1307f186373fSMark Fasheh return nlink; 1308f186373fSMark Fasheh } 1309f186373fSMark Fasheh 1310f186373fSMark Fasheh static int count_inode_refs(struct btrfs_root *root, 1311f186373fSMark Fasheh struct inode *inode, struct btrfs_path *path) 1312f186373fSMark Fasheh { 1313e02119d5SChris Mason int ret; 1314e02119d5SChris Mason struct btrfs_key key; 1315f186373fSMark Fasheh unsigned int nlink = 0; 1316e02119d5SChris Mason unsigned long ptr; 1317e02119d5SChris Mason unsigned long ptr_end; 1318e02119d5SChris Mason int name_len; 131933345d01SLi Zefan u64 ino = btrfs_ino(inode); 1320e02119d5SChris Mason 132133345d01SLi Zefan key.objectid = ino; 1322e02119d5SChris Mason key.type = BTRFS_INODE_REF_KEY; 1323e02119d5SChris Mason key.offset = (u64)-1; 1324e02119d5SChris Mason 1325e02119d5SChris Mason while (1) { 1326e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1327e02119d5SChris Mason if (ret < 0) 1328e02119d5SChris Mason break; 1329e02119d5SChris Mason if (ret > 0) { 1330e02119d5SChris Mason if (path->slots[0] == 0) 1331e02119d5SChris Mason break; 1332e02119d5SChris Mason path->slots[0]--; 1333e02119d5SChris Mason } 1334e93ae26fSFilipe David Borba Manana process_slot: 1335e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, 1336e02119d5SChris Mason path->slots[0]); 133733345d01SLi Zefan if (key.objectid != ino || 1338e02119d5SChris Mason key.type != BTRFS_INODE_REF_KEY) 1339e02119d5SChris Mason break; 1340e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 1341e02119d5SChris Mason ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], 1342e02119d5SChris Mason path->slots[0]); 1343e02119d5SChris Mason while (ptr < ptr_end) { 1344e02119d5SChris Mason struct btrfs_inode_ref *ref; 1345e02119d5SChris Mason 1346e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 1347e02119d5SChris Mason name_len = btrfs_inode_ref_name_len(path->nodes[0], 1348e02119d5SChris Mason ref); 1349e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + name_len; 1350e02119d5SChris Mason nlink++; 1351e02119d5SChris Mason } 1352e02119d5SChris Mason 1353e02119d5SChris Mason if (key.offset == 0) 1354e02119d5SChris Mason break; 1355e93ae26fSFilipe David Borba Manana if (path->slots[0] > 0) { 1356e93ae26fSFilipe David Borba Manana path->slots[0]--; 1357e93ae26fSFilipe David Borba Manana goto process_slot; 1358e93ae26fSFilipe David Borba Manana } 1359e02119d5SChris Mason key.offset--; 1360b3b4aa74SDavid Sterba btrfs_release_path(path); 1361e02119d5SChris Mason } 1362b3b4aa74SDavid Sterba btrfs_release_path(path); 1363f186373fSMark Fasheh 1364f186373fSMark Fasheh return nlink; 1365f186373fSMark Fasheh } 1366f186373fSMark Fasheh 1367f186373fSMark Fasheh /* 1368f186373fSMark Fasheh * There are a few corners where the link count of the file can't 1369f186373fSMark Fasheh * be properly maintained during replay. So, instead of adding 1370f186373fSMark Fasheh * lots of complexity to the log code, we just scan the backrefs 1371f186373fSMark Fasheh * for any file that has been through replay. 1372f186373fSMark Fasheh * 1373f186373fSMark Fasheh * The scan will update the link count on the inode to reflect the 1374f186373fSMark Fasheh * number of back refs found. If it goes down to zero, the iput 1375f186373fSMark Fasheh * will free the inode. 1376f186373fSMark Fasheh */ 1377f186373fSMark Fasheh static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1378f186373fSMark Fasheh struct btrfs_root *root, 1379f186373fSMark Fasheh struct inode *inode) 1380f186373fSMark Fasheh { 1381f186373fSMark Fasheh struct btrfs_path *path; 1382f186373fSMark Fasheh int ret; 1383f186373fSMark Fasheh u64 nlink = 0; 1384f186373fSMark Fasheh u64 ino = btrfs_ino(inode); 1385f186373fSMark Fasheh 1386f186373fSMark Fasheh path = btrfs_alloc_path(); 1387f186373fSMark Fasheh if (!path) 1388f186373fSMark Fasheh return -ENOMEM; 1389f186373fSMark Fasheh 1390f186373fSMark Fasheh ret = count_inode_refs(root, inode, path); 1391f186373fSMark Fasheh if (ret < 0) 1392f186373fSMark Fasheh goto out; 1393f186373fSMark Fasheh 1394f186373fSMark Fasheh nlink = ret; 1395f186373fSMark Fasheh 1396f186373fSMark Fasheh ret = count_inode_extrefs(root, inode, path); 1397f186373fSMark Fasheh if (ret == -ENOENT) 1398f186373fSMark Fasheh ret = 0; 1399f186373fSMark Fasheh 1400f186373fSMark Fasheh if (ret < 0) 1401f186373fSMark Fasheh goto out; 1402f186373fSMark Fasheh 1403f186373fSMark Fasheh nlink += ret; 1404f186373fSMark Fasheh 1405f186373fSMark Fasheh ret = 0; 1406f186373fSMark Fasheh 1407e02119d5SChris Mason if (nlink != inode->i_nlink) { 1408bfe86848SMiklos Szeredi set_nlink(inode, nlink); 1409e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 1410e02119d5SChris Mason } 14118d5bf1cbSChris Mason BTRFS_I(inode)->index_cnt = (u64)-1; 1412e02119d5SChris Mason 1413c71bf099SYan, Zheng if (inode->i_nlink == 0) { 1414c71bf099SYan, Zheng if (S_ISDIR(inode->i_mode)) { 141512fcfd22SChris Mason ret = replay_dir_deletes(trans, root, NULL, path, 141633345d01SLi Zefan ino, 1); 14173650860bSJosef Bacik if (ret) 14183650860bSJosef Bacik goto out; 141912fcfd22SChris Mason } 142033345d01SLi Zefan ret = insert_orphan_item(trans, root, ino); 1421c71bf099SYan, Zheng } 142212fcfd22SChris Mason 1423f186373fSMark Fasheh out: 1424f186373fSMark Fasheh btrfs_free_path(path); 1425f186373fSMark Fasheh return ret; 1426e02119d5SChris Mason } 1427e02119d5SChris Mason 1428e02119d5SChris Mason static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1429e02119d5SChris Mason struct btrfs_root *root, 1430e02119d5SChris Mason struct btrfs_path *path) 1431e02119d5SChris Mason { 1432e02119d5SChris Mason int ret; 1433e02119d5SChris Mason struct btrfs_key key; 1434e02119d5SChris Mason struct inode *inode; 1435e02119d5SChris Mason 1436e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1437e02119d5SChris Mason key.type = BTRFS_ORPHAN_ITEM_KEY; 1438e02119d5SChris Mason key.offset = (u64)-1; 1439e02119d5SChris Mason while (1) { 1440e02119d5SChris Mason ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1441e02119d5SChris Mason if (ret < 0) 1442e02119d5SChris Mason break; 1443e02119d5SChris Mason 1444e02119d5SChris Mason if (ret == 1) { 1445e02119d5SChris Mason if (path->slots[0] == 0) 1446e02119d5SChris Mason break; 1447e02119d5SChris Mason path->slots[0]--; 1448e02119d5SChris Mason } 1449e02119d5SChris Mason 1450e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1451e02119d5SChris Mason if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || 1452e02119d5SChris Mason key.type != BTRFS_ORPHAN_ITEM_KEY) 1453e02119d5SChris Mason break; 1454e02119d5SChris Mason 1455e02119d5SChris Mason ret = btrfs_del_item(trans, root, path); 145665a246c5STsutomu Itoh if (ret) 145765a246c5STsutomu Itoh goto out; 1458e02119d5SChris Mason 1459b3b4aa74SDavid Sterba btrfs_release_path(path); 1460e02119d5SChris Mason inode = read_one_inode(root, key.offset); 1461c00e9493STsutomu Itoh if (!inode) 1462c00e9493STsutomu Itoh return -EIO; 1463e02119d5SChris Mason 1464e02119d5SChris Mason ret = fixup_inode_link_count(trans, root, inode); 1465e02119d5SChris Mason iput(inode); 14663650860bSJosef Bacik if (ret) 14673650860bSJosef Bacik goto out; 1468e02119d5SChris Mason 146912fcfd22SChris Mason /* 147012fcfd22SChris Mason * fixup on a directory may create new entries, 147112fcfd22SChris Mason * make sure we always look for the highset possible 147212fcfd22SChris Mason * offset 147312fcfd22SChris Mason */ 147412fcfd22SChris Mason key.offset = (u64)-1; 1475e02119d5SChris Mason } 147665a246c5STsutomu Itoh ret = 0; 147765a246c5STsutomu Itoh out: 1478b3b4aa74SDavid Sterba btrfs_release_path(path); 147965a246c5STsutomu Itoh return ret; 1480e02119d5SChris Mason } 1481e02119d5SChris Mason 1482e02119d5SChris Mason 1483e02119d5SChris Mason /* 1484e02119d5SChris Mason * record a given inode in the fixup dir so we can check its link 1485e02119d5SChris Mason * count when replay is done. The link count is incremented here 1486e02119d5SChris Mason * so the inode won't go away until we check it 1487e02119d5SChris Mason */ 1488e02119d5SChris Mason static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, 1489e02119d5SChris Mason struct btrfs_root *root, 1490e02119d5SChris Mason struct btrfs_path *path, 1491e02119d5SChris Mason u64 objectid) 1492e02119d5SChris Mason { 1493e02119d5SChris Mason struct btrfs_key key; 1494e02119d5SChris Mason int ret = 0; 1495e02119d5SChris Mason struct inode *inode; 1496e02119d5SChris Mason 1497e02119d5SChris Mason inode = read_one_inode(root, objectid); 1498c00e9493STsutomu Itoh if (!inode) 1499c00e9493STsutomu Itoh return -EIO; 1500e02119d5SChris Mason 1501e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1502962a298fSDavid Sterba key.type = BTRFS_ORPHAN_ITEM_KEY; 1503e02119d5SChris Mason key.offset = objectid; 1504e02119d5SChris Mason 1505e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1506e02119d5SChris Mason 1507b3b4aa74SDavid Sterba btrfs_release_path(path); 1508e02119d5SChris Mason if (ret == 0) { 15099bf7a489SJosef Bacik if (!inode->i_nlink) 15109bf7a489SJosef Bacik set_nlink(inode, 1); 15119bf7a489SJosef Bacik else 15128b558c5fSZach Brown inc_nlink(inode); 1513b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 1514e02119d5SChris Mason } else if (ret == -EEXIST) { 1515e02119d5SChris Mason ret = 0; 1516e02119d5SChris Mason } else { 15173650860bSJosef Bacik BUG(); /* Logic Error */ 1518e02119d5SChris Mason } 1519e02119d5SChris Mason iput(inode); 1520e02119d5SChris Mason 1521e02119d5SChris Mason return ret; 1522e02119d5SChris Mason } 1523e02119d5SChris Mason 1524e02119d5SChris Mason /* 1525e02119d5SChris Mason * when replaying the log for a directory, we only insert names 1526e02119d5SChris Mason * for inodes that actually exist. This means an fsync on a directory 1527e02119d5SChris Mason * does not implicitly fsync all the new files in it 1528e02119d5SChris Mason */ 1529e02119d5SChris Mason static noinline int insert_one_name(struct btrfs_trans_handle *trans, 1530e02119d5SChris Mason struct btrfs_root *root, 1531e02119d5SChris Mason struct btrfs_path *path, 1532e02119d5SChris Mason u64 dirid, u64 index, 1533e02119d5SChris Mason char *name, int name_len, u8 type, 1534e02119d5SChris Mason struct btrfs_key *location) 1535e02119d5SChris Mason { 1536e02119d5SChris Mason struct inode *inode; 1537e02119d5SChris Mason struct inode *dir; 1538e02119d5SChris Mason int ret; 1539e02119d5SChris Mason 1540e02119d5SChris Mason inode = read_one_inode(root, location->objectid); 1541e02119d5SChris Mason if (!inode) 1542e02119d5SChris Mason return -ENOENT; 1543e02119d5SChris Mason 1544e02119d5SChris Mason dir = read_one_inode(root, dirid); 1545e02119d5SChris Mason if (!dir) { 1546e02119d5SChris Mason iput(inode); 1547e02119d5SChris Mason return -EIO; 1548e02119d5SChris Mason } 1549d555438bSJosef Bacik 1550e02119d5SChris Mason ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1551e02119d5SChris Mason 1552e02119d5SChris Mason /* FIXME, put inode into FIXUP list */ 1553e02119d5SChris Mason 1554e02119d5SChris Mason iput(inode); 1555e02119d5SChris Mason iput(dir); 1556e02119d5SChris Mason return ret; 1557e02119d5SChris Mason } 1558e02119d5SChris Mason 1559e02119d5SChris Mason /* 1560e02119d5SChris Mason * take a single entry in a log directory item and replay it into 1561e02119d5SChris Mason * the subvolume. 1562e02119d5SChris Mason * 1563e02119d5SChris Mason * if a conflicting item exists in the subdirectory already, 1564e02119d5SChris Mason * the inode it points to is unlinked and put into the link count 1565e02119d5SChris Mason * fix up tree. 1566e02119d5SChris Mason * 1567e02119d5SChris Mason * If a name from the log points to a file or directory that does 1568e02119d5SChris Mason * not exist in the FS, it is skipped. fsyncs on directories 1569e02119d5SChris Mason * do not force down inodes inside that directory, just changes to the 1570e02119d5SChris Mason * names or unlinks in a directory. 1571e02119d5SChris Mason */ 1572e02119d5SChris Mason static noinline int replay_one_name(struct btrfs_trans_handle *trans, 1573e02119d5SChris Mason struct btrfs_root *root, 1574e02119d5SChris Mason struct btrfs_path *path, 1575e02119d5SChris Mason struct extent_buffer *eb, 1576e02119d5SChris Mason struct btrfs_dir_item *di, 1577e02119d5SChris Mason struct btrfs_key *key) 1578e02119d5SChris Mason { 1579e02119d5SChris Mason char *name; 1580e02119d5SChris Mason int name_len; 1581e02119d5SChris Mason struct btrfs_dir_item *dst_di; 1582e02119d5SChris Mason struct btrfs_key found_key; 1583e02119d5SChris Mason struct btrfs_key log_key; 1584e02119d5SChris Mason struct inode *dir; 1585e02119d5SChris Mason u8 log_type; 15864bef0848SChris Mason int exists; 15873650860bSJosef Bacik int ret = 0; 1588d555438bSJosef Bacik bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); 1589e02119d5SChris Mason 1590e02119d5SChris Mason dir = read_one_inode(root, key->objectid); 1591c00e9493STsutomu Itoh if (!dir) 1592c00e9493STsutomu Itoh return -EIO; 1593e02119d5SChris Mason 1594e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1595e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 15962bac325eSFilipe David Borba Manana if (!name) { 15972bac325eSFilipe David Borba Manana ret = -ENOMEM; 15982bac325eSFilipe David Borba Manana goto out; 15992bac325eSFilipe David Borba Manana } 16002a29edc6Sliubo 1601e02119d5SChris Mason log_type = btrfs_dir_type(eb, di); 1602e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1603e02119d5SChris Mason name_len); 1604e02119d5SChris Mason 1605e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &log_key); 16064bef0848SChris Mason exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); 16074bef0848SChris Mason if (exists == 0) 16084bef0848SChris Mason exists = 1; 16094bef0848SChris Mason else 16104bef0848SChris Mason exists = 0; 1611b3b4aa74SDavid Sterba btrfs_release_path(path); 16124bef0848SChris Mason 1613e02119d5SChris Mason if (key->type == BTRFS_DIR_ITEM_KEY) { 1614e02119d5SChris Mason dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1615e02119d5SChris Mason name, name_len, 1); 1616d397712bSChris Mason } else if (key->type == BTRFS_DIR_INDEX_KEY) { 1617e02119d5SChris Mason dst_di = btrfs_lookup_dir_index_item(trans, root, path, 1618e02119d5SChris Mason key->objectid, 1619e02119d5SChris Mason key->offset, name, 1620e02119d5SChris Mason name_len, 1); 1621e02119d5SChris Mason } else { 16223650860bSJosef Bacik /* Corruption */ 16233650860bSJosef Bacik ret = -EINVAL; 16243650860bSJosef Bacik goto out; 1625e02119d5SChris Mason } 1626c704005dSDavid Sterba if (IS_ERR_OR_NULL(dst_di)) { 1627e02119d5SChris Mason /* we need a sequence number to insert, so we only 1628e02119d5SChris Mason * do inserts for the BTRFS_DIR_INDEX_KEY types 1629e02119d5SChris Mason */ 1630e02119d5SChris Mason if (key->type != BTRFS_DIR_INDEX_KEY) 1631e02119d5SChris Mason goto out; 1632e02119d5SChris Mason goto insert; 1633e02119d5SChris Mason } 1634e02119d5SChris Mason 1635e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); 1636e02119d5SChris Mason /* the existing item matches the logged item */ 1637e02119d5SChris Mason if (found_key.objectid == log_key.objectid && 1638e02119d5SChris Mason found_key.type == log_key.type && 1639e02119d5SChris Mason found_key.offset == log_key.offset && 1640e02119d5SChris Mason btrfs_dir_type(path->nodes[0], dst_di) == log_type) { 1641a2cc11dbSFilipe Manana update_size = false; 1642e02119d5SChris Mason goto out; 1643e02119d5SChris Mason } 1644e02119d5SChris Mason 1645e02119d5SChris Mason /* 1646e02119d5SChris Mason * don't drop the conflicting directory entry if the inode 1647e02119d5SChris Mason * for the new entry doesn't exist 1648e02119d5SChris Mason */ 16494bef0848SChris Mason if (!exists) 1650e02119d5SChris Mason goto out; 1651e02119d5SChris Mason 1652e02119d5SChris Mason ret = drop_one_dir_item(trans, root, path, dir, dst_di); 16533650860bSJosef Bacik if (ret) 16543650860bSJosef Bacik goto out; 1655e02119d5SChris Mason 1656e02119d5SChris Mason if (key->type == BTRFS_DIR_INDEX_KEY) 1657e02119d5SChris Mason goto insert; 1658e02119d5SChris Mason out: 1659b3b4aa74SDavid Sterba btrfs_release_path(path); 1660d555438bSJosef Bacik if (!ret && update_size) { 1661d555438bSJosef Bacik btrfs_i_size_write(dir, dir->i_size + name_len * 2); 1662d555438bSJosef Bacik ret = btrfs_update_inode(trans, root, dir); 1663d555438bSJosef Bacik } 1664e02119d5SChris Mason kfree(name); 1665e02119d5SChris Mason iput(dir); 16663650860bSJosef Bacik return ret; 1667e02119d5SChris Mason 1668e02119d5SChris Mason insert: 1669b3b4aa74SDavid Sterba btrfs_release_path(path); 1670e02119d5SChris Mason ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1671e02119d5SChris Mason name, name_len, log_type, &log_key); 16723650860bSJosef Bacik if (ret && ret != -ENOENT) 16733650860bSJosef Bacik goto out; 1674d555438bSJosef Bacik update_size = false; 16753650860bSJosef Bacik ret = 0; 1676e02119d5SChris Mason goto out; 1677e02119d5SChris Mason } 1678e02119d5SChris Mason 1679e02119d5SChris Mason /* 1680e02119d5SChris Mason * find all the names in a directory item and reconcile them into 1681e02119d5SChris Mason * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than 1682e02119d5SChris Mason * one name in a directory item, but the same code gets used for 1683e02119d5SChris Mason * both directory index types 1684e02119d5SChris Mason */ 1685e02119d5SChris Mason static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, 1686e02119d5SChris Mason struct btrfs_root *root, 1687e02119d5SChris Mason struct btrfs_path *path, 1688e02119d5SChris Mason struct extent_buffer *eb, int slot, 1689e02119d5SChris Mason struct btrfs_key *key) 1690e02119d5SChris Mason { 1691e02119d5SChris Mason int ret; 1692e02119d5SChris Mason u32 item_size = btrfs_item_size_nr(eb, slot); 1693e02119d5SChris Mason struct btrfs_dir_item *di; 1694e02119d5SChris Mason int name_len; 1695e02119d5SChris Mason unsigned long ptr; 1696e02119d5SChris Mason unsigned long ptr_end; 1697e02119d5SChris Mason 1698e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1699e02119d5SChris Mason ptr_end = ptr + item_size; 1700e02119d5SChris Mason while (ptr < ptr_end) { 1701e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 170222a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) 170322a94d44SJosef Bacik return -EIO; 1704e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1705e02119d5SChris Mason ret = replay_one_name(trans, root, path, eb, di, key); 17063650860bSJosef Bacik if (ret) 17073650860bSJosef Bacik return ret; 1708e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1709e02119d5SChris Mason ptr += name_len; 1710e02119d5SChris Mason } 1711e02119d5SChris Mason return 0; 1712e02119d5SChris Mason } 1713e02119d5SChris Mason 1714e02119d5SChris Mason /* 1715e02119d5SChris Mason * directory replay has two parts. There are the standard directory 1716e02119d5SChris Mason * items in the log copied from the subvolume, and range items 1717e02119d5SChris Mason * created in the log while the subvolume was logged. 1718e02119d5SChris Mason * 1719e02119d5SChris Mason * The range items tell us which parts of the key space the log 1720e02119d5SChris Mason * is authoritative for. During replay, if a key in the subvolume 1721e02119d5SChris Mason * directory is in a logged range item, but not actually in the log 1722e02119d5SChris Mason * that means it was deleted from the directory before the fsync 1723e02119d5SChris Mason * and should be removed. 1724e02119d5SChris Mason */ 1725e02119d5SChris Mason static noinline int find_dir_range(struct btrfs_root *root, 1726e02119d5SChris Mason struct btrfs_path *path, 1727e02119d5SChris Mason u64 dirid, int key_type, 1728e02119d5SChris Mason u64 *start_ret, u64 *end_ret) 1729e02119d5SChris Mason { 1730e02119d5SChris Mason struct btrfs_key key; 1731e02119d5SChris Mason u64 found_end; 1732e02119d5SChris Mason struct btrfs_dir_log_item *item; 1733e02119d5SChris Mason int ret; 1734e02119d5SChris Mason int nritems; 1735e02119d5SChris Mason 1736e02119d5SChris Mason if (*start_ret == (u64)-1) 1737e02119d5SChris Mason return 1; 1738e02119d5SChris Mason 1739e02119d5SChris Mason key.objectid = dirid; 1740e02119d5SChris Mason key.type = key_type; 1741e02119d5SChris Mason key.offset = *start_ret; 1742e02119d5SChris Mason 1743e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1744e02119d5SChris Mason if (ret < 0) 1745e02119d5SChris Mason goto out; 1746e02119d5SChris Mason if (ret > 0) { 1747e02119d5SChris Mason if (path->slots[0] == 0) 1748e02119d5SChris Mason goto out; 1749e02119d5SChris Mason path->slots[0]--; 1750e02119d5SChris Mason } 1751e02119d5SChris Mason if (ret != 0) 1752e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1753e02119d5SChris Mason 1754e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1755e02119d5SChris Mason ret = 1; 1756e02119d5SChris Mason goto next; 1757e02119d5SChris Mason } 1758e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1759e02119d5SChris Mason struct btrfs_dir_log_item); 1760e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1761e02119d5SChris Mason 1762e02119d5SChris Mason if (*start_ret >= key.offset && *start_ret <= found_end) { 1763e02119d5SChris Mason ret = 0; 1764e02119d5SChris Mason *start_ret = key.offset; 1765e02119d5SChris Mason *end_ret = found_end; 1766e02119d5SChris Mason goto out; 1767e02119d5SChris Mason } 1768e02119d5SChris Mason ret = 1; 1769e02119d5SChris Mason next: 1770e02119d5SChris Mason /* check the next slot in the tree to see if it is a valid item */ 1771e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1772e02119d5SChris Mason if (path->slots[0] >= nritems) { 1773e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1774e02119d5SChris Mason if (ret) 1775e02119d5SChris Mason goto out; 1776e02119d5SChris Mason } else { 1777e02119d5SChris Mason path->slots[0]++; 1778e02119d5SChris Mason } 1779e02119d5SChris Mason 1780e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1781e02119d5SChris Mason 1782e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1783e02119d5SChris Mason ret = 1; 1784e02119d5SChris Mason goto out; 1785e02119d5SChris Mason } 1786e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1787e02119d5SChris Mason struct btrfs_dir_log_item); 1788e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1789e02119d5SChris Mason *start_ret = key.offset; 1790e02119d5SChris Mason *end_ret = found_end; 1791e02119d5SChris Mason ret = 0; 1792e02119d5SChris Mason out: 1793b3b4aa74SDavid Sterba btrfs_release_path(path); 1794e02119d5SChris Mason return ret; 1795e02119d5SChris Mason } 1796e02119d5SChris Mason 1797e02119d5SChris Mason /* 1798e02119d5SChris Mason * this looks for a given directory item in the log. If the directory 1799e02119d5SChris Mason * item is not in the log, the item is removed and the inode it points 1800e02119d5SChris Mason * to is unlinked 1801e02119d5SChris Mason */ 1802e02119d5SChris Mason static noinline int check_item_in_log(struct btrfs_trans_handle *trans, 1803e02119d5SChris Mason struct btrfs_root *root, 1804e02119d5SChris Mason struct btrfs_root *log, 1805e02119d5SChris Mason struct btrfs_path *path, 1806e02119d5SChris Mason struct btrfs_path *log_path, 1807e02119d5SChris Mason struct inode *dir, 1808e02119d5SChris Mason struct btrfs_key *dir_key) 1809e02119d5SChris Mason { 1810e02119d5SChris Mason int ret; 1811e02119d5SChris Mason struct extent_buffer *eb; 1812e02119d5SChris Mason int slot; 1813e02119d5SChris Mason u32 item_size; 1814e02119d5SChris Mason struct btrfs_dir_item *di; 1815e02119d5SChris Mason struct btrfs_dir_item *log_di; 1816e02119d5SChris Mason int name_len; 1817e02119d5SChris Mason unsigned long ptr; 1818e02119d5SChris Mason unsigned long ptr_end; 1819e02119d5SChris Mason char *name; 1820e02119d5SChris Mason struct inode *inode; 1821e02119d5SChris Mason struct btrfs_key location; 1822e02119d5SChris Mason 1823e02119d5SChris Mason again: 1824e02119d5SChris Mason eb = path->nodes[0]; 1825e02119d5SChris Mason slot = path->slots[0]; 1826e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 1827e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1828e02119d5SChris Mason ptr_end = ptr + item_size; 1829e02119d5SChris Mason while (ptr < ptr_end) { 1830e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 183122a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) { 183222a94d44SJosef Bacik ret = -EIO; 183322a94d44SJosef Bacik goto out; 183422a94d44SJosef Bacik } 183522a94d44SJosef Bacik 1836e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1837e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 1838e02119d5SChris Mason if (!name) { 1839e02119d5SChris Mason ret = -ENOMEM; 1840e02119d5SChris Mason goto out; 1841e02119d5SChris Mason } 1842e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1843e02119d5SChris Mason name_len); 1844e02119d5SChris Mason log_di = NULL; 184512fcfd22SChris Mason if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { 1846e02119d5SChris Mason log_di = btrfs_lookup_dir_item(trans, log, log_path, 1847e02119d5SChris Mason dir_key->objectid, 1848e02119d5SChris Mason name, name_len, 0); 184912fcfd22SChris Mason } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { 1850e02119d5SChris Mason log_di = btrfs_lookup_dir_index_item(trans, log, 1851e02119d5SChris Mason log_path, 1852e02119d5SChris Mason dir_key->objectid, 1853e02119d5SChris Mason dir_key->offset, 1854e02119d5SChris Mason name, name_len, 0); 1855e02119d5SChris Mason } 1856269d040fSFilipe David Borba Manana if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { 1857e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &location); 1858b3b4aa74SDavid Sterba btrfs_release_path(path); 1859b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1860e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 1861c00e9493STsutomu Itoh if (!inode) { 1862c00e9493STsutomu Itoh kfree(name); 1863c00e9493STsutomu Itoh return -EIO; 1864c00e9493STsutomu Itoh } 1865e02119d5SChris Mason 1866e02119d5SChris Mason ret = link_to_fixup_dir(trans, root, 1867e02119d5SChris Mason path, location.objectid); 18683650860bSJosef Bacik if (ret) { 18693650860bSJosef Bacik kfree(name); 18703650860bSJosef Bacik iput(inode); 18713650860bSJosef Bacik goto out; 18723650860bSJosef Bacik } 18733650860bSJosef Bacik 18748b558c5fSZach Brown inc_nlink(inode); 1875e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, 1876e02119d5SChris Mason name, name_len); 18773650860bSJosef Bacik if (!ret) 1878ada9af21SFilipe David Borba Manana ret = btrfs_run_delayed_items(trans, root); 1879e02119d5SChris Mason kfree(name); 1880e02119d5SChris Mason iput(inode); 18813650860bSJosef Bacik if (ret) 18823650860bSJosef Bacik goto out; 1883e02119d5SChris Mason 1884e02119d5SChris Mason /* there might still be more names under this key 1885e02119d5SChris Mason * check and repeat if required 1886e02119d5SChris Mason */ 1887e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, dir_key, path, 1888e02119d5SChris Mason 0, 0); 1889e02119d5SChris Mason if (ret == 0) 1890e02119d5SChris Mason goto again; 1891e02119d5SChris Mason ret = 0; 1892e02119d5SChris Mason goto out; 1893269d040fSFilipe David Borba Manana } else if (IS_ERR(log_di)) { 1894269d040fSFilipe David Borba Manana kfree(name); 1895269d040fSFilipe David Borba Manana return PTR_ERR(log_di); 1896e02119d5SChris Mason } 1897b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1898e02119d5SChris Mason kfree(name); 1899e02119d5SChris Mason 1900e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1901e02119d5SChris Mason ptr += name_len; 1902e02119d5SChris Mason } 1903e02119d5SChris Mason ret = 0; 1904e02119d5SChris Mason out: 1905b3b4aa74SDavid Sterba btrfs_release_path(path); 1906b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1907e02119d5SChris Mason return ret; 1908e02119d5SChris Mason } 1909e02119d5SChris Mason 1910e02119d5SChris Mason /* 1911e02119d5SChris Mason * deletion replay happens before we copy any new directory items 1912e02119d5SChris Mason * out of the log or out of backreferences from inodes. It 1913e02119d5SChris Mason * scans the log to find ranges of keys that log is authoritative for, 1914e02119d5SChris Mason * and then scans the directory to find items in those ranges that are 1915e02119d5SChris Mason * not present in the log. 1916e02119d5SChris Mason * 1917e02119d5SChris Mason * Anything we don't find in the log is unlinked and removed from the 1918e02119d5SChris Mason * directory. 1919e02119d5SChris Mason */ 1920e02119d5SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 1921e02119d5SChris Mason struct btrfs_root *root, 1922e02119d5SChris Mason struct btrfs_root *log, 1923e02119d5SChris Mason struct btrfs_path *path, 192412fcfd22SChris Mason u64 dirid, int del_all) 1925e02119d5SChris Mason { 1926e02119d5SChris Mason u64 range_start; 1927e02119d5SChris Mason u64 range_end; 1928e02119d5SChris Mason int key_type = BTRFS_DIR_LOG_ITEM_KEY; 1929e02119d5SChris Mason int ret = 0; 1930e02119d5SChris Mason struct btrfs_key dir_key; 1931e02119d5SChris Mason struct btrfs_key found_key; 1932e02119d5SChris Mason struct btrfs_path *log_path; 1933e02119d5SChris Mason struct inode *dir; 1934e02119d5SChris Mason 1935e02119d5SChris Mason dir_key.objectid = dirid; 1936e02119d5SChris Mason dir_key.type = BTRFS_DIR_ITEM_KEY; 1937e02119d5SChris Mason log_path = btrfs_alloc_path(); 1938e02119d5SChris Mason if (!log_path) 1939e02119d5SChris Mason return -ENOMEM; 1940e02119d5SChris Mason 1941e02119d5SChris Mason dir = read_one_inode(root, dirid); 1942e02119d5SChris Mason /* it isn't an error if the inode isn't there, that can happen 1943e02119d5SChris Mason * because we replay the deletes before we copy in the inode item 1944e02119d5SChris Mason * from the log 1945e02119d5SChris Mason */ 1946e02119d5SChris Mason if (!dir) { 1947e02119d5SChris Mason btrfs_free_path(log_path); 1948e02119d5SChris Mason return 0; 1949e02119d5SChris Mason } 1950e02119d5SChris Mason again: 1951e02119d5SChris Mason range_start = 0; 1952e02119d5SChris Mason range_end = 0; 1953e02119d5SChris Mason while (1) { 195412fcfd22SChris Mason if (del_all) 195512fcfd22SChris Mason range_end = (u64)-1; 195612fcfd22SChris Mason else { 1957e02119d5SChris Mason ret = find_dir_range(log, path, dirid, key_type, 1958e02119d5SChris Mason &range_start, &range_end); 1959e02119d5SChris Mason if (ret != 0) 1960e02119d5SChris Mason break; 196112fcfd22SChris Mason } 1962e02119d5SChris Mason 1963e02119d5SChris Mason dir_key.offset = range_start; 1964e02119d5SChris Mason while (1) { 1965e02119d5SChris Mason int nritems; 1966e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &dir_key, path, 1967e02119d5SChris Mason 0, 0); 1968e02119d5SChris Mason if (ret < 0) 1969e02119d5SChris Mason goto out; 1970e02119d5SChris Mason 1971e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1972e02119d5SChris Mason if (path->slots[0] >= nritems) { 1973e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1974e02119d5SChris Mason if (ret) 1975e02119d5SChris Mason break; 1976e02119d5SChris Mason } 1977e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1978e02119d5SChris Mason path->slots[0]); 1979e02119d5SChris Mason if (found_key.objectid != dirid || 1980e02119d5SChris Mason found_key.type != dir_key.type) 1981e02119d5SChris Mason goto next_type; 1982e02119d5SChris Mason 1983e02119d5SChris Mason if (found_key.offset > range_end) 1984e02119d5SChris Mason break; 1985e02119d5SChris Mason 1986e02119d5SChris Mason ret = check_item_in_log(trans, root, log, path, 198712fcfd22SChris Mason log_path, dir, 198812fcfd22SChris Mason &found_key); 19893650860bSJosef Bacik if (ret) 19903650860bSJosef Bacik goto out; 1991e02119d5SChris Mason if (found_key.offset == (u64)-1) 1992e02119d5SChris Mason break; 1993e02119d5SChris Mason dir_key.offset = found_key.offset + 1; 1994e02119d5SChris Mason } 1995b3b4aa74SDavid Sterba btrfs_release_path(path); 1996e02119d5SChris Mason if (range_end == (u64)-1) 1997e02119d5SChris Mason break; 1998e02119d5SChris Mason range_start = range_end + 1; 1999e02119d5SChris Mason } 2000e02119d5SChris Mason 2001e02119d5SChris Mason next_type: 2002e02119d5SChris Mason ret = 0; 2003e02119d5SChris Mason if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 2004e02119d5SChris Mason key_type = BTRFS_DIR_LOG_INDEX_KEY; 2005e02119d5SChris Mason dir_key.type = BTRFS_DIR_INDEX_KEY; 2006b3b4aa74SDavid Sterba btrfs_release_path(path); 2007e02119d5SChris Mason goto again; 2008e02119d5SChris Mason } 2009e02119d5SChris Mason out: 2010b3b4aa74SDavid Sterba btrfs_release_path(path); 2011e02119d5SChris Mason btrfs_free_path(log_path); 2012e02119d5SChris Mason iput(dir); 2013e02119d5SChris Mason return ret; 2014e02119d5SChris Mason } 2015e02119d5SChris Mason 2016e02119d5SChris Mason /* 2017e02119d5SChris Mason * the process_func used to replay items from the log tree. This 2018e02119d5SChris Mason * gets called in two different stages. The first stage just looks 2019e02119d5SChris Mason * for inodes and makes sure they are all copied into the subvolume. 2020e02119d5SChris Mason * 2021e02119d5SChris Mason * The second stage copies all the other item types from the log into 2022e02119d5SChris Mason * the subvolume. The two stage approach is slower, but gets rid of 2023e02119d5SChris Mason * lots of complexity around inodes referencing other inodes that exist 2024e02119d5SChris Mason * only in the log (references come from either directory items or inode 2025e02119d5SChris Mason * back refs). 2026e02119d5SChris Mason */ 2027e02119d5SChris Mason static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 2028e02119d5SChris Mason struct walk_control *wc, u64 gen) 2029e02119d5SChris Mason { 2030e02119d5SChris Mason int nritems; 2031e02119d5SChris Mason struct btrfs_path *path; 2032e02119d5SChris Mason struct btrfs_root *root = wc->replay_dest; 2033e02119d5SChris Mason struct btrfs_key key; 2034e02119d5SChris Mason int level; 2035e02119d5SChris Mason int i; 2036e02119d5SChris Mason int ret; 2037e02119d5SChris Mason 2038018642a1STsutomu Itoh ret = btrfs_read_buffer(eb, gen); 2039018642a1STsutomu Itoh if (ret) 2040018642a1STsutomu Itoh return ret; 2041e02119d5SChris Mason 2042e02119d5SChris Mason level = btrfs_header_level(eb); 2043e02119d5SChris Mason 2044e02119d5SChris Mason if (level != 0) 2045e02119d5SChris Mason return 0; 2046e02119d5SChris Mason 2047e02119d5SChris Mason path = btrfs_alloc_path(); 20481e5063d0SMark Fasheh if (!path) 20491e5063d0SMark Fasheh return -ENOMEM; 2050e02119d5SChris Mason 2051e02119d5SChris Mason nritems = btrfs_header_nritems(eb); 2052e02119d5SChris Mason for (i = 0; i < nritems; i++) { 2053e02119d5SChris Mason btrfs_item_key_to_cpu(eb, &key, i); 2054e02119d5SChris Mason 2055e02119d5SChris Mason /* inode keys are done during the first stage */ 2056e02119d5SChris Mason if (key.type == BTRFS_INODE_ITEM_KEY && 2057e02119d5SChris Mason wc->stage == LOG_WALK_REPLAY_INODES) { 2058e02119d5SChris Mason struct btrfs_inode_item *inode_item; 2059e02119d5SChris Mason u32 mode; 2060e02119d5SChris Mason 2061e02119d5SChris Mason inode_item = btrfs_item_ptr(eb, i, 2062e02119d5SChris Mason struct btrfs_inode_item); 2063e02119d5SChris Mason mode = btrfs_inode_mode(eb, inode_item); 2064e02119d5SChris Mason if (S_ISDIR(mode)) { 2065e02119d5SChris Mason ret = replay_dir_deletes(wc->trans, 206612fcfd22SChris Mason root, log, path, key.objectid, 0); 2067b50c6e25SJosef Bacik if (ret) 2068b50c6e25SJosef Bacik break; 2069e02119d5SChris Mason } 2070e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 2071e02119d5SChris Mason eb, i, &key); 2072b50c6e25SJosef Bacik if (ret) 2073b50c6e25SJosef Bacik break; 2074e02119d5SChris Mason 2075c71bf099SYan, Zheng /* for regular files, make sure corresponding 2076c71bf099SYan, Zheng * orhpan item exist. extents past the new EOF 2077c71bf099SYan, Zheng * will be truncated later by orphan cleanup. 2078e02119d5SChris Mason */ 2079e02119d5SChris Mason if (S_ISREG(mode)) { 2080c71bf099SYan, Zheng ret = insert_orphan_item(wc->trans, root, 2081e02119d5SChris Mason key.objectid); 2082b50c6e25SJosef Bacik if (ret) 2083b50c6e25SJosef Bacik break; 2084c71bf099SYan, Zheng } 2085a74ac322SChris Mason 2086e02119d5SChris Mason ret = link_to_fixup_dir(wc->trans, root, 2087e02119d5SChris Mason path, key.objectid); 2088b50c6e25SJosef Bacik if (ret) 2089b50c6e25SJosef Bacik break; 2090e02119d5SChris Mason } 2091dd8e7217SJosef Bacik 2092dd8e7217SJosef Bacik if (key.type == BTRFS_DIR_INDEX_KEY && 2093dd8e7217SJosef Bacik wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { 2094dd8e7217SJosef Bacik ret = replay_one_dir_item(wc->trans, root, path, 2095dd8e7217SJosef Bacik eb, i, &key); 2096dd8e7217SJosef Bacik if (ret) 2097dd8e7217SJosef Bacik break; 2098dd8e7217SJosef Bacik } 2099dd8e7217SJosef Bacik 2100e02119d5SChris Mason if (wc->stage < LOG_WALK_REPLAY_ALL) 2101e02119d5SChris Mason continue; 2102e02119d5SChris Mason 2103e02119d5SChris Mason /* these keys are simply copied */ 2104e02119d5SChris Mason if (key.type == BTRFS_XATTR_ITEM_KEY) { 2105e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 2106e02119d5SChris Mason eb, i, &key); 2107b50c6e25SJosef Bacik if (ret) 2108b50c6e25SJosef Bacik break; 21092da1c669SLiu Bo } else if (key.type == BTRFS_INODE_REF_KEY || 21102da1c669SLiu Bo key.type == BTRFS_INODE_EXTREF_KEY) { 2111f186373fSMark Fasheh ret = add_inode_ref(wc->trans, root, log, path, 2112f186373fSMark Fasheh eb, i, &key); 2113b50c6e25SJosef Bacik if (ret && ret != -ENOENT) 2114b50c6e25SJosef Bacik break; 2115b50c6e25SJosef Bacik ret = 0; 2116e02119d5SChris Mason } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2117e02119d5SChris Mason ret = replay_one_extent(wc->trans, root, path, 2118e02119d5SChris Mason eb, i, &key); 2119b50c6e25SJosef Bacik if (ret) 2120b50c6e25SJosef Bacik break; 2121dd8e7217SJosef Bacik } else if (key.type == BTRFS_DIR_ITEM_KEY) { 2122e02119d5SChris Mason ret = replay_one_dir_item(wc->trans, root, path, 2123e02119d5SChris Mason eb, i, &key); 2124b50c6e25SJosef Bacik if (ret) 2125b50c6e25SJosef Bacik break; 2126e02119d5SChris Mason } 2127e02119d5SChris Mason } 2128e02119d5SChris Mason btrfs_free_path(path); 2129b50c6e25SJosef Bacik return ret; 2130e02119d5SChris Mason } 2131e02119d5SChris Mason 2132d397712bSChris Mason static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2133e02119d5SChris Mason struct btrfs_root *root, 2134e02119d5SChris Mason struct btrfs_path *path, int *level, 2135e02119d5SChris Mason struct walk_control *wc) 2136e02119d5SChris Mason { 2137e02119d5SChris Mason u64 root_owner; 2138e02119d5SChris Mason u64 bytenr; 2139e02119d5SChris Mason u64 ptr_gen; 2140e02119d5SChris Mason struct extent_buffer *next; 2141e02119d5SChris Mason struct extent_buffer *cur; 2142e02119d5SChris Mason struct extent_buffer *parent; 2143e02119d5SChris Mason u32 blocksize; 2144e02119d5SChris Mason int ret = 0; 2145e02119d5SChris Mason 2146e02119d5SChris Mason WARN_ON(*level < 0); 2147e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2148e02119d5SChris Mason 2149e02119d5SChris Mason while (*level > 0) { 2150e02119d5SChris Mason WARN_ON(*level < 0); 2151e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2152e02119d5SChris Mason cur = path->nodes[*level]; 2153e02119d5SChris Mason 2154fae7f21cSDulshani Gunawardhana WARN_ON(btrfs_header_level(cur) != *level); 2155e02119d5SChris Mason 2156e02119d5SChris Mason if (path->slots[*level] >= 2157e02119d5SChris Mason btrfs_header_nritems(cur)) 2158e02119d5SChris Mason break; 2159e02119d5SChris Mason 2160e02119d5SChris Mason bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2161e02119d5SChris Mason ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2162707e8a07SDavid Sterba blocksize = root->nodesize; 2163e02119d5SChris Mason 2164e02119d5SChris Mason parent = path->nodes[*level]; 2165e02119d5SChris Mason root_owner = btrfs_header_owner(parent); 2166e02119d5SChris Mason 2167e02119d5SChris Mason next = btrfs_find_create_tree_block(root, bytenr, blocksize); 21682a29edc6Sliubo if (!next) 21692a29edc6Sliubo return -ENOMEM; 2170e02119d5SChris Mason 21714a500fd1SYan, Zheng if (*level == 1) { 21721e5063d0SMark Fasheh ret = wc->process_func(root, next, wc, ptr_gen); 2173b50c6e25SJosef Bacik if (ret) { 2174b50c6e25SJosef Bacik free_extent_buffer(next); 21751e5063d0SMark Fasheh return ret; 2176b50c6e25SJosef Bacik } 2177e02119d5SChris Mason 2178e02119d5SChris Mason path->slots[*level]++; 2179e02119d5SChris Mason if (wc->free) { 2180018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 2181018642a1STsutomu Itoh if (ret) { 2182018642a1STsutomu Itoh free_extent_buffer(next); 2183018642a1STsutomu Itoh return ret; 2184018642a1STsutomu Itoh } 2185e02119d5SChris Mason 2186681ae509SJosef Bacik if (trans) { 2187e02119d5SChris Mason btrfs_tree_lock(next); 2188b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2189bd681513SChris Mason clean_tree_block(trans, root, next); 2190e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2191e02119d5SChris Mason btrfs_tree_unlock(next); 2192681ae509SJosef Bacik } 2193e02119d5SChris Mason 2194e02119d5SChris Mason WARN_ON(root_owner != 2195e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 2196e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 2197d00aff00SChris Mason bytenr, blocksize); 21983650860bSJosef Bacik if (ret) { 21993650860bSJosef Bacik free_extent_buffer(next); 22003650860bSJosef Bacik return ret; 22013650860bSJosef Bacik } 2202e02119d5SChris Mason } 2203e02119d5SChris Mason free_extent_buffer(next); 2204e02119d5SChris Mason continue; 2205e02119d5SChris Mason } 2206018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 2207018642a1STsutomu Itoh if (ret) { 2208018642a1STsutomu Itoh free_extent_buffer(next); 2209018642a1STsutomu Itoh return ret; 2210018642a1STsutomu Itoh } 2211e02119d5SChris Mason 2212e02119d5SChris Mason WARN_ON(*level <= 0); 2213e02119d5SChris Mason if (path->nodes[*level-1]) 2214e02119d5SChris Mason free_extent_buffer(path->nodes[*level-1]); 2215e02119d5SChris Mason path->nodes[*level-1] = next; 2216e02119d5SChris Mason *level = btrfs_header_level(next); 2217e02119d5SChris Mason path->slots[*level] = 0; 2218e02119d5SChris Mason cond_resched(); 2219e02119d5SChris Mason } 2220e02119d5SChris Mason WARN_ON(*level < 0); 2221e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2222e02119d5SChris Mason 22234a500fd1SYan, Zheng path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); 2224e02119d5SChris Mason 2225e02119d5SChris Mason cond_resched(); 2226e02119d5SChris Mason return 0; 2227e02119d5SChris Mason } 2228e02119d5SChris Mason 2229d397712bSChris Mason static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, 2230e02119d5SChris Mason struct btrfs_root *root, 2231e02119d5SChris Mason struct btrfs_path *path, int *level, 2232e02119d5SChris Mason struct walk_control *wc) 2233e02119d5SChris Mason { 2234e02119d5SChris Mason u64 root_owner; 2235e02119d5SChris Mason int i; 2236e02119d5SChris Mason int slot; 2237e02119d5SChris Mason int ret; 2238e02119d5SChris Mason 2239e02119d5SChris Mason for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 2240e02119d5SChris Mason slot = path->slots[i]; 22414a500fd1SYan, Zheng if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 2242e02119d5SChris Mason path->slots[i]++; 2243e02119d5SChris Mason *level = i; 2244e02119d5SChris Mason WARN_ON(*level == 0); 2245e02119d5SChris Mason return 0; 2246e02119d5SChris Mason } else { 224731840ae1SZheng Yan struct extent_buffer *parent; 224831840ae1SZheng Yan if (path->nodes[*level] == root->node) 224931840ae1SZheng Yan parent = path->nodes[*level]; 225031840ae1SZheng Yan else 225131840ae1SZheng Yan parent = path->nodes[*level + 1]; 225231840ae1SZheng Yan 225331840ae1SZheng Yan root_owner = btrfs_header_owner(parent); 22541e5063d0SMark Fasheh ret = wc->process_func(root, path->nodes[*level], wc, 2255e02119d5SChris Mason btrfs_header_generation(path->nodes[*level])); 22561e5063d0SMark Fasheh if (ret) 22571e5063d0SMark Fasheh return ret; 22581e5063d0SMark Fasheh 2259e02119d5SChris Mason if (wc->free) { 2260e02119d5SChris Mason struct extent_buffer *next; 2261e02119d5SChris Mason 2262e02119d5SChris Mason next = path->nodes[*level]; 2263e02119d5SChris Mason 2264681ae509SJosef Bacik if (trans) { 2265e02119d5SChris Mason btrfs_tree_lock(next); 2266b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2267bd681513SChris Mason clean_tree_block(trans, root, next); 2268e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2269e02119d5SChris Mason btrfs_tree_unlock(next); 2270681ae509SJosef Bacik } 2271e02119d5SChris Mason 2272e02119d5SChris Mason WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 2273e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 2274e02119d5SChris Mason path->nodes[*level]->start, 2275d00aff00SChris Mason path->nodes[*level]->len); 22763650860bSJosef Bacik if (ret) 22773650860bSJosef Bacik return ret; 2278e02119d5SChris Mason } 2279e02119d5SChris Mason free_extent_buffer(path->nodes[*level]); 2280e02119d5SChris Mason path->nodes[*level] = NULL; 2281e02119d5SChris Mason *level = i + 1; 2282e02119d5SChris Mason } 2283e02119d5SChris Mason } 2284e02119d5SChris Mason return 1; 2285e02119d5SChris Mason } 2286e02119d5SChris Mason 2287e02119d5SChris Mason /* 2288e02119d5SChris Mason * drop the reference count on the tree rooted at 'snap'. This traverses 2289e02119d5SChris Mason * the tree freeing any blocks that have a ref count of zero after being 2290e02119d5SChris Mason * decremented. 2291e02119d5SChris Mason */ 2292e02119d5SChris Mason static int walk_log_tree(struct btrfs_trans_handle *trans, 2293e02119d5SChris Mason struct btrfs_root *log, struct walk_control *wc) 2294e02119d5SChris Mason { 2295e02119d5SChris Mason int ret = 0; 2296e02119d5SChris Mason int wret; 2297e02119d5SChris Mason int level; 2298e02119d5SChris Mason struct btrfs_path *path; 2299e02119d5SChris Mason int orig_level; 2300e02119d5SChris Mason 2301e02119d5SChris Mason path = btrfs_alloc_path(); 2302db5b493aSTsutomu Itoh if (!path) 2303db5b493aSTsutomu Itoh return -ENOMEM; 2304e02119d5SChris Mason 2305e02119d5SChris Mason level = btrfs_header_level(log->node); 2306e02119d5SChris Mason orig_level = level; 2307e02119d5SChris Mason path->nodes[level] = log->node; 2308e02119d5SChris Mason extent_buffer_get(log->node); 2309e02119d5SChris Mason path->slots[level] = 0; 2310e02119d5SChris Mason 2311e02119d5SChris Mason while (1) { 2312e02119d5SChris Mason wret = walk_down_log_tree(trans, log, path, &level, wc); 2313e02119d5SChris Mason if (wret > 0) 2314e02119d5SChris Mason break; 231579787eaaSJeff Mahoney if (wret < 0) { 2316e02119d5SChris Mason ret = wret; 231779787eaaSJeff Mahoney goto out; 231879787eaaSJeff Mahoney } 2319e02119d5SChris Mason 2320e02119d5SChris Mason wret = walk_up_log_tree(trans, log, path, &level, wc); 2321e02119d5SChris Mason if (wret > 0) 2322e02119d5SChris Mason break; 232379787eaaSJeff Mahoney if (wret < 0) { 2324e02119d5SChris Mason ret = wret; 232579787eaaSJeff Mahoney goto out; 232679787eaaSJeff Mahoney } 2327e02119d5SChris Mason } 2328e02119d5SChris Mason 2329e02119d5SChris Mason /* was the root node processed? if not, catch it here */ 2330e02119d5SChris Mason if (path->nodes[orig_level]) { 233179787eaaSJeff Mahoney ret = wc->process_func(log, path->nodes[orig_level], wc, 2332e02119d5SChris Mason btrfs_header_generation(path->nodes[orig_level])); 233379787eaaSJeff Mahoney if (ret) 233479787eaaSJeff Mahoney goto out; 2335e02119d5SChris Mason if (wc->free) { 2336e02119d5SChris Mason struct extent_buffer *next; 2337e02119d5SChris Mason 2338e02119d5SChris Mason next = path->nodes[orig_level]; 2339e02119d5SChris Mason 2340681ae509SJosef Bacik if (trans) { 2341e02119d5SChris Mason btrfs_tree_lock(next); 2342b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2343bd681513SChris Mason clean_tree_block(trans, log, next); 2344e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2345e02119d5SChris Mason btrfs_tree_unlock(next); 2346681ae509SJosef Bacik } 2347e02119d5SChris Mason 2348e02119d5SChris Mason WARN_ON(log->root_key.objectid != 2349e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 2350e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(log, next->start, 2351d00aff00SChris Mason next->len); 23523650860bSJosef Bacik if (ret) 23533650860bSJosef Bacik goto out; 2354e02119d5SChris Mason } 2355e02119d5SChris Mason } 2356e02119d5SChris Mason 235779787eaaSJeff Mahoney out: 2358e02119d5SChris Mason btrfs_free_path(path); 2359e02119d5SChris Mason return ret; 2360e02119d5SChris Mason } 2361e02119d5SChris Mason 23627237f183SYan Zheng /* 23637237f183SYan Zheng * helper function to update the item for a given subvolumes log root 23647237f183SYan Zheng * in the tree of log roots 23657237f183SYan Zheng */ 23667237f183SYan Zheng static int update_log_root(struct btrfs_trans_handle *trans, 23677237f183SYan Zheng struct btrfs_root *log) 23687237f183SYan Zheng { 23697237f183SYan Zheng int ret; 23707237f183SYan Zheng 23717237f183SYan Zheng if (log->log_transid == 1) { 23727237f183SYan Zheng /* insert root item on the first sync */ 23737237f183SYan Zheng ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, 23747237f183SYan Zheng &log->root_key, &log->root_item); 23757237f183SYan Zheng } else { 23767237f183SYan Zheng ret = btrfs_update_root(trans, log->fs_info->log_root_tree, 23777237f183SYan Zheng &log->root_key, &log->root_item); 23787237f183SYan Zheng } 23797237f183SYan Zheng return ret; 23807237f183SYan Zheng } 23817237f183SYan Zheng 23828b050d35SMiao Xie static void wait_log_commit(struct btrfs_trans_handle *trans, 2383bb14a59bSMiao Xie struct btrfs_root *root, int transid) 2384e02119d5SChris Mason { 2385e02119d5SChris Mason DEFINE_WAIT(wait); 23867237f183SYan Zheng int index = transid % 2; 2387e02119d5SChris Mason 23887237f183SYan Zheng /* 23897237f183SYan Zheng * we only allow two pending log transactions at a time, 23907237f183SYan Zheng * so we know that if ours is more than 2 older than the 23917237f183SYan Zheng * current transaction, we're done 23927237f183SYan Zheng */ 2393e02119d5SChris Mason do { 23947237f183SYan Zheng prepare_to_wait(&root->log_commit_wait[index], 23957237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 23967237f183SYan Zheng mutex_unlock(&root->log_mutex); 239712fcfd22SChris Mason 2398d1433debSMiao Xie if (root->log_transid_committed < transid && 23997237f183SYan Zheng atomic_read(&root->log_commit[index])) 2400e02119d5SChris Mason schedule(); 240112fcfd22SChris Mason 24027237f183SYan Zheng finish_wait(&root->log_commit_wait[index], &wait); 24037237f183SYan Zheng mutex_lock(&root->log_mutex); 2404d1433debSMiao Xie } while (root->log_transid_committed < transid && 24057237f183SYan Zheng atomic_read(&root->log_commit[index])); 24067237f183SYan Zheng } 24077237f183SYan Zheng 2408143bede5SJeff Mahoney static void wait_for_writer(struct btrfs_trans_handle *trans, 240912fcfd22SChris Mason struct btrfs_root *root) 24107237f183SYan Zheng { 24117237f183SYan Zheng DEFINE_WAIT(wait); 24128b050d35SMiao Xie 24138b050d35SMiao Xie while (atomic_read(&root->log_writers)) { 24147237f183SYan Zheng prepare_to_wait(&root->log_writer_wait, 24157237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 24167237f183SYan Zheng mutex_unlock(&root->log_mutex); 24178b050d35SMiao Xie if (atomic_read(&root->log_writers)) 24187237f183SYan Zheng schedule(); 24197237f183SYan Zheng mutex_lock(&root->log_mutex); 24207237f183SYan Zheng finish_wait(&root->log_writer_wait, &wait); 24217237f183SYan Zheng } 2422e02119d5SChris Mason } 2423e02119d5SChris Mason 24248b050d35SMiao Xie static inline void btrfs_remove_log_ctx(struct btrfs_root *root, 24258b050d35SMiao Xie struct btrfs_log_ctx *ctx) 24268b050d35SMiao Xie { 24278b050d35SMiao Xie if (!ctx) 24288b050d35SMiao Xie return; 24298b050d35SMiao Xie 24308b050d35SMiao Xie mutex_lock(&root->log_mutex); 24318b050d35SMiao Xie list_del_init(&ctx->list); 24328b050d35SMiao Xie mutex_unlock(&root->log_mutex); 24338b050d35SMiao Xie } 24348b050d35SMiao Xie 24358b050d35SMiao Xie /* 24368b050d35SMiao Xie * Invoked in log mutex context, or be sure there is no other task which 24378b050d35SMiao Xie * can access the list. 24388b050d35SMiao Xie */ 24398b050d35SMiao Xie static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, 24408b050d35SMiao Xie int index, int error) 24418b050d35SMiao Xie { 24428b050d35SMiao Xie struct btrfs_log_ctx *ctx; 24438b050d35SMiao Xie 24448b050d35SMiao Xie if (!error) { 24458b050d35SMiao Xie INIT_LIST_HEAD(&root->log_ctxs[index]); 24468b050d35SMiao Xie return; 24478b050d35SMiao Xie } 24488b050d35SMiao Xie 24498b050d35SMiao Xie list_for_each_entry(ctx, &root->log_ctxs[index], list) 24508b050d35SMiao Xie ctx->log_ret = error; 24518b050d35SMiao Xie 24528b050d35SMiao Xie INIT_LIST_HEAD(&root->log_ctxs[index]); 24538b050d35SMiao Xie } 24548b050d35SMiao Xie 2455e02119d5SChris Mason /* 2456e02119d5SChris Mason * btrfs_sync_log does sends a given tree log down to the disk and 2457e02119d5SChris Mason * updates the super blocks to record it. When this call is done, 245812fcfd22SChris Mason * you know that any inodes previously logged are safely on disk only 245912fcfd22SChris Mason * if it returns 0. 246012fcfd22SChris Mason * 246112fcfd22SChris Mason * Any other return value means you need to call btrfs_commit_transaction. 246212fcfd22SChris Mason * Some of the edge cases for fsyncing directories that have had unlinks 246312fcfd22SChris Mason * or renames done in the past mean that sometimes the only safe 246412fcfd22SChris Mason * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, 246512fcfd22SChris Mason * that has happened. 2466e02119d5SChris Mason */ 2467e02119d5SChris Mason int btrfs_sync_log(struct btrfs_trans_handle *trans, 24688b050d35SMiao Xie struct btrfs_root *root, struct btrfs_log_ctx *ctx) 2469e02119d5SChris Mason { 24707237f183SYan Zheng int index1; 24717237f183SYan Zheng int index2; 24728cef4e16SYan, Zheng int mark; 2473e02119d5SChris Mason int ret; 2474e02119d5SChris Mason struct btrfs_root *log = root->log_root; 24757237f183SYan Zheng struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 2476bb14a59bSMiao Xie int log_transid = 0; 24778b050d35SMiao Xie struct btrfs_log_ctx root_log_ctx; 2478c6adc9ccSMiao Xie struct blk_plug plug; 2479e02119d5SChris Mason 24807237f183SYan Zheng mutex_lock(&root->log_mutex); 2481d1433debSMiao Xie log_transid = ctx->log_transid; 2482d1433debSMiao Xie if (root->log_transid_committed >= log_transid) { 24837237f183SYan Zheng mutex_unlock(&root->log_mutex); 24848b050d35SMiao Xie return ctx->log_ret; 2485e02119d5SChris Mason } 2486d1433debSMiao Xie 2487d1433debSMiao Xie index1 = log_transid % 2; 2488d1433debSMiao Xie if (atomic_read(&root->log_commit[index1])) { 2489d1433debSMiao Xie wait_log_commit(trans, root, log_transid); 2490d1433debSMiao Xie mutex_unlock(&root->log_mutex); 2491d1433debSMiao Xie return ctx->log_ret; 2492d1433debSMiao Xie } 2493d1433debSMiao Xie ASSERT(log_transid == root->log_transid); 24947237f183SYan Zheng atomic_set(&root->log_commit[index1], 1); 24957237f183SYan Zheng 24967237f183SYan Zheng /* wait for previous tree log sync to complete */ 24977237f183SYan Zheng if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 2498d1433debSMiao Xie wait_log_commit(trans, root, log_transid - 1); 249948cab2e0SMiao Xie 250086df7eb9SYan, Zheng while (1) { 25012ecb7923SMiao Xie int batch = atomic_read(&root->log_batch); 2502cd354ad6SChris Mason /* when we're on an ssd, just kick the log commit out */ 250327cdeb70SMiao Xie if (!btrfs_test_opt(root, SSD) && 250427cdeb70SMiao Xie test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) { 25057237f183SYan Zheng mutex_unlock(&root->log_mutex); 2506e02119d5SChris Mason schedule_timeout_uninterruptible(1); 25077237f183SYan Zheng mutex_lock(&root->log_mutex); 250886df7eb9SYan, Zheng } 250912fcfd22SChris Mason wait_for_writer(trans, root); 25102ecb7923SMiao Xie if (batch == atomic_read(&root->log_batch)) 2511e02119d5SChris Mason break; 2512e02119d5SChris Mason } 2513d0c803c4SChris Mason 251412fcfd22SChris Mason /* bail out if we need to do a full commit */ 2515995946ddSMiao Xie if (btrfs_need_log_full_commit(root->fs_info, trans)) { 251612fcfd22SChris Mason ret = -EAGAIN; 25172ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 251812fcfd22SChris Mason mutex_unlock(&root->log_mutex); 251912fcfd22SChris Mason goto out; 252012fcfd22SChris Mason } 252112fcfd22SChris Mason 25228cef4e16SYan, Zheng if (log_transid % 2 == 0) 25238cef4e16SYan, Zheng mark = EXTENT_DIRTY; 25248cef4e16SYan, Zheng else 25258cef4e16SYan, Zheng mark = EXTENT_NEW; 25268cef4e16SYan, Zheng 2527690587d1SChris Mason /* we start IO on all the marked extents here, but we don't actually 2528690587d1SChris Mason * wait for them until later. 2529690587d1SChris Mason */ 2530c6adc9ccSMiao Xie blk_start_plug(&plug); 25318cef4e16SYan, Zheng ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 253279787eaaSJeff Mahoney if (ret) { 2533c6adc9ccSMiao Xie blk_finish_plug(&plug); 253479787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 25352ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 2536995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 253779787eaaSJeff Mahoney mutex_unlock(&root->log_mutex); 253879787eaaSJeff Mahoney goto out; 253979787eaaSJeff Mahoney } 25407237f183SYan Zheng 25415d4f98a2SYan Zheng btrfs_set_root_node(&log->root_item, log->node); 25427237f183SYan Zheng 25437237f183SYan Zheng root->log_transid++; 25447237f183SYan Zheng log->log_transid = root->log_transid; 2545ff782e0aSJosef Bacik root->log_start_pid = 0; 25467237f183SYan Zheng /* 25478cef4e16SYan, Zheng * IO has been started, blocks of the log tree have WRITTEN flag set 25488cef4e16SYan, Zheng * in their headers. new modifications of the log will be written to 25498cef4e16SYan, Zheng * new positions. so it's safe to allow log writers to go in. 25507237f183SYan Zheng */ 25517237f183SYan Zheng mutex_unlock(&root->log_mutex); 25527237f183SYan Zheng 2553d1433debSMiao Xie btrfs_init_log_ctx(&root_log_ctx); 2554d1433debSMiao Xie 25557237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 25562ecb7923SMiao Xie atomic_inc(&log_root_tree->log_batch); 25577237f183SYan Zheng atomic_inc(&log_root_tree->log_writers); 2558d1433debSMiao Xie 2559d1433debSMiao Xie index2 = log_root_tree->log_transid % 2; 2560d1433debSMiao Xie list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); 2561d1433debSMiao Xie root_log_ctx.log_transid = log_root_tree->log_transid; 2562d1433debSMiao Xie 25637237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 25647237f183SYan Zheng 25657237f183SYan Zheng ret = update_log_root(trans, log); 25667237f183SYan Zheng 25677237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 25687237f183SYan Zheng if (atomic_dec_and_test(&log_root_tree->log_writers)) { 25697237f183SYan Zheng smp_mb(); 25707237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_writer_wait)) 25717237f183SYan Zheng wake_up(&log_root_tree->log_writer_wait); 25727237f183SYan Zheng } 25737237f183SYan Zheng 25744a500fd1SYan, Zheng if (ret) { 2575d1433debSMiao Xie if (!list_empty(&root_log_ctx.list)) 2576d1433debSMiao Xie list_del_init(&root_log_ctx.list); 2577d1433debSMiao Xie 2578c6adc9ccSMiao Xie blk_finish_plug(&plug); 2579995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 2580995946ddSMiao Xie 258179787eaaSJeff Mahoney if (ret != -ENOSPC) { 258279787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 258379787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 258479787eaaSJeff Mahoney goto out; 258579787eaaSJeff Mahoney } 25864a500fd1SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 25872ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 25884a500fd1SYan, Zheng mutex_unlock(&log_root_tree->log_mutex); 25894a500fd1SYan, Zheng ret = -EAGAIN; 25904a500fd1SYan, Zheng goto out; 25914a500fd1SYan, Zheng } 25924a500fd1SYan, Zheng 2593d1433debSMiao Xie if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { 2594d1433debSMiao Xie mutex_unlock(&log_root_tree->log_mutex); 2595d1433debSMiao Xie ret = root_log_ctx.log_ret; 2596d1433debSMiao Xie goto out; 2597d1433debSMiao Xie } 25988b050d35SMiao Xie 2599d1433debSMiao Xie index2 = root_log_ctx.log_transid % 2; 26007237f183SYan Zheng if (atomic_read(&log_root_tree->log_commit[index2])) { 2601c6adc9ccSMiao Xie blk_finish_plug(&plug); 2602*5ab5e44aSFilipe Manana ret = btrfs_wait_marked_extents(log, &log->dirty_log_pages, 2603*5ab5e44aSFilipe Manana mark); 260450d9aa99SJosef Bacik btrfs_wait_logged_extents(trans, log, log_transid); 26058b050d35SMiao Xie wait_log_commit(trans, log_root_tree, 2606d1433debSMiao Xie root_log_ctx.log_transid); 26077237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 2608*5ab5e44aSFilipe Manana if (!ret) 26098b050d35SMiao Xie ret = root_log_ctx.log_ret; 26107237f183SYan Zheng goto out; 26117237f183SYan Zheng } 2612d1433debSMiao Xie ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); 26137237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 1); 26147237f183SYan Zheng 261512fcfd22SChris Mason if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { 261612fcfd22SChris Mason wait_log_commit(trans, log_root_tree, 2617d1433debSMiao Xie root_log_ctx.log_transid - 1); 261812fcfd22SChris Mason } 26197237f183SYan Zheng 262012fcfd22SChris Mason wait_for_writer(trans, log_root_tree); 262112fcfd22SChris Mason 262212fcfd22SChris Mason /* 262312fcfd22SChris Mason * now that we've moved on to the tree of log tree roots, 262412fcfd22SChris Mason * check the full commit flag again 262512fcfd22SChris Mason */ 2626995946ddSMiao Xie if (btrfs_need_log_full_commit(root->fs_info, trans)) { 2627c6adc9ccSMiao Xie blk_finish_plug(&plug); 26288cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 26292ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 263012fcfd22SChris Mason mutex_unlock(&log_root_tree->log_mutex); 263112fcfd22SChris Mason ret = -EAGAIN; 263212fcfd22SChris Mason goto out_wake_log_root; 263312fcfd22SChris Mason } 26347237f183SYan Zheng 2635c6adc9ccSMiao Xie ret = btrfs_write_marked_extents(log_root_tree, 26368cef4e16SYan, Zheng &log_root_tree->dirty_log_pages, 26378cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW); 2638c6adc9ccSMiao Xie blk_finish_plug(&plug); 263979787eaaSJeff Mahoney if (ret) { 2640995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 264179787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 26422ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 264379787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 264479787eaaSJeff Mahoney goto out_wake_log_root; 264579787eaaSJeff Mahoney } 2646*5ab5e44aSFilipe Manana ret = btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2647*5ab5e44aSFilipe Manana if (!ret) 2648*5ab5e44aSFilipe Manana ret = btrfs_wait_marked_extents(log_root_tree, 2649c6adc9ccSMiao Xie &log_root_tree->dirty_log_pages, 2650c6adc9ccSMiao Xie EXTENT_NEW | EXTENT_DIRTY); 2651*5ab5e44aSFilipe Manana if (ret) { 2652*5ab5e44aSFilipe Manana btrfs_set_log_full_commit(root->fs_info, trans); 2653*5ab5e44aSFilipe Manana btrfs_free_logged_extents(log, log_transid); 2654*5ab5e44aSFilipe Manana mutex_unlock(&log_root_tree->log_mutex); 2655*5ab5e44aSFilipe Manana goto out_wake_log_root; 2656*5ab5e44aSFilipe Manana } 265750d9aa99SJosef Bacik btrfs_wait_logged_extents(trans, log, log_transid); 2658e02119d5SChris Mason 26596c41761fSDavid Sterba btrfs_set_super_log_root(root->fs_info->super_for_commit, 26607237f183SYan Zheng log_root_tree->node->start); 26616c41761fSDavid Sterba btrfs_set_super_log_root_level(root->fs_info->super_for_commit, 26627237f183SYan Zheng btrfs_header_level(log_root_tree->node)); 2663e02119d5SChris Mason 26647237f183SYan Zheng log_root_tree->log_transid++; 26657237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 26667237f183SYan Zheng 26677237f183SYan Zheng /* 26687237f183SYan Zheng * nobody else is going to jump in and write the the ctree 26697237f183SYan Zheng * super here because the log_commit atomic below is protecting 26707237f183SYan Zheng * us. We must be called with a transaction handle pinning 26717237f183SYan Zheng * the running transaction open, so a full commit can't hop 26727237f183SYan Zheng * in and cause problems either. 26737237f183SYan Zheng */ 26745af3e8ccSStefan Behrens ret = write_ctree_super(trans, root->fs_info->tree_root, 1); 26755af3e8ccSStefan Behrens if (ret) { 2676995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 26775af3e8ccSStefan Behrens btrfs_abort_transaction(trans, root, ret); 26785af3e8ccSStefan Behrens goto out_wake_log_root; 26795af3e8ccSStefan Behrens } 26807237f183SYan Zheng 2681257c62e1SChris Mason mutex_lock(&root->log_mutex); 2682257c62e1SChris Mason if (root->last_log_commit < log_transid) 2683257c62e1SChris Mason root->last_log_commit = log_transid; 2684257c62e1SChris Mason mutex_unlock(&root->log_mutex); 2685257c62e1SChris Mason 268612fcfd22SChris Mason out_wake_log_root: 26878b050d35SMiao Xie /* 26888b050d35SMiao Xie * We needn't get log_mutex here because we are sure all 26898b050d35SMiao Xie * the other tasks are blocked. 26908b050d35SMiao Xie */ 26918b050d35SMiao Xie btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); 26928b050d35SMiao Xie 2693d1433debSMiao Xie mutex_lock(&log_root_tree->log_mutex); 2694d1433debSMiao Xie log_root_tree->log_transid_committed++; 26957237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 0); 2696d1433debSMiao Xie mutex_unlock(&log_root_tree->log_mutex); 2697d1433debSMiao Xie 26987237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) 26997237f183SYan Zheng wake_up(&log_root_tree->log_commit_wait[index2]); 2700e02119d5SChris Mason out: 27018b050d35SMiao Xie /* See above. */ 27028b050d35SMiao Xie btrfs_remove_all_log_ctxs(root, index1, ret); 27038b050d35SMiao Xie 2704d1433debSMiao Xie mutex_lock(&root->log_mutex); 2705d1433debSMiao Xie root->log_transid_committed++; 27067237f183SYan Zheng atomic_set(&root->log_commit[index1], 0); 2707d1433debSMiao Xie mutex_unlock(&root->log_mutex); 27088b050d35SMiao Xie 27097237f183SYan Zheng if (waitqueue_active(&root->log_commit_wait[index1])) 27107237f183SYan Zheng wake_up(&root->log_commit_wait[index1]); 2711b31eabd8SChris Mason return ret; 2712e02119d5SChris Mason } 2713e02119d5SChris Mason 27144a500fd1SYan, Zheng static void free_log_tree(struct btrfs_trans_handle *trans, 27154a500fd1SYan, Zheng struct btrfs_root *log) 2716e02119d5SChris Mason { 2717e02119d5SChris Mason int ret; 2718d0c803c4SChris Mason u64 start; 2719d0c803c4SChris Mason u64 end; 2720e02119d5SChris Mason struct walk_control wc = { 2721e02119d5SChris Mason .free = 1, 2722e02119d5SChris Mason .process_func = process_one_buffer 2723e02119d5SChris Mason }; 2724e02119d5SChris Mason 2725e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 27263650860bSJosef Bacik /* I don't think this can happen but just in case */ 27273650860bSJosef Bacik if (ret) 27283650860bSJosef Bacik btrfs_abort_transaction(trans, log, ret); 2729e02119d5SChris Mason 2730d0c803c4SChris Mason while (1) { 2731d0c803c4SChris Mason ret = find_first_extent_bit(&log->dirty_log_pages, 2732e6138876SJosef Bacik 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, 2733e6138876SJosef Bacik NULL); 2734d0c803c4SChris Mason if (ret) 2735d0c803c4SChris Mason break; 2736d0c803c4SChris Mason 27378cef4e16SYan, Zheng clear_extent_bits(&log->dirty_log_pages, start, end, 27388cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2739d0c803c4SChris Mason } 2740d0c803c4SChris Mason 27412ab28f32SJosef Bacik /* 27422ab28f32SJosef Bacik * We may have short-circuited the log tree with the full commit logic 27432ab28f32SJosef Bacik * and left ordered extents on our list, so clear these out to keep us 27442ab28f32SJosef Bacik * from leaking inodes and memory. 27452ab28f32SJosef Bacik */ 27462ab28f32SJosef Bacik btrfs_free_logged_extents(log, 0); 27472ab28f32SJosef Bacik btrfs_free_logged_extents(log, 1); 27482ab28f32SJosef Bacik 27497237f183SYan Zheng free_extent_buffer(log->node); 27507237f183SYan Zheng kfree(log); 27514a500fd1SYan, Zheng } 27524a500fd1SYan, Zheng 27534a500fd1SYan, Zheng /* 27544a500fd1SYan, Zheng * free all the extents used by the tree log. This should be called 27554a500fd1SYan, Zheng * at commit time of the full transaction 27564a500fd1SYan, Zheng */ 27574a500fd1SYan, Zheng int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) 27584a500fd1SYan, Zheng { 27594a500fd1SYan, Zheng if (root->log_root) { 27604a500fd1SYan, Zheng free_log_tree(trans, root->log_root); 27614a500fd1SYan, Zheng root->log_root = NULL; 27624a500fd1SYan, Zheng } 27634a500fd1SYan, Zheng return 0; 27644a500fd1SYan, Zheng } 27654a500fd1SYan, Zheng 27664a500fd1SYan, Zheng int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, 27674a500fd1SYan, Zheng struct btrfs_fs_info *fs_info) 27684a500fd1SYan, Zheng { 27694a500fd1SYan, Zheng if (fs_info->log_root_tree) { 27704a500fd1SYan, Zheng free_log_tree(trans, fs_info->log_root_tree); 27714a500fd1SYan, Zheng fs_info->log_root_tree = NULL; 27724a500fd1SYan, Zheng } 2773e02119d5SChris Mason return 0; 2774e02119d5SChris Mason } 2775e02119d5SChris Mason 2776e02119d5SChris Mason /* 2777e02119d5SChris Mason * If both a file and directory are logged, and unlinks or renames are 2778e02119d5SChris Mason * mixed in, we have a few interesting corners: 2779e02119d5SChris Mason * 2780e02119d5SChris Mason * create file X in dir Y 2781e02119d5SChris Mason * link file X to X.link in dir Y 2782e02119d5SChris Mason * fsync file X 2783e02119d5SChris Mason * unlink file X but leave X.link 2784e02119d5SChris Mason * fsync dir Y 2785e02119d5SChris Mason * 2786e02119d5SChris Mason * After a crash we would expect only X.link to exist. But file X 2787e02119d5SChris Mason * didn't get fsync'd again so the log has back refs for X and X.link. 2788e02119d5SChris Mason * 2789e02119d5SChris Mason * We solve this by removing directory entries and inode backrefs from the 2790e02119d5SChris Mason * log when a file that was logged in the current transaction is 2791e02119d5SChris Mason * unlinked. Any later fsync will include the updated log entries, and 2792e02119d5SChris Mason * we'll be able to reconstruct the proper directory items from backrefs. 2793e02119d5SChris Mason * 2794e02119d5SChris Mason * This optimizations allows us to avoid relogging the entire inode 2795e02119d5SChris Mason * or the entire directory. 2796e02119d5SChris Mason */ 2797e02119d5SChris Mason int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 2798e02119d5SChris Mason struct btrfs_root *root, 2799e02119d5SChris Mason const char *name, int name_len, 2800e02119d5SChris Mason struct inode *dir, u64 index) 2801e02119d5SChris Mason { 2802e02119d5SChris Mason struct btrfs_root *log; 2803e02119d5SChris Mason struct btrfs_dir_item *di; 2804e02119d5SChris Mason struct btrfs_path *path; 2805e02119d5SChris Mason int ret; 28064a500fd1SYan, Zheng int err = 0; 2807e02119d5SChris Mason int bytes_del = 0; 280833345d01SLi Zefan u64 dir_ino = btrfs_ino(dir); 2809e02119d5SChris Mason 28103a5f1d45SChris Mason if (BTRFS_I(dir)->logged_trans < trans->transid) 28113a5f1d45SChris Mason return 0; 28123a5f1d45SChris Mason 2813e02119d5SChris Mason ret = join_running_log_trans(root); 2814e02119d5SChris Mason if (ret) 2815e02119d5SChris Mason return 0; 2816e02119d5SChris Mason 2817e02119d5SChris Mason mutex_lock(&BTRFS_I(dir)->log_mutex); 2818e02119d5SChris Mason 2819e02119d5SChris Mason log = root->log_root; 2820e02119d5SChris Mason path = btrfs_alloc_path(); 2821a62f44a5STsutomu Itoh if (!path) { 2822a62f44a5STsutomu Itoh err = -ENOMEM; 2823a62f44a5STsutomu Itoh goto out_unlock; 2824a62f44a5STsutomu Itoh } 28252a29edc6Sliubo 282633345d01SLi Zefan di = btrfs_lookup_dir_item(trans, log, path, dir_ino, 2827e02119d5SChris Mason name, name_len, -1); 28284a500fd1SYan, Zheng if (IS_ERR(di)) { 28294a500fd1SYan, Zheng err = PTR_ERR(di); 28304a500fd1SYan, Zheng goto fail; 28314a500fd1SYan, Zheng } 28324a500fd1SYan, Zheng if (di) { 2833e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2834e02119d5SChris Mason bytes_del += name_len; 28353650860bSJosef Bacik if (ret) { 28363650860bSJosef Bacik err = ret; 28373650860bSJosef Bacik goto fail; 28383650860bSJosef Bacik } 2839e02119d5SChris Mason } 2840b3b4aa74SDavid Sterba btrfs_release_path(path); 284133345d01SLi Zefan di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 2842e02119d5SChris Mason index, name, name_len, -1); 28434a500fd1SYan, Zheng if (IS_ERR(di)) { 28444a500fd1SYan, Zheng err = PTR_ERR(di); 28454a500fd1SYan, Zheng goto fail; 28464a500fd1SYan, Zheng } 28474a500fd1SYan, Zheng if (di) { 2848e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2849e02119d5SChris Mason bytes_del += name_len; 28503650860bSJosef Bacik if (ret) { 28513650860bSJosef Bacik err = ret; 28523650860bSJosef Bacik goto fail; 28533650860bSJosef Bacik } 2854e02119d5SChris Mason } 2855e02119d5SChris Mason 2856e02119d5SChris Mason /* update the directory size in the log to reflect the names 2857e02119d5SChris Mason * we have removed 2858e02119d5SChris Mason */ 2859e02119d5SChris Mason if (bytes_del) { 2860e02119d5SChris Mason struct btrfs_key key; 2861e02119d5SChris Mason 286233345d01SLi Zefan key.objectid = dir_ino; 2863e02119d5SChris Mason key.offset = 0; 2864e02119d5SChris Mason key.type = BTRFS_INODE_ITEM_KEY; 2865b3b4aa74SDavid Sterba btrfs_release_path(path); 2866e02119d5SChris Mason 2867e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 28684a500fd1SYan, Zheng if (ret < 0) { 28694a500fd1SYan, Zheng err = ret; 28704a500fd1SYan, Zheng goto fail; 28714a500fd1SYan, Zheng } 2872e02119d5SChris Mason if (ret == 0) { 2873e02119d5SChris Mason struct btrfs_inode_item *item; 2874e02119d5SChris Mason u64 i_size; 2875e02119d5SChris Mason 2876e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2877e02119d5SChris Mason struct btrfs_inode_item); 2878e02119d5SChris Mason i_size = btrfs_inode_size(path->nodes[0], item); 2879e02119d5SChris Mason if (i_size > bytes_del) 2880e02119d5SChris Mason i_size -= bytes_del; 2881e02119d5SChris Mason else 2882e02119d5SChris Mason i_size = 0; 2883e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], item, i_size); 2884e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2885e02119d5SChris Mason } else 2886e02119d5SChris Mason ret = 0; 2887b3b4aa74SDavid Sterba btrfs_release_path(path); 2888e02119d5SChris Mason } 28894a500fd1SYan, Zheng fail: 2890e02119d5SChris Mason btrfs_free_path(path); 2891a62f44a5STsutomu Itoh out_unlock: 2892e02119d5SChris Mason mutex_unlock(&BTRFS_I(dir)->log_mutex); 28934a500fd1SYan, Zheng if (ret == -ENOSPC) { 2894995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 28954a500fd1SYan, Zheng ret = 0; 289679787eaaSJeff Mahoney } else if (ret < 0) 289779787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 289879787eaaSJeff Mahoney 289912fcfd22SChris Mason btrfs_end_log_trans(root); 2900e02119d5SChris Mason 2901411fc6bcSAndi Kleen return err; 2902e02119d5SChris Mason } 2903e02119d5SChris Mason 2904e02119d5SChris Mason /* see comments for btrfs_del_dir_entries_in_log */ 2905e02119d5SChris Mason int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 2906e02119d5SChris Mason struct btrfs_root *root, 2907e02119d5SChris Mason const char *name, int name_len, 2908e02119d5SChris Mason struct inode *inode, u64 dirid) 2909e02119d5SChris Mason { 2910e02119d5SChris Mason struct btrfs_root *log; 2911e02119d5SChris Mason u64 index; 2912e02119d5SChris Mason int ret; 2913e02119d5SChris Mason 29143a5f1d45SChris Mason if (BTRFS_I(inode)->logged_trans < trans->transid) 29153a5f1d45SChris Mason return 0; 29163a5f1d45SChris Mason 2917e02119d5SChris Mason ret = join_running_log_trans(root); 2918e02119d5SChris Mason if (ret) 2919e02119d5SChris Mason return 0; 2920e02119d5SChris Mason log = root->log_root; 2921e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 2922e02119d5SChris Mason 292333345d01SLi Zefan ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), 2924e02119d5SChris Mason dirid, &index); 2925e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 29264a500fd1SYan, Zheng if (ret == -ENOSPC) { 2927995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 29284a500fd1SYan, Zheng ret = 0; 292979787eaaSJeff Mahoney } else if (ret < 0 && ret != -ENOENT) 293079787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 293112fcfd22SChris Mason btrfs_end_log_trans(root); 2932e02119d5SChris Mason 2933e02119d5SChris Mason return ret; 2934e02119d5SChris Mason } 2935e02119d5SChris Mason 2936e02119d5SChris Mason /* 2937e02119d5SChris Mason * creates a range item in the log for 'dirid'. first_offset and 2938e02119d5SChris Mason * last_offset tell us which parts of the key space the log should 2939e02119d5SChris Mason * be considered authoritative for. 2940e02119d5SChris Mason */ 2941e02119d5SChris Mason static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, 2942e02119d5SChris Mason struct btrfs_root *log, 2943e02119d5SChris Mason struct btrfs_path *path, 2944e02119d5SChris Mason int key_type, u64 dirid, 2945e02119d5SChris Mason u64 first_offset, u64 last_offset) 2946e02119d5SChris Mason { 2947e02119d5SChris Mason int ret; 2948e02119d5SChris Mason struct btrfs_key key; 2949e02119d5SChris Mason struct btrfs_dir_log_item *item; 2950e02119d5SChris Mason 2951e02119d5SChris Mason key.objectid = dirid; 2952e02119d5SChris Mason key.offset = first_offset; 2953e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) 2954e02119d5SChris Mason key.type = BTRFS_DIR_LOG_ITEM_KEY; 2955e02119d5SChris Mason else 2956e02119d5SChris Mason key.type = BTRFS_DIR_LOG_INDEX_KEY; 2957e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); 29584a500fd1SYan, Zheng if (ret) 29594a500fd1SYan, Zheng return ret; 2960e02119d5SChris Mason 2961e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2962e02119d5SChris Mason struct btrfs_dir_log_item); 2963e02119d5SChris Mason btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2964e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2965b3b4aa74SDavid Sterba btrfs_release_path(path); 2966e02119d5SChris Mason return 0; 2967e02119d5SChris Mason } 2968e02119d5SChris Mason 2969e02119d5SChris Mason /* 2970e02119d5SChris Mason * log all the items included in the current transaction for a given 2971e02119d5SChris Mason * directory. This also creates the range items in the log tree required 2972e02119d5SChris Mason * to replay anything deleted before the fsync 2973e02119d5SChris Mason */ 2974e02119d5SChris Mason static noinline int log_dir_items(struct btrfs_trans_handle *trans, 2975e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 2976e02119d5SChris Mason struct btrfs_path *path, 2977e02119d5SChris Mason struct btrfs_path *dst_path, int key_type, 2978e02119d5SChris Mason u64 min_offset, u64 *last_offset_ret) 2979e02119d5SChris Mason { 2980e02119d5SChris Mason struct btrfs_key min_key; 2981e02119d5SChris Mason struct btrfs_root *log = root->log_root; 2982e02119d5SChris Mason struct extent_buffer *src; 29834a500fd1SYan, Zheng int err = 0; 2984e02119d5SChris Mason int ret; 2985e02119d5SChris Mason int i; 2986e02119d5SChris Mason int nritems; 2987e02119d5SChris Mason u64 first_offset = min_offset; 2988e02119d5SChris Mason u64 last_offset = (u64)-1; 298933345d01SLi Zefan u64 ino = btrfs_ino(inode); 2990e02119d5SChris Mason 2991e02119d5SChris Mason log = root->log_root; 2992e02119d5SChris Mason 299333345d01SLi Zefan min_key.objectid = ino; 2994e02119d5SChris Mason min_key.type = key_type; 2995e02119d5SChris Mason min_key.offset = min_offset; 2996e02119d5SChris Mason 29976174d3cbSFilipe David Borba Manana ret = btrfs_search_forward(root, &min_key, path, trans->transid); 2998e02119d5SChris Mason 2999e02119d5SChris Mason /* 3000e02119d5SChris Mason * we didn't find anything from this transaction, see if there 3001e02119d5SChris Mason * is anything at all 3002e02119d5SChris Mason */ 300333345d01SLi Zefan if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { 300433345d01SLi Zefan min_key.objectid = ino; 3005e02119d5SChris Mason min_key.type = key_type; 3006e02119d5SChris Mason min_key.offset = (u64)-1; 3007b3b4aa74SDavid Sterba btrfs_release_path(path); 3008e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 3009e02119d5SChris Mason if (ret < 0) { 3010b3b4aa74SDavid Sterba btrfs_release_path(path); 3011e02119d5SChris Mason return ret; 3012e02119d5SChris Mason } 301333345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 3014e02119d5SChris Mason 3015e02119d5SChris Mason /* if ret == 0 there are items for this type, 3016e02119d5SChris Mason * create a range to tell us the last key of this type. 3017e02119d5SChris Mason * otherwise, there are no items in this directory after 3018e02119d5SChris Mason * *min_offset, and we create a range to indicate that. 3019e02119d5SChris Mason */ 3020e02119d5SChris Mason if (ret == 0) { 3021e02119d5SChris Mason struct btrfs_key tmp; 3022e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, 3023e02119d5SChris Mason path->slots[0]); 3024d397712bSChris Mason if (key_type == tmp.type) 3025e02119d5SChris Mason first_offset = max(min_offset, tmp.offset) + 1; 3026e02119d5SChris Mason } 3027e02119d5SChris Mason goto done; 3028e02119d5SChris Mason } 3029e02119d5SChris Mason 3030e02119d5SChris Mason /* go backward to find any previous key */ 303133345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 3032e02119d5SChris Mason if (ret == 0) { 3033e02119d5SChris Mason struct btrfs_key tmp; 3034e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 3035e02119d5SChris Mason if (key_type == tmp.type) { 3036e02119d5SChris Mason first_offset = tmp.offset; 3037e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 3038e02119d5SChris Mason path->nodes[0], path->slots[0], 3039e02119d5SChris Mason &tmp); 30404a500fd1SYan, Zheng if (ret) { 30414a500fd1SYan, Zheng err = ret; 30424a500fd1SYan, Zheng goto done; 30434a500fd1SYan, Zheng } 3044e02119d5SChris Mason } 3045e02119d5SChris Mason } 3046b3b4aa74SDavid Sterba btrfs_release_path(path); 3047e02119d5SChris Mason 3048e02119d5SChris Mason /* find the first key from this transaction again */ 3049e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 3050fae7f21cSDulshani Gunawardhana if (WARN_ON(ret != 0)) 3051e02119d5SChris Mason goto done; 3052e02119d5SChris Mason 3053e02119d5SChris Mason /* 3054e02119d5SChris Mason * we have a block from this transaction, log every item in it 3055e02119d5SChris Mason * from our directory 3056e02119d5SChris Mason */ 3057e02119d5SChris Mason while (1) { 3058e02119d5SChris Mason struct btrfs_key tmp; 3059e02119d5SChris Mason src = path->nodes[0]; 3060e02119d5SChris Mason nritems = btrfs_header_nritems(src); 3061e02119d5SChris Mason for (i = path->slots[0]; i < nritems; i++) { 3062e02119d5SChris Mason btrfs_item_key_to_cpu(src, &min_key, i); 3063e02119d5SChris Mason 306433345d01SLi Zefan if (min_key.objectid != ino || min_key.type != key_type) 3065e02119d5SChris Mason goto done; 3066e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, src, i, 3067e02119d5SChris Mason &min_key); 30684a500fd1SYan, Zheng if (ret) { 30694a500fd1SYan, Zheng err = ret; 30704a500fd1SYan, Zheng goto done; 30714a500fd1SYan, Zheng } 3072e02119d5SChris Mason } 3073e02119d5SChris Mason path->slots[0] = nritems; 3074e02119d5SChris Mason 3075e02119d5SChris Mason /* 3076e02119d5SChris Mason * look ahead to the next item and see if it is also 3077e02119d5SChris Mason * from this directory and from this transaction 3078e02119d5SChris Mason */ 3079e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 3080e02119d5SChris Mason if (ret == 1) { 3081e02119d5SChris Mason last_offset = (u64)-1; 3082e02119d5SChris Mason goto done; 3083e02119d5SChris Mason } 3084e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 308533345d01SLi Zefan if (tmp.objectid != ino || tmp.type != key_type) { 3086e02119d5SChris Mason last_offset = (u64)-1; 3087e02119d5SChris Mason goto done; 3088e02119d5SChris Mason } 3089e02119d5SChris Mason if (btrfs_header_generation(path->nodes[0]) != trans->transid) { 3090e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 3091e02119d5SChris Mason path->nodes[0], path->slots[0], 3092e02119d5SChris Mason &tmp); 30934a500fd1SYan, Zheng if (ret) 30944a500fd1SYan, Zheng err = ret; 30954a500fd1SYan, Zheng else 3096e02119d5SChris Mason last_offset = tmp.offset; 3097e02119d5SChris Mason goto done; 3098e02119d5SChris Mason } 3099e02119d5SChris Mason } 3100e02119d5SChris Mason done: 3101b3b4aa74SDavid Sterba btrfs_release_path(path); 3102b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 3103e02119d5SChris Mason 31044a500fd1SYan, Zheng if (err == 0) { 31054a500fd1SYan, Zheng *last_offset_ret = last_offset; 31064a500fd1SYan, Zheng /* 31074a500fd1SYan, Zheng * insert the log range keys to indicate where the log 31084a500fd1SYan, Zheng * is valid 31094a500fd1SYan, Zheng */ 31104a500fd1SYan, Zheng ret = insert_dir_log_key(trans, log, path, key_type, 311133345d01SLi Zefan ino, first_offset, last_offset); 31124a500fd1SYan, Zheng if (ret) 31134a500fd1SYan, Zheng err = ret; 31144a500fd1SYan, Zheng } 31154a500fd1SYan, Zheng return err; 3116e02119d5SChris Mason } 3117e02119d5SChris Mason 3118e02119d5SChris Mason /* 3119e02119d5SChris Mason * logging directories is very similar to logging inodes, We find all the items 3120e02119d5SChris Mason * from the current transaction and write them to the log. 3121e02119d5SChris Mason * 3122e02119d5SChris Mason * The recovery code scans the directory in the subvolume, and if it finds a 3123e02119d5SChris Mason * key in the range logged that is not present in the log tree, then it means 3124e02119d5SChris Mason * that dir entry was unlinked during the transaction. 3125e02119d5SChris Mason * 3126e02119d5SChris Mason * In order for that scan to work, we must include one key smaller than 3127e02119d5SChris Mason * the smallest logged by this transaction and one key larger than the largest 3128e02119d5SChris Mason * key logged by this transaction. 3129e02119d5SChris Mason */ 3130e02119d5SChris Mason static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 3131e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 3132e02119d5SChris Mason struct btrfs_path *path, 3133e02119d5SChris Mason struct btrfs_path *dst_path) 3134e02119d5SChris Mason { 3135e02119d5SChris Mason u64 min_key; 3136e02119d5SChris Mason u64 max_key; 3137e02119d5SChris Mason int ret; 3138e02119d5SChris Mason int key_type = BTRFS_DIR_ITEM_KEY; 3139e02119d5SChris Mason 3140e02119d5SChris Mason again: 3141e02119d5SChris Mason min_key = 0; 3142e02119d5SChris Mason max_key = 0; 3143e02119d5SChris Mason while (1) { 3144e02119d5SChris Mason ret = log_dir_items(trans, root, inode, path, 3145e02119d5SChris Mason dst_path, key_type, min_key, 3146e02119d5SChris Mason &max_key); 31474a500fd1SYan, Zheng if (ret) 31484a500fd1SYan, Zheng return ret; 3149e02119d5SChris Mason if (max_key == (u64)-1) 3150e02119d5SChris Mason break; 3151e02119d5SChris Mason min_key = max_key + 1; 3152e02119d5SChris Mason } 3153e02119d5SChris Mason 3154e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) { 3155e02119d5SChris Mason key_type = BTRFS_DIR_INDEX_KEY; 3156e02119d5SChris Mason goto again; 3157e02119d5SChris Mason } 3158e02119d5SChris Mason return 0; 3159e02119d5SChris Mason } 3160e02119d5SChris Mason 3161e02119d5SChris Mason /* 3162e02119d5SChris Mason * a helper function to drop items from the log before we relog an 3163e02119d5SChris Mason * inode. max_key_type indicates the highest item type to remove. 3164e02119d5SChris Mason * This cannot be run for file data extents because it does not 3165e02119d5SChris Mason * free the extents they point to. 3166e02119d5SChris Mason */ 3167e02119d5SChris Mason static int drop_objectid_items(struct btrfs_trans_handle *trans, 3168e02119d5SChris Mason struct btrfs_root *log, 3169e02119d5SChris Mason struct btrfs_path *path, 3170e02119d5SChris Mason u64 objectid, int max_key_type) 3171e02119d5SChris Mason { 3172e02119d5SChris Mason int ret; 3173e02119d5SChris Mason struct btrfs_key key; 3174e02119d5SChris Mason struct btrfs_key found_key; 317518ec90d6SJosef Bacik int start_slot; 3176e02119d5SChris Mason 3177e02119d5SChris Mason key.objectid = objectid; 3178e02119d5SChris Mason key.type = max_key_type; 3179e02119d5SChris Mason key.offset = (u64)-1; 3180e02119d5SChris Mason 3181e02119d5SChris Mason while (1) { 3182e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 31833650860bSJosef Bacik BUG_ON(ret == 0); /* Logic error */ 31844a500fd1SYan, Zheng if (ret < 0) 3185e02119d5SChris Mason break; 3186e02119d5SChris Mason 3187e02119d5SChris Mason if (path->slots[0] == 0) 3188e02119d5SChris Mason break; 3189e02119d5SChris Mason 3190e02119d5SChris Mason path->slots[0]--; 3191e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3192e02119d5SChris Mason path->slots[0]); 3193e02119d5SChris Mason 3194e02119d5SChris Mason if (found_key.objectid != objectid) 3195e02119d5SChris Mason break; 3196e02119d5SChris Mason 319718ec90d6SJosef Bacik found_key.offset = 0; 319818ec90d6SJosef Bacik found_key.type = 0; 319918ec90d6SJosef Bacik ret = btrfs_bin_search(path->nodes[0], &found_key, 0, 320018ec90d6SJosef Bacik &start_slot); 320118ec90d6SJosef Bacik 320218ec90d6SJosef Bacik ret = btrfs_del_items(trans, log, path, start_slot, 320318ec90d6SJosef Bacik path->slots[0] - start_slot + 1); 320418ec90d6SJosef Bacik /* 320518ec90d6SJosef Bacik * If start slot isn't 0 then we don't need to re-search, we've 320618ec90d6SJosef Bacik * found the last guy with the objectid in this tree. 320718ec90d6SJosef Bacik */ 320818ec90d6SJosef Bacik if (ret || start_slot != 0) 320965a246c5STsutomu Itoh break; 3210b3b4aa74SDavid Sterba btrfs_release_path(path); 3211e02119d5SChris Mason } 3212b3b4aa74SDavid Sterba btrfs_release_path(path); 32135bdbeb21SJosef Bacik if (ret > 0) 32145bdbeb21SJosef Bacik ret = 0; 32154a500fd1SYan, Zheng return ret; 3216e02119d5SChris Mason } 3217e02119d5SChris Mason 321894edf4aeSJosef Bacik static void fill_inode_item(struct btrfs_trans_handle *trans, 321994edf4aeSJosef Bacik struct extent_buffer *leaf, 322094edf4aeSJosef Bacik struct btrfs_inode_item *item, 322194edf4aeSJosef Bacik struct inode *inode, int log_inode_only) 322294edf4aeSJosef Bacik { 32230b1c6ccaSJosef Bacik struct btrfs_map_token token; 322494edf4aeSJosef Bacik 32250b1c6ccaSJosef Bacik btrfs_init_map_token(&token); 322694edf4aeSJosef Bacik 322794edf4aeSJosef Bacik if (log_inode_only) { 322894edf4aeSJosef Bacik /* set the generation to zero so the recover code 322994edf4aeSJosef Bacik * can tell the difference between an logging 323094edf4aeSJosef Bacik * just to say 'this inode exists' and a logging 323194edf4aeSJosef Bacik * to say 'update this inode with these values' 323294edf4aeSJosef Bacik */ 32330b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 0, &token); 32340b1c6ccaSJosef Bacik btrfs_set_token_inode_size(leaf, item, 0, &token); 323594edf4aeSJosef Bacik } else { 32360b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 32370b1c6ccaSJosef Bacik BTRFS_I(inode)->generation, 32380b1c6ccaSJosef Bacik &token); 32390b1c6ccaSJosef Bacik btrfs_set_token_inode_size(leaf, item, inode->i_size, &token); 324094edf4aeSJosef Bacik } 324194edf4aeSJosef Bacik 32420b1c6ccaSJosef Bacik btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); 32430b1c6ccaSJosef Bacik btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); 32440b1c6ccaSJosef Bacik btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 32450b1c6ccaSJosef Bacik btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 32460b1c6ccaSJosef Bacik 32470b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 32480b1c6ccaSJosef Bacik inode->i_atime.tv_sec, &token); 32490b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 32500b1c6ccaSJosef Bacik inode->i_atime.tv_nsec, &token); 32510b1c6ccaSJosef Bacik 32520b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 32530b1c6ccaSJosef Bacik inode->i_mtime.tv_sec, &token); 32540b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 32550b1c6ccaSJosef Bacik inode->i_mtime.tv_nsec, &token); 32560b1c6ccaSJosef Bacik 32570b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 32580b1c6ccaSJosef Bacik inode->i_ctime.tv_sec, &token); 32590b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 32600b1c6ccaSJosef Bacik inode->i_ctime.tv_nsec, &token); 32610b1c6ccaSJosef Bacik 32620b1c6ccaSJosef Bacik btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 32630b1c6ccaSJosef Bacik &token); 32640b1c6ccaSJosef Bacik 32650b1c6ccaSJosef Bacik btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); 32660b1c6ccaSJosef Bacik btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); 32670b1c6ccaSJosef Bacik btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); 32680b1c6ccaSJosef Bacik btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); 32690b1c6ccaSJosef Bacik btrfs_set_token_inode_block_group(leaf, item, 0, &token); 327094edf4aeSJosef Bacik } 327194edf4aeSJosef Bacik 3272a95249b3SJosef Bacik static int log_inode_item(struct btrfs_trans_handle *trans, 3273a95249b3SJosef Bacik struct btrfs_root *log, struct btrfs_path *path, 3274a95249b3SJosef Bacik struct inode *inode) 3275a95249b3SJosef Bacik { 3276a95249b3SJosef Bacik struct btrfs_inode_item *inode_item; 3277a95249b3SJosef Bacik int ret; 3278a95249b3SJosef Bacik 3279efd0c405SFilipe David Borba Manana ret = btrfs_insert_empty_item(trans, log, path, 3280efd0c405SFilipe David Borba Manana &BTRFS_I(inode)->location, 3281a95249b3SJosef Bacik sizeof(*inode_item)); 3282a95249b3SJosef Bacik if (ret && ret != -EEXIST) 3283a95249b3SJosef Bacik return ret; 3284a95249b3SJosef Bacik inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3285a95249b3SJosef Bacik struct btrfs_inode_item); 3286a95249b3SJosef Bacik fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); 3287a95249b3SJosef Bacik btrfs_release_path(path); 3288a95249b3SJosef Bacik return 0; 3289a95249b3SJosef Bacik } 3290a95249b3SJosef Bacik 329131ff1cd2SChris Mason static noinline int copy_items(struct btrfs_trans_handle *trans, 3292d2794405SLiu Bo struct inode *inode, 329331ff1cd2SChris Mason struct btrfs_path *dst_path, 329416e7549fSJosef Bacik struct btrfs_path *src_path, u64 *last_extent, 329531ff1cd2SChris Mason int start_slot, int nr, int inode_only) 329631ff1cd2SChris Mason { 329731ff1cd2SChris Mason unsigned long src_offset; 329831ff1cd2SChris Mason unsigned long dst_offset; 3299d2794405SLiu Bo struct btrfs_root *log = BTRFS_I(inode)->root->log_root; 330031ff1cd2SChris Mason struct btrfs_file_extent_item *extent; 330131ff1cd2SChris Mason struct btrfs_inode_item *inode_item; 330216e7549fSJosef Bacik struct extent_buffer *src = src_path->nodes[0]; 330316e7549fSJosef Bacik struct btrfs_key first_key, last_key, key; 330431ff1cd2SChris Mason int ret; 330531ff1cd2SChris Mason struct btrfs_key *ins_keys; 330631ff1cd2SChris Mason u32 *ins_sizes; 330731ff1cd2SChris Mason char *ins_data; 330831ff1cd2SChris Mason int i; 3309d20f7043SChris Mason struct list_head ordered_sums; 3310d2794405SLiu Bo int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 331116e7549fSJosef Bacik bool has_extents = false; 331274121f7cSFilipe Manana bool need_find_last_extent = true; 331316e7549fSJosef Bacik bool done = false; 3314d20f7043SChris Mason 3315d20f7043SChris Mason INIT_LIST_HEAD(&ordered_sums); 331631ff1cd2SChris Mason 331731ff1cd2SChris Mason ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 331831ff1cd2SChris Mason nr * sizeof(u32), GFP_NOFS); 33192a29edc6Sliubo if (!ins_data) 33202a29edc6Sliubo return -ENOMEM; 33212a29edc6Sliubo 332216e7549fSJosef Bacik first_key.objectid = (u64)-1; 332316e7549fSJosef Bacik 332431ff1cd2SChris Mason ins_sizes = (u32 *)ins_data; 332531ff1cd2SChris Mason ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 332631ff1cd2SChris Mason 332731ff1cd2SChris Mason for (i = 0; i < nr; i++) { 332831ff1cd2SChris Mason ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); 332931ff1cd2SChris Mason btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); 333031ff1cd2SChris Mason } 333131ff1cd2SChris Mason ret = btrfs_insert_empty_items(trans, log, dst_path, 333231ff1cd2SChris Mason ins_keys, ins_sizes, nr); 33334a500fd1SYan, Zheng if (ret) { 33344a500fd1SYan, Zheng kfree(ins_data); 33354a500fd1SYan, Zheng return ret; 33364a500fd1SYan, Zheng } 333731ff1cd2SChris Mason 33385d4f98a2SYan Zheng for (i = 0; i < nr; i++, dst_path->slots[0]++) { 333931ff1cd2SChris Mason dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 334031ff1cd2SChris Mason dst_path->slots[0]); 334131ff1cd2SChris Mason 334231ff1cd2SChris Mason src_offset = btrfs_item_ptr_offset(src, start_slot + i); 334331ff1cd2SChris Mason 334416e7549fSJosef Bacik if ((i == (nr - 1))) 334516e7549fSJosef Bacik last_key = ins_keys[i]; 334616e7549fSJosef Bacik 334794edf4aeSJosef Bacik if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 334831ff1cd2SChris Mason inode_item = btrfs_item_ptr(dst_path->nodes[0], 334931ff1cd2SChris Mason dst_path->slots[0], 335031ff1cd2SChris Mason struct btrfs_inode_item); 335194edf4aeSJosef Bacik fill_inode_item(trans, dst_path->nodes[0], inode_item, 335294edf4aeSJosef Bacik inode, inode_only == LOG_INODE_EXISTS); 335394edf4aeSJosef Bacik } else { 335494edf4aeSJosef Bacik copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 335594edf4aeSJosef Bacik src_offset, ins_sizes[i]); 335631ff1cd2SChris Mason } 335794edf4aeSJosef Bacik 335816e7549fSJosef Bacik /* 335916e7549fSJosef Bacik * We set need_find_last_extent here in case we know we were 336016e7549fSJosef Bacik * processing other items and then walk into the first extent in 336116e7549fSJosef Bacik * the inode. If we don't hit an extent then nothing changes, 336216e7549fSJosef Bacik * we'll do the last search the next time around. 336316e7549fSJosef Bacik */ 336416e7549fSJosef Bacik if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { 336516e7549fSJosef Bacik has_extents = true; 336674121f7cSFilipe Manana if (first_key.objectid == (u64)-1) 336716e7549fSJosef Bacik first_key = ins_keys[i]; 336816e7549fSJosef Bacik } else { 336916e7549fSJosef Bacik need_find_last_extent = false; 337016e7549fSJosef Bacik } 337116e7549fSJosef Bacik 337231ff1cd2SChris Mason /* take a reference on file data extents so that truncates 337331ff1cd2SChris Mason * or deletes of this inode don't have to relog the inode 337431ff1cd2SChris Mason * again 337531ff1cd2SChris Mason */ 3376962a298fSDavid Sterba if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY && 3377d2794405SLiu Bo !skip_csum) { 337831ff1cd2SChris Mason int found_type; 337931ff1cd2SChris Mason extent = btrfs_item_ptr(src, start_slot + i, 338031ff1cd2SChris Mason struct btrfs_file_extent_item); 338131ff1cd2SChris Mason 33828e531cdfSliubo if (btrfs_file_extent_generation(src, extent) < trans->transid) 33838e531cdfSliubo continue; 33848e531cdfSliubo 338531ff1cd2SChris Mason found_type = btrfs_file_extent_type(src, extent); 33866f1fed77SJosef Bacik if (found_type == BTRFS_FILE_EXTENT_REG) { 33875d4f98a2SYan Zheng u64 ds, dl, cs, cl; 33885d4f98a2SYan Zheng ds = btrfs_file_extent_disk_bytenr(src, 338931ff1cd2SChris Mason extent); 33905d4f98a2SYan Zheng /* ds == 0 is a hole */ 33915d4f98a2SYan Zheng if (ds == 0) 33925d4f98a2SYan Zheng continue; 33935d4f98a2SYan Zheng 33945d4f98a2SYan Zheng dl = btrfs_file_extent_disk_num_bytes(src, 339531ff1cd2SChris Mason extent); 33965d4f98a2SYan Zheng cs = btrfs_file_extent_offset(src, extent); 33975d4f98a2SYan Zheng cl = btrfs_file_extent_num_bytes(src, 3398a419aef8SJoe Perches extent); 3399580afd76SChris Mason if (btrfs_file_extent_compression(src, 3400580afd76SChris Mason extent)) { 3401580afd76SChris Mason cs = 0; 3402580afd76SChris Mason cl = dl; 3403580afd76SChris Mason } 34045d4f98a2SYan Zheng 340507d400a6SYan Zheng ret = btrfs_lookup_csums_range( 3406d20f7043SChris Mason log->fs_info->csum_root, 340707d400a6SYan Zheng ds + cs, ds + cs + cl - 1, 3408a2de733cSArne Jansen &ordered_sums, 0); 34093650860bSJosef Bacik if (ret) { 34103650860bSJosef Bacik btrfs_release_path(dst_path); 34113650860bSJosef Bacik kfree(ins_data); 34123650860bSJosef Bacik return ret; 34133650860bSJosef Bacik } 341431ff1cd2SChris Mason } 341531ff1cd2SChris Mason } 341631ff1cd2SChris Mason } 341731ff1cd2SChris Mason 341831ff1cd2SChris Mason btrfs_mark_buffer_dirty(dst_path->nodes[0]); 3419b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 342031ff1cd2SChris Mason kfree(ins_data); 3421d20f7043SChris Mason 3422d20f7043SChris Mason /* 3423d20f7043SChris Mason * we have to do this after the loop above to avoid changing the 3424d20f7043SChris Mason * log tree while trying to change the log tree. 3425d20f7043SChris Mason */ 34264a500fd1SYan, Zheng ret = 0; 3427d20f7043SChris Mason while (!list_empty(&ordered_sums)) { 3428d20f7043SChris Mason struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 3429d20f7043SChris Mason struct btrfs_ordered_sum, 3430d20f7043SChris Mason list); 34314a500fd1SYan, Zheng if (!ret) 3432d20f7043SChris Mason ret = btrfs_csum_file_blocks(trans, log, sums); 3433d20f7043SChris Mason list_del(&sums->list); 3434d20f7043SChris Mason kfree(sums); 3435d20f7043SChris Mason } 343616e7549fSJosef Bacik 343716e7549fSJosef Bacik if (!has_extents) 343816e7549fSJosef Bacik return ret; 343916e7549fSJosef Bacik 344074121f7cSFilipe Manana if (need_find_last_extent && *last_extent == first_key.offset) { 344174121f7cSFilipe Manana /* 344274121f7cSFilipe Manana * We don't have any leafs between our current one and the one 344374121f7cSFilipe Manana * we processed before that can have file extent items for our 344474121f7cSFilipe Manana * inode (and have a generation number smaller than our current 344574121f7cSFilipe Manana * transaction id). 344674121f7cSFilipe Manana */ 344774121f7cSFilipe Manana need_find_last_extent = false; 344874121f7cSFilipe Manana } 344974121f7cSFilipe Manana 345016e7549fSJosef Bacik /* 345116e7549fSJosef Bacik * Because we use btrfs_search_forward we could skip leaves that were 345216e7549fSJosef Bacik * not modified and then assume *last_extent is valid when it really 345316e7549fSJosef Bacik * isn't. So back up to the previous leaf and read the end of the last 345416e7549fSJosef Bacik * extent before we go and fill in holes. 345516e7549fSJosef Bacik */ 345616e7549fSJosef Bacik if (need_find_last_extent) { 345716e7549fSJosef Bacik u64 len; 345816e7549fSJosef Bacik 345916e7549fSJosef Bacik ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path); 346016e7549fSJosef Bacik if (ret < 0) 346116e7549fSJosef Bacik return ret; 346216e7549fSJosef Bacik if (ret) 346316e7549fSJosef Bacik goto fill_holes; 346416e7549fSJosef Bacik if (src_path->slots[0]) 346516e7549fSJosef Bacik src_path->slots[0]--; 346616e7549fSJosef Bacik src = src_path->nodes[0]; 346716e7549fSJosef Bacik btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); 346816e7549fSJosef Bacik if (key.objectid != btrfs_ino(inode) || 346916e7549fSJosef Bacik key.type != BTRFS_EXTENT_DATA_KEY) 347016e7549fSJosef Bacik goto fill_holes; 347116e7549fSJosef Bacik extent = btrfs_item_ptr(src, src_path->slots[0], 347216e7549fSJosef Bacik struct btrfs_file_extent_item); 347316e7549fSJosef Bacik if (btrfs_file_extent_type(src, extent) == 347416e7549fSJosef Bacik BTRFS_FILE_EXTENT_INLINE) { 3475514ac8adSChris Mason len = btrfs_file_extent_inline_len(src, 3476514ac8adSChris Mason src_path->slots[0], 3477514ac8adSChris Mason extent); 347816e7549fSJosef Bacik *last_extent = ALIGN(key.offset + len, 347916e7549fSJosef Bacik log->sectorsize); 348016e7549fSJosef Bacik } else { 348116e7549fSJosef Bacik len = btrfs_file_extent_num_bytes(src, extent); 348216e7549fSJosef Bacik *last_extent = key.offset + len; 348316e7549fSJosef Bacik } 348416e7549fSJosef Bacik } 348516e7549fSJosef Bacik fill_holes: 348616e7549fSJosef Bacik /* So we did prev_leaf, now we need to move to the next leaf, but a few 348716e7549fSJosef Bacik * things could have happened 348816e7549fSJosef Bacik * 348916e7549fSJosef Bacik * 1) A merge could have happened, so we could currently be on a leaf 349016e7549fSJosef Bacik * that holds what we were copying in the first place. 349116e7549fSJosef Bacik * 2) A split could have happened, and now not all of the items we want 349216e7549fSJosef Bacik * are on the same leaf. 349316e7549fSJosef Bacik * 349416e7549fSJosef Bacik * So we need to adjust how we search for holes, we need to drop the 349516e7549fSJosef Bacik * path and re-search for the first extent key we found, and then walk 349616e7549fSJosef Bacik * forward until we hit the last one we copied. 349716e7549fSJosef Bacik */ 349816e7549fSJosef Bacik if (need_find_last_extent) { 349916e7549fSJosef Bacik /* btrfs_prev_leaf could return 1 without releasing the path */ 350016e7549fSJosef Bacik btrfs_release_path(src_path); 350116e7549fSJosef Bacik ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key, 350216e7549fSJosef Bacik src_path, 0, 0); 350316e7549fSJosef Bacik if (ret < 0) 350416e7549fSJosef Bacik return ret; 350516e7549fSJosef Bacik ASSERT(ret == 0); 350616e7549fSJosef Bacik src = src_path->nodes[0]; 350716e7549fSJosef Bacik i = src_path->slots[0]; 350816e7549fSJosef Bacik } else { 350916e7549fSJosef Bacik i = start_slot; 351016e7549fSJosef Bacik } 351116e7549fSJosef Bacik 351216e7549fSJosef Bacik /* 351316e7549fSJosef Bacik * Ok so here we need to go through and fill in any holes we may have 351416e7549fSJosef Bacik * to make sure that holes are punched for those areas in case they had 351516e7549fSJosef Bacik * extents previously. 351616e7549fSJosef Bacik */ 351716e7549fSJosef Bacik while (!done) { 351816e7549fSJosef Bacik u64 offset, len; 351916e7549fSJosef Bacik u64 extent_end; 352016e7549fSJosef Bacik 352116e7549fSJosef Bacik if (i >= btrfs_header_nritems(src_path->nodes[0])) { 352216e7549fSJosef Bacik ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path); 352316e7549fSJosef Bacik if (ret < 0) 352416e7549fSJosef Bacik return ret; 352516e7549fSJosef Bacik ASSERT(ret == 0); 352616e7549fSJosef Bacik src = src_path->nodes[0]; 352716e7549fSJosef Bacik i = 0; 352816e7549fSJosef Bacik } 352916e7549fSJosef Bacik 353016e7549fSJosef Bacik btrfs_item_key_to_cpu(src, &key, i); 353116e7549fSJosef Bacik if (!btrfs_comp_cpu_keys(&key, &last_key)) 353216e7549fSJosef Bacik done = true; 353316e7549fSJosef Bacik if (key.objectid != btrfs_ino(inode) || 353416e7549fSJosef Bacik key.type != BTRFS_EXTENT_DATA_KEY) { 353516e7549fSJosef Bacik i++; 353616e7549fSJosef Bacik continue; 353716e7549fSJosef Bacik } 353816e7549fSJosef Bacik extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); 353916e7549fSJosef Bacik if (btrfs_file_extent_type(src, extent) == 354016e7549fSJosef Bacik BTRFS_FILE_EXTENT_INLINE) { 3541514ac8adSChris Mason len = btrfs_file_extent_inline_len(src, i, extent); 354216e7549fSJosef Bacik extent_end = ALIGN(key.offset + len, log->sectorsize); 354316e7549fSJosef Bacik } else { 354416e7549fSJosef Bacik len = btrfs_file_extent_num_bytes(src, extent); 354516e7549fSJosef Bacik extent_end = key.offset + len; 354616e7549fSJosef Bacik } 354716e7549fSJosef Bacik i++; 354816e7549fSJosef Bacik 354916e7549fSJosef Bacik if (*last_extent == key.offset) { 355016e7549fSJosef Bacik *last_extent = extent_end; 355116e7549fSJosef Bacik continue; 355216e7549fSJosef Bacik } 355316e7549fSJosef Bacik offset = *last_extent; 355416e7549fSJosef Bacik len = key.offset - *last_extent; 355516e7549fSJosef Bacik ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), 355616e7549fSJosef Bacik offset, 0, 0, len, 0, len, 0, 355716e7549fSJosef Bacik 0, 0); 355816e7549fSJosef Bacik if (ret) 355916e7549fSJosef Bacik break; 356074121f7cSFilipe Manana *last_extent = extent_end; 356116e7549fSJosef Bacik } 356216e7549fSJosef Bacik /* 356316e7549fSJosef Bacik * Need to let the callers know we dropped the path so they should 356416e7549fSJosef Bacik * re-search. 356516e7549fSJosef Bacik */ 356616e7549fSJosef Bacik if (!ret && need_find_last_extent) 356716e7549fSJosef Bacik ret = 1; 35684a500fd1SYan, Zheng return ret; 356931ff1cd2SChris Mason } 357031ff1cd2SChris Mason 35715dc562c5SJosef Bacik static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) 35725dc562c5SJosef Bacik { 35735dc562c5SJosef Bacik struct extent_map *em1, *em2; 35745dc562c5SJosef Bacik 35755dc562c5SJosef Bacik em1 = list_entry(a, struct extent_map, list); 35765dc562c5SJosef Bacik em2 = list_entry(b, struct extent_map, list); 35775dc562c5SJosef Bacik 35785dc562c5SJosef Bacik if (em1->start < em2->start) 35795dc562c5SJosef Bacik return -1; 35805dc562c5SJosef Bacik else if (em1->start > em2->start) 35815dc562c5SJosef Bacik return 1; 35825dc562c5SJosef Bacik return 0; 35835dc562c5SJosef Bacik } 35845dc562c5SJosef Bacik 35858407f553SFilipe Manana static int wait_ordered_extents(struct btrfs_trans_handle *trans, 35868407f553SFilipe Manana struct inode *inode, 35878407f553SFilipe Manana struct btrfs_root *root, 35888407f553SFilipe Manana const struct extent_map *em, 35898407f553SFilipe Manana const struct list_head *logged_list, 35908407f553SFilipe Manana bool *ordered_io_error) 35915dc562c5SJosef Bacik { 35922ab28f32SJosef Bacik struct btrfs_ordered_extent *ordered; 35938407f553SFilipe Manana struct btrfs_root *log = root->log_root; 35942ab28f32SJosef Bacik u64 mod_start = em->mod_start; 35952ab28f32SJosef Bacik u64 mod_len = em->mod_len; 35968407f553SFilipe Manana const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 35972ab28f32SJosef Bacik u64 csum_offset; 35982ab28f32SJosef Bacik u64 csum_len; 35998407f553SFilipe Manana LIST_HEAD(ordered_sums); 36008407f553SFilipe Manana int ret = 0; 360109a2a8f9SJosef Bacik 36028407f553SFilipe Manana *ordered_io_error = false; 36031acae57bSFilipe David Borba Manana 36048407f553SFilipe Manana if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 36058407f553SFilipe Manana em->block_start == EXTENT_MAP_HOLE) 360670c8a91cSJosef Bacik return 0; 360770c8a91cSJosef Bacik 36082ab28f32SJosef Bacik /* 36098407f553SFilipe Manana * Wait far any ordered extent that covers our extent map. If it 36108407f553SFilipe Manana * finishes without an error, first check and see if our csums are on 36118407f553SFilipe Manana * our outstanding ordered extents. 36122ab28f32SJosef Bacik */ 3613827463c4SMiao Xie list_for_each_entry(ordered, logged_list, log_list) { 36142ab28f32SJosef Bacik struct btrfs_ordered_sum *sum; 36152ab28f32SJosef Bacik 36162ab28f32SJosef Bacik if (!mod_len) 36172ab28f32SJosef Bacik break; 36182ab28f32SJosef Bacik 36192ab28f32SJosef Bacik if (ordered->file_offset + ordered->len <= mod_start || 36202ab28f32SJosef Bacik mod_start + mod_len <= ordered->file_offset) 36212ab28f32SJosef Bacik continue; 36222ab28f32SJosef Bacik 36238407f553SFilipe Manana if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && 36248407f553SFilipe Manana !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && 36258407f553SFilipe Manana !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { 36268407f553SFilipe Manana const u64 start = ordered->file_offset; 36278407f553SFilipe Manana const u64 end = ordered->file_offset + ordered->len - 1; 36288407f553SFilipe Manana 36298407f553SFilipe Manana WARN_ON(ordered->inode != inode); 36308407f553SFilipe Manana filemap_fdatawrite_range(inode->i_mapping, start, end); 36318407f553SFilipe Manana } 36328407f553SFilipe Manana 36338407f553SFilipe Manana wait_event(ordered->wait, 36348407f553SFilipe Manana (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || 36358407f553SFilipe Manana test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); 36368407f553SFilipe Manana 36378407f553SFilipe Manana if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { 36388407f553SFilipe Manana *ordered_io_error = true; 36398407f553SFilipe Manana break; 36408407f553SFilipe Manana } 36412ab28f32SJosef Bacik /* 36422ab28f32SJosef Bacik * We are going to copy all the csums on this ordered extent, so 36432ab28f32SJosef Bacik * go ahead and adjust mod_start and mod_len in case this 36442ab28f32SJosef Bacik * ordered extent has already been logged. 36452ab28f32SJosef Bacik */ 36462ab28f32SJosef Bacik if (ordered->file_offset > mod_start) { 36472ab28f32SJosef Bacik if (ordered->file_offset + ordered->len >= 36482ab28f32SJosef Bacik mod_start + mod_len) 36492ab28f32SJosef Bacik mod_len = ordered->file_offset - mod_start; 36502ab28f32SJosef Bacik /* 36512ab28f32SJosef Bacik * If we have this case 36522ab28f32SJosef Bacik * 36532ab28f32SJosef Bacik * |--------- logged extent ---------| 36542ab28f32SJosef Bacik * |----- ordered extent ----| 36552ab28f32SJosef Bacik * 36562ab28f32SJosef Bacik * Just don't mess with mod_start and mod_len, we'll 36572ab28f32SJosef Bacik * just end up logging more csums than we need and it 36582ab28f32SJosef Bacik * will be ok. 36592ab28f32SJosef Bacik */ 36602ab28f32SJosef Bacik } else { 36612ab28f32SJosef Bacik if (ordered->file_offset + ordered->len < 36622ab28f32SJosef Bacik mod_start + mod_len) { 36632ab28f32SJosef Bacik mod_len = (mod_start + mod_len) - 36642ab28f32SJosef Bacik (ordered->file_offset + ordered->len); 36652ab28f32SJosef Bacik mod_start = ordered->file_offset + 36662ab28f32SJosef Bacik ordered->len; 36672ab28f32SJosef Bacik } else { 36682ab28f32SJosef Bacik mod_len = 0; 36692ab28f32SJosef Bacik } 36702ab28f32SJosef Bacik } 36712ab28f32SJosef Bacik 36728407f553SFilipe Manana if (skip_csum) 36738407f553SFilipe Manana continue; 36748407f553SFilipe Manana 36752ab28f32SJosef Bacik /* 36762ab28f32SJosef Bacik * To keep us from looping for the above case of an ordered 36772ab28f32SJosef Bacik * extent that falls inside of the logged extent. 36782ab28f32SJosef Bacik */ 36792ab28f32SJosef Bacik if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, 36802ab28f32SJosef Bacik &ordered->flags)) 36812ab28f32SJosef Bacik continue; 36822ab28f32SJosef Bacik 368323c671a5SMiao Xie if (ordered->csum_bytes_left) { 368423c671a5SMiao Xie btrfs_start_ordered_extent(inode, ordered, 0); 368523c671a5SMiao Xie wait_event(ordered->wait, 368623c671a5SMiao Xie ordered->csum_bytes_left == 0); 368723c671a5SMiao Xie } 36882ab28f32SJosef Bacik 36892ab28f32SJosef Bacik list_for_each_entry(sum, &ordered->list, list) { 36902ab28f32SJosef Bacik ret = btrfs_csum_file_blocks(trans, log, sum); 3691827463c4SMiao Xie if (ret) 36928407f553SFilipe Manana break; 36938407f553SFilipe Manana } 36942ab28f32SJosef Bacik } 36952ab28f32SJosef Bacik 36968407f553SFilipe Manana if (*ordered_io_error || !mod_len || ret || skip_csum) 36972ab28f32SJosef Bacik return ret; 36982ab28f32SJosef Bacik 3699488111aaSFilipe David Borba Manana if (em->compress_type) { 3700488111aaSFilipe David Borba Manana csum_offset = 0; 37018407f553SFilipe Manana csum_len = max(em->block_len, em->orig_block_len); 3702488111aaSFilipe David Borba Manana } else { 37032ab28f32SJosef Bacik csum_offset = mod_start - em->start; 37042ab28f32SJosef Bacik csum_len = mod_len; 3705488111aaSFilipe David Borba Manana } 37062ab28f32SJosef Bacik 370770c8a91cSJosef Bacik /* block start is already adjusted for the file extent offset. */ 370870c8a91cSJosef Bacik ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 370970c8a91cSJosef Bacik em->block_start + csum_offset, 371070c8a91cSJosef Bacik em->block_start + csum_offset + 371170c8a91cSJosef Bacik csum_len - 1, &ordered_sums, 0); 37125dc562c5SJosef Bacik if (ret) 37135dc562c5SJosef Bacik return ret; 371470c8a91cSJosef Bacik 371570c8a91cSJosef Bacik while (!list_empty(&ordered_sums)) { 371670c8a91cSJosef Bacik struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 371770c8a91cSJosef Bacik struct btrfs_ordered_sum, 371870c8a91cSJosef Bacik list); 371970c8a91cSJosef Bacik if (!ret) 372070c8a91cSJosef Bacik ret = btrfs_csum_file_blocks(trans, log, sums); 372170c8a91cSJosef Bacik list_del(&sums->list); 372270c8a91cSJosef Bacik kfree(sums); 37235dc562c5SJosef Bacik } 37245dc562c5SJosef Bacik 372570c8a91cSJosef Bacik return ret; 37265dc562c5SJosef Bacik } 37275dc562c5SJosef Bacik 37288407f553SFilipe Manana static int log_one_extent(struct btrfs_trans_handle *trans, 37298407f553SFilipe Manana struct inode *inode, struct btrfs_root *root, 37308407f553SFilipe Manana const struct extent_map *em, 37318407f553SFilipe Manana struct btrfs_path *path, 37328407f553SFilipe Manana const struct list_head *logged_list, 37338407f553SFilipe Manana struct btrfs_log_ctx *ctx) 37348407f553SFilipe Manana { 37358407f553SFilipe Manana struct btrfs_root *log = root->log_root; 37368407f553SFilipe Manana struct btrfs_file_extent_item *fi; 37378407f553SFilipe Manana struct extent_buffer *leaf; 37388407f553SFilipe Manana struct btrfs_map_token token; 37398407f553SFilipe Manana struct btrfs_key key; 37408407f553SFilipe Manana u64 extent_offset = em->start - em->orig_start; 37418407f553SFilipe Manana u64 block_len; 37428407f553SFilipe Manana int ret; 37438407f553SFilipe Manana int extent_inserted = 0; 37448407f553SFilipe Manana bool ordered_io_err = false; 37458407f553SFilipe Manana 37468407f553SFilipe Manana ret = wait_ordered_extents(trans, inode, root, em, logged_list, 37478407f553SFilipe Manana &ordered_io_err); 37488407f553SFilipe Manana if (ret) 37498407f553SFilipe Manana return ret; 37508407f553SFilipe Manana 37518407f553SFilipe Manana if (ordered_io_err) { 37528407f553SFilipe Manana ctx->io_err = -EIO; 37538407f553SFilipe Manana return 0; 37548407f553SFilipe Manana } 37558407f553SFilipe Manana 37568407f553SFilipe Manana btrfs_init_map_token(&token); 37578407f553SFilipe Manana 37588407f553SFilipe Manana ret = __btrfs_drop_extents(trans, log, inode, path, em->start, 37598407f553SFilipe Manana em->start + em->len, NULL, 0, 1, 37608407f553SFilipe Manana sizeof(*fi), &extent_inserted); 37618407f553SFilipe Manana if (ret) 37628407f553SFilipe Manana return ret; 37638407f553SFilipe Manana 37648407f553SFilipe Manana if (!extent_inserted) { 37658407f553SFilipe Manana key.objectid = btrfs_ino(inode); 37668407f553SFilipe Manana key.type = BTRFS_EXTENT_DATA_KEY; 37678407f553SFilipe Manana key.offset = em->start; 37688407f553SFilipe Manana 37698407f553SFilipe Manana ret = btrfs_insert_empty_item(trans, log, path, &key, 37708407f553SFilipe Manana sizeof(*fi)); 37718407f553SFilipe Manana if (ret) 37728407f553SFilipe Manana return ret; 37738407f553SFilipe Manana } 37748407f553SFilipe Manana leaf = path->nodes[0]; 37758407f553SFilipe Manana fi = btrfs_item_ptr(leaf, path->slots[0], 37768407f553SFilipe Manana struct btrfs_file_extent_item); 37778407f553SFilipe Manana 377850d9aa99SJosef Bacik btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, 37798407f553SFilipe Manana &token); 37808407f553SFilipe Manana if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 37818407f553SFilipe Manana btrfs_set_token_file_extent_type(leaf, fi, 37828407f553SFilipe Manana BTRFS_FILE_EXTENT_PREALLOC, 37838407f553SFilipe Manana &token); 37848407f553SFilipe Manana else 37858407f553SFilipe Manana btrfs_set_token_file_extent_type(leaf, fi, 37868407f553SFilipe Manana BTRFS_FILE_EXTENT_REG, 37878407f553SFilipe Manana &token); 37888407f553SFilipe Manana 37898407f553SFilipe Manana block_len = max(em->block_len, em->orig_block_len); 37908407f553SFilipe Manana if (em->compress_type != BTRFS_COMPRESS_NONE) { 37918407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 37928407f553SFilipe Manana em->block_start, 37938407f553SFilipe Manana &token); 37948407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 37958407f553SFilipe Manana &token); 37968407f553SFilipe Manana } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { 37978407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 37988407f553SFilipe Manana em->block_start - 37998407f553SFilipe Manana extent_offset, &token); 38008407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 38018407f553SFilipe Manana &token); 38028407f553SFilipe Manana } else { 38038407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); 38048407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, 38058407f553SFilipe Manana &token); 38068407f553SFilipe Manana } 38078407f553SFilipe Manana 38088407f553SFilipe Manana btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token); 38098407f553SFilipe Manana btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); 38108407f553SFilipe Manana btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); 38118407f553SFilipe Manana btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, 38128407f553SFilipe Manana &token); 38138407f553SFilipe Manana btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); 38148407f553SFilipe Manana btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); 38158407f553SFilipe Manana btrfs_mark_buffer_dirty(leaf); 38168407f553SFilipe Manana 38178407f553SFilipe Manana btrfs_release_path(path); 38188407f553SFilipe Manana 38198407f553SFilipe Manana return ret; 38208407f553SFilipe Manana } 38218407f553SFilipe Manana 38225dc562c5SJosef Bacik static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, 38235dc562c5SJosef Bacik struct btrfs_root *root, 38245dc562c5SJosef Bacik struct inode *inode, 3825827463c4SMiao Xie struct btrfs_path *path, 38268407f553SFilipe Manana struct list_head *logged_list, 38278407f553SFilipe Manana struct btrfs_log_ctx *ctx) 38285dc562c5SJosef Bacik { 38295dc562c5SJosef Bacik struct extent_map *em, *n; 38305dc562c5SJosef Bacik struct list_head extents; 38315dc562c5SJosef Bacik struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 38325dc562c5SJosef Bacik u64 test_gen; 38335dc562c5SJosef Bacik int ret = 0; 38342ab28f32SJosef Bacik int num = 0; 38355dc562c5SJosef Bacik 38365dc562c5SJosef Bacik INIT_LIST_HEAD(&extents); 38375dc562c5SJosef Bacik 38385dc562c5SJosef Bacik write_lock(&tree->lock); 38395dc562c5SJosef Bacik test_gen = root->fs_info->last_trans_committed; 38405dc562c5SJosef Bacik 38415dc562c5SJosef Bacik list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 38425dc562c5SJosef Bacik list_del_init(&em->list); 38432ab28f32SJosef Bacik 38442ab28f32SJosef Bacik /* 38452ab28f32SJosef Bacik * Just an arbitrary number, this can be really CPU intensive 38462ab28f32SJosef Bacik * once we start getting a lot of extents, and really once we 38472ab28f32SJosef Bacik * have a bunch of extents we just want to commit since it will 38482ab28f32SJosef Bacik * be faster. 38492ab28f32SJosef Bacik */ 38502ab28f32SJosef Bacik if (++num > 32768) { 38512ab28f32SJosef Bacik list_del_init(&tree->modified_extents); 38522ab28f32SJosef Bacik ret = -EFBIG; 38532ab28f32SJosef Bacik goto process; 38542ab28f32SJosef Bacik } 38552ab28f32SJosef Bacik 38565dc562c5SJosef Bacik if (em->generation <= test_gen) 38575dc562c5SJosef Bacik continue; 3858ff44c6e3SJosef Bacik /* Need a ref to keep it from getting evicted from cache */ 3859ff44c6e3SJosef Bacik atomic_inc(&em->refs); 3860ff44c6e3SJosef Bacik set_bit(EXTENT_FLAG_LOGGING, &em->flags); 38615dc562c5SJosef Bacik list_add_tail(&em->list, &extents); 38622ab28f32SJosef Bacik num++; 38635dc562c5SJosef Bacik } 38645dc562c5SJosef Bacik 38655dc562c5SJosef Bacik list_sort(NULL, &extents, extent_cmp); 38665dc562c5SJosef Bacik 38672ab28f32SJosef Bacik process: 38685dc562c5SJosef Bacik while (!list_empty(&extents)) { 38695dc562c5SJosef Bacik em = list_entry(extents.next, struct extent_map, list); 38705dc562c5SJosef Bacik 38715dc562c5SJosef Bacik list_del_init(&em->list); 38725dc562c5SJosef Bacik 38735dc562c5SJosef Bacik /* 38745dc562c5SJosef Bacik * If we had an error we just need to delete everybody from our 38755dc562c5SJosef Bacik * private list. 38765dc562c5SJosef Bacik */ 3877ff44c6e3SJosef Bacik if (ret) { 3878201a9038SJosef Bacik clear_em_logging(tree, em); 3879ff44c6e3SJosef Bacik free_extent_map(em); 38805dc562c5SJosef Bacik continue; 3881ff44c6e3SJosef Bacik } 3882ff44c6e3SJosef Bacik 3883ff44c6e3SJosef Bacik write_unlock(&tree->lock); 38845dc562c5SJosef Bacik 38858407f553SFilipe Manana ret = log_one_extent(trans, inode, root, em, path, logged_list, 38868407f553SFilipe Manana ctx); 3887ff44c6e3SJosef Bacik write_lock(&tree->lock); 3888201a9038SJosef Bacik clear_em_logging(tree, em); 3889201a9038SJosef Bacik free_extent_map(em); 38905dc562c5SJosef Bacik } 3891ff44c6e3SJosef Bacik WARN_ON(!list_empty(&extents)); 3892ff44c6e3SJosef Bacik write_unlock(&tree->lock); 38935dc562c5SJosef Bacik 38945dc562c5SJosef Bacik btrfs_release_path(path); 38955dc562c5SJosef Bacik return ret; 38965dc562c5SJosef Bacik } 38975dc562c5SJosef Bacik 3898e02119d5SChris Mason /* log a single inode in the tree log. 3899e02119d5SChris Mason * At least one parent directory for this inode must exist in the tree 3900e02119d5SChris Mason * or be logged already. 3901e02119d5SChris Mason * 3902e02119d5SChris Mason * Any items from this inode changed by the current transaction are copied 3903e02119d5SChris Mason * to the log tree. An extra reference is taken on any extents in this 3904e02119d5SChris Mason * file, allowing us to avoid a whole pile of corner cases around logging 3905e02119d5SChris Mason * blocks that have been removed from the tree. 3906e02119d5SChris Mason * 3907e02119d5SChris Mason * See LOG_INODE_ALL and related defines for a description of what inode_only 3908e02119d5SChris Mason * does. 3909e02119d5SChris Mason * 3910e02119d5SChris Mason * This handles both files and directories. 3911e02119d5SChris Mason */ 391212fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 3913e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 391449dae1bcSFilipe Manana int inode_only, 391549dae1bcSFilipe Manana const loff_t start, 39168407f553SFilipe Manana const loff_t end, 39178407f553SFilipe Manana struct btrfs_log_ctx *ctx) 3918e02119d5SChris Mason { 3919e02119d5SChris Mason struct btrfs_path *path; 3920e02119d5SChris Mason struct btrfs_path *dst_path; 3921e02119d5SChris Mason struct btrfs_key min_key; 3922e02119d5SChris Mason struct btrfs_key max_key; 3923e02119d5SChris Mason struct btrfs_root *log = root->log_root; 392431ff1cd2SChris Mason struct extent_buffer *src = NULL; 3925827463c4SMiao Xie LIST_HEAD(logged_list); 392616e7549fSJosef Bacik u64 last_extent = 0; 39274a500fd1SYan, Zheng int err = 0; 3928e02119d5SChris Mason int ret; 39293a5f1d45SChris Mason int nritems; 393031ff1cd2SChris Mason int ins_start_slot = 0; 393131ff1cd2SChris Mason int ins_nr; 39325dc562c5SJosef Bacik bool fast_search = false; 393333345d01SLi Zefan u64 ino = btrfs_ino(inode); 393449dae1bcSFilipe Manana struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 3935e02119d5SChris Mason 3936e02119d5SChris Mason path = btrfs_alloc_path(); 39375df67083STsutomu Itoh if (!path) 39385df67083STsutomu Itoh return -ENOMEM; 3939e02119d5SChris Mason dst_path = btrfs_alloc_path(); 39405df67083STsutomu Itoh if (!dst_path) { 39415df67083STsutomu Itoh btrfs_free_path(path); 39425df67083STsutomu Itoh return -ENOMEM; 39435df67083STsutomu Itoh } 3944e02119d5SChris Mason 394533345d01SLi Zefan min_key.objectid = ino; 3946e02119d5SChris Mason min_key.type = BTRFS_INODE_ITEM_KEY; 3947e02119d5SChris Mason min_key.offset = 0; 3948e02119d5SChris Mason 394933345d01SLi Zefan max_key.objectid = ino; 395012fcfd22SChris Mason 395112fcfd22SChris Mason 39525dc562c5SJosef Bacik /* today the code can only do partial logging of directories */ 39535269b67eSMiao Xie if (S_ISDIR(inode->i_mode) || 39545269b67eSMiao Xie (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 39555269b67eSMiao Xie &BTRFS_I(inode)->runtime_flags) && 39565269b67eSMiao Xie inode_only == LOG_INODE_EXISTS)) 3957e02119d5SChris Mason max_key.type = BTRFS_XATTR_ITEM_KEY; 3958e02119d5SChris Mason else 3959e02119d5SChris Mason max_key.type = (u8)-1; 3960e02119d5SChris Mason max_key.offset = (u64)-1; 3961e02119d5SChris Mason 396294edf4aeSJosef Bacik /* Only run delayed items if we are a dir or a new file */ 396394edf4aeSJosef Bacik if (S_ISDIR(inode->i_mode) || 396494edf4aeSJosef Bacik BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { 396516cdcec7SMiao Xie ret = btrfs_commit_inode_delayed_items(trans, inode); 396616cdcec7SMiao Xie if (ret) { 396716cdcec7SMiao Xie btrfs_free_path(path); 396816cdcec7SMiao Xie btrfs_free_path(dst_path); 396916cdcec7SMiao Xie return ret; 397016cdcec7SMiao Xie } 397194edf4aeSJosef Bacik } 397216cdcec7SMiao Xie 3973e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 3974e02119d5SChris Mason 3975827463c4SMiao Xie btrfs_get_logged_extents(inode, &logged_list); 39762ab28f32SJosef Bacik 3977e02119d5SChris Mason /* 3978e02119d5SChris Mason * a brute force approach to making sure we get the most uptodate 3979e02119d5SChris Mason * copies of everything. 3980e02119d5SChris Mason */ 3981e02119d5SChris Mason if (S_ISDIR(inode->i_mode)) { 3982e02119d5SChris Mason int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 3983e02119d5SChris Mason 3984e02119d5SChris Mason if (inode_only == LOG_INODE_EXISTS) 3985e02119d5SChris Mason max_key_type = BTRFS_XATTR_ITEM_KEY; 398633345d01SLi Zefan ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3987e02119d5SChris Mason } else { 39885dc562c5SJosef Bacik if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 39895dc562c5SJosef Bacik &BTRFS_I(inode)->runtime_flags)) { 3990e9976151SJosef Bacik clear_bit(BTRFS_INODE_COPY_EVERYTHING, 3991e9976151SJosef Bacik &BTRFS_I(inode)->runtime_flags); 39925dc562c5SJosef Bacik ret = btrfs_truncate_inode_items(trans, log, 39935dc562c5SJosef Bacik inode, 0, 0); 3994a95249b3SJosef Bacik } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 39956cfab851SJosef Bacik &BTRFS_I(inode)->runtime_flags) || 39966cfab851SJosef Bacik inode_only == LOG_INODE_EXISTS) { 3997a95249b3SJosef Bacik if (inode_only == LOG_INODE_ALL) 3998a95249b3SJosef Bacik fast_search = true; 3999a95249b3SJosef Bacik max_key.type = BTRFS_XATTR_ITEM_KEY; 4000a95249b3SJosef Bacik ret = drop_objectid_items(trans, log, path, ino, 4001a95249b3SJosef Bacik max_key.type); 40025dc562c5SJosef Bacik } else { 4003183f37faSLiu Bo if (inode_only == LOG_INODE_ALL) 40045dc562c5SJosef Bacik fast_search = true; 4005a95249b3SJosef Bacik ret = log_inode_item(trans, log, dst_path, inode); 4006a95249b3SJosef Bacik if (ret) { 4007a95249b3SJosef Bacik err = ret; 4008a95249b3SJosef Bacik goto out_unlock; 40095dc562c5SJosef Bacik } 4010a95249b3SJosef Bacik goto log_extents; 4011a95249b3SJosef Bacik } 4012a95249b3SJosef Bacik 4013e02119d5SChris Mason } 40144a500fd1SYan, Zheng if (ret) { 40154a500fd1SYan, Zheng err = ret; 40164a500fd1SYan, Zheng goto out_unlock; 40174a500fd1SYan, Zheng } 4018e02119d5SChris Mason 4019e02119d5SChris Mason while (1) { 402031ff1cd2SChris Mason ins_nr = 0; 40216174d3cbSFilipe David Borba Manana ret = btrfs_search_forward(root, &min_key, 4022de78b51aSEric Sandeen path, trans->transid); 4023e02119d5SChris Mason if (ret != 0) 4024e02119d5SChris Mason break; 40253a5f1d45SChris Mason again: 402631ff1cd2SChris Mason /* note, ins_nr might be > 0 here, cleanup outside the loop */ 402733345d01SLi Zefan if (min_key.objectid != ino) 4028e02119d5SChris Mason break; 4029e02119d5SChris Mason if (min_key.type > max_key.type) 4030e02119d5SChris Mason break; 403131ff1cd2SChris Mason 4032e02119d5SChris Mason src = path->nodes[0]; 403331ff1cd2SChris Mason if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 403431ff1cd2SChris Mason ins_nr++; 403531ff1cd2SChris Mason goto next_slot; 403631ff1cd2SChris Mason } else if (!ins_nr) { 403731ff1cd2SChris Mason ins_start_slot = path->slots[0]; 403831ff1cd2SChris Mason ins_nr = 1; 403931ff1cd2SChris Mason goto next_slot; 4040e02119d5SChris Mason } 4041e02119d5SChris Mason 404216e7549fSJosef Bacik ret = copy_items(trans, inode, dst_path, path, &last_extent, 404316e7549fSJosef Bacik ins_start_slot, ins_nr, inode_only); 404416e7549fSJosef Bacik if (ret < 0) { 40454a500fd1SYan, Zheng err = ret; 40464a500fd1SYan, Zheng goto out_unlock; 4047a71db86eSRasmus Villemoes } 4048a71db86eSRasmus Villemoes if (ret) { 404916e7549fSJosef Bacik ins_nr = 0; 405016e7549fSJosef Bacik btrfs_release_path(path); 405116e7549fSJosef Bacik continue; 40524a500fd1SYan, Zheng } 405331ff1cd2SChris Mason ins_nr = 1; 405431ff1cd2SChris Mason ins_start_slot = path->slots[0]; 405531ff1cd2SChris Mason next_slot: 4056e02119d5SChris Mason 40573a5f1d45SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 40583a5f1d45SChris Mason path->slots[0]++; 40593a5f1d45SChris Mason if (path->slots[0] < nritems) { 40603a5f1d45SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &min_key, 40613a5f1d45SChris Mason path->slots[0]); 40623a5f1d45SChris Mason goto again; 40633a5f1d45SChris Mason } 406431ff1cd2SChris Mason if (ins_nr) { 406516e7549fSJosef Bacik ret = copy_items(trans, inode, dst_path, path, 406616e7549fSJosef Bacik &last_extent, ins_start_slot, 406731ff1cd2SChris Mason ins_nr, inode_only); 406816e7549fSJosef Bacik if (ret < 0) { 40694a500fd1SYan, Zheng err = ret; 40704a500fd1SYan, Zheng goto out_unlock; 40714a500fd1SYan, Zheng } 407216e7549fSJosef Bacik ret = 0; 407331ff1cd2SChris Mason ins_nr = 0; 407431ff1cd2SChris Mason } 4075b3b4aa74SDavid Sterba btrfs_release_path(path); 40763a5f1d45SChris Mason 40773d41d702SFilipe David Borba Manana if (min_key.offset < (u64)-1) { 4078e02119d5SChris Mason min_key.offset++; 40793d41d702SFilipe David Borba Manana } else if (min_key.type < max_key.type) { 4080e02119d5SChris Mason min_key.type++; 40813d41d702SFilipe David Borba Manana min_key.offset = 0; 40823d41d702SFilipe David Borba Manana } else { 4083e02119d5SChris Mason break; 4084e02119d5SChris Mason } 40853d41d702SFilipe David Borba Manana } 408631ff1cd2SChris Mason if (ins_nr) { 408716e7549fSJosef Bacik ret = copy_items(trans, inode, dst_path, path, &last_extent, 408816e7549fSJosef Bacik ins_start_slot, ins_nr, inode_only); 408916e7549fSJosef Bacik if (ret < 0) { 40904a500fd1SYan, Zheng err = ret; 40914a500fd1SYan, Zheng goto out_unlock; 40924a500fd1SYan, Zheng } 409316e7549fSJosef Bacik ret = 0; 409431ff1cd2SChris Mason ins_nr = 0; 409531ff1cd2SChris Mason } 40965dc562c5SJosef Bacik 4097a95249b3SJosef Bacik log_extents: 4098f3b15ccdSJosef Bacik btrfs_release_path(path); 40995dc562c5SJosef Bacik btrfs_release_path(dst_path); 4100f3b15ccdSJosef Bacik if (fast_search) { 4101827463c4SMiao Xie ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 41028407f553SFilipe Manana &logged_list, ctx); 41035dc562c5SJosef Bacik if (ret) { 41045dc562c5SJosef Bacik err = ret; 41055dc562c5SJosef Bacik goto out_unlock; 41065dc562c5SJosef Bacik } 4107d006a048SJosef Bacik } else if (inode_only == LOG_INODE_ALL) { 410806d3d22bSLiu Bo struct extent_map *em, *n; 410906d3d22bSLiu Bo 411049dae1bcSFilipe Manana write_lock(&em_tree->lock); 411149dae1bcSFilipe Manana /* 411249dae1bcSFilipe Manana * We can't just remove every em if we're called for a ranged 411349dae1bcSFilipe Manana * fsync - that is, one that doesn't cover the whole possible 411449dae1bcSFilipe Manana * file range (0 to LLONG_MAX). This is because we can have 411549dae1bcSFilipe Manana * em's that fall outside the range we're logging and therefore 411649dae1bcSFilipe Manana * their ordered operations haven't completed yet 411749dae1bcSFilipe Manana * (btrfs_finish_ordered_io() not invoked yet). This means we 411849dae1bcSFilipe Manana * didn't get their respective file extent item in the fs/subvol 411949dae1bcSFilipe Manana * tree yet, and need to let the next fast fsync (one which 412049dae1bcSFilipe Manana * consults the list of modified extent maps) find the em so 412149dae1bcSFilipe Manana * that it logs a matching file extent item and waits for the 412249dae1bcSFilipe Manana * respective ordered operation to complete (if it's still 412349dae1bcSFilipe Manana * running). 412449dae1bcSFilipe Manana * 412549dae1bcSFilipe Manana * Removing every em outside the range we're logging would make 412649dae1bcSFilipe Manana * the next fast fsync not log their matching file extent items, 412749dae1bcSFilipe Manana * therefore making us lose data after a log replay. 412849dae1bcSFilipe Manana */ 412949dae1bcSFilipe Manana list_for_each_entry_safe(em, n, &em_tree->modified_extents, 413049dae1bcSFilipe Manana list) { 413149dae1bcSFilipe Manana const u64 mod_end = em->mod_start + em->mod_len - 1; 413249dae1bcSFilipe Manana 413349dae1bcSFilipe Manana if (em->mod_start >= start && mod_end <= end) 413406d3d22bSLiu Bo list_del_init(&em->list); 413549dae1bcSFilipe Manana } 413649dae1bcSFilipe Manana write_unlock(&em_tree->lock); 41375dc562c5SJosef Bacik } 41385dc562c5SJosef Bacik 41399623f9a3SChris Mason if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4140e02119d5SChris Mason ret = log_directory_changes(trans, root, inode, path, dst_path); 41414a500fd1SYan, Zheng if (ret) { 41424a500fd1SYan, Zheng err = ret; 41434a500fd1SYan, Zheng goto out_unlock; 41444a500fd1SYan, Zheng } 4145e02119d5SChris Mason } 414649dae1bcSFilipe Manana 41473a5f1d45SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 4148125c4cf9SFilipe Manana BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 41494a500fd1SYan, Zheng out_unlock: 4150827463c4SMiao Xie if (unlikely(err)) 4151827463c4SMiao Xie btrfs_put_logged_extents(&logged_list); 4152827463c4SMiao Xie else 4153827463c4SMiao Xie btrfs_submit_logged_extents(&logged_list, log); 4154e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 4155e02119d5SChris Mason 4156e02119d5SChris Mason btrfs_free_path(path); 4157e02119d5SChris Mason btrfs_free_path(dst_path); 41584a500fd1SYan, Zheng return err; 4159e02119d5SChris Mason } 4160e02119d5SChris Mason 416112fcfd22SChris Mason /* 416212fcfd22SChris Mason * follow the dentry parent pointers up the chain and see if any 416312fcfd22SChris Mason * of the directories in it require a full commit before they can 416412fcfd22SChris Mason * be logged. Returns zero if nothing special needs to be done or 1 if 416512fcfd22SChris Mason * a full commit is required. 416612fcfd22SChris Mason */ 416712fcfd22SChris Mason static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, 416812fcfd22SChris Mason struct inode *inode, 416912fcfd22SChris Mason struct dentry *parent, 417012fcfd22SChris Mason struct super_block *sb, 417112fcfd22SChris Mason u64 last_committed) 4172e02119d5SChris Mason { 417312fcfd22SChris Mason int ret = 0; 417412fcfd22SChris Mason struct btrfs_root *root; 41756a912213SJosef Bacik struct dentry *old_parent = NULL; 4176de2b530bSJosef Bacik struct inode *orig_inode = inode; 4177e02119d5SChris Mason 4178af4176b4SChris Mason /* 4179af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 4180af4176b4SChris Mason * have to worry about the parents at all. This is because 4181af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 4182af4176b4SChris Mason * and other fun in this file. 4183af4176b4SChris Mason */ 4184af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 4185af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 4186af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans <= last_committed) 4187af4176b4SChris Mason goto out; 4188af4176b4SChris Mason 418912fcfd22SChris Mason if (!S_ISDIR(inode->i_mode)) { 419012fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 419112fcfd22SChris Mason goto out; 419212fcfd22SChris Mason inode = parent->d_inode; 419312fcfd22SChris Mason } 419412fcfd22SChris Mason 419512fcfd22SChris Mason while (1) { 4196de2b530bSJosef Bacik /* 4197de2b530bSJosef Bacik * If we are logging a directory then we start with our inode, 4198de2b530bSJosef Bacik * not our parents inode, so we need to skipp setting the 4199de2b530bSJosef Bacik * logged_trans so that further down in the log code we don't 4200de2b530bSJosef Bacik * think this inode has already been logged. 4201de2b530bSJosef Bacik */ 4202de2b530bSJosef Bacik if (inode != orig_inode) 420312fcfd22SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 420412fcfd22SChris Mason smp_mb(); 420512fcfd22SChris Mason 420612fcfd22SChris Mason if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 420712fcfd22SChris Mason root = BTRFS_I(inode)->root; 420812fcfd22SChris Mason 420912fcfd22SChris Mason /* 421012fcfd22SChris Mason * make sure any commits to the log are forced 421112fcfd22SChris Mason * to be full commits 421212fcfd22SChris Mason */ 4213995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 421412fcfd22SChris Mason ret = 1; 421512fcfd22SChris Mason break; 421612fcfd22SChris Mason } 421712fcfd22SChris Mason 421812fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 421912fcfd22SChris Mason break; 422012fcfd22SChris Mason 422176dda93cSYan, Zheng if (IS_ROOT(parent)) 422212fcfd22SChris Mason break; 422312fcfd22SChris Mason 42246a912213SJosef Bacik parent = dget_parent(parent); 42256a912213SJosef Bacik dput(old_parent); 42266a912213SJosef Bacik old_parent = parent; 422712fcfd22SChris Mason inode = parent->d_inode; 422812fcfd22SChris Mason 422912fcfd22SChris Mason } 42306a912213SJosef Bacik dput(old_parent); 423112fcfd22SChris Mason out: 4232e02119d5SChris Mason return ret; 4233e02119d5SChris Mason } 4234e02119d5SChris Mason 4235e02119d5SChris Mason /* 4236e02119d5SChris Mason * helper function around btrfs_log_inode to make sure newly created 4237e02119d5SChris Mason * parent directories also end up in the log. A minimal inode and backref 4238e02119d5SChris Mason * only logging is done of any parent directories that are older than 4239e02119d5SChris Mason * the last committed transaction 4240e02119d5SChris Mason */ 424148a3b636SEric Sandeen static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 424212fcfd22SChris Mason struct btrfs_root *root, struct inode *inode, 424349dae1bcSFilipe Manana struct dentry *parent, 424449dae1bcSFilipe Manana const loff_t start, 424549dae1bcSFilipe Manana const loff_t end, 424649dae1bcSFilipe Manana int exists_only, 42478b050d35SMiao Xie struct btrfs_log_ctx *ctx) 4248e02119d5SChris Mason { 424912fcfd22SChris Mason int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 4250e02119d5SChris Mason struct super_block *sb; 42516a912213SJosef Bacik struct dentry *old_parent = NULL; 425212fcfd22SChris Mason int ret = 0; 425312fcfd22SChris Mason u64 last_committed = root->fs_info->last_trans_committed; 425412fcfd22SChris Mason 425512fcfd22SChris Mason sb = inode->i_sb; 425612fcfd22SChris Mason 42573a5e1404SSage Weil if (btrfs_test_opt(root, NOTREELOG)) { 42583a5e1404SSage Weil ret = 1; 42593a5e1404SSage Weil goto end_no_trans; 42603a5e1404SSage Weil } 42613a5e1404SSage Weil 4262995946ddSMiao Xie /* 4263995946ddSMiao Xie * The prev transaction commit doesn't complete, we need do 4264995946ddSMiao Xie * full commit by ourselves. 4265995946ddSMiao Xie */ 426612fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit > 426712fcfd22SChris Mason root->fs_info->last_trans_committed) { 426812fcfd22SChris Mason ret = 1; 426912fcfd22SChris Mason goto end_no_trans; 427012fcfd22SChris Mason } 427112fcfd22SChris Mason 427276dda93cSYan, Zheng if (root != BTRFS_I(inode)->root || 427376dda93cSYan, Zheng btrfs_root_refs(&root->root_item) == 0) { 427476dda93cSYan, Zheng ret = 1; 427576dda93cSYan, Zheng goto end_no_trans; 427676dda93cSYan, Zheng } 427776dda93cSYan, Zheng 427812fcfd22SChris Mason ret = check_parent_dirs_for_sync(trans, inode, parent, 427912fcfd22SChris Mason sb, last_committed); 428012fcfd22SChris Mason if (ret) 428112fcfd22SChris Mason goto end_no_trans; 4282e02119d5SChris Mason 428322ee6985SJosef Bacik if (btrfs_inode_in_log(inode, trans->transid)) { 4284257c62e1SChris Mason ret = BTRFS_NO_LOG_SYNC; 4285257c62e1SChris Mason goto end_no_trans; 4286257c62e1SChris Mason } 4287257c62e1SChris Mason 42888b050d35SMiao Xie ret = start_log_trans(trans, root, ctx); 42894a500fd1SYan, Zheng if (ret) 4290e87ac136SMiao Xie goto end_no_trans; 429112fcfd22SChris Mason 42928407f553SFilipe Manana ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); 42934a500fd1SYan, Zheng if (ret) 42944a500fd1SYan, Zheng goto end_trans; 4295e02119d5SChris Mason 4296af4176b4SChris Mason /* 4297af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 4298af4176b4SChris Mason * have to worry about the parents at all. This is because 4299af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 4300af4176b4SChris Mason * and other fun in this file. 4301af4176b4SChris Mason */ 4302af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 4303af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 43044a500fd1SYan, Zheng BTRFS_I(inode)->last_unlink_trans <= last_committed) { 43054a500fd1SYan, Zheng ret = 0; 43064a500fd1SYan, Zheng goto end_trans; 43074a500fd1SYan, Zheng } 4308af4176b4SChris Mason 4309af4176b4SChris Mason inode_only = LOG_INODE_EXISTS; 431012fcfd22SChris Mason while (1) { 431112fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4312e02119d5SChris Mason break; 4313e02119d5SChris Mason 431412fcfd22SChris Mason inode = parent->d_inode; 431576dda93cSYan, Zheng if (root != BTRFS_I(inode)->root) 431676dda93cSYan, Zheng break; 431776dda93cSYan, Zheng 431812fcfd22SChris Mason if (BTRFS_I(inode)->generation > 431912fcfd22SChris Mason root->fs_info->last_trans_committed) { 432049dae1bcSFilipe Manana ret = btrfs_log_inode(trans, root, inode, inode_only, 43218407f553SFilipe Manana 0, LLONG_MAX, ctx); 43224a500fd1SYan, Zheng if (ret) 43234a500fd1SYan, Zheng goto end_trans; 4324e02119d5SChris Mason } 432576dda93cSYan, Zheng if (IS_ROOT(parent)) 432612fcfd22SChris Mason break; 432712fcfd22SChris Mason 43286a912213SJosef Bacik parent = dget_parent(parent); 43296a912213SJosef Bacik dput(old_parent); 43306a912213SJosef Bacik old_parent = parent; 433112fcfd22SChris Mason } 433212fcfd22SChris Mason ret = 0; 43334a500fd1SYan, Zheng end_trans: 43346a912213SJosef Bacik dput(old_parent); 43354a500fd1SYan, Zheng if (ret < 0) { 4336995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 43374a500fd1SYan, Zheng ret = 1; 43384a500fd1SYan, Zheng } 43398b050d35SMiao Xie 43408b050d35SMiao Xie if (ret) 43418b050d35SMiao Xie btrfs_remove_log_ctx(root, ctx); 434212fcfd22SChris Mason btrfs_end_log_trans(root); 434312fcfd22SChris Mason end_no_trans: 434412fcfd22SChris Mason return ret; 4345e02119d5SChris Mason } 4346e02119d5SChris Mason 4347e02119d5SChris Mason /* 4348e02119d5SChris Mason * it is not safe to log dentry if the chunk root has added new 4349e02119d5SChris Mason * chunks. This returns 0 if the dentry was logged, and 1 otherwise. 4350e02119d5SChris Mason * If this returns 1, you must commit the transaction to safely get your 4351e02119d5SChris Mason * data on disk. 4352e02119d5SChris Mason */ 4353e02119d5SChris Mason int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 43548b050d35SMiao Xie struct btrfs_root *root, struct dentry *dentry, 435549dae1bcSFilipe Manana const loff_t start, 435649dae1bcSFilipe Manana const loff_t end, 43578b050d35SMiao Xie struct btrfs_log_ctx *ctx) 4358e02119d5SChris Mason { 43596a912213SJosef Bacik struct dentry *parent = dget_parent(dentry); 43606a912213SJosef Bacik int ret; 43616a912213SJosef Bacik 43628b050d35SMiao Xie ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 436349dae1bcSFilipe Manana start, end, 0, ctx); 43646a912213SJosef Bacik dput(parent); 43656a912213SJosef Bacik 43666a912213SJosef Bacik return ret; 4367e02119d5SChris Mason } 4368e02119d5SChris Mason 4369e02119d5SChris Mason /* 4370e02119d5SChris Mason * should be called during mount to recover any replay any log trees 4371e02119d5SChris Mason * from the FS 4372e02119d5SChris Mason */ 4373e02119d5SChris Mason int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) 4374e02119d5SChris Mason { 4375e02119d5SChris Mason int ret; 4376e02119d5SChris Mason struct btrfs_path *path; 4377e02119d5SChris Mason struct btrfs_trans_handle *trans; 4378e02119d5SChris Mason struct btrfs_key key; 4379e02119d5SChris Mason struct btrfs_key found_key; 4380e02119d5SChris Mason struct btrfs_key tmp_key; 4381e02119d5SChris Mason struct btrfs_root *log; 4382e02119d5SChris Mason struct btrfs_fs_info *fs_info = log_root_tree->fs_info; 4383e02119d5SChris Mason struct walk_control wc = { 4384e02119d5SChris Mason .process_func = process_one_buffer, 4385e02119d5SChris Mason .stage = 0, 4386e02119d5SChris Mason }; 4387e02119d5SChris Mason 4388e02119d5SChris Mason path = btrfs_alloc_path(); 4389db5b493aSTsutomu Itoh if (!path) 4390db5b493aSTsutomu Itoh return -ENOMEM; 4391db5b493aSTsutomu Itoh 4392db5b493aSTsutomu Itoh fs_info->log_root_recovering = 1; 4393e02119d5SChris Mason 43944a500fd1SYan, Zheng trans = btrfs_start_transaction(fs_info->tree_root, 0); 439579787eaaSJeff Mahoney if (IS_ERR(trans)) { 439679787eaaSJeff Mahoney ret = PTR_ERR(trans); 439779787eaaSJeff Mahoney goto error; 439879787eaaSJeff Mahoney } 4399e02119d5SChris Mason 4400e02119d5SChris Mason wc.trans = trans; 4401e02119d5SChris Mason wc.pin = 1; 4402e02119d5SChris Mason 4403db5b493aSTsutomu Itoh ret = walk_log_tree(trans, log_root_tree, &wc); 440479787eaaSJeff Mahoney if (ret) { 440579787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Failed to pin buffers while " 440679787eaaSJeff Mahoney "recovering log root tree."); 440779787eaaSJeff Mahoney goto error; 440879787eaaSJeff Mahoney } 4409e02119d5SChris Mason 4410e02119d5SChris Mason again: 4411e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_OBJECTID; 4412e02119d5SChris Mason key.offset = (u64)-1; 4413962a298fSDavid Sterba key.type = BTRFS_ROOT_ITEM_KEY; 4414e02119d5SChris Mason 4415e02119d5SChris Mason while (1) { 4416e02119d5SChris Mason ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 441779787eaaSJeff Mahoney 441879787eaaSJeff Mahoney if (ret < 0) { 441979787eaaSJeff Mahoney btrfs_error(fs_info, ret, 442079787eaaSJeff Mahoney "Couldn't find tree log root."); 442179787eaaSJeff Mahoney goto error; 442279787eaaSJeff Mahoney } 4423e02119d5SChris Mason if (ret > 0) { 4424e02119d5SChris Mason if (path->slots[0] == 0) 4425e02119d5SChris Mason break; 4426e02119d5SChris Mason path->slots[0]--; 4427e02119d5SChris Mason } 4428e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 4429e02119d5SChris Mason path->slots[0]); 4430b3b4aa74SDavid Sterba btrfs_release_path(path); 4431e02119d5SChris Mason if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 4432e02119d5SChris Mason break; 4433e02119d5SChris Mason 4434cb517eabSMiao Xie log = btrfs_read_fs_root(log_root_tree, &found_key); 443579787eaaSJeff Mahoney if (IS_ERR(log)) { 443679787eaaSJeff Mahoney ret = PTR_ERR(log); 443779787eaaSJeff Mahoney btrfs_error(fs_info, ret, 443879787eaaSJeff Mahoney "Couldn't read tree log root."); 443979787eaaSJeff Mahoney goto error; 444079787eaaSJeff Mahoney } 4441e02119d5SChris Mason 4442e02119d5SChris Mason tmp_key.objectid = found_key.offset; 4443e02119d5SChris Mason tmp_key.type = BTRFS_ROOT_ITEM_KEY; 4444e02119d5SChris Mason tmp_key.offset = (u64)-1; 4445e02119d5SChris Mason 4446e02119d5SChris Mason wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 444779787eaaSJeff Mahoney if (IS_ERR(wc.replay_dest)) { 444879787eaaSJeff Mahoney ret = PTR_ERR(wc.replay_dest); 4449b50c6e25SJosef Bacik free_extent_buffer(log->node); 4450b50c6e25SJosef Bacik free_extent_buffer(log->commit_root); 4451b50c6e25SJosef Bacik kfree(log); 445279787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Couldn't read target root " 445379787eaaSJeff Mahoney "for tree log recovery."); 445479787eaaSJeff Mahoney goto error; 445579787eaaSJeff Mahoney } 4456e02119d5SChris Mason 445707d400a6SYan Zheng wc.replay_dest->log_root = log; 44585d4f98a2SYan Zheng btrfs_record_root_in_trans(trans, wc.replay_dest); 4459e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 4460e02119d5SChris Mason 4461b50c6e25SJosef Bacik if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { 4462e02119d5SChris Mason ret = fixup_inode_link_counts(trans, wc.replay_dest, 4463e02119d5SChris Mason path); 4464e02119d5SChris Mason } 4465e02119d5SChris Mason 4466e02119d5SChris Mason key.offset = found_key.offset - 1; 446707d400a6SYan Zheng wc.replay_dest->log_root = NULL; 4468e02119d5SChris Mason free_extent_buffer(log->node); 4469b263c2c8SChris Mason free_extent_buffer(log->commit_root); 4470e02119d5SChris Mason kfree(log); 4471e02119d5SChris Mason 4472b50c6e25SJosef Bacik if (ret) 4473b50c6e25SJosef Bacik goto error; 4474b50c6e25SJosef Bacik 4475e02119d5SChris Mason if (found_key.offset == 0) 4476e02119d5SChris Mason break; 4477e02119d5SChris Mason } 4478b3b4aa74SDavid Sterba btrfs_release_path(path); 4479e02119d5SChris Mason 4480e02119d5SChris Mason /* step one is to pin it all, step two is to replay just inodes */ 4481e02119d5SChris Mason if (wc.pin) { 4482e02119d5SChris Mason wc.pin = 0; 4483e02119d5SChris Mason wc.process_func = replay_one_buffer; 4484e02119d5SChris Mason wc.stage = LOG_WALK_REPLAY_INODES; 4485e02119d5SChris Mason goto again; 4486e02119d5SChris Mason } 4487e02119d5SChris Mason /* step three is to replay everything */ 4488e02119d5SChris Mason if (wc.stage < LOG_WALK_REPLAY_ALL) { 4489e02119d5SChris Mason wc.stage++; 4490e02119d5SChris Mason goto again; 4491e02119d5SChris Mason } 4492e02119d5SChris Mason 4493e02119d5SChris Mason btrfs_free_path(path); 4494e02119d5SChris Mason 4495abefa55aSJosef Bacik /* step 4: commit the transaction, which also unpins the blocks */ 4496abefa55aSJosef Bacik ret = btrfs_commit_transaction(trans, fs_info->tree_root); 4497abefa55aSJosef Bacik if (ret) 4498abefa55aSJosef Bacik return ret; 4499abefa55aSJosef Bacik 4500e02119d5SChris Mason free_extent_buffer(log_root_tree->node); 4501e02119d5SChris Mason log_root_tree->log_root = NULL; 4502e02119d5SChris Mason fs_info->log_root_recovering = 0; 4503e02119d5SChris Mason kfree(log_root_tree); 450479787eaaSJeff Mahoney 4505abefa55aSJosef Bacik return 0; 450679787eaaSJeff Mahoney error: 4507b50c6e25SJosef Bacik if (wc.trans) 4508b50c6e25SJosef Bacik btrfs_end_transaction(wc.trans, fs_info->tree_root); 450979787eaaSJeff Mahoney btrfs_free_path(path); 451079787eaaSJeff Mahoney return ret; 4511e02119d5SChris Mason } 451212fcfd22SChris Mason 451312fcfd22SChris Mason /* 451412fcfd22SChris Mason * there are some corner cases where we want to force a full 451512fcfd22SChris Mason * commit instead of allowing a directory to be logged. 451612fcfd22SChris Mason * 451712fcfd22SChris Mason * They revolve around files there were unlinked from the directory, and 451812fcfd22SChris Mason * this function updates the parent directory so that a full commit is 451912fcfd22SChris Mason * properly done if it is fsync'd later after the unlinks are done. 452012fcfd22SChris Mason */ 452112fcfd22SChris Mason void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 452212fcfd22SChris Mason struct inode *dir, struct inode *inode, 452312fcfd22SChris Mason int for_rename) 452412fcfd22SChris Mason { 452512fcfd22SChris Mason /* 4526af4176b4SChris Mason * when we're logging a file, if it hasn't been renamed 4527af4176b4SChris Mason * or unlinked, and its inode is fully committed on disk, 4528af4176b4SChris Mason * we don't have to worry about walking up the directory chain 4529af4176b4SChris Mason * to log its parents. 4530af4176b4SChris Mason * 4531af4176b4SChris Mason * So, we use the last_unlink_trans field to put this transid 4532af4176b4SChris Mason * into the file. When the file is logged we check it and 4533af4176b4SChris Mason * don't log the parents if the file is fully on disk. 4534af4176b4SChris Mason */ 4535af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 4536af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 4537af4176b4SChris Mason 4538af4176b4SChris Mason /* 453912fcfd22SChris Mason * if this directory was already logged any new 454012fcfd22SChris Mason * names for this file/dir will get recorded 454112fcfd22SChris Mason */ 454212fcfd22SChris Mason smp_mb(); 454312fcfd22SChris Mason if (BTRFS_I(dir)->logged_trans == trans->transid) 454412fcfd22SChris Mason return; 454512fcfd22SChris Mason 454612fcfd22SChris Mason /* 454712fcfd22SChris Mason * if the inode we're about to unlink was logged, 454812fcfd22SChris Mason * the log will be properly updated for any new names 454912fcfd22SChris Mason */ 455012fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans == trans->transid) 455112fcfd22SChris Mason return; 455212fcfd22SChris Mason 455312fcfd22SChris Mason /* 455412fcfd22SChris Mason * when renaming files across directories, if the directory 455512fcfd22SChris Mason * there we're unlinking from gets fsync'd later on, there's 455612fcfd22SChris Mason * no way to find the destination directory later and fsync it 455712fcfd22SChris Mason * properly. So, we have to be conservative and force commits 455812fcfd22SChris Mason * so the new name gets discovered. 455912fcfd22SChris Mason */ 456012fcfd22SChris Mason if (for_rename) 456112fcfd22SChris Mason goto record; 456212fcfd22SChris Mason 456312fcfd22SChris Mason /* we can safely do the unlink without any special recording */ 456412fcfd22SChris Mason return; 456512fcfd22SChris Mason 456612fcfd22SChris Mason record: 456712fcfd22SChris Mason BTRFS_I(dir)->last_unlink_trans = trans->transid; 456812fcfd22SChris Mason } 456912fcfd22SChris Mason 457012fcfd22SChris Mason /* 457112fcfd22SChris Mason * Call this after adding a new name for a file and it will properly 457212fcfd22SChris Mason * update the log to reflect the new name. 457312fcfd22SChris Mason * 457412fcfd22SChris Mason * It will return zero if all goes well, and it will return 1 if a 457512fcfd22SChris Mason * full transaction commit is required. 457612fcfd22SChris Mason */ 457712fcfd22SChris Mason int btrfs_log_new_name(struct btrfs_trans_handle *trans, 457812fcfd22SChris Mason struct inode *inode, struct inode *old_dir, 457912fcfd22SChris Mason struct dentry *parent) 458012fcfd22SChris Mason { 458112fcfd22SChris Mason struct btrfs_root * root = BTRFS_I(inode)->root; 458212fcfd22SChris Mason 458312fcfd22SChris Mason /* 4584af4176b4SChris Mason * this will force the logging code to walk the dentry chain 4585af4176b4SChris Mason * up for the file 4586af4176b4SChris Mason */ 4587af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 4588af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 4589af4176b4SChris Mason 4590af4176b4SChris Mason /* 459112fcfd22SChris Mason * if this inode hasn't been logged and directory we're renaming it 459212fcfd22SChris Mason * from hasn't been logged, we don't need to log it 459312fcfd22SChris Mason */ 459412fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans <= 459512fcfd22SChris Mason root->fs_info->last_trans_committed && 459612fcfd22SChris Mason (!old_dir || BTRFS_I(old_dir)->logged_trans <= 459712fcfd22SChris Mason root->fs_info->last_trans_committed)) 459812fcfd22SChris Mason return 0; 459912fcfd22SChris Mason 460049dae1bcSFilipe Manana return btrfs_log_inode_parent(trans, root, inode, parent, 0, 460149dae1bcSFilipe Manana LLONG_MAX, 1, NULL); 460212fcfd22SChris Mason } 460312fcfd22SChris Mason 4604