1e02119d5SChris Mason /* 2e02119d5SChris Mason * Copyright (C) 2008 Oracle. All rights reserved. 3e02119d5SChris Mason * 4e02119d5SChris Mason * This program is free software; you can redistribute it and/or 5e02119d5SChris Mason * modify it under the terms of the GNU General Public 6e02119d5SChris Mason * License v2 as published by the Free Software Foundation. 7e02119d5SChris Mason * 8e02119d5SChris Mason * This program is distributed in the hope that it will be useful, 9e02119d5SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 10e02119d5SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11e02119d5SChris Mason * General Public License for more details. 12e02119d5SChris Mason * 13e02119d5SChris Mason * You should have received a copy of the GNU General Public 14e02119d5SChris Mason * License along with this program; if not, write to the 15e02119d5SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16e02119d5SChris Mason * Boston, MA 021110-1307, USA. 17e02119d5SChris Mason */ 18e02119d5SChris Mason 19e02119d5SChris Mason #include <linux/sched.h> 205a0e3ad6STejun Heo #include <linux/slab.h> 215dc562c5SJosef Bacik #include <linux/list_sort.h> 22e02119d5SChris Mason #include "ctree.h" 23e02119d5SChris Mason #include "transaction.h" 24e02119d5SChris Mason #include "disk-io.h" 25e02119d5SChris Mason #include "locking.h" 26e02119d5SChris Mason #include "print-tree.h" 27f186373fSMark Fasheh #include "backref.h" 28e02119d5SChris Mason #include "compat.h" 29b2950863SChristoph Hellwig #include "tree-log.h" 30f186373fSMark Fasheh #include "hash.h" 31e02119d5SChris Mason 32e02119d5SChris Mason /* magic values for the inode_only field in btrfs_log_inode: 33e02119d5SChris Mason * 34e02119d5SChris Mason * LOG_INODE_ALL means to log everything 35e02119d5SChris Mason * LOG_INODE_EXISTS means to log just enough to recreate the inode 36e02119d5SChris Mason * during log replay 37e02119d5SChris Mason */ 38e02119d5SChris Mason #define LOG_INODE_ALL 0 39e02119d5SChris Mason #define LOG_INODE_EXISTS 1 40e02119d5SChris Mason 41e02119d5SChris Mason /* 4212fcfd22SChris Mason * directory trouble cases 4312fcfd22SChris Mason * 4412fcfd22SChris Mason * 1) on rename or unlink, if the inode being unlinked isn't in the fsync 4512fcfd22SChris Mason * log, we must force a full commit before doing an fsync of the directory 4612fcfd22SChris Mason * where the unlink was done. 4712fcfd22SChris Mason * ---> record transid of last unlink/rename per directory 4812fcfd22SChris Mason * 4912fcfd22SChris Mason * mkdir foo/some_dir 5012fcfd22SChris Mason * normal commit 5112fcfd22SChris Mason * rename foo/some_dir foo2/some_dir 5212fcfd22SChris Mason * mkdir foo/some_dir 5312fcfd22SChris Mason * fsync foo/some_dir/some_file 5412fcfd22SChris Mason * 5512fcfd22SChris Mason * The fsync above will unlink the original some_dir without recording 5612fcfd22SChris Mason * it in its new location (foo2). After a crash, some_dir will be gone 5712fcfd22SChris Mason * unless the fsync of some_file forces a full commit 5812fcfd22SChris Mason * 5912fcfd22SChris Mason * 2) we must log any new names for any file or dir that is in the fsync 6012fcfd22SChris Mason * log. ---> check inode while renaming/linking. 6112fcfd22SChris Mason * 6212fcfd22SChris Mason * 2a) we must log any new names for any file or dir during rename 6312fcfd22SChris Mason * when the directory they are being removed from was logged. 6412fcfd22SChris Mason * ---> check inode and old parent dir during rename 6512fcfd22SChris Mason * 6612fcfd22SChris Mason * 2a is actually the more important variant. With the extra logging 6712fcfd22SChris Mason * a crash might unlink the old name without recreating the new one 6812fcfd22SChris Mason * 6912fcfd22SChris Mason * 3) after a crash, we must go through any directories with a link count 7012fcfd22SChris Mason * of zero and redo the rm -rf 7112fcfd22SChris Mason * 7212fcfd22SChris Mason * mkdir f1/foo 7312fcfd22SChris Mason * normal commit 7412fcfd22SChris Mason * rm -rf f1/foo 7512fcfd22SChris Mason * fsync(f1) 7612fcfd22SChris Mason * 7712fcfd22SChris Mason * The directory f1 was fully removed from the FS, but fsync was never 7812fcfd22SChris Mason * called on f1, only its parent dir. After a crash the rm -rf must 7912fcfd22SChris Mason * be replayed. This must be able to recurse down the entire 8012fcfd22SChris Mason * directory tree. The inode link count fixup code takes care of the 8112fcfd22SChris Mason * ugly details. 8212fcfd22SChris Mason */ 8312fcfd22SChris Mason 8412fcfd22SChris Mason /* 85e02119d5SChris Mason * stages for the tree walking. The first 86e02119d5SChris Mason * stage (0) is to only pin down the blocks we find 87e02119d5SChris Mason * the second stage (1) is to make sure that all the inodes 88e02119d5SChris Mason * we find in the log are created in the subvolume. 89e02119d5SChris Mason * 90e02119d5SChris Mason * The last stage is to deal with directories and links and extents 91e02119d5SChris Mason * and all the other fun semantics 92e02119d5SChris Mason */ 93e02119d5SChris Mason #define LOG_WALK_PIN_ONLY 0 94e02119d5SChris Mason #define LOG_WALK_REPLAY_INODES 1 95e02119d5SChris Mason #define LOG_WALK_REPLAY_ALL 2 96e02119d5SChris Mason 9712fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 98e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 99e02119d5SChris Mason int inode_only); 100ec051c0fSYan Zheng static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 101ec051c0fSYan Zheng struct btrfs_root *root, 102ec051c0fSYan Zheng struct btrfs_path *path, u64 objectid); 10312fcfd22SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 10412fcfd22SChris Mason struct btrfs_root *root, 10512fcfd22SChris Mason struct btrfs_root *log, 10612fcfd22SChris Mason struct btrfs_path *path, 10712fcfd22SChris Mason u64 dirid, int del_all); 108e02119d5SChris Mason 109e02119d5SChris Mason /* 110e02119d5SChris Mason * tree logging is a special write ahead log used to make sure that 111e02119d5SChris Mason * fsyncs and O_SYNCs can happen without doing full tree commits. 112e02119d5SChris Mason * 113e02119d5SChris Mason * Full tree commits are expensive because they require commonly 114e02119d5SChris Mason * modified blocks to be recowed, creating many dirty pages in the 115e02119d5SChris Mason * extent tree an 4x-6x higher write load than ext3. 116e02119d5SChris Mason * 117e02119d5SChris Mason * Instead of doing a tree commit on every fsync, we use the 118e02119d5SChris Mason * key ranges and transaction ids to find items for a given file or directory 119e02119d5SChris Mason * that have changed in this transaction. Those items are copied into 120e02119d5SChris Mason * a special tree (one per subvolume root), that tree is written to disk 121e02119d5SChris Mason * and then the fsync is considered complete. 122e02119d5SChris Mason * 123e02119d5SChris Mason * After a crash, items are copied out of the log-tree back into the 124e02119d5SChris Mason * subvolume tree. Any file data extents found are recorded in the extent 125e02119d5SChris Mason * allocation tree, and the log-tree freed. 126e02119d5SChris Mason * 127e02119d5SChris Mason * The log tree is read three times, once to pin down all the extents it is 128e02119d5SChris Mason * using in ram and once, once to create all the inodes logged in the tree 129e02119d5SChris Mason * and once to do all the other items. 130e02119d5SChris Mason */ 131e02119d5SChris Mason 132e02119d5SChris Mason /* 133e02119d5SChris Mason * start a sub transaction and setup the log tree 134e02119d5SChris Mason * this increments the log tree writer count to make the people 135e02119d5SChris Mason * syncing the tree wait for us to finish 136e02119d5SChris Mason */ 137e02119d5SChris Mason static int start_log_trans(struct btrfs_trans_handle *trans, 138e02119d5SChris Mason struct btrfs_root *root) 139e02119d5SChris Mason { 140e02119d5SChris Mason int ret; 1414a500fd1SYan, Zheng int err = 0; 1427237f183SYan Zheng 1437237f183SYan Zheng mutex_lock(&root->log_mutex); 1447237f183SYan Zheng if (root->log_root) { 145ff782e0aSJosef Bacik if (!root->log_start_pid) { 146ff782e0aSJosef Bacik root->log_start_pid = current->pid; 147ff782e0aSJosef Bacik root->log_multiple_pids = false; 148ff782e0aSJosef Bacik } else if (root->log_start_pid != current->pid) { 149ff782e0aSJosef Bacik root->log_multiple_pids = true; 150ff782e0aSJosef Bacik } 151ff782e0aSJosef Bacik 1522ecb7923SMiao Xie atomic_inc(&root->log_batch); 1537237f183SYan Zheng atomic_inc(&root->log_writers); 1547237f183SYan Zheng mutex_unlock(&root->log_mutex); 1557237f183SYan Zheng return 0; 1567237f183SYan Zheng } 157ff782e0aSJosef Bacik root->log_multiple_pids = false; 158ff782e0aSJosef Bacik root->log_start_pid = current->pid; 159e02119d5SChris Mason mutex_lock(&root->fs_info->tree_log_mutex); 160e02119d5SChris Mason if (!root->fs_info->log_root_tree) { 161e02119d5SChris Mason ret = btrfs_init_log_root_tree(trans, root->fs_info); 1624a500fd1SYan, Zheng if (ret) 1634a500fd1SYan, Zheng err = ret; 164e02119d5SChris Mason } 1654a500fd1SYan, Zheng if (err == 0 && !root->log_root) { 166e02119d5SChris Mason ret = btrfs_add_log_tree(trans, root); 1674a500fd1SYan, Zheng if (ret) 1684a500fd1SYan, Zheng err = ret; 169e02119d5SChris Mason } 170e02119d5SChris Mason mutex_unlock(&root->fs_info->tree_log_mutex); 1712ecb7923SMiao Xie atomic_inc(&root->log_batch); 1727237f183SYan Zheng atomic_inc(&root->log_writers); 1737237f183SYan Zheng mutex_unlock(&root->log_mutex); 1744a500fd1SYan, Zheng return err; 175e02119d5SChris Mason } 176e02119d5SChris Mason 177e02119d5SChris Mason /* 178e02119d5SChris Mason * returns 0 if there was a log transaction running and we were able 179e02119d5SChris Mason * to join, or returns -ENOENT if there were not transactions 180e02119d5SChris Mason * in progress 181e02119d5SChris Mason */ 182e02119d5SChris Mason static int join_running_log_trans(struct btrfs_root *root) 183e02119d5SChris Mason { 184e02119d5SChris Mason int ret = -ENOENT; 185e02119d5SChris Mason 186e02119d5SChris Mason smp_mb(); 187e02119d5SChris Mason if (!root->log_root) 188e02119d5SChris Mason return -ENOENT; 189e02119d5SChris Mason 1907237f183SYan Zheng mutex_lock(&root->log_mutex); 191e02119d5SChris Mason if (root->log_root) { 192e02119d5SChris Mason ret = 0; 1937237f183SYan Zheng atomic_inc(&root->log_writers); 194e02119d5SChris Mason } 1957237f183SYan Zheng mutex_unlock(&root->log_mutex); 196e02119d5SChris Mason return ret; 197e02119d5SChris Mason } 198e02119d5SChris Mason 199e02119d5SChris Mason /* 20012fcfd22SChris Mason * This either makes the current running log transaction wait 20112fcfd22SChris Mason * until you call btrfs_end_log_trans() or it makes any future 20212fcfd22SChris Mason * log transactions wait until you call btrfs_end_log_trans() 20312fcfd22SChris Mason */ 20412fcfd22SChris Mason int btrfs_pin_log_trans(struct btrfs_root *root) 20512fcfd22SChris Mason { 20612fcfd22SChris Mason int ret = -ENOENT; 20712fcfd22SChris Mason 20812fcfd22SChris Mason mutex_lock(&root->log_mutex); 20912fcfd22SChris Mason atomic_inc(&root->log_writers); 21012fcfd22SChris Mason mutex_unlock(&root->log_mutex); 21112fcfd22SChris Mason return ret; 21212fcfd22SChris Mason } 21312fcfd22SChris Mason 21412fcfd22SChris Mason /* 215e02119d5SChris Mason * indicate we're done making changes to the log tree 216e02119d5SChris Mason * and wake up anyone waiting to do a sync 217e02119d5SChris Mason */ 218143bede5SJeff Mahoney void btrfs_end_log_trans(struct btrfs_root *root) 219e02119d5SChris Mason { 2207237f183SYan Zheng if (atomic_dec_and_test(&root->log_writers)) { 221e02119d5SChris Mason smp_mb(); 2227237f183SYan Zheng if (waitqueue_active(&root->log_writer_wait)) 2237237f183SYan Zheng wake_up(&root->log_writer_wait); 2247237f183SYan Zheng } 225e02119d5SChris Mason } 226e02119d5SChris Mason 227e02119d5SChris Mason 228e02119d5SChris Mason /* 229e02119d5SChris Mason * the walk control struct is used to pass state down the chain when 230e02119d5SChris Mason * processing the log tree. The stage field tells us which part 231e02119d5SChris Mason * of the log tree processing we are currently doing. The others 232e02119d5SChris Mason * are state fields used for that specific part 233e02119d5SChris Mason */ 234e02119d5SChris Mason struct walk_control { 235e02119d5SChris Mason /* should we free the extent on disk when done? This is used 236e02119d5SChris Mason * at transaction commit time while freeing a log tree 237e02119d5SChris Mason */ 238e02119d5SChris Mason int free; 239e02119d5SChris Mason 240e02119d5SChris Mason /* should we write out the extent buffer? This is used 241e02119d5SChris Mason * while flushing the log tree to disk during a sync 242e02119d5SChris Mason */ 243e02119d5SChris Mason int write; 244e02119d5SChris Mason 245e02119d5SChris Mason /* should we wait for the extent buffer io to finish? Also used 246e02119d5SChris Mason * while flushing the log tree to disk for a sync 247e02119d5SChris Mason */ 248e02119d5SChris Mason int wait; 249e02119d5SChris Mason 250e02119d5SChris Mason /* pin only walk, we record which extents on disk belong to the 251e02119d5SChris Mason * log trees 252e02119d5SChris Mason */ 253e02119d5SChris Mason int pin; 254e02119d5SChris Mason 255e02119d5SChris Mason /* what stage of the replay code we're currently in */ 256e02119d5SChris Mason int stage; 257e02119d5SChris Mason 258e02119d5SChris Mason /* the root we are currently replaying */ 259e02119d5SChris Mason struct btrfs_root *replay_dest; 260e02119d5SChris Mason 261e02119d5SChris Mason /* the trans handle for the current replay */ 262e02119d5SChris Mason struct btrfs_trans_handle *trans; 263e02119d5SChris Mason 264e02119d5SChris Mason /* the function that gets used to process blocks we find in the 265e02119d5SChris Mason * tree. Note the extent_buffer might not be up to date when it is 266e02119d5SChris Mason * passed in, and it must be checked or read if you need the data 267e02119d5SChris Mason * inside it 268e02119d5SChris Mason */ 269e02119d5SChris Mason int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 270e02119d5SChris Mason struct walk_control *wc, u64 gen); 271e02119d5SChris Mason }; 272e02119d5SChris Mason 273e02119d5SChris Mason /* 274e02119d5SChris Mason * process_func used to pin down extents, write them or wait on them 275e02119d5SChris Mason */ 276e02119d5SChris Mason static int process_one_buffer(struct btrfs_root *log, 277e02119d5SChris Mason struct extent_buffer *eb, 278e02119d5SChris Mason struct walk_control *wc, u64 gen) 279e02119d5SChris Mason { 280*b50c6e25SJosef Bacik int ret = 0; 281*b50c6e25SJosef Bacik 28204018de5SJosef Bacik if (wc->pin) 283*b50c6e25SJosef Bacik ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, 284e688b725SChris Mason eb->start, eb->len); 285e02119d5SChris Mason 286*b50c6e25SJosef Bacik if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { 287e02119d5SChris Mason if (wc->write) 288e02119d5SChris Mason btrfs_write_tree_block(eb); 289e02119d5SChris Mason if (wc->wait) 290e02119d5SChris Mason btrfs_wait_tree_block_writeback(eb); 291e02119d5SChris Mason } 292*b50c6e25SJosef Bacik return ret; 293e02119d5SChris Mason } 294e02119d5SChris Mason 295e02119d5SChris Mason /* 296e02119d5SChris Mason * Item overwrite used by replay and tree logging. eb, slot and key all refer 297e02119d5SChris Mason * to the src data we are copying out. 298e02119d5SChris Mason * 299e02119d5SChris Mason * root is the tree we are copying into, and path is a scratch 300e02119d5SChris Mason * path for use in this function (it should be released on entry and 301e02119d5SChris Mason * will be released on exit). 302e02119d5SChris Mason * 303e02119d5SChris Mason * If the key is already in the destination tree the existing item is 304e02119d5SChris Mason * overwritten. If the existing item isn't big enough, it is extended. 305e02119d5SChris Mason * If it is too large, it is truncated. 306e02119d5SChris Mason * 307e02119d5SChris Mason * If the key isn't in the destination yet, a new item is inserted. 308e02119d5SChris Mason */ 309e02119d5SChris Mason static noinline int overwrite_item(struct btrfs_trans_handle *trans, 310e02119d5SChris Mason struct btrfs_root *root, 311e02119d5SChris Mason struct btrfs_path *path, 312e02119d5SChris Mason struct extent_buffer *eb, int slot, 313e02119d5SChris Mason struct btrfs_key *key) 314e02119d5SChris Mason { 315e02119d5SChris Mason int ret; 316e02119d5SChris Mason u32 item_size; 317e02119d5SChris Mason u64 saved_i_size = 0; 318e02119d5SChris Mason int save_old_i_size = 0; 319e02119d5SChris Mason unsigned long src_ptr; 320e02119d5SChris Mason unsigned long dst_ptr; 321e02119d5SChris Mason int overwrite_root = 0; 3224bc4bee4SJosef Bacik bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; 323e02119d5SChris Mason 324e02119d5SChris Mason if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 325e02119d5SChris Mason overwrite_root = 1; 326e02119d5SChris Mason 327e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 328e02119d5SChris Mason src_ptr = btrfs_item_ptr_offset(eb, slot); 329e02119d5SChris Mason 330e02119d5SChris Mason /* look for the key in the destination tree */ 331e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 3324bc4bee4SJosef Bacik if (ret < 0) 3334bc4bee4SJosef Bacik return ret; 3344bc4bee4SJosef Bacik 335e02119d5SChris Mason if (ret == 0) { 336e02119d5SChris Mason char *src_copy; 337e02119d5SChris Mason char *dst_copy; 338e02119d5SChris Mason u32 dst_size = btrfs_item_size_nr(path->nodes[0], 339e02119d5SChris Mason path->slots[0]); 340e02119d5SChris Mason if (dst_size != item_size) 341e02119d5SChris Mason goto insert; 342e02119d5SChris Mason 343e02119d5SChris Mason if (item_size == 0) { 344b3b4aa74SDavid Sterba btrfs_release_path(path); 345e02119d5SChris Mason return 0; 346e02119d5SChris Mason } 347e02119d5SChris Mason dst_copy = kmalloc(item_size, GFP_NOFS); 348e02119d5SChris Mason src_copy = kmalloc(item_size, GFP_NOFS); 3492a29edc6Sliubo if (!dst_copy || !src_copy) { 350b3b4aa74SDavid Sterba btrfs_release_path(path); 3512a29edc6Sliubo kfree(dst_copy); 3522a29edc6Sliubo kfree(src_copy); 3532a29edc6Sliubo return -ENOMEM; 3542a29edc6Sliubo } 355e02119d5SChris Mason 356e02119d5SChris Mason read_extent_buffer(eb, src_copy, src_ptr, item_size); 357e02119d5SChris Mason 358e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 359e02119d5SChris Mason read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, 360e02119d5SChris Mason item_size); 361e02119d5SChris Mason ret = memcmp(dst_copy, src_copy, item_size); 362e02119d5SChris Mason 363e02119d5SChris Mason kfree(dst_copy); 364e02119d5SChris Mason kfree(src_copy); 365e02119d5SChris Mason /* 366e02119d5SChris Mason * they have the same contents, just return, this saves 367e02119d5SChris Mason * us from cowing blocks in the destination tree and doing 368e02119d5SChris Mason * extra writes that may not have been done by a previous 369e02119d5SChris Mason * sync 370e02119d5SChris Mason */ 371e02119d5SChris Mason if (ret == 0) { 372b3b4aa74SDavid Sterba btrfs_release_path(path); 373e02119d5SChris Mason return 0; 374e02119d5SChris Mason } 375e02119d5SChris Mason 3764bc4bee4SJosef Bacik /* 3774bc4bee4SJosef Bacik * We need to load the old nbytes into the inode so when we 3784bc4bee4SJosef Bacik * replay the extents we've logged we get the right nbytes. 3794bc4bee4SJosef Bacik */ 3804bc4bee4SJosef Bacik if (inode_item) { 3814bc4bee4SJosef Bacik struct btrfs_inode_item *item; 3824bc4bee4SJosef Bacik u64 nbytes; 3834bc4bee4SJosef Bacik 3844bc4bee4SJosef Bacik item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3854bc4bee4SJosef Bacik struct btrfs_inode_item); 3864bc4bee4SJosef Bacik nbytes = btrfs_inode_nbytes(path->nodes[0], item); 3874bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, 3884bc4bee4SJosef Bacik struct btrfs_inode_item); 3894bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, nbytes); 3904bc4bee4SJosef Bacik } 3914bc4bee4SJosef Bacik } else if (inode_item) { 3924bc4bee4SJosef Bacik struct btrfs_inode_item *item; 3934bc4bee4SJosef Bacik 3944bc4bee4SJosef Bacik /* 3954bc4bee4SJosef Bacik * New inode, set nbytes to 0 so that the nbytes comes out 3964bc4bee4SJosef Bacik * properly when we replay the extents. 3974bc4bee4SJosef Bacik */ 3984bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 3994bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, 0); 400e02119d5SChris Mason } 401e02119d5SChris Mason insert: 402b3b4aa74SDavid Sterba btrfs_release_path(path); 403e02119d5SChris Mason /* try to insert the key into the destination tree */ 404e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, 405e02119d5SChris Mason key, item_size); 406e02119d5SChris Mason 407e02119d5SChris Mason /* make sure any existing item is the correct size */ 408e02119d5SChris Mason if (ret == -EEXIST) { 409e02119d5SChris Mason u32 found_size; 410e02119d5SChris Mason found_size = btrfs_item_size_nr(path->nodes[0], 411e02119d5SChris Mason path->slots[0]); 412143bede5SJeff Mahoney if (found_size > item_size) 413afe5fea7STsutomu Itoh btrfs_truncate_item(root, path, item_size, 1); 414143bede5SJeff Mahoney else if (found_size < item_size) 4154b90c680STsutomu Itoh btrfs_extend_item(root, path, 41687b29b20SYan Zheng item_size - found_size); 417e02119d5SChris Mason } else if (ret) { 4184a500fd1SYan, Zheng return ret; 419e02119d5SChris Mason } 420e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], 421e02119d5SChris Mason path->slots[0]); 422e02119d5SChris Mason 423e02119d5SChris Mason /* don't overwrite an existing inode if the generation number 424e02119d5SChris Mason * was logged as zero. This is done when the tree logging code 425e02119d5SChris Mason * is just logging an inode to make sure it exists after recovery. 426e02119d5SChris Mason * 427e02119d5SChris Mason * Also, don't overwrite i_size on directories during replay. 428e02119d5SChris Mason * log replay inserts and removes directory items based on the 429e02119d5SChris Mason * state of the tree found in the subvolume, and i_size is modified 430e02119d5SChris Mason * as it goes 431e02119d5SChris Mason */ 432e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { 433e02119d5SChris Mason struct btrfs_inode_item *src_item; 434e02119d5SChris Mason struct btrfs_inode_item *dst_item; 435e02119d5SChris Mason 436e02119d5SChris Mason src_item = (struct btrfs_inode_item *)src_ptr; 437e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 438e02119d5SChris Mason 439e02119d5SChris Mason if (btrfs_inode_generation(eb, src_item) == 0) 440e02119d5SChris Mason goto no_copy; 441e02119d5SChris Mason 442e02119d5SChris Mason if (overwrite_root && 443e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(eb, src_item)) && 444e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { 445e02119d5SChris Mason save_old_i_size = 1; 446e02119d5SChris Mason saved_i_size = btrfs_inode_size(path->nodes[0], 447e02119d5SChris Mason dst_item); 448e02119d5SChris Mason } 449e02119d5SChris Mason } 450e02119d5SChris Mason 451e02119d5SChris Mason copy_extent_buffer(path->nodes[0], eb, dst_ptr, 452e02119d5SChris Mason src_ptr, item_size); 453e02119d5SChris Mason 454e02119d5SChris Mason if (save_old_i_size) { 455e02119d5SChris Mason struct btrfs_inode_item *dst_item; 456e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 457e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); 458e02119d5SChris Mason } 459e02119d5SChris Mason 460e02119d5SChris Mason /* make sure the generation is filled in */ 461e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY) { 462e02119d5SChris Mason struct btrfs_inode_item *dst_item; 463e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 464e02119d5SChris Mason if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { 465e02119d5SChris Mason btrfs_set_inode_generation(path->nodes[0], dst_item, 466e02119d5SChris Mason trans->transid); 467e02119d5SChris Mason } 468e02119d5SChris Mason } 469e02119d5SChris Mason no_copy: 470e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 471b3b4aa74SDavid Sterba btrfs_release_path(path); 472e02119d5SChris Mason return 0; 473e02119d5SChris Mason } 474e02119d5SChris Mason 475e02119d5SChris Mason /* 476e02119d5SChris Mason * simple helper to read an inode off the disk from a given root 477e02119d5SChris Mason * This can only be called for subvolume roots and not for the log 478e02119d5SChris Mason */ 479e02119d5SChris Mason static noinline struct inode *read_one_inode(struct btrfs_root *root, 480e02119d5SChris Mason u64 objectid) 481e02119d5SChris Mason { 4825d4f98a2SYan Zheng struct btrfs_key key; 483e02119d5SChris Mason struct inode *inode; 484e02119d5SChris Mason 4855d4f98a2SYan Zheng key.objectid = objectid; 4865d4f98a2SYan Zheng key.type = BTRFS_INODE_ITEM_KEY; 4875d4f98a2SYan Zheng key.offset = 0; 48873f73415SJosef Bacik inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 4895d4f98a2SYan Zheng if (IS_ERR(inode)) { 4905d4f98a2SYan Zheng inode = NULL; 4915d4f98a2SYan Zheng } else if (is_bad_inode(inode)) { 492e02119d5SChris Mason iput(inode); 493e02119d5SChris Mason inode = NULL; 494e02119d5SChris Mason } 495e02119d5SChris Mason return inode; 496e02119d5SChris Mason } 497e02119d5SChris Mason 498e02119d5SChris Mason /* replays a single extent in 'eb' at 'slot' with 'key' into the 499e02119d5SChris Mason * subvolume 'root'. path is released on entry and should be released 500e02119d5SChris Mason * on exit. 501e02119d5SChris Mason * 502e02119d5SChris Mason * extents in the log tree have not been allocated out of the extent 503e02119d5SChris Mason * tree yet. So, this completes the allocation, taking a reference 504e02119d5SChris Mason * as required if the extent already exists or creating a new extent 505e02119d5SChris Mason * if it isn't in the extent allocation tree yet. 506e02119d5SChris Mason * 507e02119d5SChris Mason * The extent is inserted into the file, dropping any existing extents 508e02119d5SChris Mason * from the file that overlap the new one. 509e02119d5SChris Mason */ 510e02119d5SChris Mason static noinline int replay_one_extent(struct btrfs_trans_handle *trans, 511e02119d5SChris Mason struct btrfs_root *root, 512e02119d5SChris Mason struct btrfs_path *path, 513e02119d5SChris Mason struct extent_buffer *eb, int slot, 514e02119d5SChris Mason struct btrfs_key *key) 515e02119d5SChris Mason { 516e02119d5SChris Mason int found_type; 517e02119d5SChris Mason u64 extent_end; 518e02119d5SChris Mason u64 start = key->offset; 5194bc4bee4SJosef Bacik u64 nbytes = 0; 520e02119d5SChris Mason struct btrfs_file_extent_item *item; 521e02119d5SChris Mason struct inode *inode = NULL; 522e02119d5SChris Mason unsigned long size; 523e02119d5SChris Mason int ret = 0; 524e02119d5SChris Mason 525e02119d5SChris Mason item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 526e02119d5SChris Mason found_type = btrfs_file_extent_type(eb, item); 527e02119d5SChris Mason 528d899e052SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 5294bc4bee4SJosef Bacik found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5304bc4bee4SJosef Bacik nbytes = btrfs_file_extent_num_bytes(eb, item); 5314bc4bee4SJosef Bacik extent_end = start + nbytes; 5324bc4bee4SJosef Bacik 5334bc4bee4SJosef Bacik /* 5344bc4bee4SJosef Bacik * We don't add to the inodes nbytes if we are prealloc or a 5354bc4bee4SJosef Bacik * hole. 5364bc4bee4SJosef Bacik */ 5374bc4bee4SJosef Bacik if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 5384bc4bee4SJosef Bacik nbytes = 0; 5394bc4bee4SJosef Bacik } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 540c8b97818SChris Mason size = btrfs_file_extent_inline_len(eb, item); 5414bc4bee4SJosef Bacik nbytes = btrfs_file_extent_ram_bytes(eb, item); 542fda2832fSQu Wenruo extent_end = ALIGN(start + size, root->sectorsize); 543e02119d5SChris Mason } else { 544e02119d5SChris Mason ret = 0; 545e02119d5SChris Mason goto out; 546e02119d5SChris Mason } 547e02119d5SChris Mason 548e02119d5SChris Mason inode = read_one_inode(root, key->objectid); 549e02119d5SChris Mason if (!inode) { 550e02119d5SChris Mason ret = -EIO; 551e02119d5SChris Mason goto out; 552e02119d5SChris Mason } 553e02119d5SChris Mason 554e02119d5SChris Mason /* 555e02119d5SChris Mason * first check to see if we already have this extent in the 556e02119d5SChris Mason * file. This must be done before the btrfs_drop_extents run 557e02119d5SChris Mason * so we don't try to drop this extent. 558e02119d5SChris Mason */ 55933345d01SLi Zefan ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 560e02119d5SChris Mason start, 0); 561e02119d5SChris Mason 562d899e052SYan Zheng if (ret == 0 && 563d899e052SYan Zheng (found_type == BTRFS_FILE_EXTENT_REG || 564d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 565e02119d5SChris Mason struct btrfs_file_extent_item cmp1; 566e02119d5SChris Mason struct btrfs_file_extent_item cmp2; 567e02119d5SChris Mason struct btrfs_file_extent_item *existing; 568e02119d5SChris Mason struct extent_buffer *leaf; 569e02119d5SChris Mason 570e02119d5SChris Mason leaf = path->nodes[0]; 571e02119d5SChris Mason existing = btrfs_item_ptr(leaf, path->slots[0], 572e02119d5SChris Mason struct btrfs_file_extent_item); 573e02119d5SChris Mason 574e02119d5SChris Mason read_extent_buffer(eb, &cmp1, (unsigned long)item, 575e02119d5SChris Mason sizeof(cmp1)); 576e02119d5SChris Mason read_extent_buffer(leaf, &cmp2, (unsigned long)existing, 577e02119d5SChris Mason sizeof(cmp2)); 578e02119d5SChris Mason 579e02119d5SChris Mason /* 580e02119d5SChris Mason * we already have a pointer to this exact extent, 581e02119d5SChris Mason * we don't have to do anything 582e02119d5SChris Mason */ 583e02119d5SChris Mason if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 584b3b4aa74SDavid Sterba btrfs_release_path(path); 585e02119d5SChris Mason goto out; 586e02119d5SChris Mason } 587e02119d5SChris Mason } 588b3b4aa74SDavid Sterba btrfs_release_path(path); 589e02119d5SChris Mason 590e02119d5SChris Mason /* drop any overlapping extents */ 5912671485dSJosef Bacik ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 592e02119d5SChris Mason BUG_ON(ret); 593e02119d5SChris Mason 59407d400a6SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 59507d400a6SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5965d4f98a2SYan Zheng u64 offset; 59707d400a6SYan Zheng unsigned long dest_offset; 59807d400a6SYan Zheng struct btrfs_key ins; 59907d400a6SYan Zheng 60007d400a6SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, key, 60107d400a6SYan Zheng sizeof(*item)); 60207d400a6SYan Zheng BUG_ON(ret); 60307d400a6SYan Zheng dest_offset = btrfs_item_ptr_offset(path->nodes[0], 60407d400a6SYan Zheng path->slots[0]); 60507d400a6SYan Zheng copy_extent_buffer(path->nodes[0], eb, dest_offset, 60607d400a6SYan Zheng (unsigned long)item, sizeof(*item)); 60707d400a6SYan Zheng 60807d400a6SYan Zheng ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 60907d400a6SYan Zheng ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 61007d400a6SYan Zheng ins.type = BTRFS_EXTENT_ITEM_KEY; 6115d4f98a2SYan Zheng offset = key->offset - btrfs_file_extent_offset(eb, item); 61207d400a6SYan Zheng 61307d400a6SYan Zheng if (ins.objectid > 0) { 61407d400a6SYan Zheng u64 csum_start; 61507d400a6SYan Zheng u64 csum_end; 61607d400a6SYan Zheng LIST_HEAD(ordered_sums); 61707d400a6SYan Zheng /* 61807d400a6SYan Zheng * is this extent already allocated in the extent 61907d400a6SYan Zheng * allocation tree? If so, just add a reference 62007d400a6SYan Zheng */ 62107d400a6SYan Zheng ret = btrfs_lookup_extent(root, ins.objectid, 62207d400a6SYan Zheng ins.offset); 62307d400a6SYan Zheng if (ret == 0) { 62407d400a6SYan Zheng ret = btrfs_inc_extent_ref(trans, root, 62507d400a6SYan Zheng ins.objectid, ins.offset, 6265d4f98a2SYan Zheng 0, root->root_key.objectid, 62766d7e7f0SArne Jansen key->objectid, offset, 0); 628*b50c6e25SJosef Bacik if (ret) 629*b50c6e25SJosef Bacik goto out; 63007d400a6SYan Zheng } else { 63107d400a6SYan Zheng /* 63207d400a6SYan Zheng * insert the extent pointer in the extent 63307d400a6SYan Zheng * allocation tree 63407d400a6SYan Zheng */ 6355d4f98a2SYan Zheng ret = btrfs_alloc_logged_file_extent(trans, 6365d4f98a2SYan Zheng root, root->root_key.objectid, 6375d4f98a2SYan Zheng key->objectid, offset, &ins); 638*b50c6e25SJosef Bacik if (ret) 639*b50c6e25SJosef Bacik goto out; 64007d400a6SYan Zheng } 641b3b4aa74SDavid Sterba btrfs_release_path(path); 64207d400a6SYan Zheng 64307d400a6SYan Zheng if (btrfs_file_extent_compression(eb, item)) { 64407d400a6SYan Zheng csum_start = ins.objectid; 64507d400a6SYan Zheng csum_end = csum_start + ins.offset; 64607d400a6SYan Zheng } else { 64707d400a6SYan Zheng csum_start = ins.objectid + 64807d400a6SYan Zheng btrfs_file_extent_offset(eb, item); 64907d400a6SYan Zheng csum_end = csum_start + 65007d400a6SYan Zheng btrfs_file_extent_num_bytes(eb, item); 65107d400a6SYan Zheng } 65207d400a6SYan Zheng 65307d400a6SYan Zheng ret = btrfs_lookup_csums_range(root->log_root, 65407d400a6SYan Zheng csum_start, csum_end - 1, 655a2de733cSArne Jansen &ordered_sums, 0); 65607d400a6SYan Zheng BUG_ON(ret); 65707d400a6SYan Zheng while (!list_empty(&ordered_sums)) { 65807d400a6SYan Zheng struct btrfs_ordered_sum *sums; 65907d400a6SYan Zheng sums = list_entry(ordered_sums.next, 66007d400a6SYan Zheng struct btrfs_ordered_sum, 66107d400a6SYan Zheng list); 66207d400a6SYan Zheng ret = btrfs_csum_file_blocks(trans, 66307d400a6SYan Zheng root->fs_info->csum_root, 66407d400a6SYan Zheng sums); 66507d400a6SYan Zheng BUG_ON(ret); 66607d400a6SYan Zheng list_del(&sums->list); 66707d400a6SYan Zheng kfree(sums); 66807d400a6SYan Zheng } 66907d400a6SYan Zheng } else { 670b3b4aa74SDavid Sterba btrfs_release_path(path); 67107d400a6SYan Zheng } 67207d400a6SYan Zheng } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 67307d400a6SYan Zheng /* inline extents are easy, we just overwrite them */ 674e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 675e02119d5SChris Mason BUG_ON(ret); 67607d400a6SYan Zheng } 677e02119d5SChris Mason 6784bc4bee4SJosef Bacik inode_add_bytes(inode, nbytes); 679b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 680e02119d5SChris Mason out: 681e02119d5SChris Mason if (inode) 682e02119d5SChris Mason iput(inode); 683e02119d5SChris Mason return ret; 684e02119d5SChris Mason } 685e02119d5SChris Mason 686e02119d5SChris Mason /* 687e02119d5SChris Mason * when cleaning up conflicts between the directory names in the 688e02119d5SChris Mason * subvolume, directory names in the log and directory names in the 689e02119d5SChris Mason * inode back references, we may have to unlink inodes from directories. 690e02119d5SChris Mason * 691e02119d5SChris Mason * This is a helper function to do the unlink of a specific directory 692e02119d5SChris Mason * item 693e02119d5SChris Mason */ 694e02119d5SChris Mason static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, 695e02119d5SChris Mason struct btrfs_root *root, 696e02119d5SChris Mason struct btrfs_path *path, 697e02119d5SChris Mason struct inode *dir, 698e02119d5SChris Mason struct btrfs_dir_item *di) 699e02119d5SChris Mason { 700e02119d5SChris Mason struct inode *inode; 701e02119d5SChris Mason char *name; 702e02119d5SChris Mason int name_len; 703e02119d5SChris Mason struct extent_buffer *leaf; 704e02119d5SChris Mason struct btrfs_key location; 705e02119d5SChris Mason int ret; 706e02119d5SChris Mason 707e02119d5SChris Mason leaf = path->nodes[0]; 708e02119d5SChris Mason 709e02119d5SChris Mason btrfs_dir_item_key_to_cpu(leaf, di, &location); 710e02119d5SChris Mason name_len = btrfs_dir_name_len(leaf, di); 711e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 7122a29edc6Sliubo if (!name) 7132a29edc6Sliubo return -ENOMEM; 7142a29edc6Sliubo 715e02119d5SChris Mason read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 716b3b4aa74SDavid Sterba btrfs_release_path(path); 717e02119d5SChris Mason 718e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 719c00e9493STsutomu Itoh if (!inode) { 720c00e9493STsutomu Itoh kfree(name); 721c00e9493STsutomu Itoh return -EIO; 722c00e9493STsutomu Itoh } 723e02119d5SChris Mason 724ec051c0fSYan Zheng ret = link_to_fixup_dir(trans, root, path, location.objectid); 725ec051c0fSYan Zheng BUG_ON(ret); 72612fcfd22SChris Mason 727e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 728ec051c0fSYan Zheng BUG_ON(ret); 729e02119d5SChris Mason kfree(name); 730e02119d5SChris Mason 731e02119d5SChris Mason iput(inode); 732b6305567SChris Mason 733b6305567SChris Mason btrfs_run_delayed_items(trans, root); 734e02119d5SChris Mason return ret; 735e02119d5SChris Mason } 736e02119d5SChris Mason 737e02119d5SChris Mason /* 738e02119d5SChris Mason * helper function to see if a given name and sequence number found 739e02119d5SChris Mason * in an inode back reference are already in a directory and correctly 740e02119d5SChris Mason * point to this inode 741e02119d5SChris Mason */ 742e02119d5SChris Mason static noinline int inode_in_dir(struct btrfs_root *root, 743e02119d5SChris Mason struct btrfs_path *path, 744e02119d5SChris Mason u64 dirid, u64 objectid, u64 index, 745e02119d5SChris Mason const char *name, int name_len) 746e02119d5SChris Mason { 747e02119d5SChris Mason struct btrfs_dir_item *di; 748e02119d5SChris Mason struct btrfs_key location; 749e02119d5SChris Mason int match = 0; 750e02119d5SChris Mason 751e02119d5SChris Mason di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, 752e02119d5SChris Mason index, name, name_len, 0); 753e02119d5SChris Mason if (di && !IS_ERR(di)) { 754e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 755e02119d5SChris Mason if (location.objectid != objectid) 756e02119d5SChris Mason goto out; 757e02119d5SChris Mason } else 758e02119d5SChris Mason goto out; 759b3b4aa74SDavid Sterba btrfs_release_path(path); 760e02119d5SChris Mason 761e02119d5SChris Mason di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 762e02119d5SChris Mason if (di && !IS_ERR(di)) { 763e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 764e02119d5SChris Mason if (location.objectid != objectid) 765e02119d5SChris Mason goto out; 766e02119d5SChris Mason } else 767e02119d5SChris Mason goto out; 768e02119d5SChris Mason match = 1; 769e02119d5SChris Mason out: 770b3b4aa74SDavid Sterba btrfs_release_path(path); 771e02119d5SChris Mason return match; 772e02119d5SChris Mason } 773e02119d5SChris Mason 774e02119d5SChris Mason /* 775e02119d5SChris Mason * helper function to check a log tree for a named back reference in 776e02119d5SChris Mason * an inode. This is used to decide if a back reference that is 777e02119d5SChris Mason * found in the subvolume conflicts with what we find in the log. 778e02119d5SChris Mason * 779e02119d5SChris Mason * inode backreferences may have multiple refs in a single item, 780e02119d5SChris Mason * during replay we process one reference at a time, and we don't 781e02119d5SChris Mason * want to delete valid links to a file from the subvolume if that 782e02119d5SChris Mason * link is also in the log. 783e02119d5SChris Mason */ 784e02119d5SChris Mason static noinline int backref_in_log(struct btrfs_root *log, 785e02119d5SChris Mason struct btrfs_key *key, 786f186373fSMark Fasheh u64 ref_objectid, 787e02119d5SChris Mason char *name, int namelen) 788e02119d5SChris Mason { 789e02119d5SChris Mason struct btrfs_path *path; 790e02119d5SChris Mason struct btrfs_inode_ref *ref; 791e02119d5SChris Mason unsigned long ptr; 792e02119d5SChris Mason unsigned long ptr_end; 793e02119d5SChris Mason unsigned long name_ptr; 794e02119d5SChris Mason int found_name_len; 795e02119d5SChris Mason int item_size; 796e02119d5SChris Mason int ret; 797e02119d5SChris Mason int match = 0; 798e02119d5SChris Mason 799e02119d5SChris Mason path = btrfs_alloc_path(); 8002a29edc6Sliubo if (!path) 8012a29edc6Sliubo return -ENOMEM; 8022a29edc6Sliubo 803e02119d5SChris Mason ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 804e02119d5SChris Mason if (ret != 0) 805e02119d5SChris Mason goto out; 806e02119d5SChris Mason 807e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 808f186373fSMark Fasheh 809f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 810f186373fSMark Fasheh if (btrfs_find_name_in_ext_backref(path, ref_objectid, 811f186373fSMark Fasheh name, namelen, NULL)) 812f186373fSMark Fasheh match = 1; 813f186373fSMark Fasheh 814f186373fSMark Fasheh goto out; 815f186373fSMark Fasheh } 816f186373fSMark Fasheh 817f186373fSMark Fasheh item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 818e02119d5SChris Mason ptr_end = ptr + item_size; 819e02119d5SChris Mason while (ptr < ptr_end) { 820e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 821e02119d5SChris Mason found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); 822e02119d5SChris Mason if (found_name_len == namelen) { 823e02119d5SChris Mason name_ptr = (unsigned long)(ref + 1); 824e02119d5SChris Mason ret = memcmp_extent_buffer(path->nodes[0], name, 825e02119d5SChris Mason name_ptr, namelen); 826e02119d5SChris Mason if (ret == 0) { 827e02119d5SChris Mason match = 1; 828e02119d5SChris Mason goto out; 829e02119d5SChris Mason } 830e02119d5SChris Mason } 831e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + found_name_len; 832e02119d5SChris Mason } 833e02119d5SChris Mason out: 834e02119d5SChris Mason btrfs_free_path(path); 835e02119d5SChris Mason return match; 836e02119d5SChris Mason } 837e02119d5SChris Mason 8385a1d7843SJan Schmidt static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 8395a1d7843SJan Schmidt struct btrfs_root *root, 8405a1d7843SJan Schmidt struct btrfs_path *path, 8415a1d7843SJan Schmidt struct btrfs_root *log_root, 8425a1d7843SJan Schmidt struct inode *dir, struct inode *inode, 8435a1d7843SJan Schmidt struct extent_buffer *eb, 844f186373fSMark Fasheh u64 inode_objectid, u64 parent_objectid, 845f186373fSMark Fasheh u64 ref_index, char *name, int namelen, 846f186373fSMark Fasheh int *search_done) 8475a1d7843SJan Schmidt { 8485a1d7843SJan Schmidt int ret; 8495a1d7843SJan Schmidt char *victim_name; 8505a1d7843SJan Schmidt int victim_name_len; 851f186373fSMark Fasheh struct extent_buffer *leaf; 852f186373fSMark Fasheh struct btrfs_dir_item *di; 853f186373fSMark Fasheh struct btrfs_key search_key; 854f186373fSMark Fasheh struct btrfs_inode_extref *extref; 855f186373fSMark Fasheh 856f186373fSMark Fasheh again: 857f186373fSMark Fasheh /* Search old style refs */ 858f186373fSMark Fasheh search_key.objectid = inode_objectid; 859f186373fSMark Fasheh search_key.type = BTRFS_INODE_REF_KEY; 860f186373fSMark Fasheh search_key.offset = parent_objectid; 861f186373fSMark Fasheh ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 862f186373fSMark Fasheh if (ret == 0) { 8635a1d7843SJan Schmidt struct btrfs_inode_ref *victim_ref; 8645a1d7843SJan Schmidt unsigned long ptr; 8655a1d7843SJan Schmidt unsigned long ptr_end; 866f186373fSMark Fasheh 867f186373fSMark Fasheh leaf = path->nodes[0]; 8685a1d7843SJan Schmidt 8695a1d7843SJan Schmidt /* are we trying to overwrite a back ref for the root directory 8705a1d7843SJan Schmidt * if so, just jump out, we're done 8715a1d7843SJan Schmidt */ 872f186373fSMark Fasheh if (search_key.objectid == search_key.offset) 8735a1d7843SJan Schmidt return 1; 8745a1d7843SJan Schmidt 8755a1d7843SJan Schmidt /* check all the names in this back reference to see 8765a1d7843SJan Schmidt * if they are in the log. if so, we allow them to stay 8775a1d7843SJan Schmidt * otherwise they must be unlinked as a conflict 8785a1d7843SJan Schmidt */ 8795a1d7843SJan Schmidt ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 8805a1d7843SJan Schmidt ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); 8815a1d7843SJan Schmidt while (ptr < ptr_end) { 8825a1d7843SJan Schmidt victim_ref = (struct btrfs_inode_ref *)ptr; 8835a1d7843SJan Schmidt victim_name_len = btrfs_inode_ref_name_len(leaf, 8845a1d7843SJan Schmidt victim_ref); 8855a1d7843SJan Schmidt victim_name = kmalloc(victim_name_len, GFP_NOFS); 8865a1d7843SJan Schmidt BUG_ON(!victim_name); 8875a1d7843SJan Schmidt 8885a1d7843SJan Schmidt read_extent_buffer(leaf, victim_name, 8895a1d7843SJan Schmidt (unsigned long)(victim_ref + 1), 8905a1d7843SJan Schmidt victim_name_len); 8915a1d7843SJan Schmidt 892f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 893f186373fSMark Fasheh parent_objectid, 894f186373fSMark Fasheh victim_name, 8955a1d7843SJan Schmidt victim_name_len)) { 8965a1d7843SJan Schmidt btrfs_inc_nlink(inode); 8975a1d7843SJan Schmidt btrfs_release_path(path); 8985a1d7843SJan Schmidt 8995a1d7843SJan Schmidt ret = btrfs_unlink_inode(trans, root, dir, 9005a1d7843SJan Schmidt inode, victim_name, 9015a1d7843SJan Schmidt victim_name_len); 902f186373fSMark Fasheh BUG_ON(ret); 9035a1d7843SJan Schmidt btrfs_run_delayed_items(trans, root); 904f186373fSMark Fasheh kfree(victim_name); 905f186373fSMark Fasheh *search_done = 1; 906f186373fSMark Fasheh goto again; 9075a1d7843SJan Schmidt } 9085a1d7843SJan Schmidt kfree(victim_name); 909f186373fSMark Fasheh 9105a1d7843SJan Schmidt ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 9115a1d7843SJan Schmidt } 9125a1d7843SJan Schmidt BUG_ON(ret); 9135a1d7843SJan Schmidt 9145a1d7843SJan Schmidt /* 9155a1d7843SJan Schmidt * NOTE: we have searched root tree and checked the 9165a1d7843SJan Schmidt * coresponding ref, it does not need to check again. 9175a1d7843SJan Schmidt */ 9185a1d7843SJan Schmidt *search_done = 1; 9195a1d7843SJan Schmidt } 9205a1d7843SJan Schmidt btrfs_release_path(path); 9215a1d7843SJan Schmidt 922f186373fSMark Fasheh /* Same search but for extended refs */ 923f186373fSMark Fasheh extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, 924f186373fSMark Fasheh inode_objectid, parent_objectid, 0, 925f186373fSMark Fasheh 0); 926f186373fSMark Fasheh if (!IS_ERR_OR_NULL(extref)) { 927f186373fSMark Fasheh u32 item_size; 928f186373fSMark Fasheh u32 cur_offset = 0; 929f186373fSMark Fasheh unsigned long base; 930f186373fSMark Fasheh struct inode *victim_parent; 931f186373fSMark Fasheh 932f186373fSMark Fasheh leaf = path->nodes[0]; 933f186373fSMark Fasheh 934f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 935f186373fSMark Fasheh base = btrfs_item_ptr_offset(leaf, path->slots[0]); 936f186373fSMark Fasheh 937f186373fSMark Fasheh while (cur_offset < item_size) { 938f186373fSMark Fasheh extref = (struct btrfs_inode_extref *)base + cur_offset; 939f186373fSMark Fasheh 940f186373fSMark Fasheh victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 941f186373fSMark Fasheh 942f186373fSMark Fasheh if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) 943f186373fSMark Fasheh goto next; 944f186373fSMark Fasheh 945f186373fSMark Fasheh victim_name = kmalloc(victim_name_len, GFP_NOFS); 946f186373fSMark Fasheh read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, 947f186373fSMark Fasheh victim_name_len); 948f186373fSMark Fasheh 949f186373fSMark Fasheh search_key.objectid = inode_objectid; 950f186373fSMark Fasheh search_key.type = BTRFS_INODE_EXTREF_KEY; 951f186373fSMark Fasheh search_key.offset = btrfs_extref_hash(parent_objectid, 952f186373fSMark Fasheh victim_name, 953f186373fSMark Fasheh victim_name_len); 954f186373fSMark Fasheh ret = 0; 955f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 956f186373fSMark Fasheh parent_objectid, victim_name, 957f186373fSMark Fasheh victim_name_len)) { 958f186373fSMark Fasheh ret = -ENOENT; 959f186373fSMark Fasheh victim_parent = read_one_inode(root, 960f186373fSMark Fasheh parent_objectid); 961f186373fSMark Fasheh if (victim_parent) { 962f186373fSMark Fasheh btrfs_inc_nlink(inode); 963f186373fSMark Fasheh btrfs_release_path(path); 964f186373fSMark Fasheh 965f186373fSMark Fasheh ret = btrfs_unlink_inode(trans, root, 966f186373fSMark Fasheh victim_parent, 967f186373fSMark Fasheh inode, 968f186373fSMark Fasheh victim_name, 969f186373fSMark Fasheh victim_name_len); 970f186373fSMark Fasheh btrfs_run_delayed_items(trans, root); 971f186373fSMark Fasheh } 972f186373fSMark Fasheh BUG_ON(ret); 973f186373fSMark Fasheh iput(victim_parent); 974f186373fSMark Fasheh kfree(victim_name); 975f186373fSMark Fasheh *search_done = 1; 976f186373fSMark Fasheh goto again; 977f186373fSMark Fasheh } 978f186373fSMark Fasheh kfree(victim_name); 979f186373fSMark Fasheh BUG_ON(ret); 980f186373fSMark Fasheh next: 981f186373fSMark Fasheh cur_offset += victim_name_len + sizeof(*extref); 982f186373fSMark Fasheh } 983f186373fSMark Fasheh *search_done = 1; 984f186373fSMark Fasheh } 985f186373fSMark Fasheh btrfs_release_path(path); 986f186373fSMark Fasheh 9875a1d7843SJan Schmidt /* look for a conflicting sequence number */ 9885a1d7843SJan Schmidt di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 989f186373fSMark Fasheh ref_index, name, namelen, 0); 9905a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 9915a1d7843SJan Schmidt ret = drop_one_dir_item(trans, root, path, dir, di); 9925a1d7843SJan Schmidt BUG_ON(ret); 9935a1d7843SJan Schmidt } 9945a1d7843SJan Schmidt btrfs_release_path(path); 9955a1d7843SJan Schmidt 9965a1d7843SJan Schmidt /* look for a conflicing name */ 9975a1d7843SJan Schmidt di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), 9985a1d7843SJan Schmidt name, namelen, 0); 9995a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 10005a1d7843SJan Schmidt ret = drop_one_dir_item(trans, root, path, dir, di); 10015a1d7843SJan Schmidt BUG_ON(ret); 10025a1d7843SJan Schmidt } 10035a1d7843SJan Schmidt btrfs_release_path(path); 10045a1d7843SJan Schmidt 10055a1d7843SJan Schmidt return 0; 10065a1d7843SJan Schmidt } 1007e02119d5SChris Mason 1008f186373fSMark Fasheh static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1009f186373fSMark Fasheh u32 *namelen, char **name, u64 *index, 1010f186373fSMark Fasheh u64 *parent_objectid) 1011f186373fSMark Fasheh { 1012f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1013f186373fSMark Fasheh 1014f186373fSMark Fasheh extref = (struct btrfs_inode_extref *)ref_ptr; 1015f186373fSMark Fasheh 1016f186373fSMark Fasheh *namelen = btrfs_inode_extref_name_len(eb, extref); 1017f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1018f186373fSMark Fasheh if (*name == NULL) 1019f186373fSMark Fasheh return -ENOMEM; 1020f186373fSMark Fasheh 1021f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)&extref->name, 1022f186373fSMark Fasheh *namelen); 1023f186373fSMark Fasheh 1024f186373fSMark Fasheh *index = btrfs_inode_extref_index(eb, extref); 1025f186373fSMark Fasheh if (parent_objectid) 1026f186373fSMark Fasheh *parent_objectid = btrfs_inode_extref_parent(eb, extref); 1027f186373fSMark Fasheh 1028f186373fSMark Fasheh return 0; 1029f186373fSMark Fasheh } 1030f186373fSMark Fasheh 1031f186373fSMark Fasheh static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1032f186373fSMark Fasheh u32 *namelen, char **name, u64 *index) 1033f186373fSMark Fasheh { 1034f186373fSMark Fasheh struct btrfs_inode_ref *ref; 1035f186373fSMark Fasheh 1036f186373fSMark Fasheh ref = (struct btrfs_inode_ref *)ref_ptr; 1037f186373fSMark Fasheh 1038f186373fSMark Fasheh *namelen = btrfs_inode_ref_name_len(eb, ref); 1039f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1040f186373fSMark Fasheh if (*name == NULL) 1041f186373fSMark Fasheh return -ENOMEM; 1042f186373fSMark Fasheh 1043f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); 1044f186373fSMark Fasheh 1045f186373fSMark Fasheh *index = btrfs_inode_ref_index(eb, ref); 1046f186373fSMark Fasheh 1047f186373fSMark Fasheh return 0; 1048f186373fSMark Fasheh } 1049f186373fSMark Fasheh 1050e02119d5SChris Mason /* 1051e02119d5SChris Mason * replay one inode back reference item found in the log tree. 1052e02119d5SChris Mason * eb, slot and key refer to the buffer and key found in the log tree. 1053e02119d5SChris Mason * root is the destination we are replaying into, and path is for temp 1054e02119d5SChris Mason * use by this function. (it should be released on return). 1055e02119d5SChris Mason */ 1056e02119d5SChris Mason static noinline int add_inode_ref(struct btrfs_trans_handle *trans, 1057e02119d5SChris Mason struct btrfs_root *root, 1058e02119d5SChris Mason struct btrfs_root *log, 1059e02119d5SChris Mason struct btrfs_path *path, 1060e02119d5SChris Mason struct extent_buffer *eb, int slot, 1061e02119d5SChris Mason struct btrfs_key *key) 1062e02119d5SChris Mason { 106334f3e4f2Sliubo struct inode *dir; 1064e02119d5SChris Mason struct inode *inode; 1065e02119d5SChris Mason unsigned long ref_ptr; 1066e02119d5SChris Mason unsigned long ref_end; 106734f3e4f2Sliubo char *name; 106834f3e4f2Sliubo int namelen; 106934f3e4f2Sliubo int ret; 1070c622ae60Sliubo int search_done = 0; 1071f186373fSMark Fasheh int log_ref_ver = 0; 1072f186373fSMark Fasheh u64 parent_objectid; 1073f186373fSMark Fasheh u64 inode_objectid; 1074f46dbe3dSChris Mason u64 ref_index = 0; 1075f186373fSMark Fasheh int ref_struct_size; 1076f186373fSMark Fasheh 1077f186373fSMark Fasheh ref_ptr = btrfs_item_ptr_offset(eb, slot); 1078f186373fSMark Fasheh ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 1079f186373fSMark Fasheh 1080f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 1081f186373fSMark Fasheh struct btrfs_inode_extref *r; 1082f186373fSMark Fasheh 1083f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_extref); 1084f186373fSMark Fasheh log_ref_ver = 1; 1085f186373fSMark Fasheh r = (struct btrfs_inode_extref *)ref_ptr; 1086f186373fSMark Fasheh parent_objectid = btrfs_inode_extref_parent(eb, r); 1087f186373fSMark Fasheh } else { 1088f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_ref); 1089f186373fSMark Fasheh parent_objectid = key->offset; 1090f186373fSMark Fasheh } 1091f186373fSMark Fasheh inode_objectid = key->objectid; 1092e02119d5SChris Mason 1093e02119d5SChris Mason /* 1094e02119d5SChris Mason * it is possible that we didn't log all the parent directories 1095e02119d5SChris Mason * for a given inode. If we don't find the dir, just don't 1096e02119d5SChris Mason * copy the back ref in. The link count fixup code will take 1097e02119d5SChris Mason * care of the rest 1098e02119d5SChris Mason */ 1099f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 1100e02119d5SChris Mason if (!dir) 1101e02119d5SChris Mason return -ENOENT; 1102e02119d5SChris Mason 1103f186373fSMark Fasheh inode = read_one_inode(root, inode_objectid); 1104c00e9493STsutomu Itoh if (!inode) { 1105c00e9493STsutomu Itoh iput(dir); 1106c00e9493STsutomu Itoh return -EIO; 1107c00e9493STsutomu Itoh } 1108e02119d5SChris Mason 11095a1d7843SJan Schmidt while (ref_ptr < ref_end) { 1110f186373fSMark Fasheh if (log_ref_ver) { 1111f186373fSMark Fasheh ret = extref_get_fields(eb, ref_ptr, &namelen, &name, 1112f186373fSMark Fasheh &ref_index, &parent_objectid); 1113f186373fSMark Fasheh /* 1114f186373fSMark Fasheh * parent object can change from one array 1115f186373fSMark Fasheh * item to another. 1116f186373fSMark Fasheh */ 1117f186373fSMark Fasheh if (!dir) 1118f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 1119f186373fSMark Fasheh if (!dir) 1120f186373fSMark Fasheh return -ENOENT; 1121f186373fSMark Fasheh } else { 1122f186373fSMark Fasheh ret = ref_get_fields(eb, ref_ptr, &namelen, &name, 1123f186373fSMark Fasheh &ref_index); 1124f186373fSMark Fasheh } 1125f186373fSMark Fasheh if (ret) 1126f186373fSMark Fasheh return ret; 1127e02119d5SChris Mason 1128e02119d5SChris Mason /* if we already have a perfect match, we're done */ 11295a1d7843SJan Schmidt if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1130f186373fSMark Fasheh ref_index, name, namelen)) { 11315a1d7843SJan Schmidt /* 11325a1d7843SJan Schmidt * look for a conflicting back reference in the 11335a1d7843SJan Schmidt * metadata. if we find one we have to unlink that name 11345a1d7843SJan Schmidt * of the file before we add our new link. Later on, we 11355a1d7843SJan Schmidt * overwrite any existing back reference, and we don't 11365a1d7843SJan Schmidt * want to create dangling pointers in the directory. 11375a1d7843SJan Schmidt */ 11385a1d7843SJan Schmidt 11395a1d7843SJan Schmidt if (!search_done) { 11405a1d7843SJan Schmidt ret = __add_inode_ref(trans, root, path, log, 1141f186373fSMark Fasheh dir, inode, eb, 1142f186373fSMark Fasheh inode_objectid, 1143f186373fSMark Fasheh parent_objectid, 1144f186373fSMark Fasheh ref_index, name, namelen, 11455a1d7843SJan Schmidt &search_done); 11465a1d7843SJan Schmidt if (ret == 1) 1147e02119d5SChris Mason goto out; 114834f3e4f2Sliubo BUG_ON(ret); 114934f3e4f2Sliubo } 115034f3e4f2Sliubo 1151e02119d5SChris Mason /* insert our name */ 11525a1d7843SJan Schmidt ret = btrfs_add_link(trans, dir, inode, name, namelen, 1153f186373fSMark Fasheh 0, ref_index); 1154e02119d5SChris Mason BUG_ON(ret); 1155e02119d5SChris Mason 1156e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 11575a1d7843SJan Schmidt } 1158e02119d5SChris Mason 1159f186373fSMark Fasheh ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; 1160e02119d5SChris Mason kfree(name); 1161f186373fSMark Fasheh if (log_ref_ver) { 1162f186373fSMark Fasheh iput(dir); 1163f186373fSMark Fasheh dir = NULL; 1164f186373fSMark Fasheh } 11655a1d7843SJan Schmidt } 1166e02119d5SChris Mason 1167e02119d5SChris Mason /* finally write the back reference in the inode */ 1168e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 1169e02119d5SChris Mason BUG_ON(ret); 1170e02119d5SChris Mason 11715a1d7843SJan Schmidt out: 1172b3b4aa74SDavid Sterba btrfs_release_path(path); 1173e02119d5SChris Mason iput(dir); 1174e02119d5SChris Mason iput(inode); 1175e02119d5SChris Mason return 0; 1176e02119d5SChris Mason } 1177e02119d5SChris Mason 1178c71bf099SYan, Zheng static int insert_orphan_item(struct btrfs_trans_handle *trans, 1179c71bf099SYan, Zheng struct btrfs_root *root, u64 offset) 1180c71bf099SYan, Zheng { 1181c71bf099SYan, Zheng int ret; 1182c71bf099SYan, Zheng ret = btrfs_find_orphan_item(root, offset); 1183c71bf099SYan, Zheng if (ret > 0) 1184c71bf099SYan, Zheng ret = btrfs_insert_orphan_item(trans, root, offset); 1185c71bf099SYan, Zheng return ret; 1186c71bf099SYan, Zheng } 1187c71bf099SYan, Zheng 1188f186373fSMark Fasheh static int count_inode_extrefs(struct btrfs_root *root, 1189f186373fSMark Fasheh struct inode *inode, struct btrfs_path *path) 1190e02119d5SChris Mason { 1191f186373fSMark Fasheh int ret = 0; 1192f186373fSMark Fasheh int name_len; 1193f186373fSMark Fasheh unsigned int nlink = 0; 1194f186373fSMark Fasheh u32 item_size; 1195f186373fSMark Fasheh u32 cur_offset = 0; 1196f186373fSMark Fasheh u64 inode_objectid = btrfs_ino(inode); 1197f186373fSMark Fasheh u64 offset = 0; 1198f186373fSMark Fasheh unsigned long ptr; 1199f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1200f186373fSMark Fasheh struct extent_buffer *leaf; 1201f186373fSMark Fasheh 1202f186373fSMark Fasheh while (1) { 1203f186373fSMark Fasheh ret = btrfs_find_one_extref(root, inode_objectid, offset, path, 1204f186373fSMark Fasheh &extref, &offset); 1205f186373fSMark Fasheh if (ret) 1206f186373fSMark Fasheh break; 1207f186373fSMark Fasheh 1208f186373fSMark Fasheh leaf = path->nodes[0]; 1209f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1210f186373fSMark Fasheh ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1211f186373fSMark Fasheh 1212f186373fSMark Fasheh while (cur_offset < item_size) { 1213f186373fSMark Fasheh extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1214f186373fSMark Fasheh name_len = btrfs_inode_extref_name_len(leaf, extref); 1215f186373fSMark Fasheh 1216f186373fSMark Fasheh nlink++; 1217f186373fSMark Fasheh 1218f186373fSMark Fasheh cur_offset += name_len + sizeof(*extref); 1219f186373fSMark Fasheh } 1220f186373fSMark Fasheh 1221f186373fSMark Fasheh offset++; 1222f186373fSMark Fasheh btrfs_release_path(path); 1223f186373fSMark Fasheh } 1224f186373fSMark Fasheh btrfs_release_path(path); 1225f186373fSMark Fasheh 1226f186373fSMark Fasheh if (ret < 0) 1227f186373fSMark Fasheh return ret; 1228f186373fSMark Fasheh return nlink; 1229f186373fSMark Fasheh } 1230f186373fSMark Fasheh 1231f186373fSMark Fasheh static int count_inode_refs(struct btrfs_root *root, 1232f186373fSMark Fasheh struct inode *inode, struct btrfs_path *path) 1233f186373fSMark Fasheh { 1234e02119d5SChris Mason int ret; 1235e02119d5SChris Mason struct btrfs_key key; 1236f186373fSMark Fasheh unsigned int nlink = 0; 1237e02119d5SChris Mason unsigned long ptr; 1238e02119d5SChris Mason unsigned long ptr_end; 1239e02119d5SChris Mason int name_len; 124033345d01SLi Zefan u64 ino = btrfs_ino(inode); 1241e02119d5SChris Mason 124233345d01SLi Zefan key.objectid = ino; 1243e02119d5SChris Mason key.type = BTRFS_INODE_REF_KEY; 1244e02119d5SChris Mason key.offset = (u64)-1; 1245e02119d5SChris Mason 1246e02119d5SChris Mason while (1) { 1247e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1248e02119d5SChris Mason if (ret < 0) 1249e02119d5SChris Mason break; 1250e02119d5SChris Mason if (ret > 0) { 1251e02119d5SChris Mason if (path->slots[0] == 0) 1252e02119d5SChris Mason break; 1253e02119d5SChris Mason path->slots[0]--; 1254e02119d5SChris Mason } 1255e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, 1256e02119d5SChris Mason path->slots[0]); 125733345d01SLi Zefan if (key.objectid != ino || 1258e02119d5SChris Mason key.type != BTRFS_INODE_REF_KEY) 1259e02119d5SChris Mason break; 1260e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 1261e02119d5SChris Mason ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], 1262e02119d5SChris Mason path->slots[0]); 1263e02119d5SChris Mason while (ptr < ptr_end) { 1264e02119d5SChris Mason struct btrfs_inode_ref *ref; 1265e02119d5SChris Mason 1266e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 1267e02119d5SChris Mason name_len = btrfs_inode_ref_name_len(path->nodes[0], 1268e02119d5SChris Mason ref); 1269e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + name_len; 1270e02119d5SChris Mason nlink++; 1271e02119d5SChris Mason } 1272e02119d5SChris Mason 1273e02119d5SChris Mason if (key.offset == 0) 1274e02119d5SChris Mason break; 1275e02119d5SChris Mason key.offset--; 1276b3b4aa74SDavid Sterba btrfs_release_path(path); 1277e02119d5SChris Mason } 1278b3b4aa74SDavid Sterba btrfs_release_path(path); 1279f186373fSMark Fasheh 1280f186373fSMark Fasheh return nlink; 1281f186373fSMark Fasheh } 1282f186373fSMark Fasheh 1283f186373fSMark Fasheh /* 1284f186373fSMark Fasheh * There are a few corners where the link count of the file can't 1285f186373fSMark Fasheh * be properly maintained during replay. So, instead of adding 1286f186373fSMark Fasheh * lots of complexity to the log code, we just scan the backrefs 1287f186373fSMark Fasheh * for any file that has been through replay. 1288f186373fSMark Fasheh * 1289f186373fSMark Fasheh * The scan will update the link count on the inode to reflect the 1290f186373fSMark Fasheh * number of back refs found. If it goes down to zero, the iput 1291f186373fSMark Fasheh * will free the inode. 1292f186373fSMark Fasheh */ 1293f186373fSMark Fasheh static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1294f186373fSMark Fasheh struct btrfs_root *root, 1295f186373fSMark Fasheh struct inode *inode) 1296f186373fSMark Fasheh { 1297f186373fSMark Fasheh struct btrfs_path *path; 1298f186373fSMark Fasheh int ret; 1299f186373fSMark Fasheh u64 nlink = 0; 1300f186373fSMark Fasheh u64 ino = btrfs_ino(inode); 1301f186373fSMark Fasheh 1302f186373fSMark Fasheh path = btrfs_alloc_path(); 1303f186373fSMark Fasheh if (!path) 1304f186373fSMark Fasheh return -ENOMEM; 1305f186373fSMark Fasheh 1306f186373fSMark Fasheh ret = count_inode_refs(root, inode, path); 1307f186373fSMark Fasheh if (ret < 0) 1308f186373fSMark Fasheh goto out; 1309f186373fSMark Fasheh 1310f186373fSMark Fasheh nlink = ret; 1311f186373fSMark Fasheh 1312f186373fSMark Fasheh ret = count_inode_extrefs(root, inode, path); 1313f186373fSMark Fasheh if (ret == -ENOENT) 1314f186373fSMark Fasheh ret = 0; 1315f186373fSMark Fasheh 1316f186373fSMark Fasheh if (ret < 0) 1317f186373fSMark Fasheh goto out; 1318f186373fSMark Fasheh 1319f186373fSMark Fasheh nlink += ret; 1320f186373fSMark Fasheh 1321f186373fSMark Fasheh ret = 0; 1322f186373fSMark Fasheh 1323e02119d5SChris Mason if (nlink != inode->i_nlink) { 1324bfe86848SMiklos Szeredi set_nlink(inode, nlink); 1325e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 1326e02119d5SChris Mason } 13278d5bf1cbSChris Mason BTRFS_I(inode)->index_cnt = (u64)-1; 1328e02119d5SChris Mason 1329c71bf099SYan, Zheng if (inode->i_nlink == 0) { 1330c71bf099SYan, Zheng if (S_ISDIR(inode->i_mode)) { 133112fcfd22SChris Mason ret = replay_dir_deletes(trans, root, NULL, path, 133233345d01SLi Zefan ino, 1); 133312fcfd22SChris Mason BUG_ON(ret); 133412fcfd22SChris Mason } 133533345d01SLi Zefan ret = insert_orphan_item(trans, root, ino); 1336c71bf099SYan, Zheng BUG_ON(ret); 1337c71bf099SYan, Zheng } 133812fcfd22SChris Mason 1339f186373fSMark Fasheh out: 1340f186373fSMark Fasheh btrfs_free_path(path); 1341f186373fSMark Fasheh return ret; 1342e02119d5SChris Mason } 1343e02119d5SChris Mason 1344e02119d5SChris Mason static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1345e02119d5SChris Mason struct btrfs_root *root, 1346e02119d5SChris Mason struct btrfs_path *path) 1347e02119d5SChris Mason { 1348e02119d5SChris Mason int ret; 1349e02119d5SChris Mason struct btrfs_key key; 1350e02119d5SChris Mason struct inode *inode; 1351e02119d5SChris Mason 1352e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1353e02119d5SChris Mason key.type = BTRFS_ORPHAN_ITEM_KEY; 1354e02119d5SChris Mason key.offset = (u64)-1; 1355e02119d5SChris Mason while (1) { 1356e02119d5SChris Mason ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1357e02119d5SChris Mason if (ret < 0) 1358e02119d5SChris Mason break; 1359e02119d5SChris Mason 1360e02119d5SChris Mason if (ret == 1) { 1361e02119d5SChris Mason if (path->slots[0] == 0) 1362e02119d5SChris Mason break; 1363e02119d5SChris Mason path->slots[0]--; 1364e02119d5SChris Mason } 1365e02119d5SChris Mason 1366e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1367e02119d5SChris Mason if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || 1368e02119d5SChris Mason key.type != BTRFS_ORPHAN_ITEM_KEY) 1369e02119d5SChris Mason break; 1370e02119d5SChris Mason 1371e02119d5SChris Mason ret = btrfs_del_item(trans, root, path); 137265a246c5STsutomu Itoh if (ret) 137365a246c5STsutomu Itoh goto out; 1374e02119d5SChris Mason 1375b3b4aa74SDavid Sterba btrfs_release_path(path); 1376e02119d5SChris Mason inode = read_one_inode(root, key.offset); 1377c00e9493STsutomu Itoh if (!inode) 1378c00e9493STsutomu Itoh return -EIO; 1379e02119d5SChris Mason 1380e02119d5SChris Mason ret = fixup_inode_link_count(trans, root, inode); 1381e02119d5SChris Mason BUG_ON(ret); 1382e02119d5SChris Mason 1383e02119d5SChris Mason iput(inode); 1384e02119d5SChris Mason 138512fcfd22SChris Mason /* 138612fcfd22SChris Mason * fixup on a directory may create new entries, 138712fcfd22SChris Mason * make sure we always look for the highset possible 138812fcfd22SChris Mason * offset 138912fcfd22SChris Mason */ 139012fcfd22SChris Mason key.offset = (u64)-1; 1391e02119d5SChris Mason } 139265a246c5STsutomu Itoh ret = 0; 139365a246c5STsutomu Itoh out: 1394b3b4aa74SDavid Sterba btrfs_release_path(path); 139565a246c5STsutomu Itoh return ret; 1396e02119d5SChris Mason } 1397e02119d5SChris Mason 1398e02119d5SChris Mason 1399e02119d5SChris Mason /* 1400e02119d5SChris Mason * record a given inode in the fixup dir so we can check its link 1401e02119d5SChris Mason * count when replay is done. The link count is incremented here 1402e02119d5SChris Mason * so the inode won't go away until we check it 1403e02119d5SChris Mason */ 1404e02119d5SChris Mason static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, 1405e02119d5SChris Mason struct btrfs_root *root, 1406e02119d5SChris Mason struct btrfs_path *path, 1407e02119d5SChris Mason u64 objectid) 1408e02119d5SChris Mason { 1409e02119d5SChris Mason struct btrfs_key key; 1410e02119d5SChris Mason int ret = 0; 1411e02119d5SChris Mason struct inode *inode; 1412e02119d5SChris Mason 1413e02119d5SChris Mason inode = read_one_inode(root, objectid); 1414c00e9493STsutomu Itoh if (!inode) 1415c00e9493STsutomu Itoh return -EIO; 1416e02119d5SChris Mason 1417e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1418e02119d5SChris Mason btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 1419e02119d5SChris Mason key.offset = objectid; 1420e02119d5SChris Mason 1421e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1422e02119d5SChris Mason 1423b3b4aa74SDavid Sterba btrfs_release_path(path); 1424e02119d5SChris Mason if (ret == 0) { 14259bf7a489SJosef Bacik if (!inode->i_nlink) 14269bf7a489SJosef Bacik set_nlink(inode, 1); 14279bf7a489SJosef Bacik else 1428e02119d5SChris Mason btrfs_inc_nlink(inode); 1429b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 1430e02119d5SChris Mason } else if (ret == -EEXIST) { 1431e02119d5SChris Mason ret = 0; 1432e02119d5SChris Mason } else { 1433e02119d5SChris Mason BUG(); 1434e02119d5SChris Mason } 1435e02119d5SChris Mason iput(inode); 1436e02119d5SChris Mason 1437e02119d5SChris Mason return ret; 1438e02119d5SChris Mason } 1439e02119d5SChris Mason 1440e02119d5SChris Mason /* 1441e02119d5SChris Mason * when replaying the log for a directory, we only insert names 1442e02119d5SChris Mason * for inodes that actually exist. This means an fsync on a directory 1443e02119d5SChris Mason * does not implicitly fsync all the new files in it 1444e02119d5SChris Mason */ 1445e02119d5SChris Mason static noinline int insert_one_name(struct btrfs_trans_handle *trans, 1446e02119d5SChris Mason struct btrfs_root *root, 1447e02119d5SChris Mason struct btrfs_path *path, 1448e02119d5SChris Mason u64 dirid, u64 index, 1449e02119d5SChris Mason char *name, int name_len, u8 type, 1450e02119d5SChris Mason struct btrfs_key *location) 1451e02119d5SChris Mason { 1452e02119d5SChris Mason struct inode *inode; 1453e02119d5SChris Mason struct inode *dir; 1454e02119d5SChris Mason int ret; 1455e02119d5SChris Mason 1456e02119d5SChris Mason inode = read_one_inode(root, location->objectid); 1457e02119d5SChris Mason if (!inode) 1458e02119d5SChris Mason return -ENOENT; 1459e02119d5SChris Mason 1460e02119d5SChris Mason dir = read_one_inode(root, dirid); 1461e02119d5SChris Mason if (!dir) { 1462e02119d5SChris Mason iput(inode); 1463e02119d5SChris Mason return -EIO; 1464e02119d5SChris Mason } 1465e02119d5SChris Mason ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1466e02119d5SChris Mason 1467e02119d5SChris Mason /* FIXME, put inode into FIXUP list */ 1468e02119d5SChris Mason 1469e02119d5SChris Mason iput(inode); 1470e02119d5SChris Mason iput(dir); 1471e02119d5SChris Mason return ret; 1472e02119d5SChris Mason } 1473e02119d5SChris Mason 1474e02119d5SChris Mason /* 1475e02119d5SChris Mason * take a single entry in a log directory item and replay it into 1476e02119d5SChris Mason * the subvolume. 1477e02119d5SChris Mason * 1478e02119d5SChris Mason * if a conflicting item exists in the subdirectory already, 1479e02119d5SChris Mason * the inode it points to is unlinked and put into the link count 1480e02119d5SChris Mason * fix up tree. 1481e02119d5SChris Mason * 1482e02119d5SChris Mason * If a name from the log points to a file or directory that does 1483e02119d5SChris Mason * not exist in the FS, it is skipped. fsyncs on directories 1484e02119d5SChris Mason * do not force down inodes inside that directory, just changes to the 1485e02119d5SChris Mason * names or unlinks in a directory. 1486e02119d5SChris Mason */ 1487e02119d5SChris Mason static noinline int replay_one_name(struct btrfs_trans_handle *trans, 1488e02119d5SChris Mason struct btrfs_root *root, 1489e02119d5SChris Mason struct btrfs_path *path, 1490e02119d5SChris Mason struct extent_buffer *eb, 1491e02119d5SChris Mason struct btrfs_dir_item *di, 1492e02119d5SChris Mason struct btrfs_key *key) 1493e02119d5SChris Mason { 1494e02119d5SChris Mason char *name; 1495e02119d5SChris Mason int name_len; 1496e02119d5SChris Mason struct btrfs_dir_item *dst_di; 1497e02119d5SChris Mason struct btrfs_key found_key; 1498e02119d5SChris Mason struct btrfs_key log_key; 1499e02119d5SChris Mason struct inode *dir; 1500e02119d5SChris Mason u8 log_type; 15014bef0848SChris Mason int exists; 1502e02119d5SChris Mason int ret; 1503e02119d5SChris Mason 1504e02119d5SChris Mason dir = read_one_inode(root, key->objectid); 1505c00e9493STsutomu Itoh if (!dir) 1506c00e9493STsutomu Itoh return -EIO; 1507e02119d5SChris Mason 1508e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1509e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 15102a29edc6Sliubo if (!name) 15112a29edc6Sliubo return -ENOMEM; 15122a29edc6Sliubo 1513e02119d5SChris Mason log_type = btrfs_dir_type(eb, di); 1514e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1515e02119d5SChris Mason name_len); 1516e02119d5SChris Mason 1517e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &log_key); 15184bef0848SChris Mason exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); 15194bef0848SChris Mason if (exists == 0) 15204bef0848SChris Mason exists = 1; 15214bef0848SChris Mason else 15224bef0848SChris Mason exists = 0; 1523b3b4aa74SDavid Sterba btrfs_release_path(path); 15244bef0848SChris Mason 1525e02119d5SChris Mason if (key->type == BTRFS_DIR_ITEM_KEY) { 1526e02119d5SChris Mason dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1527e02119d5SChris Mason name, name_len, 1); 1528d397712bSChris Mason } else if (key->type == BTRFS_DIR_INDEX_KEY) { 1529e02119d5SChris Mason dst_di = btrfs_lookup_dir_index_item(trans, root, path, 1530e02119d5SChris Mason key->objectid, 1531e02119d5SChris Mason key->offset, name, 1532e02119d5SChris Mason name_len, 1); 1533e02119d5SChris Mason } else { 1534e02119d5SChris Mason BUG(); 1535e02119d5SChris Mason } 1536c704005dSDavid Sterba if (IS_ERR_OR_NULL(dst_di)) { 1537e02119d5SChris Mason /* we need a sequence number to insert, so we only 1538e02119d5SChris Mason * do inserts for the BTRFS_DIR_INDEX_KEY types 1539e02119d5SChris Mason */ 1540e02119d5SChris Mason if (key->type != BTRFS_DIR_INDEX_KEY) 1541e02119d5SChris Mason goto out; 1542e02119d5SChris Mason goto insert; 1543e02119d5SChris Mason } 1544e02119d5SChris Mason 1545e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); 1546e02119d5SChris Mason /* the existing item matches the logged item */ 1547e02119d5SChris Mason if (found_key.objectid == log_key.objectid && 1548e02119d5SChris Mason found_key.type == log_key.type && 1549e02119d5SChris Mason found_key.offset == log_key.offset && 1550e02119d5SChris Mason btrfs_dir_type(path->nodes[0], dst_di) == log_type) { 1551e02119d5SChris Mason goto out; 1552e02119d5SChris Mason } 1553e02119d5SChris Mason 1554e02119d5SChris Mason /* 1555e02119d5SChris Mason * don't drop the conflicting directory entry if the inode 1556e02119d5SChris Mason * for the new entry doesn't exist 1557e02119d5SChris Mason */ 15584bef0848SChris Mason if (!exists) 1559e02119d5SChris Mason goto out; 1560e02119d5SChris Mason 1561e02119d5SChris Mason ret = drop_one_dir_item(trans, root, path, dir, dst_di); 1562e02119d5SChris Mason BUG_ON(ret); 1563e02119d5SChris Mason 1564e02119d5SChris Mason if (key->type == BTRFS_DIR_INDEX_KEY) 1565e02119d5SChris Mason goto insert; 1566e02119d5SChris Mason out: 1567b3b4aa74SDavid Sterba btrfs_release_path(path); 1568e02119d5SChris Mason kfree(name); 1569e02119d5SChris Mason iput(dir); 1570e02119d5SChris Mason return 0; 1571e02119d5SChris Mason 1572e02119d5SChris Mason insert: 1573b3b4aa74SDavid Sterba btrfs_release_path(path); 1574e02119d5SChris Mason ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1575e02119d5SChris Mason name, name_len, log_type, &log_key); 1576e02119d5SChris Mason 1577c293498bSStoyan Gaydarov BUG_ON(ret && ret != -ENOENT); 1578e02119d5SChris Mason goto out; 1579e02119d5SChris Mason } 1580e02119d5SChris Mason 1581e02119d5SChris Mason /* 1582e02119d5SChris Mason * find all the names in a directory item and reconcile them into 1583e02119d5SChris Mason * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than 1584e02119d5SChris Mason * one name in a directory item, but the same code gets used for 1585e02119d5SChris Mason * both directory index types 1586e02119d5SChris Mason */ 1587e02119d5SChris Mason static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, 1588e02119d5SChris Mason struct btrfs_root *root, 1589e02119d5SChris Mason struct btrfs_path *path, 1590e02119d5SChris Mason struct extent_buffer *eb, int slot, 1591e02119d5SChris Mason struct btrfs_key *key) 1592e02119d5SChris Mason { 1593e02119d5SChris Mason int ret; 1594e02119d5SChris Mason u32 item_size = btrfs_item_size_nr(eb, slot); 1595e02119d5SChris Mason struct btrfs_dir_item *di; 1596e02119d5SChris Mason int name_len; 1597e02119d5SChris Mason unsigned long ptr; 1598e02119d5SChris Mason unsigned long ptr_end; 1599e02119d5SChris Mason 1600e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1601e02119d5SChris Mason ptr_end = ptr + item_size; 1602e02119d5SChris Mason while (ptr < ptr_end) { 1603e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 160422a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) 160522a94d44SJosef Bacik return -EIO; 1606e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1607e02119d5SChris Mason ret = replay_one_name(trans, root, path, eb, di, key); 1608e02119d5SChris Mason BUG_ON(ret); 1609e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1610e02119d5SChris Mason ptr += name_len; 1611e02119d5SChris Mason } 1612e02119d5SChris Mason return 0; 1613e02119d5SChris Mason } 1614e02119d5SChris Mason 1615e02119d5SChris Mason /* 1616e02119d5SChris Mason * directory replay has two parts. There are the standard directory 1617e02119d5SChris Mason * items in the log copied from the subvolume, and range items 1618e02119d5SChris Mason * created in the log while the subvolume was logged. 1619e02119d5SChris Mason * 1620e02119d5SChris Mason * The range items tell us which parts of the key space the log 1621e02119d5SChris Mason * is authoritative for. During replay, if a key in the subvolume 1622e02119d5SChris Mason * directory is in a logged range item, but not actually in the log 1623e02119d5SChris Mason * that means it was deleted from the directory before the fsync 1624e02119d5SChris Mason * and should be removed. 1625e02119d5SChris Mason */ 1626e02119d5SChris Mason static noinline int find_dir_range(struct btrfs_root *root, 1627e02119d5SChris Mason struct btrfs_path *path, 1628e02119d5SChris Mason u64 dirid, int key_type, 1629e02119d5SChris Mason u64 *start_ret, u64 *end_ret) 1630e02119d5SChris Mason { 1631e02119d5SChris Mason struct btrfs_key key; 1632e02119d5SChris Mason u64 found_end; 1633e02119d5SChris Mason struct btrfs_dir_log_item *item; 1634e02119d5SChris Mason int ret; 1635e02119d5SChris Mason int nritems; 1636e02119d5SChris Mason 1637e02119d5SChris Mason if (*start_ret == (u64)-1) 1638e02119d5SChris Mason return 1; 1639e02119d5SChris Mason 1640e02119d5SChris Mason key.objectid = dirid; 1641e02119d5SChris Mason key.type = key_type; 1642e02119d5SChris Mason key.offset = *start_ret; 1643e02119d5SChris Mason 1644e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1645e02119d5SChris Mason if (ret < 0) 1646e02119d5SChris Mason goto out; 1647e02119d5SChris Mason if (ret > 0) { 1648e02119d5SChris Mason if (path->slots[0] == 0) 1649e02119d5SChris Mason goto out; 1650e02119d5SChris Mason path->slots[0]--; 1651e02119d5SChris Mason } 1652e02119d5SChris Mason if (ret != 0) 1653e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1654e02119d5SChris Mason 1655e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1656e02119d5SChris Mason ret = 1; 1657e02119d5SChris Mason goto next; 1658e02119d5SChris Mason } 1659e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1660e02119d5SChris Mason struct btrfs_dir_log_item); 1661e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1662e02119d5SChris Mason 1663e02119d5SChris Mason if (*start_ret >= key.offset && *start_ret <= found_end) { 1664e02119d5SChris Mason ret = 0; 1665e02119d5SChris Mason *start_ret = key.offset; 1666e02119d5SChris Mason *end_ret = found_end; 1667e02119d5SChris Mason goto out; 1668e02119d5SChris Mason } 1669e02119d5SChris Mason ret = 1; 1670e02119d5SChris Mason next: 1671e02119d5SChris Mason /* check the next slot in the tree to see if it is a valid item */ 1672e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1673e02119d5SChris Mason if (path->slots[0] >= nritems) { 1674e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1675e02119d5SChris Mason if (ret) 1676e02119d5SChris Mason goto out; 1677e02119d5SChris Mason } else { 1678e02119d5SChris Mason path->slots[0]++; 1679e02119d5SChris Mason } 1680e02119d5SChris Mason 1681e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1682e02119d5SChris Mason 1683e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1684e02119d5SChris Mason ret = 1; 1685e02119d5SChris Mason goto out; 1686e02119d5SChris Mason } 1687e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1688e02119d5SChris Mason struct btrfs_dir_log_item); 1689e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1690e02119d5SChris Mason *start_ret = key.offset; 1691e02119d5SChris Mason *end_ret = found_end; 1692e02119d5SChris Mason ret = 0; 1693e02119d5SChris Mason out: 1694b3b4aa74SDavid Sterba btrfs_release_path(path); 1695e02119d5SChris Mason return ret; 1696e02119d5SChris Mason } 1697e02119d5SChris Mason 1698e02119d5SChris Mason /* 1699e02119d5SChris Mason * this looks for a given directory item in the log. If the directory 1700e02119d5SChris Mason * item is not in the log, the item is removed and the inode it points 1701e02119d5SChris Mason * to is unlinked 1702e02119d5SChris Mason */ 1703e02119d5SChris Mason static noinline int check_item_in_log(struct btrfs_trans_handle *trans, 1704e02119d5SChris Mason struct btrfs_root *root, 1705e02119d5SChris Mason struct btrfs_root *log, 1706e02119d5SChris Mason struct btrfs_path *path, 1707e02119d5SChris Mason struct btrfs_path *log_path, 1708e02119d5SChris Mason struct inode *dir, 1709e02119d5SChris Mason struct btrfs_key *dir_key) 1710e02119d5SChris Mason { 1711e02119d5SChris Mason int ret; 1712e02119d5SChris Mason struct extent_buffer *eb; 1713e02119d5SChris Mason int slot; 1714e02119d5SChris Mason u32 item_size; 1715e02119d5SChris Mason struct btrfs_dir_item *di; 1716e02119d5SChris Mason struct btrfs_dir_item *log_di; 1717e02119d5SChris Mason int name_len; 1718e02119d5SChris Mason unsigned long ptr; 1719e02119d5SChris Mason unsigned long ptr_end; 1720e02119d5SChris Mason char *name; 1721e02119d5SChris Mason struct inode *inode; 1722e02119d5SChris Mason struct btrfs_key location; 1723e02119d5SChris Mason 1724e02119d5SChris Mason again: 1725e02119d5SChris Mason eb = path->nodes[0]; 1726e02119d5SChris Mason slot = path->slots[0]; 1727e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 1728e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1729e02119d5SChris Mason ptr_end = ptr + item_size; 1730e02119d5SChris Mason while (ptr < ptr_end) { 1731e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 173222a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) { 173322a94d44SJosef Bacik ret = -EIO; 173422a94d44SJosef Bacik goto out; 173522a94d44SJosef Bacik } 173622a94d44SJosef Bacik 1737e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1738e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 1739e02119d5SChris Mason if (!name) { 1740e02119d5SChris Mason ret = -ENOMEM; 1741e02119d5SChris Mason goto out; 1742e02119d5SChris Mason } 1743e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1744e02119d5SChris Mason name_len); 1745e02119d5SChris Mason log_di = NULL; 174612fcfd22SChris Mason if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { 1747e02119d5SChris Mason log_di = btrfs_lookup_dir_item(trans, log, log_path, 1748e02119d5SChris Mason dir_key->objectid, 1749e02119d5SChris Mason name, name_len, 0); 175012fcfd22SChris Mason } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { 1751e02119d5SChris Mason log_di = btrfs_lookup_dir_index_item(trans, log, 1752e02119d5SChris Mason log_path, 1753e02119d5SChris Mason dir_key->objectid, 1754e02119d5SChris Mason dir_key->offset, 1755e02119d5SChris Mason name, name_len, 0); 1756e02119d5SChris Mason } 1757c704005dSDavid Sterba if (IS_ERR_OR_NULL(log_di)) { 1758e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &location); 1759b3b4aa74SDavid Sterba btrfs_release_path(path); 1760b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1761e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 1762c00e9493STsutomu Itoh if (!inode) { 1763c00e9493STsutomu Itoh kfree(name); 1764c00e9493STsutomu Itoh return -EIO; 1765c00e9493STsutomu Itoh } 1766e02119d5SChris Mason 1767e02119d5SChris Mason ret = link_to_fixup_dir(trans, root, 1768e02119d5SChris Mason path, location.objectid); 1769e02119d5SChris Mason BUG_ON(ret); 1770e02119d5SChris Mason btrfs_inc_nlink(inode); 1771e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, 1772e02119d5SChris Mason name, name_len); 1773e02119d5SChris Mason BUG_ON(ret); 1774b6305567SChris Mason 1775b6305567SChris Mason btrfs_run_delayed_items(trans, root); 1776b6305567SChris Mason 1777e02119d5SChris Mason kfree(name); 1778e02119d5SChris Mason iput(inode); 1779e02119d5SChris Mason 1780e02119d5SChris Mason /* there might still be more names under this key 1781e02119d5SChris Mason * check and repeat if required 1782e02119d5SChris Mason */ 1783e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, dir_key, path, 1784e02119d5SChris Mason 0, 0); 1785e02119d5SChris Mason if (ret == 0) 1786e02119d5SChris Mason goto again; 1787e02119d5SChris Mason ret = 0; 1788e02119d5SChris Mason goto out; 1789e02119d5SChris Mason } 1790b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1791e02119d5SChris Mason kfree(name); 1792e02119d5SChris Mason 1793e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1794e02119d5SChris Mason ptr += name_len; 1795e02119d5SChris Mason } 1796e02119d5SChris Mason ret = 0; 1797e02119d5SChris Mason out: 1798b3b4aa74SDavid Sterba btrfs_release_path(path); 1799b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1800e02119d5SChris Mason return ret; 1801e02119d5SChris Mason } 1802e02119d5SChris Mason 1803e02119d5SChris Mason /* 1804e02119d5SChris Mason * deletion replay happens before we copy any new directory items 1805e02119d5SChris Mason * out of the log or out of backreferences from inodes. It 1806e02119d5SChris Mason * scans the log to find ranges of keys that log is authoritative for, 1807e02119d5SChris Mason * and then scans the directory to find items in those ranges that are 1808e02119d5SChris Mason * not present in the log. 1809e02119d5SChris Mason * 1810e02119d5SChris Mason * Anything we don't find in the log is unlinked and removed from the 1811e02119d5SChris Mason * directory. 1812e02119d5SChris Mason */ 1813e02119d5SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 1814e02119d5SChris Mason struct btrfs_root *root, 1815e02119d5SChris Mason struct btrfs_root *log, 1816e02119d5SChris Mason struct btrfs_path *path, 181712fcfd22SChris Mason u64 dirid, int del_all) 1818e02119d5SChris Mason { 1819e02119d5SChris Mason u64 range_start; 1820e02119d5SChris Mason u64 range_end; 1821e02119d5SChris Mason int key_type = BTRFS_DIR_LOG_ITEM_KEY; 1822e02119d5SChris Mason int ret = 0; 1823e02119d5SChris Mason struct btrfs_key dir_key; 1824e02119d5SChris Mason struct btrfs_key found_key; 1825e02119d5SChris Mason struct btrfs_path *log_path; 1826e02119d5SChris Mason struct inode *dir; 1827e02119d5SChris Mason 1828e02119d5SChris Mason dir_key.objectid = dirid; 1829e02119d5SChris Mason dir_key.type = BTRFS_DIR_ITEM_KEY; 1830e02119d5SChris Mason log_path = btrfs_alloc_path(); 1831e02119d5SChris Mason if (!log_path) 1832e02119d5SChris Mason return -ENOMEM; 1833e02119d5SChris Mason 1834e02119d5SChris Mason dir = read_one_inode(root, dirid); 1835e02119d5SChris Mason /* it isn't an error if the inode isn't there, that can happen 1836e02119d5SChris Mason * because we replay the deletes before we copy in the inode item 1837e02119d5SChris Mason * from the log 1838e02119d5SChris Mason */ 1839e02119d5SChris Mason if (!dir) { 1840e02119d5SChris Mason btrfs_free_path(log_path); 1841e02119d5SChris Mason return 0; 1842e02119d5SChris Mason } 1843e02119d5SChris Mason again: 1844e02119d5SChris Mason range_start = 0; 1845e02119d5SChris Mason range_end = 0; 1846e02119d5SChris Mason while (1) { 184712fcfd22SChris Mason if (del_all) 184812fcfd22SChris Mason range_end = (u64)-1; 184912fcfd22SChris Mason else { 1850e02119d5SChris Mason ret = find_dir_range(log, path, dirid, key_type, 1851e02119d5SChris Mason &range_start, &range_end); 1852e02119d5SChris Mason if (ret != 0) 1853e02119d5SChris Mason break; 185412fcfd22SChris Mason } 1855e02119d5SChris Mason 1856e02119d5SChris Mason dir_key.offset = range_start; 1857e02119d5SChris Mason while (1) { 1858e02119d5SChris Mason int nritems; 1859e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &dir_key, path, 1860e02119d5SChris Mason 0, 0); 1861e02119d5SChris Mason if (ret < 0) 1862e02119d5SChris Mason goto out; 1863e02119d5SChris Mason 1864e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1865e02119d5SChris Mason if (path->slots[0] >= nritems) { 1866e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1867e02119d5SChris Mason if (ret) 1868e02119d5SChris Mason break; 1869e02119d5SChris Mason } 1870e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1871e02119d5SChris Mason path->slots[0]); 1872e02119d5SChris Mason if (found_key.objectid != dirid || 1873e02119d5SChris Mason found_key.type != dir_key.type) 1874e02119d5SChris Mason goto next_type; 1875e02119d5SChris Mason 1876e02119d5SChris Mason if (found_key.offset > range_end) 1877e02119d5SChris Mason break; 1878e02119d5SChris Mason 1879e02119d5SChris Mason ret = check_item_in_log(trans, root, log, path, 188012fcfd22SChris Mason log_path, dir, 188112fcfd22SChris Mason &found_key); 1882e02119d5SChris Mason BUG_ON(ret); 1883e02119d5SChris Mason if (found_key.offset == (u64)-1) 1884e02119d5SChris Mason break; 1885e02119d5SChris Mason dir_key.offset = found_key.offset + 1; 1886e02119d5SChris Mason } 1887b3b4aa74SDavid Sterba btrfs_release_path(path); 1888e02119d5SChris Mason if (range_end == (u64)-1) 1889e02119d5SChris Mason break; 1890e02119d5SChris Mason range_start = range_end + 1; 1891e02119d5SChris Mason } 1892e02119d5SChris Mason 1893e02119d5SChris Mason next_type: 1894e02119d5SChris Mason ret = 0; 1895e02119d5SChris Mason if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 1896e02119d5SChris Mason key_type = BTRFS_DIR_LOG_INDEX_KEY; 1897e02119d5SChris Mason dir_key.type = BTRFS_DIR_INDEX_KEY; 1898b3b4aa74SDavid Sterba btrfs_release_path(path); 1899e02119d5SChris Mason goto again; 1900e02119d5SChris Mason } 1901e02119d5SChris Mason out: 1902b3b4aa74SDavid Sterba btrfs_release_path(path); 1903e02119d5SChris Mason btrfs_free_path(log_path); 1904e02119d5SChris Mason iput(dir); 1905e02119d5SChris Mason return ret; 1906e02119d5SChris Mason } 1907e02119d5SChris Mason 1908e02119d5SChris Mason /* 1909e02119d5SChris Mason * the process_func used to replay items from the log tree. This 1910e02119d5SChris Mason * gets called in two different stages. The first stage just looks 1911e02119d5SChris Mason * for inodes and makes sure they are all copied into the subvolume. 1912e02119d5SChris Mason * 1913e02119d5SChris Mason * The second stage copies all the other item types from the log into 1914e02119d5SChris Mason * the subvolume. The two stage approach is slower, but gets rid of 1915e02119d5SChris Mason * lots of complexity around inodes referencing other inodes that exist 1916e02119d5SChris Mason * only in the log (references come from either directory items or inode 1917e02119d5SChris Mason * back refs). 1918e02119d5SChris Mason */ 1919e02119d5SChris Mason static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 1920e02119d5SChris Mason struct walk_control *wc, u64 gen) 1921e02119d5SChris Mason { 1922e02119d5SChris Mason int nritems; 1923e02119d5SChris Mason struct btrfs_path *path; 1924e02119d5SChris Mason struct btrfs_root *root = wc->replay_dest; 1925e02119d5SChris Mason struct btrfs_key key; 1926e02119d5SChris Mason int level; 1927e02119d5SChris Mason int i; 1928e02119d5SChris Mason int ret; 1929e02119d5SChris Mason 1930018642a1STsutomu Itoh ret = btrfs_read_buffer(eb, gen); 1931018642a1STsutomu Itoh if (ret) 1932018642a1STsutomu Itoh return ret; 1933e02119d5SChris Mason 1934e02119d5SChris Mason level = btrfs_header_level(eb); 1935e02119d5SChris Mason 1936e02119d5SChris Mason if (level != 0) 1937e02119d5SChris Mason return 0; 1938e02119d5SChris Mason 1939e02119d5SChris Mason path = btrfs_alloc_path(); 19401e5063d0SMark Fasheh if (!path) 19411e5063d0SMark Fasheh return -ENOMEM; 1942e02119d5SChris Mason 1943e02119d5SChris Mason nritems = btrfs_header_nritems(eb); 1944e02119d5SChris Mason for (i = 0; i < nritems; i++) { 1945e02119d5SChris Mason btrfs_item_key_to_cpu(eb, &key, i); 1946e02119d5SChris Mason 1947e02119d5SChris Mason /* inode keys are done during the first stage */ 1948e02119d5SChris Mason if (key.type == BTRFS_INODE_ITEM_KEY && 1949e02119d5SChris Mason wc->stage == LOG_WALK_REPLAY_INODES) { 1950e02119d5SChris Mason struct btrfs_inode_item *inode_item; 1951e02119d5SChris Mason u32 mode; 1952e02119d5SChris Mason 1953e02119d5SChris Mason inode_item = btrfs_item_ptr(eb, i, 1954e02119d5SChris Mason struct btrfs_inode_item); 1955e02119d5SChris Mason mode = btrfs_inode_mode(eb, inode_item); 1956e02119d5SChris Mason if (S_ISDIR(mode)) { 1957e02119d5SChris Mason ret = replay_dir_deletes(wc->trans, 195812fcfd22SChris Mason root, log, path, key.objectid, 0); 1959*b50c6e25SJosef Bacik if (ret) 1960*b50c6e25SJosef Bacik break; 1961e02119d5SChris Mason } 1962e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 1963e02119d5SChris Mason eb, i, &key); 1964*b50c6e25SJosef Bacik if (ret) 1965*b50c6e25SJosef Bacik break; 1966e02119d5SChris Mason 1967c71bf099SYan, Zheng /* for regular files, make sure corresponding 1968c71bf099SYan, Zheng * orhpan item exist. extents past the new EOF 1969c71bf099SYan, Zheng * will be truncated later by orphan cleanup. 1970e02119d5SChris Mason */ 1971e02119d5SChris Mason if (S_ISREG(mode)) { 1972c71bf099SYan, Zheng ret = insert_orphan_item(wc->trans, root, 1973e02119d5SChris Mason key.objectid); 1974*b50c6e25SJosef Bacik if (ret) 1975*b50c6e25SJosef Bacik break; 1976c71bf099SYan, Zheng } 1977a74ac322SChris Mason 1978e02119d5SChris Mason ret = link_to_fixup_dir(wc->trans, root, 1979e02119d5SChris Mason path, key.objectid); 1980*b50c6e25SJosef Bacik if (ret) 1981*b50c6e25SJosef Bacik break; 1982e02119d5SChris Mason } 1983e02119d5SChris Mason if (wc->stage < LOG_WALK_REPLAY_ALL) 1984e02119d5SChris Mason continue; 1985e02119d5SChris Mason 1986e02119d5SChris Mason /* these keys are simply copied */ 1987e02119d5SChris Mason if (key.type == BTRFS_XATTR_ITEM_KEY) { 1988e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 1989e02119d5SChris Mason eb, i, &key); 1990*b50c6e25SJosef Bacik if (ret) 1991*b50c6e25SJosef Bacik break; 1992e02119d5SChris Mason } else if (key.type == BTRFS_INODE_REF_KEY) { 1993e02119d5SChris Mason ret = add_inode_ref(wc->trans, root, log, path, 1994e02119d5SChris Mason eb, i, &key); 1995*b50c6e25SJosef Bacik if (ret && ret != -ENOENT) 1996*b50c6e25SJosef Bacik break; 1997*b50c6e25SJosef Bacik ret = 0; 1998f186373fSMark Fasheh } else if (key.type == BTRFS_INODE_EXTREF_KEY) { 1999f186373fSMark Fasheh ret = add_inode_ref(wc->trans, root, log, path, 2000f186373fSMark Fasheh eb, i, &key); 2001*b50c6e25SJosef Bacik if (ret && ret != -ENOENT) 2002*b50c6e25SJosef Bacik break; 2003*b50c6e25SJosef Bacik ret = 0; 2004e02119d5SChris Mason } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2005e02119d5SChris Mason ret = replay_one_extent(wc->trans, root, path, 2006e02119d5SChris Mason eb, i, &key); 2007*b50c6e25SJosef Bacik if (ret) 2008*b50c6e25SJosef Bacik break; 2009e02119d5SChris Mason } else if (key.type == BTRFS_DIR_ITEM_KEY || 2010e02119d5SChris Mason key.type == BTRFS_DIR_INDEX_KEY) { 2011e02119d5SChris Mason ret = replay_one_dir_item(wc->trans, root, path, 2012e02119d5SChris Mason eb, i, &key); 2013*b50c6e25SJosef Bacik if (ret) 2014*b50c6e25SJosef Bacik break; 2015e02119d5SChris Mason } 2016e02119d5SChris Mason } 2017e02119d5SChris Mason btrfs_free_path(path); 2018*b50c6e25SJosef Bacik return ret; 2019e02119d5SChris Mason } 2020e02119d5SChris Mason 2021d397712bSChris Mason static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2022e02119d5SChris Mason struct btrfs_root *root, 2023e02119d5SChris Mason struct btrfs_path *path, int *level, 2024e02119d5SChris Mason struct walk_control *wc) 2025e02119d5SChris Mason { 2026e02119d5SChris Mason u64 root_owner; 2027e02119d5SChris Mason u64 bytenr; 2028e02119d5SChris Mason u64 ptr_gen; 2029e02119d5SChris Mason struct extent_buffer *next; 2030e02119d5SChris Mason struct extent_buffer *cur; 2031e02119d5SChris Mason struct extent_buffer *parent; 2032e02119d5SChris Mason u32 blocksize; 2033e02119d5SChris Mason int ret = 0; 2034e02119d5SChris Mason 2035e02119d5SChris Mason WARN_ON(*level < 0); 2036e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2037e02119d5SChris Mason 2038e02119d5SChris Mason while (*level > 0) { 2039e02119d5SChris Mason WARN_ON(*level < 0); 2040e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2041e02119d5SChris Mason cur = path->nodes[*level]; 2042e02119d5SChris Mason 2043e02119d5SChris Mason if (btrfs_header_level(cur) != *level) 2044e02119d5SChris Mason WARN_ON(1); 2045e02119d5SChris Mason 2046e02119d5SChris Mason if (path->slots[*level] >= 2047e02119d5SChris Mason btrfs_header_nritems(cur)) 2048e02119d5SChris Mason break; 2049e02119d5SChris Mason 2050e02119d5SChris Mason bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2051e02119d5SChris Mason ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2052e02119d5SChris Mason blocksize = btrfs_level_size(root, *level - 1); 2053e02119d5SChris Mason 2054e02119d5SChris Mason parent = path->nodes[*level]; 2055e02119d5SChris Mason root_owner = btrfs_header_owner(parent); 2056e02119d5SChris Mason 2057e02119d5SChris Mason next = btrfs_find_create_tree_block(root, bytenr, blocksize); 20582a29edc6Sliubo if (!next) 20592a29edc6Sliubo return -ENOMEM; 2060e02119d5SChris Mason 20614a500fd1SYan, Zheng if (*level == 1) { 20621e5063d0SMark Fasheh ret = wc->process_func(root, next, wc, ptr_gen); 2063*b50c6e25SJosef Bacik if (ret) { 2064*b50c6e25SJosef Bacik free_extent_buffer(next); 20651e5063d0SMark Fasheh return ret; 2066*b50c6e25SJosef Bacik } 2067e02119d5SChris Mason 2068e02119d5SChris Mason path->slots[*level]++; 2069e02119d5SChris Mason if (wc->free) { 2070018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 2071018642a1STsutomu Itoh if (ret) { 2072018642a1STsutomu Itoh free_extent_buffer(next); 2073018642a1STsutomu Itoh return ret; 2074018642a1STsutomu Itoh } 2075e02119d5SChris Mason 2076e02119d5SChris Mason btrfs_tree_lock(next); 2077b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2078bd681513SChris Mason clean_tree_block(trans, root, next); 2079e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2080e02119d5SChris Mason btrfs_tree_unlock(next); 2081e02119d5SChris Mason 2082e02119d5SChris Mason WARN_ON(root_owner != 2083e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 2084e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 2085d00aff00SChris Mason bytenr, blocksize); 208679787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM or logic errors */ 2087e02119d5SChris Mason } 2088e02119d5SChris Mason free_extent_buffer(next); 2089e02119d5SChris Mason continue; 2090e02119d5SChris Mason } 2091018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 2092018642a1STsutomu Itoh if (ret) { 2093018642a1STsutomu Itoh free_extent_buffer(next); 2094018642a1STsutomu Itoh return ret; 2095018642a1STsutomu Itoh } 2096e02119d5SChris Mason 2097e02119d5SChris Mason WARN_ON(*level <= 0); 2098e02119d5SChris Mason if (path->nodes[*level-1]) 2099e02119d5SChris Mason free_extent_buffer(path->nodes[*level-1]); 2100e02119d5SChris Mason path->nodes[*level-1] = next; 2101e02119d5SChris Mason *level = btrfs_header_level(next); 2102e02119d5SChris Mason path->slots[*level] = 0; 2103e02119d5SChris Mason cond_resched(); 2104e02119d5SChris Mason } 2105e02119d5SChris Mason WARN_ON(*level < 0); 2106e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2107e02119d5SChris Mason 21084a500fd1SYan, Zheng path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); 2109e02119d5SChris Mason 2110e02119d5SChris Mason cond_resched(); 2111e02119d5SChris Mason return 0; 2112e02119d5SChris Mason } 2113e02119d5SChris Mason 2114d397712bSChris Mason static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, 2115e02119d5SChris Mason struct btrfs_root *root, 2116e02119d5SChris Mason struct btrfs_path *path, int *level, 2117e02119d5SChris Mason struct walk_control *wc) 2118e02119d5SChris Mason { 2119e02119d5SChris Mason u64 root_owner; 2120e02119d5SChris Mason int i; 2121e02119d5SChris Mason int slot; 2122e02119d5SChris Mason int ret; 2123e02119d5SChris Mason 2124e02119d5SChris Mason for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 2125e02119d5SChris Mason slot = path->slots[i]; 21264a500fd1SYan, Zheng if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 2127e02119d5SChris Mason path->slots[i]++; 2128e02119d5SChris Mason *level = i; 2129e02119d5SChris Mason WARN_ON(*level == 0); 2130e02119d5SChris Mason return 0; 2131e02119d5SChris Mason } else { 213231840ae1SZheng Yan struct extent_buffer *parent; 213331840ae1SZheng Yan if (path->nodes[*level] == root->node) 213431840ae1SZheng Yan parent = path->nodes[*level]; 213531840ae1SZheng Yan else 213631840ae1SZheng Yan parent = path->nodes[*level + 1]; 213731840ae1SZheng Yan 213831840ae1SZheng Yan root_owner = btrfs_header_owner(parent); 21391e5063d0SMark Fasheh ret = wc->process_func(root, path->nodes[*level], wc, 2140e02119d5SChris Mason btrfs_header_generation(path->nodes[*level])); 21411e5063d0SMark Fasheh if (ret) 21421e5063d0SMark Fasheh return ret; 21431e5063d0SMark Fasheh 2144e02119d5SChris Mason if (wc->free) { 2145e02119d5SChris Mason struct extent_buffer *next; 2146e02119d5SChris Mason 2147e02119d5SChris Mason next = path->nodes[*level]; 2148e02119d5SChris Mason 2149e02119d5SChris Mason btrfs_tree_lock(next); 2150b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2151bd681513SChris Mason clean_tree_block(trans, root, next); 2152e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2153e02119d5SChris Mason btrfs_tree_unlock(next); 2154e02119d5SChris Mason 2155e02119d5SChris Mason WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 2156e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 2157e02119d5SChris Mason path->nodes[*level]->start, 2158d00aff00SChris Mason path->nodes[*level]->len); 2159e02119d5SChris Mason BUG_ON(ret); 2160e02119d5SChris Mason } 2161e02119d5SChris Mason free_extent_buffer(path->nodes[*level]); 2162e02119d5SChris Mason path->nodes[*level] = NULL; 2163e02119d5SChris Mason *level = i + 1; 2164e02119d5SChris Mason } 2165e02119d5SChris Mason } 2166e02119d5SChris Mason return 1; 2167e02119d5SChris Mason } 2168e02119d5SChris Mason 2169e02119d5SChris Mason /* 2170e02119d5SChris Mason * drop the reference count on the tree rooted at 'snap'. This traverses 2171e02119d5SChris Mason * the tree freeing any blocks that have a ref count of zero after being 2172e02119d5SChris Mason * decremented. 2173e02119d5SChris Mason */ 2174e02119d5SChris Mason static int walk_log_tree(struct btrfs_trans_handle *trans, 2175e02119d5SChris Mason struct btrfs_root *log, struct walk_control *wc) 2176e02119d5SChris Mason { 2177e02119d5SChris Mason int ret = 0; 2178e02119d5SChris Mason int wret; 2179e02119d5SChris Mason int level; 2180e02119d5SChris Mason struct btrfs_path *path; 2181e02119d5SChris Mason int orig_level; 2182e02119d5SChris Mason 2183e02119d5SChris Mason path = btrfs_alloc_path(); 2184db5b493aSTsutomu Itoh if (!path) 2185db5b493aSTsutomu Itoh return -ENOMEM; 2186e02119d5SChris Mason 2187e02119d5SChris Mason level = btrfs_header_level(log->node); 2188e02119d5SChris Mason orig_level = level; 2189e02119d5SChris Mason path->nodes[level] = log->node; 2190e02119d5SChris Mason extent_buffer_get(log->node); 2191e02119d5SChris Mason path->slots[level] = 0; 2192e02119d5SChris Mason 2193e02119d5SChris Mason while (1) { 2194e02119d5SChris Mason wret = walk_down_log_tree(trans, log, path, &level, wc); 2195e02119d5SChris Mason if (wret > 0) 2196e02119d5SChris Mason break; 219779787eaaSJeff Mahoney if (wret < 0) { 2198e02119d5SChris Mason ret = wret; 219979787eaaSJeff Mahoney goto out; 220079787eaaSJeff Mahoney } 2201e02119d5SChris Mason 2202e02119d5SChris Mason wret = walk_up_log_tree(trans, log, path, &level, wc); 2203e02119d5SChris Mason if (wret > 0) 2204e02119d5SChris Mason break; 220579787eaaSJeff Mahoney if (wret < 0) { 2206e02119d5SChris Mason ret = wret; 220779787eaaSJeff Mahoney goto out; 220879787eaaSJeff Mahoney } 2209e02119d5SChris Mason } 2210e02119d5SChris Mason 2211e02119d5SChris Mason /* was the root node processed? if not, catch it here */ 2212e02119d5SChris Mason if (path->nodes[orig_level]) { 221379787eaaSJeff Mahoney ret = wc->process_func(log, path->nodes[orig_level], wc, 2214e02119d5SChris Mason btrfs_header_generation(path->nodes[orig_level])); 221579787eaaSJeff Mahoney if (ret) 221679787eaaSJeff Mahoney goto out; 2217e02119d5SChris Mason if (wc->free) { 2218e02119d5SChris Mason struct extent_buffer *next; 2219e02119d5SChris Mason 2220e02119d5SChris Mason next = path->nodes[orig_level]; 2221e02119d5SChris Mason 2222e02119d5SChris Mason btrfs_tree_lock(next); 2223b4ce94deSChris Mason btrfs_set_lock_blocking(next); 2224bd681513SChris Mason clean_tree_block(trans, log, next); 2225e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2226e02119d5SChris Mason btrfs_tree_unlock(next); 2227e02119d5SChris Mason 2228e02119d5SChris Mason WARN_ON(log->root_key.objectid != 2229e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 2230e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(log, next->start, 2231d00aff00SChris Mason next->len); 223279787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM or logic errors */ 2233e02119d5SChris Mason } 2234e02119d5SChris Mason } 2235e02119d5SChris Mason 223679787eaaSJeff Mahoney out: 2237e02119d5SChris Mason btrfs_free_path(path); 2238e02119d5SChris Mason return ret; 2239e02119d5SChris Mason } 2240e02119d5SChris Mason 22417237f183SYan Zheng /* 22427237f183SYan Zheng * helper function to update the item for a given subvolumes log root 22437237f183SYan Zheng * in the tree of log roots 22447237f183SYan Zheng */ 22457237f183SYan Zheng static int update_log_root(struct btrfs_trans_handle *trans, 22467237f183SYan Zheng struct btrfs_root *log) 22477237f183SYan Zheng { 22487237f183SYan Zheng int ret; 22497237f183SYan Zheng 22507237f183SYan Zheng if (log->log_transid == 1) { 22517237f183SYan Zheng /* insert root item on the first sync */ 22527237f183SYan Zheng ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, 22537237f183SYan Zheng &log->root_key, &log->root_item); 22547237f183SYan Zheng } else { 22557237f183SYan Zheng ret = btrfs_update_root(trans, log->fs_info->log_root_tree, 22567237f183SYan Zheng &log->root_key, &log->root_item); 22577237f183SYan Zheng } 22587237f183SYan Zheng return ret; 22597237f183SYan Zheng } 22607237f183SYan Zheng 226112fcfd22SChris Mason static int wait_log_commit(struct btrfs_trans_handle *trans, 226212fcfd22SChris Mason struct btrfs_root *root, unsigned long transid) 2263e02119d5SChris Mason { 2264e02119d5SChris Mason DEFINE_WAIT(wait); 22657237f183SYan Zheng int index = transid % 2; 2266e02119d5SChris Mason 22677237f183SYan Zheng /* 22687237f183SYan Zheng * we only allow two pending log transactions at a time, 22697237f183SYan Zheng * so we know that if ours is more than 2 older than the 22707237f183SYan Zheng * current transaction, we're done 22717237f183SYan Zheng */ 2272e02119d5SChris Mason do { 22737237f183SYan Zheng prepare_to_wait(&root->log_commit_wait[index], 22747237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 22757237f183SYan Zheng mutex_unlock(&root->log_mutex); 227612fcfd22SChris Mason 227712fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit != 227812fcfd22SChris Mason trans->transid && root->log_transid < transid + 2 && 22797237f183SYan Zheng atomic_read(&root->log_commit[index])) 2280e02119d5SChris Mason schedule(); 228112fcfd22SChris Mason 22827237f183SYan Zheng finish_wait(&root->log_commit_wait[index], &wait); 22837237f183SYan Zheng mutex_lock(&root->log_mutex); 22846dd70ce4SJan Kara } while (root->fs_info->last_trans_log_full_commit != 22856dd70ce4SJan Kara trans->transid && root->log_transid < transid + 2 && 22867237f183SYan Zheng atomic_read(&root->log_commit[index])); 22877237f183SYan Zheng return 0; 22887237f183SYan Zheng } 22897237f183SYan Zheng 2290143bede5SJeff Mahoney static void wait_for_writer(struct btrfs_trans_handle *trans, 229112fcfd22SChris Mason struct btrfs_root *root) 22927237f183SYan Zheng { 22937237f183SYan Zheng DEFINE_WAIT(wait); 22946dd70ce4SJan Kara while (root->fs_info->last_trans_log_full_commit != 22956dd70ce4SJan Kara trans->transid && atomic_read(&root->log_writers)) { 22967237f183SYan Zheng prepare_to_wait(&root->log_writer_wait, 22977237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 22987237f183SYan Zheng mutex_unlock(&root->log_mutex); 229912fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit != 230012fcfd22SChris Mason trans->transid && atomic_read(&root->log_writers)) 23017237f183SYan Zheng schedule(); 23027237f183SYan Zheng mutex_lock(&root->log_mutex); 23037237f183SYan Zheng finish_wait(&root->log_writer_wait, &wait); 23047237f183SYan Zheng } 2305e02119d5SChris Mason } 2306e02119d5SChris Mason 2307e02119d5SChris Mason /* 2308e02119d5SChris Mason * btrfs_sync_log does sends a given tree log down to the disk and 2309e02119d5SChris Mason * updates the super blocks to record it. When this call is done, 231012fcfd22SChris Mason * you know that any inodes previously logged are safely on disk only 231112fcfd22SChris Mason * if it returns 0. 231212fcfd22SChris Mason * 231312fcfd22SChris Mason * Any other return value means you need to call btrfs_commit_transaction. 231412fcfd22SChris Mason * Some of the edge cases for fsyncing directories that have had unlinks 231512fcfd22SChris Mason * or renames done in the past mean that sometimes the only safe 231612fcfd22SChris Mason * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, 231712fcfd22SChris Mason * that has happened. 2318e02119d5SChris Mason */ 2319e02119d5SChris Mason int btrfs_sync_log(struct btrfs_trans_handle *trans, 2320e02119d5SChris Mason struct btrfs_root *root) 2321e02119d5SChris Mason { 23227237f183SYan Zheng int index1; 23237237f183SYan Zheng int index2; 23248cef4e16SYan, Zheng int mark; 2325e02119d5SChris Mason int ret; 2326e02119d5SChris Mason struct btrfs_root *log = root->log_root; 23277237f183SYan Zheng struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 23288cef4e16SYan, Zheng unsigned long log_transid = 0; 2329e02119d5SChris Mason 23307237f183SYan Zheng mutex_lock(&root->log_mutex); 23312ab28f32SJosef Bacik log_transid = root->log_transid; 23327237f183SYan Zheng index1 = root->log_transid % 2; 23337237f183SYan Zheng if (atomic_read(&root->log_commit[index1])) { 233412fcfd22SChris Mason wait_log_commit(trans, root, root->log_transid); 23357237f183SYan Zheng mutex_unlock(&root->log_mutex); 23367237f183SYan Zheng return 0; 2337e02119d5SChris Mason } 23387237f183SYan Zheng atomic_set(&root->log_commit[index1], 1); 23397237f183SYan Zheng 23407237f183SYan Zheng /* wait for previous tree log sync to complete */ 23417237f183SYan Zheng if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 234212fcfd22SChris Mason wait_log_commit(trans, root, root->log_transid - 1); 234386df7eb9SYan, Zheng while (1) { 23442ecb7923SMiao Xie int batch = atomic_read(&root->log_batch); 2345cd354ad6SChris Mason /* when we're on an ssd, just kick the log commit out */ 2346cd354ad6SChris Mason if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { 23477237f183SYan Zheng mutex_unlock(&root->log_mutex); 2348e02119d5SChris Mason schedule_timeout_uninterruptible(1); 23497237f183SYan Zheng mutex_lock(&root->log_mutex); 235086df7eb9SYan, Zheng } 235112fcfd22SChris Mason wait_for_writer(trans, root); 23522ecb7923SMiao Xie if (batch == atomic_read(&root->log_batch)) 2353e02119d5SChris Mason break; 2354e02119d5SChris Mason } 2355d0c803c4SChris Mason 235612fcfd22SChris Mason /* bail out if we need to do a full commit */ 235712fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit == trans->transid) { 235812fcfd22SChris Mason ret = -EAGAIN; 23592ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 236012fcfd22SChris Mason mutex_unlock(&root->log_mutex); 236112fcfd22SChris Mason goto out; 236212fcfd22SChris Mason } 236312fcfd22SChris Mason 23648cef4e16SYan, Zheng if (log_transid % 2 == 0) 23658cef4e16SYan, Zheng mark = EXTENT_DIRTY; 23668cef4e16SYan, Zheng else 23678cef4e16SYan, Zheng mark = EXTENT_NEW; 23688cef4e16SYan, Zheng 2369690587d1SChris Mason /* we start IO on all the marked extents here, but we don't actually 2370690587d1SChris Mason * wait for them until later. 2371690587d1SChris Mason */ 23728cef4e16SYan, Zheng ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 237379787eaaSJeff Mahoney if (ret) { 237479787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 23752ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 237679787eaaSJeff Mahoney mutex_unlock(&root->log_mutex); 237779787eaaSJeff Mahoney goto out; 237879787eaaSJeff Mahoney } 23797237f183SYan Zheng 23805d4f98a2SYan Zheng btrfs_set_root_node(&log->root_item, log->node); 23817237f183SYan Zheng 23827237f183SYan Zheng root->log_transid++; 23837237f183SYan Zheng log->log_transid = root->log_transid; 2384ff782e0aSJosef Bacik root->log_start_pid = 0; 23857237f183SYan Zheng smp_mb(); 23867237f183SYan Zheng /* 23878cef4e16SYan, Zheng * IO has been started, blocks of the log tree have WRITTEN flag set 23888cef4e16SYan, Zheng * in their headers. new modifications of the log will be written to 23898cef4e16SYan, Zheng * new positions. so it's safe to allow log writers to go in. 23907237f183SYan Zheng */ 23917237f183SYan Zheng mutex_unlock(&root->log_mutex); 23927237f183SYan Zheng 23937237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 23942ecb7923SMiao Xie atomic_inc(&log_root_tree->log_batch); 23957237f183SYan Zheng atomic_inc(&log_root_tree->log_writers); 23967237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 23977237f183SYan Zheng 23987237f183SYan Zheng ret = update_log_root(trans, log); 23997237f183SYan Zheng 24007237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 24017237f183SYan Zheng if (atomic_dec_and_test(&log_root_tree->log_writers)) { 24027237f183SYan Zheng smp_mb(); 24037237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_writer_wait)) 24047237f183SYan Zheng wake_up(&log_root_tree->log_writer_wait); 24057237f183SYan Zheng } 24067237f183SYan Zheng 24074a500fd1SYan, Zheng if (ret) { 240879787eaaSJeff Mahoney if (ret != -ENOSPC) { 240979787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 241079787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 241179787eaaSJeff Mahoney goto out; 241279787eaaSJeff Mahoney } 24134a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 24144a500fd1SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 24152ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 24164a500fd1SYan, Zheng mutex_unlock(&log_root_tree->log_mutex); 24174a500fd1SYan, Zheng ret = -EAGAIN; 24184a500fd1SYan, Zheng goto out; 24194a500fd1SYan, Zheng } 24204a500fd1SYan, Zheng 24217237f183SYan Zheng index2 = log_root_tree->log_transid % 2; 24227237f183SYan Zheng if (atomic_read(&log_root_tree->log_commit[index2])) { 24238cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 242412fcfd22SChris Mason wait_log_commit(trans, log_root_tree, 242512fcfd22SChris Mason log_root_tree->log_transid); 24262ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 24277237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 2428b31eabd8SChris Mason ret = 0; 24297237f183SYan Zheng goto out; 24307237f183SYan Zheng } 24317237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 1); 24327237f183SYan Zheng 243312fcfd22SChris Mason if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { 243412fcfd22SChris Mason wait_log_commit(trans, log_root_tree, 243512fcfd22SChris Mason log_root_tree->log_transid - 1); 243612fcfd22SChris Mason } 24377237f183SYan Zheng 243812fcfd22SChris Mason wait_for_writer(trans, log_root_tree); 243912fcfd22SChris Mason 244012fcfd22SChris Mason /* 244112fcfd22SChris Mason * now that we've moved on to the tree of log tree roots, 244212fcfd22SChris Mason * check the full commit flag again 244312fcfd22SChris Mason */ 244412fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit == trans->transid) { 24458cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 24462ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 244712fcfd22SChris Mason mutex_unlock(&log_root_tree->log_mutex); 244812fcfd22SChris Mason ret = -EAGAIN; 244912fcfd22SChris Mason goto out_wake_log_root; 245012fcfd22SChris Mason } 24517237f183SYan Zheng 24527237f183SYan Zheng ret = btrfs_write_and_wait_marked_extents(log_root_tree, 24538cef4e16SYan, Zheng &log_root_tree->dirty_log_pages, 24548cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW); 245579787eaaSJeff Mahoney if (ret) { 245679787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 24572ab28f32SJosef Bacik btrfs_free_logged_extents(log, log_transid); 245879787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 245979787eaaSJeff Mahoney goto out_wake_log_root; 246079787eaaSJeff Mahoney } 24618cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 24622ab28f32SJosef Bacik btrfs_wait_logged_extents(log, log_transid); 2463e02119d5SChris Mason 24646c41761fSDavid Sterba btrfs_set_super_log_root(root->fs_info->super_for_commit, 24657237f183SYan Zheng log_root_tree->node->start); 24666c41761fSDavid Sterba btrfs_set_super_log_root_level(root->fs_info->super_for_commit, 24677237f183SYan Zheng btrfs_header_level(log_root_tree->node)); 2468e02119d5SChris Mason 24697237f183SYan Zheng log_root_tree->log_transid++; 2470e02119d5SChris Mason smp_mb(); 24717237f183SYan Zheng 24727237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 24737237f183SYan Zheng 24747237f183SYan Zheng /* 24757237f183SYan Zheng * nobody else is going to jump in and write the the ctree 24767237f183SYan Zheng * super here because the log_commit atomic below is protecting 24777237f183SYan Zheng * us. We must be called with a transaction handle pinning 24787237f183SYan Zheng * the running transaction open, so a full commit can't hop 24797237f183SYan Zheng * in and cause problems either. 24807237f183SYan Zheng */ 2481a2de733cSArne Jansen btrfs_scrub_pause_super(root); 24825af3e8ccSStefan Behrens ret = write_ctree_super(trans, root->fs_info->tree_root, 1); 2483a2de733cSArne Jansen btrfs_scrub_continue_super(root); 24845af3e8ccSStefan Behrens if (ret) { 24855af3e8ccSStefan Behrens btrfs_abort_transaction(trans, root, ret); 24865af3e8ccSStefan Behrens goto out_wake_log_root; 24875af3e8ccSStefan Behrens } 24887237f183SYan Zheng 2489257c62e1SChris Mason mutex_lock(&root->log_mutex); 2490257c62e1SChris Mason if (root->last_log_commit < log_transid) 2491257c62e1SChris Mason root->last_log_commit = log_transid; 2492257c62e1SChris Mason mutex_unlock(&root->log_mutex); 2493257c62e1SChris Mason 249412fcfd22SChris Mason out_wake_log_root: 24957237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 0); 24967237f183SYan Zheng smp_mb(); 24977237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) 24987237f183SYan Zheng wake_up(&log_root_tree->log_commit_wait[index2]); 2499e02119d5SChris Mason out: 25007237f183SYan Zheng atomic_set(&root->log_commit[index1], 0); 25017237f183SYan Zheng smp_mb(); 25027237f183SYan Zheng if (waitqueue_active(&root->log_commit_wait[index1])) 25037237f183SYan Zheng wake_up(&root->log_commit_wait[index1]); 2504b31eabd8SChris Mason return ret; 2505e02119d5SChris Mason } 2506e02119d5SChris Mason 25074a500fd1SYan, Zheng static void free_log_tree(struct btrfs_trans_handle *trans, 25084a500fd1SYan, Zheng struct btrfs_root *log) 2509e02119d5SChris Mason { 2510e02119d5SChris Mason int ret; 2511d0c803c4SChris Mason u64 start; 2512d0c803c4SChris Mason u64 end; 2513e02119d5SChris Mason struct walk_control wc = { 2514e02119d5SChris Mason .free = 1, 2515e02119d5SChris Mason .process_func = process_one_buffer 2516e02119d5SChris Mason }; 2517e02119d5SChris Mason 25183321719eSLiu Bo if (trans) { 2519e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 2520e02119d5SChris Mason BUG_ON(ret); 25213321719eSLiu Bo } 2522e02119d5SChris Mason 2523d0c803c4SChris Mason while (1) { 2524d0c803c4SChris Mason ret = find_first_extent_bit(&log->dirty_log_pages, 2525e6138876SJosef Bacik 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, 2526e6138876SJosef Bacik NULL); 2527d0c803c4SChris Mason if (ret) 2528d0c803c4SChris Mason break; 2529d0c803c4SChris Mason 25308cef4e16SYan, Zheng clear_extent_bits(&log->dirty_log_pages, start, end, 25318cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2532d0c803c4SChris Mason } 2533d0c803c4SChris Mason 25342ab28f32SJosef Bacik /* 25352ab28f32SJosef Bacik * We may have short-circuited the log tree with the full commit logic 25362ab28f32SJosef Bacik * and left ordered extents on our list, so clear these out to keep us 25372ab28f32SJosef Bacik * from leaking inodes and memory. 25382ab28f32SJosef Bacik */ 25392ab28f32SJosef Bacik btrfs_free_logged_extents(log, 0); 25402ab28f32SJosef Bacik btrfs_free_logged_extents(log, 1); 25412ab28f32SJosef Bacik 25427237f183SYan Zheng free_extent_buffer(log->node); 25437237f183SYan Zheng kfree(log); 25444a500fd1SYan, Zheng } 25454a500fd1SYan, Zheng 25464a500fd1SYan, Zheng /* 25474a500fd1SYan, Zheng * free all the extents used by the tree log. This should be called 25484a500fd1SYan, Zheng * at commit time of the full transaction 25494a500fd1SYan, Zheng */ 25504a500fd1SYan, Zheng int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) 25514a500fd1SYan, Zheng { 25524a500fd1SYan, Zheng if (root->log_root) { 25534a500fd1SYan, Zheng free_log_tree(trans, root->log_root); 25544a500fd1SYan, Zheng root->log_root = NULL; 25554a500fd1SYan, Zheng } 25564a500fd1SYan, Zheng return 0; 25574a500fd1SYan, Zheng } 25584a500fd1SYan, Zheng 25594a500fd1SYan, Zheng int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, 25604a500fd1SYan, Zheng struct btrfs_fs_info *fs_info) 25614a500fd1SYan, Zheng { 25624a500fd1SYan, Zheng if (fs_info->log_root_tree) { 25634a500fd1SYan, Zheng free_log_tree(trans, fs_info->log_root_tree); 25644a500fd1SYan, Zheng fs_info->log_root_tree = NULL; 25654a500fd1SYan, Zheng } 2566e02119d5SChris Mason return 0; 2567e02119d5SChris Mason } 2568e02119d5SChris Mason 2569e02119d5SChris Mason /* 2570e02119d5SChris Mason * If both a file and directory are logged, and unlinks or renames are 2571e02119d5SChris Mason * mixed in, we have a few interesting corners: 2572e02119d5SChris Mason * 2573e02119d5SChris Mason * create file X in dir Y 2574e02119d5SChris Mason * link file X to X.link in dir Y 2575e02119d5SChris Mason * fsync file X 2576e02119d5SChris Mason * unlink file X but leave X.link 2577e02119d5SChris Mason * fsync dir Y 2578e02119d5SChris Mason * 2579e02119d5SChris Mason * After a crash we would expect only X.link to exist. But file X 2580e02119d5SChris Mason * didn't get fsync'd again so the log has back refs for X and X.link. 2581e02119d5SChris Mason * 2582e02119d5SChris Mason * We solve this by removing directory entries and inode backrefs from the 2583e02119d5SChris Mason * log when a file that was logged in the current transaction is 2584e02119d5SChris Mason * unlinked. Any later fsync will include the updated log entries, and 2585e02119d5SChris Mason * we'll be able to reconstruct the proper directory items from backrefs. 2586e02119d5SChris Mason * 2587e02119d5SChris Mason * This optimizations allows us to avoid relogging the entire inode 2588e02119d5SChris Mason * or the entire directory. 2589e02119d5SChris Mason */ 2590e02119d5SChris Mason int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 2591e02119d5SChris Mason struct btrfs_root *root, 2592e02119d5SChris Mason const char *name, int name_len, 2593e02119d5SChris Mason struct inode *dir, u64 index) 2594e02119d5SChris Mason { 2595e02119d5SChris Mason struct btrfs_root *log; 2596e02119d5SChris Mason struct btrfs_dir_item *di; 2597e02119d5SChris Mason struct btrfs_path *path; 2598e02119d5SChris Mason int ret; 25994a500fd1SYan, Zheng int err = 0; 2600e02119d5SChris Mason int bytes_del = 0; 260133345d01SLi Zefan u64 dir_ino = btrfs_ino(dir); 2602e02119d5SChris Mason 26033a5f1d45SChris Mason if (BTRFS_I(dir)->logged_trans < trans->transid) 26043a5f1d45SChris Mason return 0; 26053a5f1d45SChris Mason 2606e02119d5SChris Mason ret = join_running_log_trans(root); 2607e02119d5SChris Mason if (ret) 2608e02119d5SChris Mason return 0; 2609e02119d5SChris Mason 2610e02119d5SChris Mason mutex_lock(&BTRFS_I(dir)->log_mutex); 2611e02119d5SChris Mason 2612e02119d5SChris Mason log = root->log_root; 2613e02119d5SChris Mason path = btrfs_alloc_path(); 2614a62f44a5STsutomu Itoh if (!path) { 2615a62f44a5STsutomu Itoh err = -ENOMEM; 2616a62f44a5STsutomu Itoh goto out_unlock; 2617a62f44a5STsutomu Itoh } 26182a29edc6Sliubo 261933345d01SLi Zefan di = btrfs_lookup_dir_item(trans, log, path, dir_ino, 2620e02119d5SChris Mason name, name_len, -1); 26214a500fd1SYan, Zheng if (IS_ERR(di)) { 26224a500fd1SYan, Zheng err = PTR_ERR(di); 26234a500fd1SYan, Zheng goto fail; 26244a500fd1SYan, Zheng } 26254a500fd1SYan, Zheng if (di) { 2626e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2627e02119d5SChris Mason bytes_del += name_len; 2628e02119d5SChris Mason BUG_ON(ret); 2629e02119d5SChris Mason } 2630b3b4aa74SDavid Sterba btrfs_release_path(path); 263133345d01SLi Zefan di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 2632e02119d5SChris Mason index, name, name_len, -1); 26334a500fd1SYan, Zheng if (IS_ERR(di)) { 26344a500fd1SYan, Zheng err = PTR_ERR(di); 26354a500fd1SYan, Zheng goto fail; 26364a500fd1SYan, Zheng } 26374a500fd1SYan, Zheng if (di) { 2638e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2639e02119d5SChris Mason bytes_del += name_len; 2640e02119d5SChris Mason BUG_ON(ret); 2641e02119d5SChris Mason } 2642e02119d5SChris Mason 2643e02119d5SChris Mason /* update the directory size in the log to reflect the names 2644e02119d5SChris Mason * we have removed 2645e02119d5SChris Mason */ 2646e02119d5SChris Mason if (bytes_del) { 2647e02119d5SChris Mason struct btrfs_key key; 2648e02119d5SChris Mason 264933345d01SLi Zefan key.objectid = dir_ino; 2650e02119d5SChris Mason key.offset = 0; 2651e02119d5SChris Mason key.type = BTRFS_INODE_ITEM_KEY; 2652b3b4aa74SDavid Sterba btrfs_release_path(path); 2653e02119d5SChris Mason 2654e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 26554a500fd1SYan, Zheng if (ret < 0) { 26564a500fd1SYan, Zheng err = ret; 26574a500fd1SYan, Zheng goto fail; 26584a500fd1SYan, Zheng } 2659e02119d5SChris Mason if (ret == 0) { 2660e02119d5SChris Mason struct btrfs_inode_item *item; 2661e02119d5SChris Mason u64 i_size; 2662e02119d5SChris Mason 2663e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2664e02119d5SChris Mason struct btrfs_inode_item); 2665e02119d5SChris Mason i_size = btrfs_inode_size(path->nodes[0], item); 2666e02119d5SChris Mason if (i_size > bytes_del) 2667e02119d5SChris Mason i_size -= bytes_del; 2668e02119d5SChris Mason else 2669e02119d5SChris Mason i_size = 0; 2670e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], item, i_size); 2671e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2672e02119d5SChris Mason } else 2673e02119d5SChris Mason ret = 0; 2674b3b4aa74SDavid Sterba btrfs_release_path(path); 2675e02119d5SChris Mason } 26764a500fd1SYan, Zheng fail: 2677e02119d5SChris Mason btrfs_free_path(path); 2678a62f44a5STsutomu Itoh out_unlock: 2679e02119d5SChris Mason mutex_unlock(&BTRFS_I(dir)->log_mutex); 26804a500fd1SYan, Zheng if (ret == -ENOSPC) { 26814a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 26824a500fd1SYan, Zheng ret = 0; 268379787eaaSJeff Mahoney } else if (ret < 0) 268479787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 268579787eaaSJeff Mahoney 268612fcfd22SChris Mason btrfs_end_log_trans(root); 2687e02119d5SChris Mason 2688411fc6bcSAndi Kleen return err; 2689e02119d5SChris Mason } 2690e02119d5SChris Mason 2691e02119d5SChris Mason /* see comments for btrfs_del_dir_entries_in_log */ 2692e02119d5SChris Mason int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 2693e02119d5SChris Mason struct btrfs_root *root, 2694e02119d5SChris Mason const char *name, int name_len, 2695e02119d5SChris Mason struct inode *inode, u64 dirid) 2696e02119d5SChris Mason { 2697e02119d5SChris Mason struct btrfs_root *log; 2698e02119d5SChris Mason u64 index; 2699e02119d5SChris Mason int ret; 2700e02119d5SChris Mason 27013a5f1d45SChris Mason if (BTRFS_I(inode)->logged_trans < trans->transid) 27023a5f1d45SChris Mason return 0; 27033a5f1d45SChris Mason 2704e02119d5SChris Mason ret = join_running_log_trans(root); 2705e02119d5SChris Mason if (ret) 2706e02119d5SChris Mason return 0; 2707e02119d5SChris Mason log = root->log_root; 2708e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 2709e02119d5SChris Mason 271033345d01SLi Zefan ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), 2711e02119d5SChris Mason dirid, &index); 2712e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 27134a500fd1SYan, Zheng if (ret == -ENOSPC) { 27144a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 27154a500fd1SYan, Zheng ret = 0; 271679787eaaSJeff Mahoney } else if (ret < 0 && ret != -ENOENT) 271779787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 271812fcfd22SChris Mason btrfs_end_log_trans(root); 2719e02119d5SChris Mason 2720e02119d5SChris Mason return ret; 2721e02119d5SChris Mason } 2722e02119d5SChris Mason 2723e02119d5SChris Mason /* 2724e02119d5SChris Mason * creates a range item in the log for 'dirid'. first_offset and 2725e02119d5SChris Mason * last_offset tell us which parts of the key space the log should 2726e02119d5SChris Mason * be considered authoritative for. 2727e02119d5SChris Mason */ 2728e02119d5SChris Mason static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, 2729e02119d5SChris Mason struct btrfs_root *log, 2730e02119d5SChris Mason struct btrfs_path *path, 2731e02119d5SChris Mason int key_type, u64 dirid, 2732e02119d5SChris Mason u64 first_offset, u64 last_offset) 2733e02119d5SChris Mason { 2734e02119d5SChris Mason int ret; 2735e02119d5SChris Mason struct btrfs_key key; 2736e02119d5SChris Mason struct btrfs_dir_log_item *item; 2737e02119d5SChris Mason 2738e02119d5SChris Mason key.objectid = dirid; 2739e02119d5SChris Mason key.offset = first_offset; 2740e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) 2741e02119d5SChris Mason key.type = BTRFS_DIR_LOG_ITEM_KEY; 2742e02119d5SChris Mason else 2743e02119d5SChris Mason key.type = BTRFS_DIR_LOG_INDEX_KEY; 2744e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); 27454a500fd1SYan, Zheng if (ret) 27464a500fd1SYan, Zheng return ret; 2747e02119d5SChris Mason 2748e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2749e02119d5SChris Mason struct btrfs_dir_log_item); 2750e02119d5SChris Mason btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2751e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2752b3b4aa74SDavid Sterba btrfs_release_path(path); 2753e02119d5SChris Mason return 0; 2754e02119d5SChris Mason } 2755e02119d5SChris Mason 2756e02119d5SChris Mason /* 2757e02119d5SChris Mason * log all the items included in the current transaction for a given 2758e02119d5SChris Mason * directory. This also creates the range items in the log tree required 2759e02119d5SChris Mason * to replay anything deleted before the fsync 2760e02119d5SChris Mason */ 2761e02119d5SChris Mason static noinline int log_dir_items(struct btrfs_trans_handle *trans, 2762e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 2763e02119d5SChris Mason struct btrfs_path *path, 2764e02119d5SChris Mason struct btrfs_path *dst_path, int key_type, 2765e02119d5SChris Mason u64 min_offset, u64 *last_offset_ret) 2766e02119d5SChris Mason { 2767e02119d5SChris Mason struct btrfs_key min_key; 2768e02119d5SChris Mason struct btrfs_key max_key; 2769e02119d5SChris Mason struct btrfs_root *log = root->log_root; 2770e02119d5SChris Mason struct extent_buffer *src; 27714a500fd1SYan, Zheng int err = 0; 2772e02119d5SChris Mason int ret; 2773e02119d5SChris Mason int i; 2774e02119d5SChris Mason int nritems; 2775e02119d5SChris Mason u64 first_offset = min_offset; 2776e02119d5SChris Mason u64 last_offset = (u64)-1; 277733345d01SLi Zefan u64 ino = btrfs_ino(inode); 2778e02119d5SChris Mason 2779e02119d5SChris Mason log = root->log_root; 278033345d01SLi Zefan max_key.objectid = ino; 2781e02119d5SChris Mason max_key.offset = (u64)-1; 2782e02119d5SChris Mason max_key.type = key_type; 2783e02119d5SChris Mason 278433345d01SLi Zefan min_key.objectid = ino; 2785e02119d5SChris Mason min_key.type = key_type; 2786e02119d5SChris Mason min_key.offset = min_offset; 2787e02119d5SChris Mason 2788e02119d5SChris Mason path->keep_locks = 1; 2789e02119d5SChris Mason 2790e02119d5SChris Mason ret = btrfs_search_forward(root, &min_key, &max_key, 2791de78b51aSEric Sandeen path, trans->transid); 2792e02119d5SChris Mason 2793e02119d5SChris Mason /* 2794e02119d5SChris Mason * we didn't find anything from this transaction, see if there 2795e02119d5SChris Mason * is anything at all 2796e02119d5SChris Mason */ 279733345d01SLi Zefan if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { 279833345d01SLi Zefan min_key.objectid = ino; 2799e02119d5SChris Mason min_key.type = key_type; 2800e02119d5SChris Mason min_key.offset = (u64)-1; 2801b3b4aa74SDavid Sterba btrfs_release_path(path); 2802e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2803e02119d5SChris Mason if (ret < 0) { 2804b3b4aa74SDavid Sterba btrfs_release_path(path); 2805e02119d5SChris Mason return ret; 2806e02119d5SChris Mason } 280733345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 2808e02119d5SChris Mason 2809e02119d5SChris Mason /* if ret == 0 there are items for this type, 2810e02119d5SChris Mason * create a range to tell us the last key of this type. 2811e02119d5SChris Mason * otherwise, there are no items in this directory after 2812e02119d5SChris Mason * *min_offset, and we create a range to indicate that. 2813e02119d5SChris Mason */ 2814e02119d5SChris Mason if (ret == 0) { 2815e02119d5SChris Mason struct btrfs_key tmp; 2816e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, 2817e02119d5SChris Mason path->slots[0]); 2818d397712bSChris Mason if (key_type == tmp.type) 2819e02119d5SChris Mason first_offset = max(min_offset, tmp.offset) + 1; 2820e02119d5SChris Mason } 2821e02119d5SChris Mason goto done; 2822e02119d5SChris Mason } 2823e02119d5SChris Mason 2824e02119d5SChris Mason /* go backward to find any previous key */ 282533345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 2826e02119d5SChris Mason if (ret == 0) { 2827e02119d5SChris Mason struct btrfs_key tmp; 2828e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2829e02119d5SChris Mason if (key_type == tmp.type) { 2830e02119d5SChris Mason first_offset = tmp.offset; 2831e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 2832e02119d5SChris Mason path->nodes[0], path->slots[0], 2833e02119d5SChris Mason &tmp); 28344a500fd1SYan, Zheng if (ret) { 28354a500fd1SYan, Zheng err = ret; 28364a500fd1SYan, Zheng goto done; 28374a500fd1SYan, Zheng } 2838e02119d5SChris Mason } 2839e02119d5SChris Mason } 2840b3b4aa74SDavid Sterba btrfs_release_path(path); 2841e02119d5SChris Mason 2842e02119d5SChris Mason /* find the first key from this transaction again */ 2843e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2844e02119d5SChris Mason if (ret != 0) { 2845e02119d5SChris Mason WARN_ON(1); 2846e02119d5SChris Mason goto done; 2847e02119d5SChris Mason } 2848e02119d5SChris Mason 2849e02119d5SChris Mason /* 2850e02119d5SChris Mason * we have a block from this transaction, log every item in it 2851e02119d5SChris Mason * from our directory 2852e02119d5SChris Mason */ 2853e02119d5SChris Mason while (1) { 2854e02119d5SChris Mason struct btrfs_key tmp; 2855e02119d5SChris Mason src = path->nodes[0]; 2856e02119d5SChris Mason nritems = btrfs_header_nritems(src); 2857e02119d5SChris Mason for (i = path->slots[0]; i < nritems; i++) { 2858e02119d5SChris Mason btrfs_item_key_to_cpu(src, &min_key, i); 2859e02119d5SChris Mason 286033345d01SLi Zefan if (min_key.objectid != ino || min_key.type != key_type) 2861e02119d5SChris Mason goto done; 2862e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, src, i, 2863e02119d5SChris Mason &min_key); 28644a500fd1SYan, Zheng if (ret) { 28654a500fd1SYan, Zheng err = ret; 28664a500fd1SYan, Zheng goto done; 28674a500fd1SYan, Zheng } 2868e02119d5SChris Mason } 2869e02119d5SChris Mason path->slots[0] = nritems; 2870e02119d5SChris Mason 2871e02119d5SChris Mason /* 2872e02119d5SChris Mason * look ahead to the next item and see if it is also 2873e02119d5SChris Mason * from this directory and from this transaction 2874e02119d5SChris Mason */ 2875e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 2876e02119d5SChris Mason if (ret == 1) { 2877e02119d5SChris Mason last_offset = (u64)-1; 2878e02119d5SChris Mason goto done; 2879e02119d5SChris Mason } 2880e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 288133345d01SLi Zefan if (tmp.objectid != ino || tmp.type != key_type) { 2882e02119d5SChris Mason last_offset = (u64)-1; 2883e02119d5SChris Mason goto done; 2884e02119d5SChris Mason } 2885e02119d5SChris Mason if (btrfs_header_generation(path->nodes[0]) != trans->transid) { 2886e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 2887e02119d5SChris Mason path->nodes[0], path->slots[0], 2888e02119d5SChris Mason &tmp); 28894a500fd1SYan, Zheng if (ret) 28904a500fd1SYan, Zheng err = ret; 28914a500fd1SYan, Zheng else 2892e02119d5SChris Mason last_offset = tmp.offset; 2893e02119d5SChris Mason goto done; 2894e02119d5SChris Mason } 2895e02119d5SChris Mason } 2896e02119d5SChris Mason done: 2897b3b4aa74SDavid Sterba btrfs_release_path(path); 2898b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 2899e02119d5SChris Mason 29004a500fd1SYan, Zheng if (err == 0) { 29014a500fd1SYan, Zheng *last_offset_ret = last_offset; 29024a500fd1SYan, Zheng /* 29034a500fd1SYan, Zheng * insert the log range keys to indicate where the log 29044a500fd1SYan, Zheng * is valid 29054a500fd1SYan, Zheng */ 29064a500fd1SYan, Zheng ret = insert_dir_log_key(trans, log, path, key_type, 290733345d01SLi Zefan ino, first_offset, last_offset); 29084a500fd1SYan, Zheng if (ret) 29094a500fd1SYan, Zheng err = ret; 29104a500fd1SYan, Zheng } 29114a500fd1SYan, Zheng return err; 2912e02119d5SChris Mason } 2913e02119d5SChris Mason 2914e02119d5SChris Mason /* 2915e02119d5SChris Mason * logging directories is very similar to logging inodes, We find all the items 2916e02119d5SChris Mason * from the current transaction and write them to the log. 2917e02119d5SChris Mason * 2918e02119d5SChris Mason * The recovery code scans the directory in the subvolume, and if it finds a 2919e02119d5SChris Mason * key in the range logged that is not present in the log tree, then it means 2920e02119d5SChris Mason * that dir entry was unlinked during the transaction. 2921e02119d5SChris Mason * 2922e02119d5SChris Mason * In order for that scan to work, we must include one key smaller than 2923e02119d5SChris Mason * the smallest logged by this transaction and one key larger than the largest 2924e02119d5SChris Mason * key logged by this transaction. 2925e02119d5SChris Mason */ 2926e02119d5SChris Mason static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 2927e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 2928e02119d5SChris Mason struct btrfs_path *path, 2929e02119d5SChris Mason struct btrfs_path *dst_path) 2930e02119d5SChris Mason { 2931e02119d5SChris Mason u64 min_key; 2932e02119d5SChris Mason u64 max_key; 2933e02119d5SChris Mason int ret; 2934e02119d5SChris Mason int key_type = BTRFS_DIR_ITEM_KEY; 2935e02119d5SChris Mason 2936e02119d5SChris Mason again: 2937e02119d5SChris Mason min_key = 0; 2938e02119d5SChris Mason max_key = 0; 2939e02119d5SChris Mason while (1) { 2940e02119d5SChris Mason ret = log_dir_items(trans, root, inode, path, 2941e02119d5SChris Mason dst_path, key_type, min_key, 2942e02119d5SChris Mason &max_key); 29434a500fd1SYan, Zheng if (ret) 29444a500fd1SYan, Zheng return ret; 2945e02119d5SChris Mason if (max_key == (u64)-1) 2946e02119d5SChris Mason break; 2947e02119d5SChris Mason min_key = max_key + 1; 2948e02119d5SChris Mason } 2949e02119d5SChris Mason 2950e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) { 2951e02119d5SChris Mason key_type = BTRFS_DIR_INDEX_KEY; 2952e02119d5SChris Mason goto again; 2953e02119d5SChris Mason } 2954e02119d5SChris Mason return 0; 2955e02119d5SChris Mason } 2956e02119d5SChris Mason 2957e02119d5SChris Mason /* 2958e02119d5SChris Mason * a helper function to drop items from the log before we relog an 2959e02119d5SChris Mason * inode. max_key_type indicates the highest item type to remove. 2960e02119d5SChris Mason * This cannot be run for file data extents because it does not 2961e02119d5SChris Mason * free the extents they point to. 2962e02119d5SChris Mason */ 2963e02119d5SChris Mason static int drop_objectid_items(struct btrfs_trans_handle *trans, 2964e02119d5SChris Mason struct btrfs_root *log, 2965e02119d5SChris Mason struct btrfs_path *path, 2966e02119d5SChris Mason u64 objectid, int max_key_type) 2967e02119d5SChris Mason { 2968e02119d5SChris Mason int ret; 2969e02119d5SChris Mason struct btrfs_key key; 2970e02119d5SChris Mason struct btrfs_key found_key; 297118ec90d6SJosef Bacik int start_slot; 2972e02119d5SChris Mason 2973e02119d5SChris Mason key.objectid = objectid; 2974e02119d5SChris Mason key.type = max_key_type; 2975e02119d5SChris Mason key.offset = (u64)-1; 2976e02119d5SChris Mason 2977e02119d5SChris Mason while (1) { 2978e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 29794a500fd1SYan, Zheng BUG_ON(ret == 0); 29804a500fd1SYan, Zheng if (ret < 0) 2981e02119d5SChris Mason break; 2982e02119d5SChris Mason 2983e02119d5SChris Mason if (path->slots[0] == 0) 2984e02119d5SChris Mason break; 2985e02119d5SChris Mason 2986e02119d5SChris Mason path->slots[0]--; 2987e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2988e02119d5SChris Mason path->slots[0]); 2989e02119d5SChris Mason 2990e02119d5SChris Mason if (found_key.objectid != objectid) 2991e02119d5SChris Mason break; 2992e02119d5SChris Mason 299318ec90d6SJosef Bacik found_key.offset = 0; 299418ec90d6SJosef Bacik found_key.type = 0; 299518ec90d6SJosef Bacik ret = btrfs_bin_search(path->nodes[0], &found_key, 0, 299618ec90d6SJosef Bacik &start_slot); 299718ec90d6SJosef Bacik 299818ec90d6SJosef Bacik ret = btrfs_del_items(trans, log, path, start_slot, 299918ec90d6SJosef Bacik path->slots[0] - start_slot + 1); 300018ec90d6SJosef Bacik /* 300118ec90d6SJosef Bacik * If start slot isn't 0 then we don't need to re-search, we've 300218ec90d6SJosef Bacik * found the last guy with the objectid in this tree. 300318ec90d6SJosef Bacik */ 300418ec90d6SJosef Bacik if (ret || start_slot != 0) 300565a246c5STsutomu Itoh break; 3006b3b4aa74SDavid Sterba btrfs_release_path(path); 3007e02119d5SChris Mason } 3008b3b4aa74SDavid Sterba btrfs_release_path(path); 30095bdbeb21SJosef Bacik if (ret > 0) 30105bdbeb21SJosef Bacik ret = 0; 30114a500fd1SYan, Zheng return ret; 3012e02119d5SChris Mason } 3013e02119d5SChris Mason 301494edf4aeSJosef Bacik static void fill_inode_item(struct btrfs_trans_handle *trans, 301594edf4aeSJosef Bacik struct extent_buffer *leaf, 301694edf4aeSJosef Bacik struct btrfs_inode_item *item, 301794edf4aeSJosef Bacik struct inode *inode, int log_inode_only) 301894edf4aeSJosef Bacik { 30190b1c6ccaSJosef Bacik struct btrfs_map_token token; 302094edf4aeSJosef Bacik 30210b1c6ccaSJosef Bacik btrfs_init_map_token(&token); 302294edf4aeSJosef Bacik 302394edf4aeSJosef Bacik if (log_inode_only) { 302494edf4aeSJosef Bacik /* set the generation to zero so the recover code 302594edf4aeSJosef Bacik * can tell the difference between an logging 302694edf4aeSJosef Bacik * just to say 'this inode exists' and a logging 302794edf4aeSJosef Bacik * to say 'update this inode with these values' 302894edf4aeSJosef Bacik */ 30290b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 0, &token); 30300b1c6ccaSJosef Bacik btrfs_set_token_inode_size(leaf, item, 0, &token); 303194edf4aeSJosef Bacik } else { 30320b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 30330b1c6ccaSJosef Bacik BTRFS_I(inode)->generation, 30340b1c6ccaSJosef Bacik &token); 30350b1c6ccaSJosef Bacik btrfs_set_token_inode_size(leaf, item, inode->i_size, &token); 303694edf4aeSJosef Bacik } 303794edf4aeSJosef Bacik 30380b1c6ccaSJosef Bacik btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); 30390b1c6ccaSJosef Bacik btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); 30400b1c6ccaSJosef Bacik btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 30410b1c6ccaSJosef Bacik btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 30420b1c6ccaSJosef Bacik 30430b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 30440b1c6ccaSJosef Bacik inode->i_atime.tv_sec, &token); 30450b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 30460b1c6ccaSJosef Bacik inode->i_atime.tv_nsec, &token); 30470b1c6ccaSJosef Bacik 30480b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 30490b1c6ccaSJosef Bacik inode->i_mtime.tv_sec, &token); 30500b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 30510b1c6ccaSJosef Bacik inode->i_mtime.tv_nsec, &token); 30520b1c6ccaSJosef Bacik 30530b1c6ccaSJosef Bacik btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 30540b1c6ccaSJosef Bacik inode->i_ctime.tv_sec, &token); 30550b1c6ccaSJosef Bacik btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 30560b1c6ccaSJosef Bacik inode->i_ctime.tv_nsec, &token); 30570b1c6ccaSJosef Bacik 30580b1c6ccaSJosef Bacik btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 30590b1c6ccaSJosef Bacik &token); 30600b1c6ccaSJosef Bacik 30610b1c6ccaSJosef Bacik btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); 30620b1c6ccaSJosef Bacik btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); 30630b1c6ccaSJosef Bacik btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); 30640b1c6ccaSJosef Bacik btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); 30650b1c6ccaSJosef Bacik btrfs_set_token_inode_block_group(leaf, item, 0, &token); 306694edf4aeSJosef Bacik } 306794edf4aeSJosef Bacik 3068a95249b3SJosef Bacik static int log_inode_item(struct btrfs_trans_handle *trans, 3069a95249b3SJosef Bacik struct btrfs_root *log, struct btrfs_path *path, 3070a95249b3SJosef Bacik struct inode *inode) 3071a95249b3SJosef Bacik { 3072a95249b3SJosef Bacik struct btrfs_inode_item *inode_item; 3073a95249b3SJosef Bacik struct btrfs_key key; 3074a95249b3SJosef Bacik int ret; 3075a95249b3SJosef Bacik 3076a95249b3SJosef Bacik memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); 3077a95249b3SJosef Bacik ret = btrfs_insert_empty_item(trans, log, path, &key, 3078a95249b3SJosef Bacik sizeof(*inode_item)); 3079a95249b3SJosef Bacik if (ret && ret != -EEXIST) 3080a95249b3SJosef Bacik return ret; 3081a95249b3SJosef Bacik inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3082a95249b3SJosef Bacik struct btrfs_inode_item); 3083a95249b3SJosef Bacik fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); 3084a95249b3SJosef Bacik btrfs_release_path(path); 3085a95249b3SJosef Bacik return 0; 3086a95249b3SJosef Bacik } 3087a95249b3SJosef Bacik 308831ff1cd2SChris Mason static noinline int copy_items(struct btrfs_trans_handle *trans, 3089d2794405SLiu Bo struct inode *inode, 309031ff1cd2SChris Mason struct btrfs_path *dst_path, 309131ff1cd2SChris Mason struct extent_buffer *src, 309231ff1cd2SChris Mason int start_slot, int nr, int inode_only) 309331ff1cd2SChris Mason { 309431ff1cd2SChris Mason unsigned long src_offset; 309531ff1cd2SChris Mason unsigned long dst_offset; 3096d2794405SLiu Bo struct btrfs_root *log = BTRFS_I(inode)->root->log_root; 309731ff1cd2SChris Mason struct btrfs_file_extent_item *extent; 309831ff1cd2SChris Mason struct btrfs_inode_item *inode_item; 309931ff1cd2SChris Mason int ret; 310031ff1cd2SChris Mason struct btrfs_key *ins_keys; 310131ff1cd2SChris Mason u32 *ins_sizes; 310231ff1cd2SChris Mason char *ins_data; 310331ff1cd2SChris Mason int i; 3104d20f7043SChris Mason struct list_head ordered_sums; 3105d2794405SLiu Bo int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3106d20f7043SChris Mason 3107d20f7043SChris Mason INIT_LIST_HEAD(&ordered_sums); 310831ff1cd2SChris Mason 310931ff1cd2SChris Mason ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 311031ff1cd2SChris Mason nr * sizeof(u32), GFP_NOFS); 31112a29edc6Sliubo if (!ins_data) 31122a29edc6Sliubo return -ENOMEM; 31132a29edc6Sliubo 311431ff1cd2SChris Mason ins_sizes = (u32 *)ins_data; 311531ff1cd2SChris Mason ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 311631ff1cd2SChris Mason 311731ff1cd2SChris Mason for (i = 0; i < nr; i++) { 311831ff1cd2SChris Mason ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); 311931ff1cd2SChris Mason btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); 312031ff1cd2SChris Mason } 312131ff1cd2SChris Mason ret = btrfs_insert_empty_items(trans, log, dst_path, 312231ff1cd2SChris Mason ins_keys, ins_sizes, nr); 31234a500fd1SYan, Zheng if (ret) { 31244a500fd1SYan, Zheng kfree(ins_data); 31254a500fd1SYan, Zheng return ret; 31264a500fd1SYan, Zheng } 312731ff1cd2SChris Mason 31285d4f98a2SYan Zheng for (i = 0; i < nr; i++, dst_path->slots[0]++) { 312931ff1cd2SChris Mason dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 313031ff1cd2SChris Mason dst_path->slots[0]); 313131ff1cd2SChris Mason 313231ff1cd2SChris Mason src_offset = btrfs_item_ptr_offset(src, start_slot + i); 313331ff1cd2SChris Mason 313494edf4aeSJosef Bacik if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 313531ff1cd2SChris Mason inode_item = btrfs_item_ptr(dst_path->nodes[0], 313631ff1cd2SChris Mason dst_path->slots[0], 313731ff1cd2SChris Mason struct btrfs_inode_item); 313894edf4aeSJosef Bacik fill_inode_item(trans, dst_path->nodes[0], inode_item, 313994edf4aeSJosef Bacik inode, inode_only == LOG_INODE_EXISTS); 314094edf4aeSJosef Bacik } else { 314194edf4aeSJosef Bacik copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 314294edf4aeSJosef Bacik src_offset, ins_sizes[i]); 314331ff1cd2SChris Mason } 314494edf4aeSJosef Bacik 314531ff1cd2SChris Mason /* take a reference on file data extents so that truncates 314631ff1cd2SChris Mason * or deletes of this inode don't have to relog the inode 314731ff1cd2SChris Mason * again 314831ff1cd2SChris Mason */ 3149d2794405SLiu Bo if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && 3150d2794405SLiu Bo !skip_csum) { 315131ff1cd2SChris Mason int found_type; 315231ff1cd2SChris Mason extent = btrfs_item_ptr(src, start_slot + i, 315331ff1cd2SChris Mason struct btrfs_file_extent_item); 315431ff1cd2SChris Mason 31558e531cdfSliubo if (btrfs_file_extent_generation(src, extent) < trans->transid) 31568e531cdfSliubo continue; 31578e531cdfSliubo 315831ff1cd2SChris Mason found_type = btrfs_file_extent_type(src, extent); 31596f1fed77SJosef Bacik if (found_type == BTRFS_FILE_EXTENT_REG) { 31605d4f98a2SYan Zheng u64 ds, dl, cs, cl; 31615d4f98a2SYan Zheng ds = btrfs_file_extent_disk_bytenr(src, 316231ff1cd2SChris Mason extent); 31635d4f98a2SYan Zheng /* ds == 0 is a hole */ 31645d4f98a2SYan Zheng if (ds == 0) 31655d4f98a2SYan Zheng continue; 31665d4f98a2SYan Zheng 31675d4f98a2SYan Zheng dl = btrfs_file_extent_disk_num_bytes(src, 316831ff1cd2SChris Mason extent); 31695d4f98a2SYan Zheng cs = btrfs_file_extent_offset(src, extent); 31705d4f98a2SYan Zheng cl = btrfs_file_extent_num_bytes(src, 3171a419aef8SJoe Perches extent); 3172580afd76SChris Mason if (btrfs_file_extent_compression(src, 3173580afd76SChris Mason extent)) { 3174580afd76SChris Mason cs = 0; 3175580afd76SChris Mason cl = dl; 3176580afd76SChris Mason } 31775d4f98a2SYan Zheng 317807d400a6SYan Zheng ret = btrfs_lookup_csums_range( 3179d20f7043SChris Mason log->fs_info->csum_root, 318007d400a6SYan Zheng ds + cs, ds + cs + cl - 1, 3181a2de733cSArne Jansen &ordered_sums, 0); 3182d20f7043SChris Mason BUG_ON(ret); 318331ff1cd2SChris Mason } 318431ff1cd2SChris Mason } 318531ff1cd2SChris Mason } 318631ff1cd2SChris Mason 318731ff1cd2SChris Mason btrfs_mark_buffer_dirty(dst_path->nodes[0]); 3188b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 318931ff1cd2SChris Mason kfree(ins_data); 3190d20f7043SChris Mason 3191d20f7043SChris Mason /* 3192d20f7043SChris Mason * we have to do this after the loop above to avoid changing the 3193d20f7043SChris Mason * log tree while trying to change the log tree. 3194d20f7043SChris Mason */ 31954a500fd1SYan, Zheng ret = 0; 3196d20f7043SChris Mason while (!list_empty(&ordered_sums)) { 3197d20f7043SChris Mason struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 3198d20f7043SChris Mason struct btrfs_ordered_sum, 3199d20f7043SChris Mason list); 32004a500fd1SYan, Zheng if (!ret) 3201d20f7043SChris Mason ret = btrfs_csum_file_blocks(trans, log, sums); 3202d20f7043SChris Mason list_del(&sums->list); 3203d20f7043SChris Mason kfree(sums); 3204d20f7043SChris Mason } 32054a500fd1SYan, Zheng return ret; 320631ff1cd2SChris Mason } 320731ff1cd2SChris Mason 32085dc562c5SJosef Bacik static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) 32095dc562c5SJosef Bacik { 32105dc562c5SJosef Bacik struct extent_map *em1, *em2; 32115dc562c5SJosef Bacik 32125dc562c5SJosef Bacik em1 = list_entry(a, struct extent_map, list); 32135dc562c5SJosef Bacik em2 = list_entry(b, struct extent_map, list); 32145dc562c5SJosef Bacik 32155dc562c5SJosef Bacik if (em1->start < em2->start) 32165dc562c5SJosef Bacik return -1; 32175dc562c5SJosef Bacik else if (em1->start > em2->start) 32185dc562c5SJosef Bacik return 1; 32195dc562c5SJosef Bacik return 0; 32205dc562c5SJosef Bacik } 32215dc562c5SJosef Bacik 32225dc562c5SJosef Bacik static int log_one_extent(struct btrfs_trans_handle *trans, 32235dc562c5SJosef Bacik struct inode *inode, struct btrfs_root *root, 322470c8a91cSJosef Bacik struct extent_map *em, struct btrfs_path *path) 32255dc562c5SJosef Bacik { 32265dc562c5SJosef Bacik struct btrfs_root *log = root->log_root; 322770c8a91cSJosef Bacik struct btrfs_file_extent_item *fi; 322870c8a91cSJosef Bacik struct extent_buffer *leaf; 32292ab28f32SJosef Bacik struct btrfs_ordered_extent *ordered; 323070c8a91cSJosef Bacik struct list_head ordered_sums; 32310b1c6ccaSJosef Bacik struct btrfs_map_token token; 32325dc562c5SJosef Bacik struct btrfs_key key; 32332ab28f32SJosef Bacik u64 mod_start = em->mod_start; 32342ab28f32SJosef Bacik u64 mod_len = em->mod_len; 32352ab28f32SJosef Bacik u64 csum_offset; 32362ab28f32SJosef Bacik u64 csum_len; 323770c8a91cSJosef Bacik u64 extent_offset = em->start - em->orig_start; 323870c8a91cSJosef Bacik u64 block_len; 32395dc562c5SJosef Bacik int ret; 32402ab28f32SJosef Bacik int index = log->log_transid % 2; 324170c8a91cSJosef Bacik bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 32425dc562c5SJosef Bacik 324309a2a8f9SJosef Bacik ret = __btrfs_drop_extents(trans, log, inode, path, em->start, 324409a2a8f9SJosef Bacik em->start + em->len, NULL, 0); 324509a2a8f9SJosef Bacik if (ret) 324609a2a8f9SJosef Bacik return ret; 324709a2a8f9SJosef Bacik 324870c8a91cSJosef Bacik INIT_LIST_HEAD(&ordered_sums); 32490b1c6ccaSJosef Bacik btrfs_init_map_token(&token); 32505dc562c5SJosef Bacik key.objectid = btrfs_ino(inode); 32515dc562c5SJosef Bacik key.type = BTRFS_EXTENT_DATA_KEY; 325270c8a91cSJosef Bacik key.offset = em->start; 32535dc562c5SJosef Bacik 325470c8a91cSJosef Bacik ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 325509a2a8f9SJosef Bacik if (ret) 32565dc562c5SJosef Bacik return ret; 325770c8a91cSJosef Bacik leaf = path->nodes[0]; 325870c8a91cSJosef Bacik fi = btrfs_item_ptr(leaf, path->slots[0], 325970c8a91cSJosef Bacik struct btrfs_file_extent_item); 3260124fe663SJosef Bacik 32610b1c6ccaSJosef Bacik btrfs_set_token_file_extent_generation(leaf, fi, em->generation, 32620b1c6ccaSJosef Bacik &token); 326370c8a91cSJosef Bacik if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 326470c8a91cSJosef Bacik skip_csum = true; 32650b1c6ccaSJosef Bacik btrfs_set_token_file_extent_type(leaf, fi, 32660b1c6ccaSJosef Bacik BTRFS_FILE_EXTENT_PREALLOC, 32670b1c6ccaSJosef Bacik &token); 32685dc562c5SJosef Bacik } else { 32690b1c6ccaSJosef Bacik btrfs_set_token_file_extent_type(leaf, fi, 32700b1c6ccaSJosef Bacik BTRFS_FILE_EXTENT_REG, 32710b1c6ccaSJosef Bacik &token); 327270c8a91cSJosef Bacik if (em->block_start == 0) 327370c8a91cSJosef Bacik skip_csum = true; 32745dc562c5SJosef Bacik } 32755dc562c5SJosef Bacik 327670c8a91cSJosef Bacik block_len = max(em->block_len, em->orig_block_len); 327770c8a91cSJosef Bacik if (em->compress_type != BTRFS_COMPRESS_NONE) { 32780b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 32790b1c6ccaSJosef Bacik em->block_start, 32800b1c6ccaSJosef Bacik &token); 32810b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 32820b1c6ccaSJosef Bacik &token); 328370c8a91cSJosef Bacik } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { 32840b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 328570c8a91cSJosef Bacik em->block_start - 32860b1c6ccaSJosef Bacik extent_offset, &token); 32870b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 32880b1c6ccaSJosef Bacik &token); 328970c8a91cSJosef Bacik } else { 32900b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); 32910b1c6ccaSJosef Bacik btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, 32920b1c6ccaSJosef Bacik &token); 329370c8a91cSJosef Bacik } 329470c8a91cSJosef Bacik 32950b1c6ccaSJosef Bacik btrfs_set_token_file_extent_offset(leaf, fi, 32960b1c6ccaSJosef Bacik em->start - em->orig_start, 32970b1c6ccaSJosef Bacik &token); 32980b1c6ccaSJosef Bacik btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); 3299cc95bef6SJosef Bacik btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); 33000b1c6ccaSJosef Bacik btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, 33010b1c6ccaSJosef Bacik &token); 33020b1c6ccaSJosef Bacik btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); 33030b1c6ccaSJosef Bacik btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); 330470c8a91cSJosef Bacik btrfs_mark_buffer_dirty(leaf); 330570c8a91cSJosef Bacik 330670c8a91cSJosef Bacik btrfs_release_path(path); 330770c8a91cSJosef Bacik if (ret) { 330870c8a91cSJosef Bacik return ret; 330970c8a91cSJosef Bacik } 331070c8a91cSJosef Bacik 331170c8a91cSJosef Bacik if (skip_csum) 331270c8a91cSJosef Bacik return 0; 331370c8a91cSJosef Bacik 3314192000ddSLiu Bo if (em->compress_type) { 3315192000ddSLiu Bo csum_offset = 0; 3316192000ddSLiu Bo csum_len = block_len; 3317192000ddSLiu Bo } 3318192000ddSLiu Bo 33192ab28f32SJosef Bacik /* 33202ab28f32SJosef Bacik * First check and see if our csums are on our outstanding ordered 33212ab28f32SJosef Bacik * extents. 33222ab28f32SJosef Bacik */ 33232ab28f32SJosef Bacik again: 33242ab28f32SJosef Bacik spin_lock_irq(&log->log_extents_lock[index]); 33252ab28f32SJosef Bacik list_for_each_entry(ordered, &log->logged_list[index], log_list) { 33262ab28f32SJosef Bacik struct btrfs_ordered_sum *sum; 33272ab28f32SJosef Bacik 33282ab28f32SJosef Bacik if (!mod_len) 33292ab28f32SJosef Bacik break; 33302ab28f32SJosef Bacik 33312ab28f32SJosef Bacik if (ordered->inode != inode) 33322ab28f32SJosef Bacik continue; 33332ab28f32SJosef Bacik 33342ab28f32SJosef Bacik if (ordered->file_offset + ordered->len <= mod_start || 33352ab28f32SJosef Bacik mod_start + mod_len <= ordered->file_offset) 33362ab28f32SJosef Bacik continue; 33372ab28f32SJosef Bacik 33382ab28f32SJosef Bacik /* 33392ab28f32SJosef Bacik * We are going to copy all the csums on this ordered extent, so 33402ab28f32SJosef Bacik * go ahead and adjust mod_start and mod_len in case this 33412ab28f32SJosef Bacik * ordered extent has already been logged. 33422ab28f32SJosef Bacik */ 33432ab28f32SJosef Bacik if (ordered->file_offset > mod_start) { 33442ab28f32SJosef Bacik if (ordered->file_offset + ordered->len >= 33452ab28f32SJosef Bacik mod_start + mod_len) 33462ab28f32SJosef Bacik mod_len = ordered->file_offset - mod_start; 33472ab28f32SJosef Bacik /* 33482ab28f32SJosef Bacik * If we have this case 33492ab28f32SJosef Bacik * 33502ab28f32SJosef Bacik * |--------- logged extent ---------| 33512ab28f32SJosef Bacik * |----- ordered extent ----| 33522ab28f32SJosef Bacik * 33532ab28f32SJosef Bacik * Just don't mess with mod_start and mod_len, we'll 33542ab28f32SJosef Bacik * just end up logging more csums than we need and it 33552ab28f32SJosef Bacik * will be ok. 33562ab28f32SJosef Bacik */ 33572ab28f32SJosef Bacik } else { 33582ab28f32SJosef Bacik if (ordered->file_offset + ordered->len < 33592ab28f32SJosef Bacik mod_start + mod_len) { 33602ab28f32SJosef Bacik mod_len = (mod_start + mod_len) - 33612ab28f32SJosef Bacik (ordered->file_offset + ordered->len); 33622ab28f32SJosef Bacik mod_start = ordered->file_offset + 33632ab28f32SJosef Bacik ordered->len; 33642ab28f32SJosef Bacik } else { 33652ab28f32SJosef Bacik mod_len = 0; 33662ab28f32SJosef Bacik } 33672ab28f32SJosef Bacik } 33682ab28f32SJosef Bacik 33692ab28f32SJosef Bacik /* 33702ab28f32SJosef Bacik * To keep us from looping for the above case of an ordered 33712ab28f32SJosef Bacik * extent that falls inside of the logged extent. 33722ab28f32SJosef Bacik */ 33732ab28f32SJosef Bacik if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, 33742ab28f32SJosef Bacik &ordered->flags)) 33752ab28f32SJosef Bacik continue; 33762ab28f32SJosef Bacik atomic_inc(&ordered->refs); 33772ab28f32SJosef Bacik spin_unlock_irq(&log->log_extents_lock[index]); 33782ab28f32SJosef Bacik /* 33792ab28f32SJosef Bacik * we've dropped the lock, we must either break or 33802ab28f32SJosef Bacik * start over after this. 33812ab28f32SJosef Bacik */ 33822ab28f32SJosef Bacik 33832ab28f32SJosef Bacik wait_event(ordered->wait, ordered->csum_bytes_left == 0); 33842ab28f32SJosef Bacik 33852ab28f32SJosef Bacik list_for_each_entry(sum, &ordered->list, list) { 33862ab28f32SJosef Bacik ret = btrfs_csum_file_blocks(trans, log, sum); 33872ab28f32SJosef Bacik if (ret) { 33882ab28f32SJosef Bacik btrfs_put_ordered_extent(ordered); 33892ab28f32SJosef Bacik goto unlocked; 33902ab28f32SJosef Bacik } 33912ab28f32SJosef Bacik } 33922ab28f32SJosef Bacik btrfs_put_ordered_extent(ordered); 33932ab28f32SJosef Bacik goto again; 33942ab28f32SJosef Bacik 33952ab28f32SJosef Bacik } 33962ab28f32SJosef Bacik spin_unlock_irq(&log->log_extents_lock[index]); 33972ab28f32SJosef Bacik unlocked: 33982ab28f32SJosef Bacik 33992ab28f32SJosef Bacik if (!mod_len || ret) 34002ab28f32SJosef Bacik return ret; 34012ab28f32SJosef Bacik 34022ab28f32SJosef Bacik csum_offset = mod_start - em->start; 34032ab28f32SJosef Bacik csum_len = mod_len; 34042ab28f32SJosef Bacik 340570c8a91cSJosef Bacik /* block start is already adjusted for the file extent offset. */ 340670c8a91cSJosef Bacik ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 340770c8a91cSJosef Bacik em->block_start + csum_offset, 340870c8a91cSJosef Bacik em->block_start + csum_offset + 340970c8a91cSJosef Bacik csum_len - 1, &ordered_sums, 0); 34105dc562c5SJosef Bacik if (ret) 34115dc562c5SJosef Bacik return ret; 341270c8a91cSJosef Bacik 341370c8a91cSJosef Bacik while (!list_empty(&ordered_sums)) { 341470c8a91cSJosef Bacik struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 341570c8a91cSJosef Bacik struct btrfs_ordered_sum, 341670c8a91cSJosef Bacik list); 341770c8a91cSJosef Bacik if (!ret) 341870c8a91cSJosef Bacik ret = btrfs_csum_file_blocks(trans, log, sums); 341970c8a91cSJosef Bacik list_del(&sums->list); 342070c8a91cSJosef Bacik kfree(sums); 34215dc562c5SJosef Bacik } 34225dc562c5SJosef Bacik 342370c8a91cSJosef Bacik return ret; 34245dc562c5SJosef Bacik } 34255dc562c5SJosef Bacik 34265dc562c5SJosef Bacik static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, 34275dc562c5SJosef Bacik struct btrfs_root *root, 34285dc562c5SJosef Bacik struct inode *inode, 342970c8a91cSJosef Bacik struct btrfs_path *path) 34305dc562c5SJosef Bacik { 34315dc562c5SJosef Bacik struct extent_map *em, *n; 34325dc562c5SJosef Bacik struct list_head extents; 34335dc562c5SJosef Bacik struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 34345dc562c5SJosef Bacik u64 test_gen; 34355dc562c5SJosef Bacik int ret = 0; 34362ab28f32SJosef Bacik int num = 0; 34375dc562c5SJosef Bacik 34385dc562c5SJosef Bacik INIT_LIST_HEAD(&extents); 34395dc562c5SJosef Bacik 34405dc562c5SJosef Bacik write_lock(&tree->lock); 34415dc562c5SJosef Bacik test_gen = root->fs_info->last_trans_committed; 34425dc562c5SJosef Bacik 34435dc562c5SJosef Bacik list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 34445dc562c5SJosef Bacik list_del_init(&em->list); 34452ab28f32SJosef Bacik 34462ab28f32SJosef Bacik /* 34472ab28f32SJosef Bacik * Just an arbitrary number, this can be really CPU intensive 34482ab28f32SJosef Bacik * once we start getting a lot of extents, and really once we 34492ab28f32SJosef Bacik * have a bunch of extents we just want to commit since it will 34502ab28f32SJosef Bacik * be faster. 34512ab28f32SJosef Bacik */ 34522ab28f32SJosef Bacik if (++num > 32768) { 34532ab28f32SJosef Bacik list_del_init(&tree->modified_extents); 34542ab28f32SJosef Bacik ret = -EFBIG; 34552ab28f32SJosef Bacik goto process; 34562ab28f32SJosef Bacik } 34572ab28f32SJosef Bacik 34585dc562c5SJosef Bacik if (em->generation <= test_gen) 34595dc562c5SJosef Bacik continue; 3460ff44c6e3SJosef Bacik /* Need a ref to keep it from getting evicted from cache */ 3461ff44c6e3SJosef Bacik atomic_inc(&em->refs); 3462ff44c6e3SJosef Bacik set_bit(EXTENT_FLAG_LOGGING, &em->flags); 34635dc562c5SJosef Bacik list_add_tail(&em->list, &extents); 34642ab28f32SJosef Bacik num++; 34655dc562c5SJosef Bacik } 34665dc562c5SJosef Bacik 34675dc562c5SJosef Bacik list_sort(NULL, &extents, extent_cmp); 34685dc562c5SJosef Bacik 34692ab28f32SJosef Bacik process: 34705dc562c5SJosef Bacik while (!list_empty(&extents)) { 34715dc562c5SJosef Bacik em = list_entry(extents.next, struct extent_map, list); 34725dc562c5SJosef Bacik 34735dc562c5SJosef Bacik list_del_init(&em->list); 34745dc562c5SJosef Bacik 34755dc562c5SJosef Bacik /* 34765dc562c5SJosef Bacik * If we had an error we just need to delete everybody from our 34775dc562c5SJosef Bacik * private list. 34785dc562c5SJosef Bacik */ 3479ff44c6e3SJosef Bacik if (ret) { 3480201a9038SJosef Bacik clear_em_logging(tree, em); 3481ff44c6e3SJosef Bacik free_extent_map(em); 34825dc562c5SJosef Bacik continue; 3483ff44c6e3SJosef Bacik } 3484ff44c6e3SJosef Bacik 3485ff44c6e3SJosef Bacik write_unlock(&tree->lock); 34865dc562c5SJosef Bacik 348770c8a91cSJosef Bacik ret = log_one_extent(trans, inode, root, em, path); 3488ff44c6e3SJosef Bacik write_lock(&tree->lock); 3489201a9038SJosef Bacik clear_em_logging(tree, em); 3490201a9038SJosef Bacik free_extent_map(em); 34915dc562c5SJosef Bacik } 3492ff44c6e3SJosef Bacik WARN_ON(!list_empty(&extents)); 3493ff44c6e3SJosef Bacik write_unlock(&tree->lock); 34945dc562c5SJosef Bacik 34955dc562c5SJosef Bacik btrfs_release_path(path); 34965dc562c5SJosef Bacik return ret; 34975dc562c5SJosef Bacik } 34985dc562c5SJosef Bacik 3499e02119d5SChris Mason /* log a single inode in the tree log. 3500e02119d5SChris Mason * At least one parent directory for this inode must exist in the tree 3501e02119d5SChris Mason * or be logged already. 3502e02119d5SChris Mason * 3503e02119d5SChris Mason * Any items from this inode changed by the current transaction are copied 3504e02119d5SChris Mason * to the log tree. An extra reference is taken on any extents in this 3505e02119d5SChris Mason * file, allowing us to avoid a whole pile of corner cases around logging 3506e02119d5SChris Mason * blocks that have been removed from the tree. 3507e02119d5SChris Mason * 3508e02119d5SChris Mason * See LOG_INODE_ALL and related defines for a description of what inode_only 3509e02119d5SChris Mason * does. 3510e02119d5SChris Mason * 3511e02119d5SChris Mason * This handles both files and directories. 3512e02119d5SChris Mason */ 351312fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 3514e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 3515e02119d5SChris Mason int inode_only) 3516e02119d5SChris Mason { 3517e02119d5SChris Mason struct btrfs_path *path; 3518e02119d5SChris Mason struct btrfs_path *dst_path; 3519e02119d5SChris Mason struct btrfs_key min_key; 3520e02119d5SChris Mason struct btrfs_key max_key; 3521e02119d5SChris Mason struct btrfs_root *log = root->log_root; 352231ff1cd2SChris Mason struct extent_buffer *src = NULL; 35234a500fd1SYan, Zheng int err = 0; 3524e02119d5SChris Mason int ret; 35253a5f1d45SChris Mason int nritems; 352631ff1cd2SChris Mason int ins_start_slot = 0; 352731ff1cd2SChris Mason int ins_nr; 35285dc562c5SJosef Bacik bool fast_search = false; 352933345d01SLi Zefan u64 ino = btrfs_ino(inode); 3530e02119d5SChris Mason 3531e02119d5SChris Mason path = btrfs_alloc_path(); 35325df67083STsutomu Itoh if (!path) 35335df67083STsutomu Itoh return -ENOMEM; 3534e02119d5SChris Mason dst_path = btrfs_alloc_path(); 35355df67083STsutomu Itoh if (!dst_path) { 35365df67083STsutomu Itoh btrfs_free_path(path); 35375df67083STsutomu Itoh return -ENOMEM; 35385df67083STsutomu Itoh } 3539e02119d5SChris Mason 354033345d01SLi Zefan min_key.objectid = ino; 3541e02119d5SChris Mason min_key.type = BTRFS_INODE_ITEM_KEY; 3542e02119d5SChris Mason min_key.offset = 0; 3543e02119d5SChris Mason 354433345d01SLi Zefan max_key.objectid = ino; 354512fcfd22SChris Mason 354612fcfd22SChris Mason 35475dc562c5SJosef Bacik /* today the code can only do partial logging of directories */ 35485269b67eSMiao Xie if (S_ISDIR(inode->i_mode) || 35495269b67eSMiao Xie (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 35505269b67eSMiao Xie &BTRFS_I(inode)->runtime_flags) && 35515269b67eSMiao Xie inode_only == LOG_INODE_EXISTS)) 3552e02119d5SChris Mason max_key.type = BTRFS_XATTR_ITEM_KEY; 3553e02119d5SChris Mason else 3554e02119d5SChris Mason max_key.type = (u8)-1; 3555e02119d5SChris Mason max_key.offset = (u64)-1; 3556e02119d5SChris Mason 355794edf4aeSJosef Bacik /* Only run delayed items if we are a dir or a new file */ 355894edf4aeSJosef Bacik if (S_ISDIR(inode->i_mode) || 355994edf4aeSJosef Bacik BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { 356016cdcec7SMiao Xie ret = btrfs_commit_inode_delayed_items(trans, inode); 356116cdcec7SMiao Xie if (ret) { 356216cdcec7SMiao Xie btrfs_free_path(path); 356316cdcec7SMiao Xie btrfs_free_path(dst_path); 356416cdcec7SMiao Xie return ret; 356516cdcec7SMiao Xie } 356694edf4aeSJosef Bacik } 356716cdcec7SMiao Xie 3568e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 3569e02119d5SChris Mason 35702ab28f32SJosef Bacik btrfs_get_logged_extents(log, inode); 35712ab28f32SJosef Bacik 3572e02119d5SChris Mason /* 3573e02119d5SChris Mason * a brute force approach to making sure we get the most uptodate 3574e02119d5SChris Mason * copies of everything. 3575e02119d5SChris Mason */ 3576e02119d5SChris Mason if (S_ISDIR(inode->i_mode)) { 3577e02119d5SChris Mason int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 3578e02119d5SChris Mason 3579e02119d5SChris Mason if (inode_only == LOG_INODE_EXISTS) 3580e02119d5SChris Mason max_key_type = BTRFS_XATTR_ITEM_KEY; 358133345d01SLi Zefan ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3582e02119d5SChris Mason } else { 35835dc562c5SJosef Bacik if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 35845dc562c5SJosef Bacik &BTRFS_I(inode)->runtime_flags)) { 3585e9976151SJosef Bacik clear_bit(BTRFS_INODE_COPY_EVERYTHING, 3586e9976151SJosef Bacik &BTRFS_I(inode)->runtime_flags); 35875dc562c5SJosef Bacik ret = btrfs_truncate_inode_items(trans, log, 35885dc562c5SJosef Bacik inode, 0, 0); 3589a95249b3SJosef Bacik } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 3590a95249b3SJosef Bacik &BTRFS_I(inode)->runtime_flags)) { 3591a95249b3SJosef Bacik if (inode_only == LOG_INODE_ALL) 3592a95249b3SJosef Bacik fast_search = true; 3593a95249b3SJosef Bacik max_key.type = BTRFS_XATTR_ITEM_KEY; 3594a95249b3SJosef Bacik ret = drop_objectid_items(trans, log, path, ino, 3595a95249b3SJosef Bacik max_key.type); 35965dc562c5SJosef Bacik } else { 3597183f37faSLiu Bo if (inode_only == LOG_INODE_ALL) 35985dc562c5SJosef Bacik fast_search = true; 3599a95249b3SJosef Bacik ret = log_inode_item(trans, log, dst_path, inode); 3600a95249b3SJosef Bacik if (ret) { 3601a95249b3SJosef Bacik err = ret; 3602a95249b3SJosef Bacik goto out_unlock; 36035dc562c5SJosef Bacik } 3604a95249b3SJosef Bacik goto log_extents; 3605a95249b3SJosef Bacik } 3606a95249b3SJosef Bacik 3607e02119d5SChris Mason } 36084a500fd1SYan, Zheng if (ret) { 36094a500fd1SYan, Zheng err = ret; 36104a500fd1SYan, Zheng goto out_unlock; 36114a500fd1SYan, Zheng } 3612e02119d5SChris Mason path->keep_locks = 1; 3613e02119d5SChris Mason 3614e02119d5SChris Mason while (1) { 361531ff1cd2SChris Mason ins_nr = 0; 3616e02119d5SChris Mason ret = btrfs_search_forward(root, &min_key, &max_key, 3617de78b51aSEric Sandeen path, trans->transid); 3618e02119d5SChris Mason if (ret != 0) 3619e02119d5SChris Mason break; 36203a5f1d45SChris Mason again: 362131ff1cd2SChris Mason /* note, ins_nr might be > 0 here, cleanup outside the loop */ 362233345d01SLi Zefan if (min_key.objectid != ino) 3623e02119d5SChris Mason break; 3624e02119d5SChris Mason if (min_key.type > max_key.type) 3625e02119d5SChris Mason break; 362631ff1cd2SChris Mason 3627e02119d5SChris Mason src = path->nodes[0]; 362831ff1cd2SChris Mason if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 362931ff1cd2SChris Mason ins_nr++; 363031ff1cd2SChris Mason goto next_slot; 363131ff1cd2SChris Mason } else if (!ins_nr) { 363231ff1cd2SChris Mason ins_start_slot = path->slots[0]; 363331ff1cd2SChris Mason ins_nr = 1; 363431ff1cd2SChris Mason goto next_slot; 3635e02119d5SChris Mason } 3636e02119d5SChris Mason 3637d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 363831ff1cd2SChris Mason ins_nr, inode_only); 36394a500fd1SYan, Zheng if (ret) { 36404a500fd1SYan, Zheng err = ret; 36414a500fd1SYan, Zheng goto out_unlock; 36424a500fd1SYan, Zheng } 364331ff1cd2SChris Mason ins_nr = 1; 364431ff1cd2SChris Mason ins_start_slot = path->slots[0]; 364531ff1cd2SChris Mason next_slot: 3646e02119d5SChris Mason 36473a5f1d45SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 36483a5f1d45SChris Mason path->slots[0]++; 36493a5f1d45SChris Mason if (path->slots[0] < nritems) { 36503a5f1d45SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &min_key, 36513a5f1d45SChris Mason path->slots[0]); 36523a5f1d45SChris Mason goto again; 36533a5f1d45SChris Mason } 365431ff1cd2SChris Mason if (ins_nr) { 3655d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, 365631ff1cd2SChris Mason ins_start_slot, 365731ff1cd2SChris Mason ins_nr, inode_only); 36584a500fd1SYan, Zheng if (ret) { 36594a500fd1SYan, Zheng err = ret; 36604a500fd1SYan, Zheng goto out_unlock; 36614a500fd1SYan, Zheng } 366231ff1cd2SChris Mason ins_nr = 0; 366331ff1cd2SChris Mason } 3664b3b4aa74SDavid Sterba btrfs_release_path(path); 36653a5f1d45SChris Mason 3666e02119d5SChris Mason if (min_key.offset < (u64)-1) 3667e02119d5SChris Mason min_key.offset++; 3668e02119d5SChris Mason else if (min_key.type < (u8)-1) 3669e02119d5SChris Mason min_key.type++; 3670e02119d5SChris Mason else if (min_key.objectid < (u64)-1) 3671e02119d5SChris Mason min_key.objectid++; 3672e02119d5SChris Mason else 3673e02119d5SChris Mason break; 3674e02119d5SChris Mason } 367531ff1cd2SChris Mason if (ins_nr) { 3676d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 367731ff1cd2SChris Mason ins_nr, inode_only); 36784a500fd1SYan, Zheng if (ret) { 36794a500fd1SYan, Zheng err = ret; 36804a500fd1SYan, Zheng goto out_unlock; 36814a500fd1SYan, Zheng } 368231ff1cd2SChris Mason ins_nr = 0; 368331ff1cd2SChris Mason } 36845dc562c5SJosef Bacik 3685a95249b3SJosef Bacik log_extents: 36865dc562c5SJosef Bacik if (fast_search) { 36875dc562c5SJosef Bacik btrfs_release_path(dst_path); 368870c8a91cSJosef Bacik ret = btrfs_log_changed_extents(trans, root, inode, dst_path); 36895dc562c5SJosef Bacik if (ret) { 36905dc562c5SJosef Bacik err = ret; 36915dc562c5SJosef Bacik goto out_unlock; 36925dc562c5SJosef Bacik } 369306d3d22bSLiu Bo } else { 369406d3d22bSLiu Bo struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 369506d3d22bSLiu Bo struct extent_map *em, *n; 369606d3d22bSLiu Bo 3697bbe14267SMiao Xie write_lock(&tree->lock); 369806d3d22bSLiu Bo list_for_each_entry_safe(em, n, &tree->modified_extents, list) 369906d3d22bSLiu Bo list_del_init(&em->list); 3700bbe14267SMiao Xie write_unlock(&tree->lock); 37015dc562c5SJosef Bacik } 37025dc562c5SJosef Bacik 37039623f9a3SChris Mason if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3704b3b4aa74SDavid Sterba btrfs_release_path(path); 3705b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 3706e02119d5SChris Mason ret = log_directory_changes(trans, root, inode, path, dst_path); 37074a500fd1SYan, Zheng if (ret) { 37084a500fd1SYan, Zheng err = ret; 37094a500fd1SYan, Zheng goto out_unlock; 37104a500fd1SYan, Zheng } 3711e02119d5SChris Mason } 37123a5f1d45SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 371346d8bc34SLiu Bo BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 37144a500fd1SYan, Zheng out_unlock: 37152ab28f32SJosef Bacik if (err) 37162ab28f32SJosef Bacik btrfs_free_logged_extents(log, log->log_transid); 3717e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 3718e02119d5SChris Mason 3719e02119d5SChris Mason btrfs_free_path(path); 3720e02119d5SChris Mason btrfs_free_path(dst_path); 37214a500fd1SYan, Zheng return err; 3722e02119d5SChris Mason } 3723e02119d5SChris Mason 372412fcfd22SChris Mason /* 372512fcfd22SChris Mason * follow the dentry parent pointers up the chain and see if any 372612fcfd22SChris Mason * of the directories in it require a full commit before they can 372712fcfd22SChris Mason * be logged. Returns zero if nothing special needs to be done or 1 if 372812fcfd22SChris Mason * a full commit is required. 372912fcfd22SChris Mason */ 373012fcfd22SChris Mason static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, 373112fcfd22SChris Mason struct inode *inode, 373212fcfd22SChris Mason struct dentry *parent, 373312fcfd22SChris Mason struct super_block *sb, 373412fcfd22SChris Mason u64 last_committed) 3735e02119d5SChris Mason { 373612fcfd22SChris Mason int ret = 0; 373712fcfd22SChris Mason struct btrfs_root *root; 37386a912213SJosef Bacik struct dentry *old_parent = NULL; 3739e02119d5SChris Mason 3740af4176b4SChris Mason /* 3741af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 3742af4176b4SChris Mason * have to worry about the parents at all. This is because 3743af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 3744af4176b4SChris Mason * and other fun in this file. 3745af4176b4SChris Mason */ 3746af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 3747af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 3748af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans <= last_committed) 3749af4176b4SChris Mason goto out; 3750af4176b4SChris Mason 375112fcfd22SChris Mason if (!S_ISDIR(inode->i_mode)) { 375212fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 375312fcfd22SChris Mason goto out; 375412fcfd22SChris Mason inode = parent->d_inode; 375512fcfd22SChris Mason } 375612fcfd22SChris Mason 375712fcfd22SChris Mason while (1) { 375812fcfd22SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 375912fcfd22SChris Mason smp_mb(); 376012fcfd22SChris Mason 376112fcfd22SChris Mason if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 376212fcfd22SChris Mason root = BTRFS_I(inode)->root; 376312fcfd22SChris Mason 376412fcfd22SChris Mason /* 376512fcfd22SChris Mason * make sure any commits to the log are forced 376612fcfd22SChris Mason * to be full commits 376712fcfd22SChris Mason */ 376812fcfd22SChris Mason root->fs_info->last_trans_log_full_commit = 376912fcfd22SChris Mason trans->transid; 377012fcfd22SChris Mason ret = 1; 377112fcfd22SChris Mason break; 377212fcfd22SChris Mason } 377312fcfd22SChris Mason 377412fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 377512fcfd22SChris Mason break; 377612fcfd22SChris Mason 377776dda93cSYan, Zheng if (IS_ROOT(parent)) 377812fcfd22SChris Mason break; 377912fcfd22SChris Mason 37806a912213SJosef Bacik parent = dget_parent(parent); 37816a912213SJosef Bacik dput(old_parent); 37826a912213SJosef Bacik old_parent = parent; 378312fcfd22SChris Mason inode = parent->d_inode; 378412fcfd22SChris Mason 378512fcfd22SChris Mason } 37866a912213SJosef Bacik dput(old_parent); 378712fcfd22SChris Mason out: 3788e02119d5SChris Mason return ret; 3789e02119d5SChris Mason } 3790e02119d5SChris Mason 3791e02119d5SChris Mason /* 3792e02119d5SChris Mason * helper function around btrfs_log_inode to make sure newly created 3793e02119d5SChris Mason * parent directories also end up in the log. A minimal inode and backref 3794e02119d5SChris Mason * only logging is done of any parent directories that are older than 3795e02119d5SChris Mason * the last committed transaction 3796e02119d5SChris Mason */ 379712fcfd22SChris Mason int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 379812fcfd22SChris Mason struct btrfs_root *root, struct inode *inode, 379912fcfd22SChris Mason struct dentry *parent, int exists_only) 3800e02119d5SChris Mason { 380112fcfd22SChris Mason int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 3802e02119d5SChris Mason struct super_block *sb; 38036a912213SJosef Bacik struct dentry *old_parent = NULL; 380412fcfd22SChris Mason int ret = 0; 380512fcfd22SChris Mason u64 last_committed = root->fs_info->last_trans_committed; 380612fcfd22SChris Mason 380712fcfd22SChris Mason sb = inode->i_sb; 380812fcfd22SChris Mason 38093a5e1404SSage Weil if (btrfs_test_opt(root, NOTREELOG)) { 38103a5e1404SSage Weil ret = 1; 38113a5e1404SSage Weil goto end_no_trans; 38123a5e1404SSage Weil } 38133a5e1404SSage Weil 381412fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit > 381512fcfd22SChris Mason root->fs_info->last_trans_committed) { 381612fcfd22SChris Mason ret = 1; 381712fcfd22SChris Mason goto end_no_trans; 381812fcfd22SChris Mason } 381912fcfd22SChris Mason 382076dda93cSYan, Zheng if (root != BTRFS_I(inode)->root || 382176dda93cSYan, Zheng btrfs_root_refs(&root->root_item) == 0) { 382276dda93cSYan, Zheng ret = 1; 382376dda93cSYan, Zheng goto end_no_trans; 382476dda93cSYan, Zheng } 382576dda93cSYan, Zheng 382612fcfd22SChris Mason ret = check_parent_dirs_for_sync(trans, inode, parent, 382712fcfd22SChris Mason sb, last_committed); 382812fcfd22SChris Mason if (ret) 382912fcfd22SChris Mason goto end_no_trans; 3830e02119d5SChris Mason 383122ee6985SJosef Bacik if (btrfs_inode_in_log(inode, trans->transid)) { 3832257c62e1SChris Mason ret = BTRFS_NO_LOG_SYNC; 3833257c62e1SChris Mason goto end_no_trans; 3834257c62e1SChris Mason } 3835257c62e1SChris Mason 38364a500fd1SYan, Zheng ret = start_log_trans(trans, root); 38374a500fd1SYan, Zheng if (ret) 38384a500fd1SYan, Zheng goto end_trans; 383912fcfd22SChris Mason 384012fcfd22SChris Mason ret = btrfs_log_inode(trans, root, inode, inode_only); 38414a500fd1SYan, Zheng if (ret) 38424a500fd1SYan, Zheng goto end_trans; 3843e02119d5SChris Mason 3844af4176b4SChris Mason /* 3845af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 3846af4176b4SChris Mason * have to worry about the parents at all. This is because 3847af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 3848af4176b4SChris Mason * and other fun in this file. 3849af4176b4SChris Mason */ 3850af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 3851af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 38524a500fd1SYan, Zheng BTRFS_I(inode)->last_unlink_trans <= last_committed) { 38534a500fd1SYan, Zheng ret = 0; 38544a500fd1SYan, Zheng goto end_trans; 38554a500fd1SYan, Zheng } 3856af4176b4SChris Mason 3857af4176b4SChris Mason inode_only = LOG_INODE_EXISTS; 385812fcfd22SChris Mason while (1) { 385912fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 3860e02119d5SChris Mason break; 3861e02119d5SChris Mason 386212fcfd22SChris Mason inode = parent->d_inode; 386376dda93cSYan, Zheng if (root != BTRFS_I(inode)->root) 386476dda93cSYan, Zheng break; 386576dda93cSYan, Zheng 386612fcfd22SChris Mason if (BTRFS_I(inode)->generation > 386712fcfd22SChris Mason root->fs_info->last_trans_committed) { 386812fcfd22SChris Mason ret = btrfs_log_inode(trans, root, inode, inode_only); 38694a500fd1SYan, Zheng if (ret) 38704a500fd1SYan, Zheng goto end_trans; 3871e02119d5SChris Mason } 387276dda93cSYan, Zheng if (IS_ROOT(parent)) 387312fcfd22SChris Mason break; 387412fcfd22SChris Mason 38756a912213SJosef Bacik parent = dget_parent(parent); 38766a912213SJosef Bacik dput(old_parent); 38776a912213SJosef Bacik old_parent = parent; 387812fcfd22SChris Mason } 387912fcfd22SChris Mason ret = 0; 38804a500fd1SYan, Zheng end_trans: 38816a912213SJosef Bacik dput(old_parent); 38824a500fd1SYan, Zheng if (ret < 0) { 38834a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 38844a500fd1SYan, Zheng ret = 1; 38854a500fd1SYan, Zheng } 388612fcfd22SChris Mason btrfs_end_log_trans(root); 388712fcfd22SChris Mason end_no_trans: 388812fcfd22SChris Mason return ret; 3889e02119d5SChris Mason } 3890e02119d5SChris Mason 3891e02119d5SChris Mason /* 3892e02119d5SChris Mason * it is not safe to log dentry if the chunk root has added new 3893e02119d5SChris Mason * chunks. This returns 0 if the dentry was logged, and 1 otherwise. 3894e02119d5SChris Mason * If this returns 1, you must commit the transaction to safely get your 3895e02119d5SChris Mason * data on disk. 3896e02119d5SChris Mason */ 3897e02119d5SChris Mason int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 3898e02119d5SChris Mason struct btrfs_root *root, struct dentry *dentry) 3899e02119d5SChris Mason { 39006a912213SJosef Bacik struct dentry *parent = dget_parent(dentry); 39016a912213SJosef Bacik int ret; 39026a912213SJosef Bacik 39036a912213SJosef Bacik ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); 39046a912213SJosef Bacik dput(parent); 39056a912213SJosef Bacik 39066a912213SJosef Bacik return ret; 3907e02119d5SChris Mason } 3908e02119d5SChris Mason 3909e02119d5SChris Mason /* 3910e02119d5SChris Mason * should be called during mount to recover any replay any log trees 3911e02119d5SChris Mason * from the FS 3912e02119d5SChris Mason */ 3913e02119d5SChris Mason int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) 3914e02119d5SChris Mason { 3915e02119d5SChris Mason int ret; 3916e02119d5SChris Mason struct btrfs_path *path; 3917e02119d5SChris Mason struct btrfs_trans_handle *trans; 3918e02119d5SChris Mason struct btrfs_key key; 3919e02119d5SChris Mason struct btrfs_key found_key; 3920e02119d5SChris Mason struct btrfs_key tmp_key; 3921e02119d5SChris Mason struct btrfs_root *log; 3922e02119d5SChris Mason struct btrfs_fs_info *fs_info = log_root_tree->fs_info; 3923e02119d5SChris Mason struct walk_control wc = { 3924e02119d5SChris Mason .process_func = process_one_buffer, 3925e02119d5SChris Mason .stage = 0, 3926e02119d5SChris Mason }; 3927e02119d5SChris Mason 3928e02119d5SChris Mason path = btrfs_alloc_path(); 3929db5b493aSTsutomu Itoh if (!path) 3930db5b493aSTsutomu Itoh return -ENOMEM; 3931db5b493aSTsutomu Itoh 3932db5b493aSTsutomu Itoh fs_info->log_root_recovering = 1; 3933e02119d5SChris Mason 39344a500fd1SYan, Zheng trans = btrfs_start_transaction(fs_info->tree_root, 0); 393579787eaaSJeff Mahoney if (IS_ERR(trans)) { 393679787eaaSJeff Mahoney ret = PTR_ERR(trans); 393779787eaaSJeff Mahoney goto error; 393879787eaaSJeff Mahoney } 3939e02119d5SChris Mason 3940e02119d5SChris Mason wc.trans = trans; 3941e02119d5SChris Mason wc.pin = 1; 3942e02119d5SChris Mason 3943db5b493aSTsutomu Itoh ret = walk_log_tree(trans, log_root_tree, &wc); 394479787eaaSJeff Mahoney if (ret) { 394579787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Failed to pin buffers while " 394679787eaaSJeff Mahoney "recovering log root tree."); 394779787eaaSJeff Mahoney goto error; 394879787eaaSJeff Mahoney } 3949e02119d5SChris Mason 3950e02119d5SChris Mason again: 3951e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_OBJECTID; 3952e02119d5SChris Mason key.offset = (u64)-1; 3953e02119d5SChris Mason btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 3954e02119d5SChris Mason 3955e02119d5SChris Mason while (1) { 3956e02119d5SChris Mason ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 395779787eaaSJeff Mahoney 395879787eaaSJeff Mahoney if (ret < 0) { 395979787eaaSJeff Mahoney btrfs_error(fs_info, ret, 396079787eaaSJeff Mahoney "Couldn't find tree log root."); 396179787eaaSJeff Mahoney goto error; 396279787eaaSJeff Mahoney } 3963e02119d5SChris Mason if (ret > 0) { 3964e02119d5SChris Mason if (path->slots[0] == 0) 3965e02119d5SChris Mason break; 3966e02119d5SChris Mason path->slots[0]--; 3967e02119d5SChris Mason } 3968e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3969e02119d5SChris Mason path->slots[0]); 3970b3b4aa74SDavid Sterba btrfs_release_path(path); 3971e02119d5SChris Mason if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 3972e02119d5SChris Mason break; 3973e02119d5SChris Mason 3974e02119d5SChris Mason log = btrfs_read_fs_root_no_radix(log_root_tree, 3975e02119d5SChris Mason &found_key); 397679787eaaSJeff Mahoney if (IS_ERR(log)) { 397779787eaaSJeff Mahoney ret = PTR_ERR(log); 397879787eaaSJeff Mahoney btrfs_error(fs_info, ret, 397979787eaaSJeff Mahoney "Couldn't read tree log root."); 398079787eaaSJeff Mahoney goto error; 398179787eaaSJeff Mahoney } 3982e02119d5SChris Mason 3983e02119d5SChris Mason tmp_key.objectid = found_key.offset; 3984e02119d5SChris Mason tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3985e02119d5SChris Mason tmp_key.offset = (u64)-1; 3986e02119d5SChris Mason 3987e02119d5SChris Mason wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 398879787eaaSJeff Mahoney if (IS_ERR(wc.replay_dest)) { 398979787eaaSJeff Mahoney ret = PTR_ERR(wc.replay_dest); 3990*b50c6e25SJosef Bacik free_extent_buffer(log->node); 3991*b50c6e25SJosef Bacik free_extent_buffer(log->commit_root); 3992*b50c6e25SJosef Bacik kfree(log); 399379787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Couldn't read target root " 399479787eaaSJeff Mahoney "for tree log recovery."); 399579787eaaSJeff Mahoney goto error; 399679787eaaSJeff Mahoney } 3997e02119d5SChris Mason 399807d400a6SYan Zheng wc.replay_dest->log_root = log; 39995d4f98a2SYan Zheng btrfs_record_root_in_trans(trans, wc.replay_dest); 4000e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 4001e02119d5SChris Mason 4002*b50c6e25SJosef Bacik if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { 4003e02119d5SChris Mason ret = fixup_inode_link_counts(trans, wc.replay_dest, 4004e02119d5SChris Mason path); 4005e02119d5SChris Mason } 4006e02119d5SChris Mason 4007e02119d5SChris Mason key.offset = found_key.offset - 1; 400807d400a6SYan Zheng wc.replay_dest->log_root = NULL; 4009e02119d5SChris Mason free_extent_buffer(log->node); 4010b263c2c8SChris Mason free_extent_buffer(log->commit_root); 4011e02119d5SChris Mason kfree(log); 4012e02119d5SChris Mason 4013*b50c6e25SJosef Bacik if (ret) 4014*b50c6e25SJosef Bacik goto error; 4015*b50c6e25SJosef Bacik 4016e02119d5SChris Mason if (found_key.offset == 0) 4017e02119d5SChris Mason break; 4018e02119d5SChris Mason } 4019b3b4aa74SDavid Sterba btrfs_release_path(path); 4020e02119d5SChris Mason 4021e02119d5SChris Mason /* step one is to pin it all, step two is to replay just inodes */ 4022e02119d5SChris Mason if (wc.pin) { 4023e02119d5SChris Mason wc.pin = 0; 4024e02119d5SChris Mason wc.process_func = replay_one_buffer; 4025e02119d5SChris Mason wc.stage = LOG_WALK_REPLAY_INODES; 4026e02119d5SChris Mason goto again; 4027e02119d5SChris Mason } 4028e02119d5SChris Mason /* step three is to replay everything */ 4029e02119d5SChris Mason if (wc.stage < LOG_WALK_REPLAY_ALL) { 4030e02119d5SChris Mason wc.stage++; 4031e02119d5SChris Mason goto again; 4032e02119d5SChris Mason } 4033e02119d5SChris Mason 4034e02119d5SChris Mason btrfs_free_path(path); 4035e02119d5SChris Mason 4036abefa55aSJosef Bacik /* step 4: commit the transaction, which also unpins the blocks */ 4037abefa55aSJosef Bacik ret = btrfs_commit_transaction(trans, fs_info->tree_root); 4038abefa55aSJosef Bacik if (ret) 4039abefa55aSJosef Bacik return ret; 4040abefa55aSJosef Bacik 4041e02119d5SChris Mason free_extent_buffer(log_root_tree->node); 4042e02119d5SChris Mason log_root_tree->log_root = NULL; 4043e02119d5SChris Mason fs_info->log_root_recovering = 0; 4044e02119d5SChris Mason kfree(log_root_tree); 404579787eaaSJeff Mahoney 4046abefa55aSJosef Bacik return 0; 404779787eaaSJeff Mahoney error: 4048*b50c6e25SJosef Bacik if (wc.trans) 4049*b50c6e25SJosef Bacik btrfs_end_transaction(wc.trans, fs_info->tree_root); 405079787eaaSJeff Mahoney btrfs_free_path(path); 405179787eaaSJeff Mahoney return ret; 4052e02119d5SChris Mason } 405312fcfd22SChris Mason 405412fcfd22SChris Mason /* 405512fcfd22SChris Mason * there are some corner cases where we want to force a full 405612fcfd22SChris Mason * commit instead of allowing a directory to be logged. 405712fcfd22SChris Mason * 405812fcfd22SChris Mason * They revolve around files there were unlinked from the directory, and 405912fcfd22SChris Mason * this function updates the parent directory so that a full commit is 406012fcfd22SChris Mason * properly done if it is fsync'd later after the unlinks are done. 406112fcfd22SChris Mason */ 406212fcfd22SChris Mason void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 406312fcfd22SChris Mason struct inode *dir, struct inode *inode, 406412fcfd22SChris Mason int for_rename) 406512fcfd22SChris Mason { 406612fcfd22SChris Mason /* 4067af4176b4SChris Mason * when we're logging a file, if it hasn't been renamed 4068af4176b4SChris Mason * or unlinked, and its inode is fully committed on disk, 4069af4176b4SChris Mason * we don't have to worry about walking up the directory chain 4070af4176b4SChris Mason * to log its parents. 4071af4176b4SChris Mason * 4072af4176b4SChris Mason * So, we use the last_unlink_trans field to put this transid 4073af4176b4SChris Mason * into the file. When the file is logged we check it and 4074af4176b4SChris Mason * don't log the parents if the file is fully on disk. 4075af4176b4SChris Mason */ 4076af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 4077af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 4078af4176b4SChris Mason 4079af4176b4SChris Mason /* 408012fcfd22SChris Mason * if this directory was already logged any new 408112fcfd22SChris Mason * names for this file/dir will get recorded 408212fcfd22SChris Mason */ 408312fcfd22SChris Mason smp_mb(); 408412fcfd22SChris Mason if (BTRFS_I(dir)->logged_trans == trans->transid) 408512fcfd22SChris Mason return; 408612fcfd22SChris Mason 408712fcfd22SChris Mason /* 408812fcfd22SChris Mason * if the inode we're about to unlink was logged, 408912fcfd22SChris Mason * the log will be properly updated for any new names 409012fcfd22SChris Mason */ 409112fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans == trans->transid) 409212fcfd22SChris Mason return; 409312fcfd22SChris Mason 409412fcfd22SChris Mason /* 409512fcfd22SChris Mason * when renaming files across directories, if the directory 409612fcfd22SChris Mason * there we're unlinking from gets fsync'd later on, there's 409712fcfd22SChris Mason * no way to find the destination directory later and fsync it 409812fcfd22SChris Mason * properly. So, we have to be conservative and force commits 409912fcfd22SChris Mason * so the new name gets discovered. 410012fcfd22SChris Mason */ 410112fcfd22SChris Mason if (for_rename) 410212fcfd22SChris Mason goto record; 410312fcfd22SChris Mason 410412fcfd22SChris Mason /* we can safely do the unlink without any special recording */ 410512fcfd22SChris Mason return; 410612fcfd22SChris Mason 410712fcfd22SChris Mason record: 410812fcfd22SChris Mason BTRFS_I(dir)->last_unlink_trans = trans->transid; 410912fcfd22SChris Mason } 411012fcfd22SChris Mason 411112fcfd22SChris Mason /* 411212fcfd22SChris Mason * Call this after adding a new name for a file and it will properly 411312fcfd22SChris Mason * update the log to reflect the new name. 411412fcfd22SChris Mason * 411512fcfd22SChris Mason * It will return zero if all goes well, and it will return 1 if a 411612fcfd22SChris Mason * full transaction commit is required. 411712fcfd22SChris Mason */ 411812fcfd22SChris Mason int btrfs_log_new_name(struct btrfs_trans_handle *trans, 411912fcfd22SChris Mason struct inode *inode, struct inode *old_dir, 412012fcfd22SChris Mason struct dentry *parent) 412112fcfd22SChris Mason { 412212fcfd22SChris Mason struct btrfs_root * root = BTRFS_I(inode)->root; 412312fcfd22SChris Mason 412412fcfd22SChris Mason /* 4125af4176b4SChris Mason * this will force the logging code to walk the dentry chain 4126af4176b4SChris Mason * up for the file 4127af4176b4SChris Mason */ 4128af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 4129af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 4130af4176b4SChris Mason 4131af4176b4SChris Mason /* 413212fcfd22SChris Mason * if this inode hasn't been logged and directory we're renaming it 413312fcfd22SChris Mason * from hasn't been logged, we don't need to log it 413412fcfd22SChris Mason */ 413512fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans <= 413612fcfd22SChris Mason root->fs_info->last_trans_committed && 413712fcfd22SChris Mason (!old_dir || BTRFS_I(old_dir)->logged_trans <= 413812fcfd22SChris Mason root->fs_info->last_trans_committed)) 413912fcfd22SChris Mason return 0; 414012fcfd22SChris Mason 414112fcfd22SChris Mason return btrfs_log_inode_parent(trans, root, inode, parent, 1); 414212fcfd22SChris Mason } 414312fcfd22SChris Mason 4144