1e02119d5SChris Mason /* 2e02119d5SChris Mason * Copyright (C) 2008 Oracle. All rights reserved. 3e02119d5SChris Mason * 4e02119d5SChris Mason * This program is free software; you can redistribute it and/or 5e02119d5SChris Mason * modify it under the terms of the GNU General Public 6e02119d5SChris Mason * License v2 as published by the Free Software Foundation. 7e02119d5SChris Mason * 8e02119d5SChris Mason * This program is distributed in the hope that it will be useful, 9e02119d5SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 10e02119d5SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11e02119d5SChris Mason * General Public License for more details. 12e02119d5SChris Mason * 13e02119d5SChris Mason * You should have received a copy of the GNU General Public 14e02119d5SChris Mason * License along with this program; if not, write to the 15e02119d5SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16e02119d5SChris Mason * Boston, MA 021110-1307, USA. 17e02119d5SChris Mason */ 18e02119d5SChris Mason 19e02119d5SChris Mason #include <linux/sched.h> 205a0e3ad6STejun Heo #include <linux/slab.h> 215dc562c5SJosef Bacik #include <linux/list_sort.h> 22e02119d5SChris Mason #include "ctree.h" 23e02119d5SChris Mason #include "transaction.h" 24e02119d5SChris Mason #include "disk-io.h" 25e02119d5SChris Mason #include "locking.h" 26e02119d5SChris Mason #include "print-tree.h" 27e02119d5SChris Mason #include "compat.h" 28b2950863SChristoph Hellwig #include "tree-log.h" 29e02119d5SChris Mason 30e02119d5SChris Mason /* magic values for the inode_only field in btrfs_log_inode: 31e02119d5SChris Mason * 32e02119d5SChris Mason * LOG_INODE_ALL means to log everything 33e02119d5SChris Mason * LOG_INODE_EXISTS means to log just enough to recreate the inode 34e02119d5SChris Mason * during log replay 35e02119d5SChris Mason */ 36e02119d5SChris Mason #define LOG_INODE_ALL 0 37e02119d5SChris Mason #define LOG_INODE_EXISTS 1 38e02119d5SChris Mason 39e02119d5SChris Mason /* 4012fcfd22SChris Mason * directory trouble cases 4112fcfd22SChris Mason * 4212fcfd22SChris Mason * 1) on rename or unlink, if the inode being unlinked isn't in the fsync 4312fcfd22SChris Mason * log, we must force a full commit before doing an fsync of the directory 4412fcfd22SChris Mason * where the unlink was done. 4512fcfd22SChris Mason * ---> record transid of last unlink/rename per directory 4612fcfd22SChris Mason * 4712fcfd22SChris Mason * mkdir foo/some_dir 4812fcfd22SChris Mason * normal commit 4912fcfd22SChris Mason * rename foo/some_dir foo2/some_dir 5012fcfd22SChris Mason * mkdir foo/some_dir 5112fcfd22SChris Mason * fsync foo/some_dir/some_file 5212fcfd22SChris Mason * 5312fcfd22SChris Mason * The fsync above will unlink the original some_dir without recording 5412fcfd22SChris Mason * it in its new location (foo2). After a crash, some_dir will be gone 5512fcfd22SChris Mason * unless the fsync of some_file forces a full commit 5612fcfd22SChris Mason * 5712fcfd22SChris Mason * 2) we must log any new names for any file or dir that is in the fsync 5812fcfd22SChris Mason * log. ---> check inode while renaming/linking. 5912fcfd22SChris Mason * 6012fcfd22SChris Mason * 2a) we must log any new names for any file or dir during rename 6112fcfd22SChris Mason * when the directory they are being removed from was logged. 6212fcfd22SChris Mason * ---> check inode and old parent dir during rename 6312fcfd22SChris Mason * 6412fcfd22SChris Mason * 2a is actually the more important variant. With the extra logging 6512fcfd22SChris Mason * a crash might unlink the old name without recreating the new one 6612fcfd22SChris Mason * 6712fcfd22SChris Mason * 3) after a crash, we must go through any directories with a link count 6812fcfd22SChris Mason * of zero and redo the rm -rf 6912fcfd22SChris Mason * 7012fcfd22SChris Mason * mkdir f1/foo 7112fcfd22SChris Mason * normal commit 7212fcfd22SChris Mason * rm -rf f1/foo 7312fcfd22SChris Mason * fsync(f1) 7412fcfd22SChris Mason * 7512fcfd22SChris Mason * The directory f1 was fully removed from the FS, but fsync was never 7612fcfd22SChris Mason * called on f1, only its parent dir. After a crash the rm -rf must 7712fcfd22SChris Mason * be replayed. This must be able to recurse down the entire 7812fcfd22SChris Mason * directory tree. The inode link count fixup code takes care of the 7912fcfd22SChris Mason * ugly details. 8012fcfd22SChris Mason */ 8112fcfd22SChris Mason 8212fcfd22SChris Mason /* 83e02119d5SChris Mason * stages for the tree walking. The first 84e02119d5SChris Mason * stage (0) is to only pin down the blocks we find 85e02119d5SChris Mason * the second stage (1) is to make sure that all the inodes 86e02119d5SChris Mason * we find in the log are created in the subvolume. 87e02119d5SChris Mason * 88e02119d5SChris Mason * The last stage is to deal with directories and links and extents 89e02119d5SChris Mason * and all the other fun semantics 90e02119d5SChris Mason */ 91e02119d5SChris Mason #define LOG_WALK_PIN_ONLY 0 92e02119d5SChris Mason #define LOG_WALK_REPLAY_INODES 1 93e02119d5SChris Mason #define LOG_WALK_REPLAY_ALL 2 94e02119d5SChris Mason 9512fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 96e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 97e02119d5SChris Mason int inode_only); 98ec051c0fSYan Zheng static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 99ec051c0fSYan Zheng struct btrfs_root *root, 100ec051c0fSYan Zheng struct btrfs_path *path, u64 objectid); 10112fcfd22SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 10212fcfd22SChris Mason struct btrfs_root *root, 10312fcfd22SChris Mason struct btrfs_root *log, 10412fcfd22SChris Mason struct btrfs_path *path, 10512fcfd22SChris Mason u64 dirid, int del_all); 106e02119d5SChris Mason 107e02119d5SChris Mason /* 108e02119d5SChris Mason * tree logging is a special write ahead log used to make sure that 109e02119d5SChris Mason * fsyncs and O_SYNCs can happen without doing full tree commits. 110e02119d5SChris Mason * 111e02119d5SChris Mason * Full tree commits are expensive because they require commonly 112e02119d5SChris Mason * modified blocks to be recowed, creating many dirty pages in the 113e02119d5SChris Mason * extent tree an 4x-6x higher write load than ext3. 114e02119d5SChris Mason * 115e02119d5SChris Mason * Instead of doing a tree commit on every fsync, we use the 116e02119d5SChris Mason * key ranges and transaction ids to find items for a given file or directory 117e02119d5SChris Mason * that have changed in this transaction. Those items are copied into 118e02119d5SChris Mason * a special tree (one per subvolume root), that tree is written to disk 119e02119d5SChris Mason * and then the fsync is considered complete. 120e02119d5SChris Mason * 121e02119d5SChris Mason * After a crash, items are copied out of the log-tree back into the 122e02119d5SChris Mason * subvolume tree. Any file data extents found are recorded in the extent 123e02119d5SChris Mason * allocation tree, and the log-tree freed. 124e02119d5SChris Mason * 125e02119d5SChris Mason * The log tree is read three times, once to pin down all the extents it is 126e02119d5SChris Mason * using in ram and once, once to create all the inodes logged in the tree 127e02119d5SChris Mason * and once to do all the other items. 128e02119d5SChris Mason */ 129e02119d5SChris Mason 130e02119d5SChris Mason /* 131e02119d5SChris Mason * start a sub transaction and setup the log tree 132e02119d5SChris Mason * this increments the log tree writer count to make the people 133e02119d5SChris Mason * syncing the tree wait for us to finish 134e02119d5SChris Mason */ 135e02119d5SChris Mason static int start_log_trans(struct btrfs_trans_handle *trans, 136e02119d5SChris Mason struct btrfs_root *root) 137e02119d5SChris Mason { 138e02119d5SChris Mason int ret; 1394a500fd1SYan, Zheng int err = 0; 1407237f183SYan Zheng 1417237f183SYan Zheng mutex_lock(&root->log_mutex); 1427237f183SYan Zheng if (root->log_root) { 143ff782e0aSJosef Bacik if (!root->log_start_pid) { 144ff782e0aSJosef Bacik root->log_start_pid = current->pid; 145ff782e0aSJosef Bacik root->log_multiple_pids = false; 146ff782e0aSJosef Bacik } else if (root->log_start_pid != current->pid) { 147ff782e0aSJosef Bacik root->log_multiple_pids = true; 148ff782e0aSJosef Bacik } 149ff782e0aSJosef Bacik 150*2ecb7923SMiao Xie atomic_inc(&root->log_batch); 1517237f183SYan Zheng atomic_inc(&root->log_writers); 1527237f183SYan Zheng mutex_unlock(&root->log_mutex); 1537237f183SYan Zheng return 0; 1547237f183SYan Zheng } 155ff782e0aSJosef Bacik root->log_multiple_pids = false; 156ff782e0aSJosef Bacik root->log_start_pid = current->pid; 157e02119d5SChris Mason mutex_lock(&root->fs_info->tree_log_mutex); 158e02119d5SChris Mason if (!root->fs_info->log_root_tree) { 159e02119d5SChris Mason ret = btrfs_init_log_root_tree(trans, root->fs_info); 1604a500fd1SYan, Zheng if (ret) 1614a500fd1SYan, Zheng err = ret; 162e02119d5SChris Mason } 1634a500fd1SYan, Zheng if (err == 0 && !root->log_root) { 164e02119d5SChris Mason ret = btrfs_add_log_tree(trans, root); 1654a500fd1SYan, Zheng if (ret) 1664a500fd1SYan, Zheng err = ret; 167e02119d5SChris Mason } 168e02119d5SChris Mason mutex_unlock(&root->fs_info->tree_log_mutex); 169*2ecb7923SMiao Xie atomic_inc(&root->log_batch); 1707237f183SYan Zheng atomic_inc(&root->log_writers); 1717237f183SYan Zheng mutex_unlock(&root->log_mutex); 1724a500fd1SYan, Zheng return err; 173e02119d5SChris Mason } 174e02119d5SChris Mason 175e02119d5SChris Mason /* 176e02119d5SChris Mason * returns 0 if there was a log transaction running and we were able 177e02119d5SChris Mason * to join, or returns -ENOENT if there were not transactions 178e02119d5SChris Mason * in progress 179e02119d5SChris Mason */ 180e02119d5SChris Mason static int join_running_log_trans(struct btrfs_root *root) 181e02119d5SChris Mason { 182e02119d5SChris Mason int ret = -ENOENT; 183e02119d5SChris Mason 184e02119d5SChris Mason smp_mb(); 185e02119d5SChris Mason if (!root->log_root) 186e02119d5SChris Mason return -ENOENT; 187e02119d5SChris Mason 1887237f183SYan Zheng mutex_lock(&root->log_mutex); 189e02119d5SChris Mason if (root->log_root) { 190e02119d5SChris Mason ret = 0; 1917237f183SYan Zheng atomic_inc(&root->log_writers); 192e02119d5SChris Mason } 1937237f183SYan Zheng mutex_unlock(&root->log_mutex); 194e02119d5SChris Mason return ret; 195e02119d5SChris Mason } 196e02119d5SChris Mason 197e02119d5SChris Mason /* 19812fcfd22SChris Mason * This either makes the current running log transaction wait 19912fcfd22SChris Mason * until you call btrfs_end_log_trans() or it makes any future 20012fcfd22SChris Mason * log transactions wait until you call btrfs_end_log_trans() 20112fcfd22SChris Mason */ 20212fcfd22SChris Mason int btrfs_pin_log_trans(struct btrfs_root *root) 20312fcfd22SChris Mason { 20412fcfd22SChris Mason int ret = -ENOENT; 20512fcfd22SChris Mason 20612fcfd22SChris Mason mutex_lock(&root->log_mutex); 20712fcfd22SChris Mason atomic_inc(&root->log_writers); 20812fcfd22SChris Mason mutex_unlock(&root->log_mutex); 20912fcfd22SChris Mason return ret; 21012fcfd22SChris Mason } 21112fcfd22SChris Mason 21212fcfd22SChris Mason /* 213e02119d5SChris Mason * indicate we're done making changes to the log tree 214e02119d5SChris Mason * and wake up anyone waiting to do a sync 215e02119d5SChris Mason */ 216143bede5SJeff Mahoney void btrfs_end_log_trans(struct btrfs_root *root) 217e02119d5SChris Mason { 2187237f183SYan Zheng if (atomic_dec_and_test(&root->log_writers)) { 219e02119d5SChris Mason smp_mb(); 2207237f183SYan Zheng if (waitqueue_active(&root->log_writer_wait)) 2217237f183SYan Zheng wake_up(&root->log_writer_wait); 2227237f183SYan Zheng } 223e02119d5SChris Mason } 224e02119d5SChris Mason 225e02119d5SChris Mason 226e02119d5SChris Mason /* 227e02119d5SChris Mason * the walk control struct is used to pass state down the chain when 228e02119d5SChris Mason * processing the log tree. The stage field tells us which part 229e02119d5SChris Mason * of the log tree processing we are currently doing. The others 230e02119d5SChris Mason * are state fields used for that specific part 231e02119d5SChris Mason */ 232e02119d5SChris Mason struct walk_control { 233e02119d5SChris Mason /* should we free the extent on disk when done? This is used 234e02119d5SChris Mason * at transaction commit time while freeing a log tree 235e02119d5SChris Mason */ 236e02119d5SChris Mason int free; 237e02119d5SChris Mason 238e02119d5SChris Mason /* should we write out the extent buffer? This is used 239e02119d5SChris Mason * while flushing the log tree to disk during a sync 240e02119d5SChris Mason */ 241e02119d5SChris Mason int write; 242e02119d5SChris Mason 243e02119d5SChris Mason /* should we wait for the extent buffer io to finish? Also used 244e02119d5SChris Mason * while flushing the log tree to disk for a sync 245e02119d5SChris Mason */ 246e02119d5SChris Mason int wait; 247e02119d5SChris Mason 248e02119d5SChris Mason /* pin only walk, we record which extents on disk belong to the 249e02119d5SChris Mason * log trees 250e02119d5SChris Mason */ 251e02119d5SChris Mason int pin; 252e02119d5SChris Mason 253e02119d5SChris Mason /* what stage of the replay code we're currently in */ 254e02119d5SChris Mason int stage; 255e02119d5SChris Mason 256e02119d5SChris Mason /* the root we are currently replaying */ 257e02119d5SChris Mason struct btrfs_root *replay_dest; 258e02119d5SChris Mason 259e02119d5SChris Mason /* the trans handle for the current replay */ 260e02119d5SChris Mason struct btrfs_trans_handle *trans; 261e02119d5SChris Mason 262e02119d5SChris Mason /* the function that gets used to process blocks we find in the 263e02119d5SChris Mason * tree. Note the extent_buffer might not be up to date when it is 264e02119d5SChris Mason * passed in, and it must be checked or read if you need the data 265e02119d5SChris Mason * inside it 266e02119d5SChris Mason */ 267e02119d5SChris Mason int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 268e02119d5SChris Mason struct walk_control *wc, u64 gen); 269e02119d5SChris Mason }; 270e02119d5SChris Mason 271e02119d5SChris Mason /* 272e02119d5SChris Mason * process_func used to pin down extents, write them or wait on them 273e02119d5SChris Mason */ 274e02119d5SChris Mason static int process_one_buffer(struct btrfs_root *log, 275e02119d5SChris Mason struct extent_buffer *eb, 276e02119d5SChris Mason struct walk_control *wc, u64 gen) 277e02119d5SChris Mason { 27804018de5SJosef Bacik if (wc->pin) 279e688b725SChris Mason btrfs_pin_extent_for_log_replay(wc->trans, 280e688b725SChris Mason log->fs_info->extent_root, 281e688b725SChris Mason eb->start, eb->len); 282e02119d5SChris Mason 283b9fab919SChris Mason if (btrfs_buffer_uptodate(eb, gen, 0)) { 284e02119d5SChris Mason if (wc->write) 285e02119d5SChris Mason btrfs_write_tree_block(eb); 286e02119d5SChris Mason if (wc->wait) 287e02119d5SChris Mason btrfs_wait_tree_block_writeback(eb); 288e02119d5SChris Mason } 289e02119d5SChris Mason return 0; 290e02119d5SChris Mason } 291e02119d5SChris Mason 292e02119d5SChris Mason /* 293e02119d5SChris Mason * Item overwrite used by replay and tree logging. eb, slot and key all refer 294e02119d5SChris Mason * to the src data we are copying out. 295e02119d5SChris Mason * 296e02119d5SChris Mason * root is the tree we are copying into, and path is a scratch 297e02119d5SChris Mason * path for use in this function (it should be released on entry and 298e02119d5SChris Mason * will be released on exit). 299e02119d5SChris Mason * 300e02119d5SChris Mason * If the key is already in the destination tree the existing item is 301e02119d5SChris Mason * overwritten. If the existing item isn't big enough, it is extended. 302e02119d5SChris Mason * If it is too large, it is truncated. 303e02119d5SChris Mason * 304e02119d5SChris Mason * If the key isn't in the destination yet, a new item is inserted. 305e02119d5SChris Mason */ 306e02119d5SChris Mason static noinline int overwrite_item(struct btrfs_trans_handle *trans, 307e02119d5SChris Mason struct btrfs_root *root, 308e02119d5SChris Mason struct btrfs_path *path, 309e02119d5SChris Mason struct extent_buffer *eb, int slot, 310e02119d5SChris Mason struct btrfs_key *key) 311e02119d5SChris Mason { 312e02119d5SChris Mason int ret; 313e02119d5SChris Mason u32 item_size; 314e02119d5SChris Mason u64 saved_i_size = 0; 315e02119d5SChris Mason int save_old_i_size = 0; 316e02119d5SChris Mason unsigned long src_ptr; 317e02119d5SChris Mason unsigned long dst_ptr; 318e02119d5SChris Mason int overwrite_root = 0; 319e02119d5SChris Mason 320e02119d5SChris Mason if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 321e02119d5SChris Mason overwrite_root = 1; 322e02119d5SChris Mason 323e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 324e02119d5SChris Mason src_ptr = btrfs_item_ptr_offset(eb, slot); 325e02119d5SChris Mason 326e02119d5SChris Mason /* look for the key in the destination tree */ 327e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 328e02119d5SChris Mason if (ret == 0) { 329e02119d5SChris Mason char *src_copy; 330e02119d5SChris Mason char *dst_copy; 331e02119d5SChris Mason u32 dst_size = btrfs_item_size_nr(path->nodes[0], 332e02119d5SChris Mason path->slots[0]); 333e02119d5SChris Mason if (dst_size != item_size) 334e02119d5SChris Mason goto insert; 335e02119d5SChris Mason 336e02119d5SChris Mason if (item_size == 0) { 337b3b4aa74SDavid Sterba btrfs_release_path(path); 338e02119d5SChris Mason return 0; 339e02119d5SChris Mason } 340e02119d5SChris Mason dst_copy = kmalloc(item_size, GFP_NOFS); 341e02119d5SChris Mason src_copy = kmalloc(item_size, GFP_NOFS); 3422a29edc6Sliubo if (!dst_copy || !src_copy) { 343b3b4aa74SDavid Sterba btrfs_release_path(path); 3442a29edc6Sliubo kfree(dst_copy); 3452a29edc6Sliubo kfree(src_copy); 3462a29edc6Sliubo return -ENOMEM; 3472a29edc6Sliubo } 348e02119d5SChris Mason 349e02119d5SChris Mason read_extent_buffer(eb, src_copy, src_ptr, item_size); 350e02119d5SChris Mason 351e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 352e02119d5SChris Mason read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, 353e02119d5SChris Mason item_size); 354e02119d5SChris Mason ret = memcmp(dst_copy, src_copy, item_size); 355e02119d5SChris Mason 356e02119d5SChris Mason kfree(dst_copy); 357e02119d5SChris Mason kfree(src_copy); 358e02119d5SChris Mason /* 359e02119d5SChris Mason * they have the same contents, just return, this saves 360e02119d5SChris Mason * us from cowing blocks in the destination tree and doing 361e02119d5SChris Mason * extra writes that may not have been done by a previous 362e02119d5SChris Mason * sync 363e02119d5SChris Mason */ 364e02119d5SChris Mason if (ret == 0) { 365b3b4aa74SDavid Sterba btrfs_release_path(path); 366e02119d5SChris Mason return 0; 367e02119d5SChris Mason } 368e02119d5SChris Mason 369e02119d5SChris Mason } 370e02119d5SChris Mason insert: 371b3b4aa74SDavid Sterba btrfs_release_path(path); 372e02119d5SChris Mason /* try to insert the key into the destination tree */ 373e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, 374e02119d5SChris Mason key, item_size); 375e02119d5SChris Mason 376e02119d5SChris Mason /* make sure any existing item is the correct size */ 377e02119d5SChris Mason if (ret == -EEXIST) { 378e02119d5SChris Mason u32 found_size; 379e02119d5SChris Mason found_size = btrfs_item_size_nr(path->nodes[0], 380e02119d5SChris Mason path->slots[0]); 381143bede5SJeff Mahoney if (found_size > item_size) 382e02119d5SChris Mason btrfs_truncate_item(trans, root, path, item_size, 1); 383143bede5SJeff Mahoney else if (found_size < item_size) 384143bede5SJeff Mahoney btrfs_extend_item(trans, root, path, 38587b29b20SYan Zheng item_size - found_size); 386e02119d5SChris Mason } else if (ret) { 3874a500fd1SYan, Zheng return ret; 388e02119d5SChris Mason } 389e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], 390e02119d5SChris Mason path->slots[0]); 391e02119d5SChris Mason 392e02119d5SChris Mason /* don't overwrite an existing inode if the generation number 393e02119d5SChris Mason * was logged as zero. This is done when the tree logging code 394e02119d5SChris Mason * is just logging an inode to make sure it exists after recovery. 395e02119d5SChris Mason * 396e02119d5SChris Mason * Also, don't overwrite i_size on directories during replay. 397e02119d5SChris Mason * log replay inserts and removes directory items based on the 398e02119d5SChris Mason * state of the tree found in the subvolume, and i_size is modified 399e02119d5SChris Mason * as it goes 400e02119d5SChris Mason */ 401e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { 402e02119d5SChris Mason struct btrfs_inode_item *src_item; 403e02119d5SChris Mason struct btrfs_inode_item *dst_item; 404e02119d5SChris Mason 405e02119d5SChris Mason src_item = (struct btrfs_inode_item *)src_ptr; 406e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 407e02119d5SChris Mason 408e02119d5SChris Mason if (btrfs_inode_generation(eb, src_item) == 0) 409e02119d5SChris Mason goto no_copy; 410e02119d5SChris Mason 411e02119d5SChris Mason if (overwrite_root && 412e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(eb, src_item)) && 413e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { 414e02119d5SChris Mason save_old_i_size = 1; 415e02119d5SChris Mason saved_i_size = btrfs_inode_size(path->nodes[0], 416e02119d5SChris Mason dst_item); 417e02119d5SChris Mason } 418e02119d5SChris Mason } 419e02119d5SChris Mason 420e02119d5SChris Mason copy_extent_buffer(path->nodes[0], eb, dst_ptr, 421e02119d5SChris Mason src_ptr, item_size); 422e02119d5SChris Mason 423e02119d5SChris Mason if (save_old_i_size) { 424e02119d5SChris Mason struct btrfs_inode_item *dst_item; 425e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 426e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); 427e02119d5SChris Mason } 428e02119d5SChris Mason 429e02119d5SChris Mason /* make sure the generation is filled in */ 430e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY) { 431e02119d5SChris Mason struct btrfs_inode_item *dst_item; 432e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 433e02119d5SChris Mason if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { 434e02119d5SChris Mason btrfs_set_inode_generation(path->nodes[0], dst_item, 435e02119d5SChris Mason trans->transid); 436e02119d5SChris Mason } 437e02119d5SChris Mason } 438e02119d5SChris Mason no_copy: 439e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 440b3b4aa74SDavid Sterba btrfs_release_path(path); 441e02119d5SChris Mason return 0; 442e02119d5SChris Mason } 443e02119d5SChris Mason 444e02119d5SChris Mason /* 445e02119d5SChris Mason * simple helper to read an inode off the disk from a given root 446e02119d5SChris Mason * This can only be called for subvolume roots and not for the log 447e02119d5SChris Mason */ 448e02119d5SChris Mason static noinline struct inode *read_one_inode(struct btrfs_root *root, 449e02119d5SChris Mason u64 objectid) 450e02119d5SChris Mason { 4515d4f98a2SYan Zheng struct btrfs_key key; 452e02119d5SChris Mason struct inode *inode; 453e02119d5SChris Mason 4545d4f98a2SYan Zheng key.objectid = objectid; 4555d4f98a2SYan Zheng key.type = BTRFS_INODE_ITEM_KEY; 4565d4f98a2SYan Zheng key.offset = 0; 45773f73415SJosef Bacik inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 4585d4f98a2SYan Zheng if (IS_ERR(inode)) { 4595d4f98a2SYan Zheng inode = NULL; 4605d4f98a2SYan Zheng } else if (is_bad_inode(inode)) { 461e02119d5SChris Mason iput(inode); 462e02119d5SChris Mason inode = NULL; 463e02119d5SChris Mason } 464e02119d5SChris Mason return inode; 465e02119d5SChris Mason } 466e02119d5SChris Mason 467e02119d5SChris Mason /* replays a single extent in 'eb' at 'slot' with 'key' into the 468e02119d5SChris Mason * subvolume 'root'. path is released on entry and should be released 469e02119d5SChris Mason * on exit. 470e02119d5SChris Mason * 471e02119d5SChris Mason * extents in the log tree have not been allocated out of the extent 472e02119d5SChris Mason * tree yet. So, this completes the allocation, taking a reference 473e02119d5SChris Mason * as required if the extent already exists or creating a new extent 474e02119d5SChris Mason * if it isn't in the extent allocation tree yet. 475e02119d5SChris Mason * 476e02119d5SChris Mason * The extent is inserted into the file, dropping any existing extents 477e02119d5SChris Mason * from the file that overlap the new one. 478e02119d5SChris Mason */ 479e02119d5SChris Mason static noinline int replay_one_extent(struct btrfs_trans_handle *trans, 480e02119d5SChris Mason struct btrfs_root *root, 481e02119d5SChris Mason struct btrfs_path *path, 482e02119d5SChris Mason struct extent_buffer *eb, int slot, 483e02119d5SChris Mason struct btrfs_key *key) 484e02119d5SChris Mason { 485e02119d5SChris Mason int found_type; 486e02119d5SChris Mason u64 mask = root->sectorsize - 1; 487e02119d5SChris Mason u64 extent_end; 488e02119d5SChris Mason u64 start = key->offset; 48907d400a6SYan Zheng u64 saved_nbytes; 490e02119d5SChris Mason struct btrfs_file_extent_item *item; 491e02119d5SChris Mason struct inode *inode = NULL; 492e02119d5SChris Mason unsigned long size; 493e02119d5SChris Mason int ret = 0; 494e02119d5SChris Mason 495e02119d5SChris Mason item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 496e02119d5SChris Mason found_type = btrfs_file_extent_type(eb, item); 497e02119d5SChris Mason 498d899e052SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 499d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) 500e02119d5SChris Mason extent_end = start + btrfs_file_extent_num_bytes(eb, item); 501e02119d5SChris Mason else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 502c8b97818SChris Mason size = btrfs_file_extent_inline_len(eb, item); 503e02119d5SChris Mason extent_end = (start + size + mask) & ~mask; 504e02119d5SChris Mason } else { 505e02119d5SChris Mason ret = 0; 506e02119d5SChris Mason goto out; 507e02119d5SChris Mason } 508e02119d5SChris Mason 509e02119d5SChris Mason inode = read_one_inode(root, key->objectid); 510e02119d5SChris Mason if (!inode) { 511e02119d5SChris Mason ret = -EIO; 512e02119d5SChris Mason goto out; 513e02119d5SChris Mason } 514e02119d5SChris Mason 515e02119d5SChris Mason /* 516e02119d5SChris Mason * first check to see if we already have this extent in the 517e02119d5SChris Mason * file. This must be done before the btrfs_drop_extents run 518e02119d5SChris Mason * so we don't try to drop this extent. 519e02119d5SChris Mason */ 52033345d01SLi Zefan ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 521e02119d5SChris Mason start, 0); 522e02119d5SChris Mason 523d899e052SYan Zheng if (ret == 0 && 524d899e052SYan Zheng (found_type == BTRFS_FILE_EXTENT_REG || 525d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 526e02119d5SChris Mason struct btrfs_file_extent_item cmp1; 527e02119d5SChris Mason struct btrfs_file_extent_item cmp2; 528e02119d5SChris Mason struct btrfs_file_extent_item *existing; 529e02119d5SChris Mason struct extent_buffer *leaf; 530e02119d5SChris Mason 531e02119d5SChris Mason leaf = path->nodes[0]; 532e02119d5SChris Mason existing = btrfs_item_ptr(leaf, path->slots[0], 533e02119d5SChris Mason struct btrfs_file_extent_item); 534e02119d5SChris Mason 535e02119d5SChris Mason read_extent_buffer(eb, &cmp1, (unsigned long)item, 536e02119d5SChris Mason sizeof(cmp1)); 537e02119d5SChris Mason read_extent_buffer(leaf, &cmp2, (unsigned long)existing, 538e02119d5SChris Mason sizeof(cmp2)); 539e02119d5SChris Mason 540e02119d5SChris Mason /* 541e02119d5SChris Mason * we already have a pointer to this exact extent, 542e02119d5SChris Mason * we don't have to do anything 543e02119d5SChris Mason */ 544e02119d5SChris Mason if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 545b3b4aa74SDavid Sterba btrfs_release_path(path); 546e02119d5SChris Mason goto out; 547e02119d5SChris Mason } 548e02119d5SChris Mason } 549b3b4aa74SDavid Sterba btrfs_release_path(path); 550e02119d5SChris Mason 55107d400a6SYan Zheng saved_nbytes = inode_get_bytes(inode); 552e02119d5SChris Mason /* drop any overlapping extents */ 5532671485dSJosef Bacik ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 554e02119d5SChris Mason BUG_ON(ret); 555e02119d5SChris Mason 55607d400a6SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 55707d400a6SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5585d4f98a2SYan Zheng u64 offset; 55907d400a6SYan Zheng unsigned long dest_offset; 56007d400a6SYan Zheng struct btrfs_key ins; 56107d400a6SYan Zheng 56207d400a6SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, key, 56307d400a6SYan Zheng sizeof(*item)); 56407d400a6SYan Zheng BUG_ON(ret); 56507d400a6SYan Zheng dest_offset = btrfs_item_ptr_offset(path->nodes[0], 56607d400a6SYan Zheng path->slots[0]); 56707d400a6SYan Zheng copy_extent_buffer(path->nodes[0], eb, dest_offset, 56807d400a6SYan Zheng (unsigned long)item, sizeof(*item)); 56907d400a6SYan Zheng 57007d400a6SYan Zheng ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 57107d400a6SYan Zheng ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 57207d400a6SYan Zheng ins.type = BTRFS_EXTENT_ITEM_KEY; 5735d4f98a2SYan Zheng offset = key->offset - btrfs_file_extent_offset(eb, item); 57407d400a6SYan Zheng 57507d400a6SYan Zheng if (ins.objectid > 0) { 57607d400a6SYan Zheng u64 csum_start; 57707d400a6SYan Zheng u64 csum_end; 57807d400a6SYan Zheng LIST_HEAD(ordered_sums); 57907d400a6SYan Zheng /* 58007d400a6SYan Zheng * is this extent already allocated in the extent 58107d400a6SYan Zheng * allocation tree? If so, just add a reference 58207d400a6SYan Zheng */ 58307d400a6SYan Zheng ret = btrfs_lookup_extent(root, ins.objectid, 58407d400a6SYan Zheng ins.offset); 58507d400a6SYan Zheng if (ret == 0) { 58607d400a6SYan Zheng ret = btrfs_inc_extent_ref(trans, root, 58707d400a6SYan Zheng ins.objectid, ins.offset, 5885d4f98a2SYan Zheng 0, root->root_key.objectid, 58966d7e7f0SArne Jansen key->objectid, offset, 0); 59037daa4f9STsutomu Itoh BUG_ON(ret); 59107d400a6SYan Zheng } else { 59207d400a6SYan Zheng /* 59307d400a6SYan Zheng * insert the extent pointer in the extent 59407d400a6SYan Zheng * allocation tree 59507d400a6SYan Zheng */ 5965d4f98a2SYan Zheng ret = btrfs_alloc_logged_file_extent(trans, 5975d4f98a2SYan Zheng root, root->root_key.objectid, 5985d4f98a2SYan Zheng key->objectid, offset, &ins); 59907d400a6SYan Zheng BUG_ON(ret); 60007d400a6SYan Zheng } 601b3b4aa74SDavid Sterba btrfs_release_path(path); 60207d400a6SYan Zheng 60307d400a6SYan Zheng if (btrfs_file_extent_compression(eb, item)) { 60407d400a6SYan Zheng csum_start = ins.objectid; 60507d400a6SYan Zheng csum_end = csum_start + ins.offset; 60607d400a6SYan Zheng } else { 60707d400a6SYan Zheng csum_start = ins.objectid + 60807d400a6SYan Zheng btrfs_file_extent_offset(eb, item); 60907d400a6SYan Zheng csum_end = csum_start + 61007d400a6SYan Zheng btrfs_file_extent_num_bytes(eb, item); 61107d400a6SYan Zheng } 61207d400a6SYan Zheng 61307d400a6SYan Zheng ret = btrfs_lookup_csums_range(root->log_root, 61407d400a6SYan Zheng csum_start, csum_end - 1, 615a2de733cSArne Jansen &ordered_sums, 0); 61607d400a6SYan Zheng BUG_ON(ret); 61707d400a6SYan Zheng while (!list_empty(&ordered_sums)) { 61807d400a6SYan Zheng struct btrfs_ordered_sum *sums; 61907d400a6SYan Zheng sums = list_entry(ordered_sums.next, 62007d400a6SYan Zheng struct btrfs_ordered_sum, 62107d400a6SYan Zheng list); 62207d400a6SYan Zheng ret = btrfs_csum_file_blocks(trans, 62307d400a6SYan Zheng root->fs_info->csum_root, 62407d400a6SYan Zheng sums); 62507d400a6SYan Zheng BUG_ON(ret); 62607d400a6SYan Zheng list_del(&sums->list); 62707d400a6SYan Zheng kfree(sums); 62807d400a6SYan Zheng } 62907d400a6SYan Zheng } else { 630b3b4aa74SDavid Sterba btrfs_release_path(path); 63107d400a6SYan Zheng } 63207d400a6SYan Zheng } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 63307d400a6SYan Zheng /* inline extents are easy, we just overwrite them */ 634e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 635e02119d5SChris Mason BUG_ON(ret); 63607d400a6SYan Zheng } 637e02119d5SChris Mason 63807d400a6SYan Zheng inode_set_bytes(inode, saved_nbytes); 639b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 640e02119d5SChris Mason out: 641e02119d5SChris Mason if (inode) 642e02119d5SChris Mason iput(inode); 643e02119d5SChris Mason return ret; 644e02119d5SChris Mason } 645e02119d5SChris Mason 646e02119d5SChris Mason /* 647e02119d5SChris Mason * when cleaning up conflicts between the directory names in the 648e02119d5SChris Mason * subvolume, directory names in the log and directory names in the 649e02119d5SChris Mason * inode back references, we may have to unlink inodes from directories. 650e02119d5SChris Mason * 651e02119d5SChris Mason * This is a helper function to do the unlink of a specific directory 652e02119d5SChris Mason * item 653e02119d5SChris Mason */ 654e02119d5SChris Mason static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, 655e02119d5SChris Mason struct btrfs_root *root, 656e02119d5SChris Mason struct btrfs_path *path, 657e02119d5SChris Mason struct inode *dir, 658e02119d5SChris Mason struct btrfs_dir_item *di) 659e02119d5SChris Mason { 660e02119d5SChris Mason struct inode *inode; 661e02119d5SChris Mason char *name; 662e02119d5SChris Mason int name_len; 663e02119d5SChris Mason struct extent_buffer *leaf; 664e02119d5SChris Mason struct btrfs_key location; 665e02119d5SChris Mason int ret; 666e02119d5SChris Mason 667e02119d5SChris Mason leaf = path->nodes[0]; 668e02119d5SChris Mason 669e02119d5SChris Mason btrfs_dir_item_key_to_cpu(leaf, di, &location); 670e02119d5SChris Mason name_len = btrfs_dir_name_len(leaf, di); 671e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 6722a29edc6Sliubo if (!name) 6732a29edc6Sliubo return -ENOMEM; 6742a29edc6Sliubo 675e02119d5SChris Mason read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 676b3b4aa74SDavid Sterba btrfs_release_path(path); 677e02119d5SChris Mason 678e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 679c00e9493STsutomu Itoh if (!inode) { 680c00e9493STsutomu Itoh kfree(name); 681c00e9493STsutomu Itoh return -EIO; 682c00e9493STsutomu Itoh } 683e02119d5SChris Mason 684ec051c0fSYan Zheng ret = link_to_fixup_dir(trans, root, path, location.objectid); 685ec051c0fSYan Zheng BUG_ON(ret); 68612fcfd22SChris Mason 687e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 688ec051c0fSYan Zheng BUG_ON(ret); 689e02119d5SChris Mason kfree(name); 690e02119d5SChris Mason 691e02119d5SChris Mason iput(inode); 692b6305567SChris Mason 693b6305567SChris Mason btrfs_run_delayed_items(trans, root); 694e02119d5SChris Mason return ret; 695e02119d5SChris Mason } 696e02119d5SChris Mason 697e02119d5SChris Mason /* 698e02119d5SChris Mason * helper function to see if a given name and sequence number found 699e02119d5SChris Mason * in an inode back reference are already in a directory and correctly 700e02119d5SChris Mason * point to this inode 701e02119d5SChris Mason */ 702e02119d5SChris Mason static noinline int inode_in_dir(struct btrfs_root *root, 703e02119d5SChris Mason struct btrfs_path *path, 704e02119d5SChris Mason u64 dirid, u64 objectid, u64 index, 705e02119d5SChris Mason const char *name, int name_len) 706e02119d5SChris Mason { 707e02119d5SChris Mason struct btrfs_dir_item *di; 708e02119d5SChris Mason struct btrfs_key location; 709e02119d5SChris Mason int match = 0; 710e02119d5SChris Mason 711e02119d5SChris Mason di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, 712e02119d5SChris Mason index, name, name_len, 0); 713e02119d5SChris Mason if (di && !IS_ERR(di)) { 714e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 715e02119d5SChris Mason if (location.objectid != objectid) 716e02119d5SChris Mason goto out; 717e02119d5SChris Mason } else 718e02119d5SChris Mason goto out; 719b3b4aa74SDavid Sterba btrfs_release_path(path); 720e02119d5SChris Mason 721e02119d5SChris Mason di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 722e02119d5SChris Mason if (di && !IS_ERR(di)) { 723e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 724e02119d5SChris Mason if (location.objectid != objectid) 725e02119d5SChris Mason goto out; 726e02119d5SChris Mason } else 727e02119d5SChris Mason goto out; 728e02119d5SChris Mason match = 1; 729e02119d5SChris Mason out: 730b3b4aa74SDavid Sterba btrfs_release_path(path); 731e02119d5SChris Mason return match; 732e02119d5SChris Mason } 733e02119d5SChris Mason 734e02119d5SChris Mason /* 735e02119d5SChris Mason * helper function to check a log tree for a named back reference in 736e02119d5SChris Mason * an inode. This is used to decide if a back reference that is 737e02119d5SChris Mason * found in the subvolume conflicts with what we find in the log. 738e02119d5SChris Mason * 739e02119d5SChris Mason * inode backreferences may have multiple refs in a single item, 740e02119d5SChris Mason * during replay we process one reference at a time, and we don't 741e02119d5SChris Mason * want to delete valid links to a file from the subvolume if that 742e02119d5SChris Mason * link is also in the log. 743e02119d5SChris Mason */ 744e02119d5SChris Mason static noinline int backref_in_log(struct btrfs_root *log, 745e02119d5SChris Mason struct btrfs_key *key, 746e02119d5SChris Mason char *name, int namelen) 747e02119d5SChris Mason { 748e02119d5SChris Mason struct btrfs_path *path; 749e02119d5SChris Mason struct btrfs_inode_ref *ref; 750e02119d5SChris Mason unsigned long ptr; 751e02119d5SChris Mason unsigned long ptr_end; 752e02119d5SChris Mason unsigned long name_ptr; 753e02119d5SChris Mason int found_name_len; 754e02119d5SChris Mason int item_size; 755e02119d5SChris Mason int ret; 756e02119d5SChris Mason int match = 0; 757e02119d5SChris Mason 758e02119d5SChris Mason path = btrfs_alloc_path(); 7592a29edc6Sliubo if (!path) 7602a29edc6Sliubo return -ENOMEM; 7612a29edc6Sliubo 762e02119d5SChris Mason ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 763e02119d5SChris Mason if (ret != 0) 764e02119d5SChris Mason goto out; 765e02119d5SChris Mason 766e02119d5SChris Mason item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 767e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 768e02119d5SChris Mason ptr_end = ptr + item_size; 769e02119d5SChris Mason while (ptr < ptr_end) { 770e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 771e02119d5SChris Mason found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); 772e02119d5SChris Mason if (found_name_len == namelen) { 773e02119d5SChris Mason name_ptr = (unsigned long)(ref + 1); 774e02119d5SChris Mason ret = memcmp_extent_buffer(path->nodes[0], name, 775e02119d5SChris Mason name_ptr, namelen); 776e02119d5SChris Mason if (ret == 0) { 777e02119d5SChris Mason match = 1; 778e02119d5SChris Mason goto out; 779e02119d5SChris Mason } 780e02119d5SChris Mason } 781e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + found_name_len; 782e02119d5SChris Mason } 783e02119d5SChris Mason out: 784e02119d5SChris Mason btrfs_free_path(path); 785e02119d5SChris Mason return match; 786e02119d5SChris Mason } 787e02119d5SChris Mason 788e02119d5SChris Mason 789e02119d5SChris Mason /* 790e02119d5SChris Mason * replay one inode back reference item found in the log tree. 791e02119d5SChris Mason * eb, slot and key refer to the buffer and key found in the log tree. 792e02119d5SChris Mason * root is the destination we are replaying into, and path is for temp 793e02119d5SChris Mason * use by this function. (it should be released on return). 794e02119d5SChris Mason */ 795e02119d5SChris Mason static noinline int add_inode_ref(struct btrfs_trans_handle *trans, 796e02119d5SChris Mason struct btrfs_root *root, 797e02119d5SChris Mason struct btrfs_root *log, 798e02119d5SChris Mason struct btrfs_path *path, 799e02119d5SChris Mason struct extent_buffer *eb, int slot, 800e02119d5SChris Mason struct btrfs_key *key) 801e02119d5SChris Mason { 802e02119d5SChris Mason struct btrfs_inode_ref *ref; 80334f3e4f2Sliubo struct btrfs_dir_item *di; 80434f3e4f2Sliubo struct inode *dir; 805e02119d5SChris Mason struct inode *inode; 806e02119d5SChris Mason unsigned long ref_ptr; 807e02119d5SChris Mason unsigned long ref_end; 80834f3e4f2Sliubo char *name; 80934f3e4f2Sliubo int namelen; 81034f3e4f2Sliubo int ret; 811c622ae60Sliubo int search_done = 0; 812e02119d5SChris Mason 813e02119d5SChris Mason /* 814e02119d5SChris Mason * it is possible that we didn't log all the parent directories 815e02119d5SChris Mason * for a given inode. If we don't find the dir, just don't 816e02119d5SChris Mason * copy the back ref in. The link count fixup code will take 817e02119d5SChris Mason * care of the rest 818e02119d5SChris Mason */ 819e02119d5SChris Mason dir = read_one_inode(root, key->offset); 820e02119d5SChris Mason if (!dir) 821e02119d5SChris Mason return -ENOENT; 822e02119d5SChris Mason 823e02119d5SChris Mason inode = read_one_inode(root, key->objectid); 824c00e9493STsutomu Itoh if (!inode) { 825c00e9493STsutomu Itoh iput(dir); 826c00e9493STsutomu Itoh return -EIO; 827c00e9493STsutomu Itoh } 828e02119d5SChris Mason 829e02119d5SChris Mason ref_ptr = btrfs_item_ptr_offset(eb, slot); 830e02119d5SChris Mason ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 831e02119d5SChris Mason 832e02119d5SChris Mason again: 833e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ref_ptr; 834e02119d5SChris Mason 835e02119d5SChris Mason namelen = btrfs_inode_ref_name_len(eb, ref); 836e02119d5SChris Mason name = kmalloc(namelen, GFP_NOFS); 837e02119d5SChris Mason BUG_ON(!name); 838e02119d5SChris Mason 839e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 840e02119d5SChris Mason 841e02119d5SChris Mason /* if we already have a perfect match, we're done */ 84233345d01SLi Zefan if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 843e02119d5SChris Mason btrfs_inode_ref_index(eb, ref), 844e02119d5SChris Mason name, namelen)) { 845e02119d5SChris Mason goto out; 846e02119d5SChris Mason } 847e02119d5SChris Mason 848e02119d5SChris Mason /* 849e02119d5SChris Mason * look for a conflicting back reference in the metadata. 850e02119d5SChris Mason * if we find one we have to unlink that name of the file 851e02119d5SChris Mason * before we add our new link. Later on, we overwrite any 852e02119d5SChris Mason * existing back reference, and we don't want to create 853e02119d5SChris Mason * dangling pointers in the directory. 854e02119d5SChris Mason */ 855c622ae60Sliubo 856c622ae60Sliubo if (search_done) 857c622ae60Sliubo goto insert; 858c622ae60Sliubo 859e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 860e02119d5SChris Mason if (ret == 0) { 861e02119d5SChris Mason char *victim_name; 862e02119d5SChris Mason int victim_name_len; 863e02119d5SChris Mason struct btrfs_inode_ref *victim_ref; 864e02119d5SChris Mason unsigned long ptr; 865e02119d5SChris Mason unsigned long ptr_end; 866e02119d5SChris Mason struct extent_buffer *leaf = path->nodes[0]; 867e02119d5SChris Mason 868e02119d5SChris Mason /* are we trying to overwrite a back ref for the root directory 869e02119d5SChris Mason * if so, just jump out, we're done 870e02119d5SChris Mason */ 871e02119d5SChris Mason if (key->objectid == key->offset) 872e02119d5SChris Mason goto out_nowrite; 873e02119d5SChris Mason 874e02119d5SChris Mason /* check all the names in this back reference to see 875e02119d5SChris Mason * if they are in the log. if so, we allow them to stay 876e02119d5SChris Mason * otherwise they must be unlinked as a conflict 877e02119d5SChris Mason */ 878e02119d5SChris Mason ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 879e02119d5SChris Mason ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); 880e02119d5SChris Mason while (ptr < ptr_end) { 881e02119d5SChris Mason victim_ref = (struct btrfs_inode_ref *)ptr; 882e02119d5SChris Mason victim_name_len = btrfs_inode_ref_name_len(leaf, 883e02119d5SChris Mason victim_ref); 884e02119d5SChris Mason victim_name = kmalloc(victim_name_len, GFP_NOFS); 885e02119d5SChris Mason BUG_ON(!victim_name); 886e02119d5SChris Mason 887e02119d5SChris Mason read_extent_buffer(leaf, victim_name, 888e02119d5SChris Mason (unsigned long)(victim_ref + 1), 889e02119d5SChris Mason victim_name_len); 890e02119d5SChris Mason 891e02119d5SChris Mason if (!backref_in_log(log, key, victim_name, 892e02119d5SChris Mason victim_name_len)) { 893e02119d5SChris Mason btrfs_inc_nlink(inode); 894b3b4aa74SDavid Sterba btrfs_release_path(path); 89512fcfd22SChris Mason 896e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, 897e02119d5SChris Mason inode, victim_name, 898e02119d5SChris Mason victim_name_len); 899b6305567SChris Mason btrfs_run_delayed_items(trans, root); 900e02119d5SChris Mason } 901e02119d5SChris Mason kfree(victim_name); 902e02119d5SChris Mason ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 903e02119d5SChris Mason } 904e02119d5SChris Mason BUG_ON(ret); 905c622ae60Sliubo 906c622ae60Sliubo /* 907c622ae60Sliubo * NOTE: we have searched root tree and checked the 908c622ae60Sliubo * coresponding ref, it does not need to check again. 909c622ae60Sliubo */ 910c622ae60Sliubo search_done = 1; 911e02119d5SChris Mason } 912b3b4aa74SDavid Sterba btrfs_release_path(path); 913e02119d5SChris Mason 91434f3e4f2Sliubo /* look for a conflicting sequence number */ 91534f3e4f2Sliubo di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 91634f3e4f2Sliubo btrfs_inode_ref_index(eb, ref), 91734f3e4f2Sliubo name, namelen, 0); 91834f3e4f2Sliubo if (di && !IS_ERR(di)) { 91934f3e4f2Sliubo ret = drop_one_dir_item(trans, root, path, dir, di); 92034f3e4f2Sliubo BUG_ON(ret); 92134f3e4f2Sliubo } 92234f3e4f2Sliubo btrfs_release_path(path); 92334f3e4f2Sliubo 92434f3e4f2Sliubo /* look for a conflicing name */ 92534f3e4f2Sliubo di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), 92634f3e4f2Sliubo name, namelen, 0); 92734f3e4f2Sliubo if (di && !IS_ERR(di)) { 92834f3e4f2Sliubo ret = drop_one_dir_item(trans, root, path, dir, di); 92934f3e4f2Sliubo BUG_ON(ret); 93034f3e4f2Sliubo } 93134f3e4f2Sliubo btrfs_release_path(path); 93234f3e4f2Sliubo 933c622ae60Sliubo insert: 934e02119d5SChris Mason /* insert our name */ 935e02119d5SChris Mason ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, 936e02119d5SChris Mason btrfs_inode_ref_index(eb, ref)); 937e02119d5SChris Mason BUG_ON(ret); 938e02119d5SChris Mason 939e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 940e02119d5SChris Mason 941e02119d5SChris Mason out: 942e02119d5SChris Mason ref_ptr = (unsigned long)(ref + 1) + namelen; 943e02119d5SChris Mason kfree(name); 944e02119d5SChris Mason if (ref_ptr < ref_end) 945e02119d5SChris Mason goto again; 946e02119d5SChris Mason 947e02119d5SChris Mason /* finally write the back reference in the inode */ 948e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 949e02119d5SChris Mason BUG_ON(ret); 950e02119d5SChris Mason 951e02119d5SChris Mason out_nowrite: 952b3b4aa74SDavid Sterba btrfs_release_path(path); 953e02119d5SChris Mason iput(dir); 954e02119d5SChris Mason iput(inode); 955e02119d5SChris Mason return 0; 956e02119d5SChris Mason } 957e02119d5SChris Mason 958c71bf099SYan, Zheng static int insert_orphan_item(struct btrfs_trans_handle *trans, 959c71bf099SYan, Zheng struct btrfs_root *root, u64 offset) 960c71bf099SYan, Zheng { 961c71bf099SYan, Zheng int ret; 962c71bf099SYan, Zheng ret = btrfs_find_orphan_item(root, offset); 963c71bf099SYan, Zheng if (ret > 0) 964c71bf099SYan, Zheng ret = btrfs_insert_orphan_item(trans, root, offset); 965c71bf099SYan, Zheng return ret; 966c71bf099SYan, Zheng } 967c71bf099SYan, Zheng 968c71bf099SYan, Zheng 969e02119d5SChris Mason /* 970e02119d5SChris Mason * There are a few corners where the link count of the file can't 971e02119d5SChris Mason * be properly maintained during replay. So, instead of adding 972e02119d5SChris Mason * lots of complexity to the log code, we just scan the backrefs 973e02119d5SChris Mason * for any file that has been through replay. 974e02119d5SChris Mason * 975e02119d5SChris Mason * The scan will update the link count on the inode to reflect the 976e02119d5SChris Mason * number of back refs found. If it goes down to zero, the iput 977e02119d5SChris Mason * will free the inode. 978e02119d5SChris Mason */ 979e02119d5SChris Mason static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 980e02119d5SChris Mason struct btrfs_root *root, 981e02119d5SChris Mason struct inode *inode) 982e02119d5SChris Mason { 983e02119d5SChris Mason struct btrfs_path *path; 984e02119d5SChris Mason int ret; 985e02119d5SChris Mason struct btrfs_key key; 986e02119d5SChris Mason u64 nlink = 0; 987e02119d5SChris Mason unsigned long ptr; 988e02119d5SChris Mason unsigned long ptr_end; 989e02119d5SChris Mason int name_len; 99033345d01SLi Zefan u64 ino = btrfs_ino(inode); 991e02119d5SChris Mason 99233345d01SLi Zefan key.objectid = ino; 993e02119d5SChris Mason key.type = BTRFS_INODE_REF_KEY; 994e02119d5SChris Mason key.offset = (u64)-1; 995e02119d5SChris Mason 996e02119d5SChris Mason path = btrfs_alloc_path(); 9972a29edc6Sliubo if (!path) 9982a29edc6Sliubo return -ENOMEM; 999e02119d5SChris Mason 1000e02119d5SChris Mason while (1) { 1001e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1002e02119d5SChris Mason if (ret < 0) 1003e02119d5SChris Mason break; 1004e02119d5SChris Mason if (ret > 0) { 1005e02119d5SChris Mason if (path->slots[0] == 0) 1006e02119d5SChris Mason break; 1007e02119d5SChris Mason path->slots[0]--; 1008e02119d5SChris Mason } 1009e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, 1010e02119d5SChris Mason path->slots[0]); 101133345d01SLi Zefan if (key.objectid != ino || 1012e02119d5SChris Mason key.type != BTRFS_INODE_REF_KEY) 1013e02119d5SChris Mason break; 1014e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 1015e02119d5SChris Mason ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], 1016e02119d5SChris Mason path->slots[0]); 1017e02119d5SChris Mason while (ptr < ptr_end) { 1018e02119d5SChris Mason struct btrfs_inode_ref *ref; 1019e02119d5SChris Mason 1020e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 1021e02119d5SChris Mason name_len = btrfs_inode_ref_name_len(path->nodes[0], 1022e02119d5SChris Mason ref); 1023e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + name_len; 1024e02119d5SChris Mason nlink++; 1025e02119d5SChris Mason } 1026e02119d5SChris Mason 1027e02119d5SChris Mason if (key.offset == 0) 1028e02119d5SChris Mason break; 1029e02119d5SChris Mason key.offset--; 1030b3b4aa74SDavid Sterba btrfs_release_path(path); 1031e02119d5SChris Mason } 1032b3b4aa74SDavid Sterba btrfs_release_path(path); 1033e02119d5SChris Mason if (nlink != inode->i_nlink) { 1034bfe86848SMiklos Szeredi set_nlink(inode, nlink); 1035e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 1036e02119d5SChris Mason } 10378d5bf1cbSChris Mason BTRFS_I(inode)->index_cnt = (u64)-1; 1038e02119d5SChris Mason 1039c71bf099SYan, Zheng if (inode->i_nlink == 0) { 1040c71bf099SYan, Zheng if (S_ISDIR(inode->i_mode)) { 104112fcfd22SChris Mason ret = replay_dir_deletes(trans, root, NULL, path, 104233345d01SLi Zefan ino, 1); 104312fcfd22SChris Mason BUG_ON(ret); 104412fcfd22SChris Mason } 104533345d01SLi Zefan ret = insert_orphan_item(trans, root, ino); 1046c71bf099SYan, Zheng BUG_ON(ret); 1047c71bf099SYan, Zheng } 104812fcfd22SChris Mason btrfs_free_path(path); 104912fcfd22SChris Mason 1050e02119d5SChris Mason return 0; 1051e02119d5SChris Mason } 1052e02119d5SChris Mason 1053e02119d5SChris Mason static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1054e02119d5SChris Mason struct btrfs_root *root, 1055e02119d5SChris Mason struct btrfs_path *path) 1056e02119d5SChris Mason { 1057e02119d5SChris Mason int ret; 1058e02119d5SChris Mason struct btrfs_key key; 1059e02119d5SChris Mason struct inode *inode; 1060e02119d5SChris Mason 1061e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1062e02119d5SChris Mason key.type = BTRFS_ORPHAN_ITEM_KEY; 1063e02119d5SChris Mason key.offset = (u64)-1; 1064e02119d5SChris Mason while (1) { 1065e02119d5SChris Mason ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1066e02119d5SChris Mason if (ret < 0) 1067e02119d5SChris Mason break; 1068e02119d5SChris Mason 1069e02119d5SChris Mason if (ret == 1) { 1070e02119d5SChris Mason if (path->slots[0] == 0) 1071e02119d5SChris Mason break; 1072e02119d5SChris Mason path->slots[0]--; 1073e02119d5SChris Mason } 1074e02119d5SChris Mason 1075e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1076e02119d5SChris Mason if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || 1077e02119d5SChris Mason key.type != BTRFS_ORPHAN_ITEM_KEY) 1078e02119d5SChris Mason break; 1079e02119d5SChris Mason 1080e02119d5SChris Mason ret = btrfs_del_item(trans, root, path); 108165a246c5STsutomu Itoh if (ret) 108265a246c5STsutomu Itoh goto out; 1083e02119d5SChris Mason 1084b3b4aa74SDavid Sterba btrfs_release_path(path); 1085e02119d5SChris Mason inode = read_one_inode(root, key.offset); 1086c00e9493STsutomu Itoh if (!inode) 1087c00e9493STsutomu Itoh return -EIO; 1088e02119d5SChris Mason 1089e02119d5SChris Mason ret = fixup_inode_link_count(trans, root, inode); 1090e02119d5SChris Mason BUG_ON(ret); 1091e02119d5SChris Mason 1092e02119d5SChris Mason iput(inode); 1093e02119d5SChris Mason 109412fcfd22SChris Mason /* 109512fcfd22SChris Mason * fixup on a directory may create new entries, 109612fcfd22SChris Mason * make sure we always look for the highset possible 109712fcfd22SChris Mason * offset 109812fcfd22SChris Mason */ 109912fcfd22SChris Mason key.offset = (u64)-1; 1100e02119d5SChris Mason } 110165a246c5STsutomu Itoh ret = 0; 110265a246c5STsutomu Itoh out: 1103b3b4aa74SDavid Sterba btrfs_release_path(path); 110465a246c5STsutomu Itoh return ret; 1105e02119d5SChris Mason } 1106e02119d5SChris Mason 1107e02119d5SChris Mason 1108e02119d5SChris Mason /* 1109e02119d5SChris Mason * record a given inode in the fixup dir so we can check its link 1110e02119d5SChris Mason * count when replay is done. The link count is incremented here 1111e02119d5SChris Mason * so the inode won't go away until we check it 1112e02119d5SChris Mason */ 1113e02119d5SChris Mason static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, 1114e02119d5SChris Mason struct btrfs_root *root, 1115e02119d5SChris Mason struct btrfs_path *path, 1116e02119d5SChris Mason u64 objectid) 1117e02119d5SChris Mason { 1118e02119d5SChris Mason struct btrfs_key key; 1119e02119d5SChris Mason int ret = 0; 1120e02119d5SChris Mason struct inode *inode; 1121e02119d5SChris Mason 1122e02119d5SChris Mason inode = read_one_inode(root, objectid); 1123c00e9493STsutomu Itoh if (!inode) 1124c00e9493STsutomu Itoh return -EIO; 1125e02119d5SChris Mason 1126e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1127e02119d5SChris Mason btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 1128e02119d5SChris Mason key.offset = objectid; 1129e02119d5SChris Mason 1130e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1131e02119d5SChris Mason 1132b3b4aa74SDavid Sterba btrfs_release_path(path); 1133e02119d5SChris Mason if (ret == 0) { 1134e02119d5SChris Mason btrfs_inc_nlink(inode); 1135b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 1136e02119d5SChris Mason } else if (ret == -EEXIST) { 1137e02119d5SChris Mason ret = 0; 1138e02119d5SChris Mason } else { 1139e02119d5SChris Mason BUG(); 1140e02119d5SChris Mason } 1141e02119d5SChris Mason iput(inode); 1142e02119d5SChris Mason 1143e02119d5SChris Mason return ret; 1144e02119d5SChris Mason } 1145e02119d5SChris Mason 1146e02119d5SChris Mason /* 1147e02119d5SChris Mason * when replaying the log for a directory, we only insert names 1148e02119d5SChris Mason * for inodes that actually exist. This means an fsync on a directory 1149e02119d5SChris Mason * does not implicitly fsync all the new files in it 1150e02119d5SChris Mason */ 1151e02119d5SChris Mason static noinline int insert_one_name(struct btrfs_trans_handle *trans, 1152e02119d5SChris Mason struct btrfs_root *root, 1153e02119d5SChris Mason struct btrfs_path *path, 1154e02119d5SChris Mason u64 dirid, u64 index, 1155e02119d5SChris Mason char *name, int name_len, u8 type, 1156e02119d5SChris Mason struct btrfs_key *location) 1157e02119d5SChris Mason { 1158e02119d5SChris Mason struct inode *inode; 1159e02119d5SChris Mason struct inode *dir; 1160e02119d5SChris Mason int ret; 1161e02119d5SChris Mason 1162e02119d5SChris Mason inode = read_one_inode(root, location->objectid); 1163e02119d5SChris Mason if (!inode) 1164e02119d5SChris Mason return -ENOENT; 1165e02119d5SChris Mason 1166e02119d5SChris Mason dir = read_one_inode(root, dirid); 1167e02119d5SChris Mason if (!dir) { 1168e02119d5SChris Mason iput(inode); 1169e02119d5SChris Mason return -EIO; 1170e02119d5SChris Mason } 1171e02119d5SChris Mason ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1172e02119d5SChris Mason 1173e02119d5SChris Mason /* FIXME, put inode into FIXUP list */ 1174e02119d5SChris Mason 1175e02119d5SChris Mason iput(inode); 1176e02119d5SChris Mason iput(dir); 1177e02119d5SChris Mason return ret; 1178e02119d5SChris Mason } 1179e02119d5SChris Mason 1180e02119d5SChris Mason /* 1181e02119d5SChris Mason * take a single entry in a log directory item and replay it into 1182e02119d5SChris Mason * the subvolume. 1183e02119d5SChris Mason * 1184e02119d5SChris Mason * if a conflicting item exists in the subdirectory already, 1185e02119d5SChris Mason * the inode it points to is unlinked and put into the link count 1186e02119d5SChris Mason * fix up tree. 1187e02119d5SChris Mason * 1188e02119d5SChris Mason * If a name from the log points to a file or directory that does 1189e02119d5SChris Mason * not exist in the FS, it is skipped. fsyncs on directories 1190e02119d5SChris Mason * do not force down inodes inside that directory, just changes to the 1191e02119d5SChris Mason * names or unlinks in a directory. 1192e02119d5SChris Mason */ 1193e02119d5SChris Mason static noinline int replay_one_name(struct btrfs_trans_handle *trans, 1194e02119d5SChris Mason struct btrfs_root *root, 1195e02119d5SChris Mason struct btrfs_path *path, 1196e02119d5SChris Mason struct extent_buffer *eb, 1197e02119d5SChris Mason struct btrfs_dir_item *di, 1198e02119d5SChris Mason struct btrfs_key *key) 1199e02119d5SChris Mason { 1200e02119d5SChris Mason char *name; 1201e02119d5SChris Mason int name_len; 1202e02119d5SChris Mason struct btrfs_dir_item *dst_di; 1203e02119d5SChris Mason struct btrfs_key found_key; 1204e02119d5SChris Mason struct btrfs_key log_key; 1205e02119d5SChris Mason struct inode *dir; 1206e02119d5SChris Mason u8 log_type; 12074bef0848SChris Mason int exists; 1208e02119d5SChris Mason int ret; 1209e02119d5SChris Mason 1210e02119d5SChris Mason dir = read_one_inode(root, key->objectid); 1211c00e9493STsutomu Itoh if (!dir) 1212c00e9493STsutomu Itoh return -EIO; 1213e02119d5SChris Mason 1214e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1215e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 12162a29edc6Sliubo if (!name) 12172a29edc6Sliubo return -ENOMEM; 12182a29edc6Sliubo 1219e02119d5SChris Mason log_type = btrfs_dir_type(eb, di); 1220e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1221e02119d5SChris Mason name_len); 1222e02119d5SChris Mason 1223e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &log_key); 12244bef0848SChris Mason exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); 12254bef0848SChris Mason if (exists == 0) 12264bef0848SChris Mason exists = 1; 12274bef0848SChris Mason else 12284bef0848SChris Mason exists = 0; 1229b3b4aa74SDavid Sterba btrfs_release_path(path); 12304bef0848SChris Mason 1231e02119d5SChris Mason if (key->type == BTRFS_DIR_ITEM_KEY) { 1232e02119d5SChris Mason dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1233e02119d5SChris Mason name, name_len, 1); 1234d397712bSChris Mason } else if (key->type == BTRFS_DIR_INDEX_KEY) { 1235e02119d5SChris Mason dst_di = btrfs_lookup_dir_index_item(trans, root, path, 1236e02119d5SChris Mason key->objectid, 1237e02119d5SChris Mason key->offset, name, 1238e02119d5SChris Mason name_len, 1); 1239e02119d5SChris Mason } else { 1240e02119d5SChris Mason BUG(); 1241e02119d5SChris Mason } 1242c704005dSDavid Sterba if (IS_ERR_OR_NULL(dst_di)) { 1243e02119d5SChris Mason /* we need a sequence number to insert, so we only 1244e02119d5SChris Mason * do inserts for the BTRFS_DIR_INDEX_KEY types 1245e02119d5SChris Mason */ 1246e02119d5SChris Mason if (key->type != BTRFS_DIR_INDEX_KEY) 1247e02119d5SChris Mason goto out; 1248e02119d5SChris Mason goto insert; 1249e02119d5SChris Mason } 1250e02119d5SChris Mason 1251e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); 1252e02119d5SChris Mason /* the existing item matches the logged item */ 1253e02119d5SChris Mason if (found_key.objectid == log_key.objectid && 1254e02119d5SChris Mason found_key.type == log_key.type && 1255e02119d5SChris Mason found_key.offset == log_key.offset && 1256e02119d5SChris Mason btrfs_dir_type(path->nodes[0], dst_di) == log_type) { 1257e02119d5SChris Mason goto out; 1258e02119d5SChris Mason } 1259e02119d5SChris Mason 1260e02119d5SChris Mason /* 1261e02119d5SChris Mason * don't drop the conflicting directory entry if the inode 1262e02119d5SChris Mason * for the new entry doesn't exist 1263e02119d5SChris Mason */ 12644bef0848SChris Mason if (!exists) 1265e02119d5SChris Mason goto out; 1266e02119d5SChris Mason 1267e02119d5SChris Mason ret = drop_one_dir_item(trans, root, path, dir, dst_di); 1268e02119d5SChris Mason BUG_ON(ret); 1269e02119d5SChris Mason 1270e02119d5SChris Mason if (key->type == BTRFS_DIR_INDEX_KEY) 1271e02119d5SChris Mason goto insert; 1272e02119d5SChris Mason out: 1273b3b4aa74SDavid Sterba btrfs_release_path(path); 1274e02119d5SChris Mason kfree(name); 1275e02119d5SChris Mason iput(dir); 1276e02119d5SChris Mason return 0; 1277e02119d5SChris Mason 1278e02119d5SChris Mason insert: 1279b3b4aa74SDavid Sterba btrfs_release_path(path); 1280e02119d5SChris Mason ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1281e02119d5SChris Mason name, name_len, log_type, &log_key); 1282e02119d5SChris Mason 1283c293498bSStoyan Gaydarov BUG_ON(ret && ret != -ENOENT); 1284e02119d5SChris Mason goto out; 1285e02119d5SChris Mason } 1286e02119d5SChris Mason 1287e02119d5SChris Mason /* 1288e02119d5SChris Mason * find all the names in a directory item and reconcile them into 1289e02119d5SChris Mason * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than 1290e02119d5SChris Mason * one name in a directory item, but the same code gets used for 1291e02119d5SChris Mason * both directory index types 1292e02119d5SChris Mason */ 1293e02119d5SChris Mason static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, 1294e02119d5SChris Mason struct btrfs_root *root, 1295e02119d5SChris Mason struct btrfs_path *path, 1296e02119d5SChris Mason struct extent_buffer *eb, int slot, 1297e02119d5SChris Mason struct btrfs_key *key) 1298e02119d5SChris Mason { 1299e02119d5SChris Mason int ret; 1300e02119d5SChris Mason u32 item_size = btrfs_item_size_nr(eb, slot); 1301e02119d5SChris Mason struct btrfs_dir_item *di; 1302e02119d5SChris Mason int name_len; 1303e02119d5SChris Mason unsigned long ptr; 1304e02119d5SChris Mason unsigned long ptr_end; 1305e02119d5SChris Mason 1306e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1307e02119d5SChris Mason ptr_end = ptr + item_size; 1308e02119d5SChris Mason while (ptr < ptr_end) { 1309e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 131022a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) 131122a94d44SJosef Bacik return -EIO; 1312e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1313e02119d5SChris Mason ret = replay_one_name(trans, root, path, eb, di, key); 1314e02119d5SChris Mason BUG_ON(ret); 1315e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1316e02119d5SChris Mason ptr += name_len; 1317e02119d5SChris Mason } 1318e02119d5SChris Mason return 0; 1319e02119d5SChris Mason } 1320e02119d5SChris Mason 1321e02119d5SChris Mason /* 1322e02119d5SChris Mason * directory replay has two parts. There are the standard directory 1323e02119d5SChris Mason * items in the log copied from the subvolume, and range items 1324e02119d5SChris Mason * created in the log while the subvolume was logged. 1325e02119d5SChris Mason * 1326e02119d5SChris Mason * The range items tell us which parts of the key space the log 1327e02119d5SChris Mason * is authoritative for. During replay, if a key in the subvolume 1328e02119d5SChris Mason * directory is in a logged range item, but not actually in the log 1329e02119d5SChris Mason * that means it was deleted from the directory before the fsync 1330e02119d5SChris Mason * and should be removed. 1331e02119d5SChris Mason */ 1332e02119d5SChris Mason static noinline int find_dir_range(struct btrfs_root *root, 1333e02119d5SChris Mason struct btrfs_path *path, 1334e02119d5SChris Mason u64 dirid, int key_type, 1335e02119d5SChris Mason u64 *start_ret, u64 *end_ret) 1336e02119d5SChris Mason { 1337e02119d5SChris Mason struct btrfs_key key; 1338e02119d5SChris Mason u64 found_end; 1339e02119d5SChris Mason struct btrfs_dir_log_item *item; 1340e02119d5SChris Mason int ret; 1341e02119d5SChris Mason int nritems; 1342e02119d5SChris Mason 1343e02119d5SChris Mason if (*start_ret == (u64)-1) 1344e02119d5SChris Mason return 1; 1345e02119d5SChris Mason 1346e02119d5SChris Mason key.objectid = dirid; 1347e02119d5SChris Mason key.type = key_type; 1348e02119d5SChris Mason key.offset = *start_ret; 1349e02119d5SChris Mason 1350e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1351e02119d5SChris Mason if (ret < 0) 1352e02119d5SChris Mason goto out; 1353e02119d5SChris Mason if (ret > 0) { 1354e02119d5SChris Mason if (path->slots[0] == 0) 1355e02119d5SChris Mason goto out; 1356e02119d5SChris Mason path->slots[0]--; 1357e02119d5SChris Mason } 1358e02119d5SChris Mason if (ret != 0) 1359e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1360e02119d5SChris Mason 1361e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1362e02119d5SChris Mason ret = 1; 1363e02119d5SChris Mason goto next; 1364e02119d5SChris Mason } 1365e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1366e02119d5SChris Mason struct btrfs_dir_log_item); 1367e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1368e02119d5SChris Mason 1369e02119d5SChris Mason if (*start_ret >= key.offset && *start_ret <= found_end) { 1370e02119d5SChris Mason ret = 0; 1371e02119d5SChris Mason *start_ret = key.offset; 1372e02119d5SChris Mason *end_ret = found_end; 1373e02119d5SChris Mason goto out; 1374e02119d5SChris Mason } 1375e02119d5SChris Mason ret = 1; 1376e02119d5SChris Mason next: 1377e02119d5SChris Mason /* check the next slot in the tree to see if it is a valid item */ 1378e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1379e02119d5SChris Mason if (path->slots[0] >= nritems) { 1380e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1381e02119d5SChris Mason if (ret) 1382e02119d5SChris Mason goto out; 1383e02119d5SChris Mason } else { 1384e02119d5SChris Mason path->slots[0]++; 1385e02119d5SChris Mason } 1386e02119d5SChris Mason 1387e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1388e02119d5SChris Mason 1389e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 1390e02119d5SChris Mason ret = 1; 1391e02119d5SChris Mason goto out; 1392e02119d5SChris Mason } 1393e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 1394e02119d5SChris Mason struct btrfs_dir_log_item); 1395e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 1396e02119d5SChris Mason *start_ret = key.offset; 1397e02119d5SChris Mason *end_ret = found_end; 1398e02119d5SChris Mason ret = 0; 1399e02119d5SChris Mason out: 1400b3b4aa74SDavid Sterba btrfs_release_path(path); 1401e02119d5SChris Mason return ret; 1402e02119d5SChris Mason } 1403e02119d5SChris Mason 1404e02119d5SChris Mason /* 1405e02119d5SChris Mason * this looks for a given directory item in the log. If the directory 1406e02119d5SChris Mason * item is not in the log, the item is removed and the inode it points 1407e02119d5SChris Mason * to is unlinked 1408e02119d5SChris Mason */ 1409e02119d5SChris Mason static noinline int check_item_in_log(struct btrfs_trans_handle *trans, 1410e02119d5SChris Mason struct btrfs_root *root, 1411e02119d5SChris Mason struct btrfs_root *log, 1412e02119d5SChris Mason struct btrfs_path *path, 1413e02119d5SChris Mason struct btrfs_path *log_path, 1414e02119d5SChris Mason struct inode *dir, 1415e02119d5SChris Mason struct btrfs_key *dir_key) 1416e02119d5SChris Mason { 1417e02119d5SChris Mason int ret; 1418e02119d5SChris Mason struct extent_buffer *eb; 1419e02119d5SChris Mason int slot; 1420e02119d5SChris Mason u32 item_size; 1421e02119d5SChris Mason struct btrfs_dir_item *di; 1422e02119d5SChris Mason struct btrfs_dir_item *log_di; 1423e02119d5SChris Mason int name_len; 1424e02119d5SChris Mason unsigned long ptr; 1425e02119d5SChris Mason unsigned long ptr_end; 1426e02119d5SChris Mason char *name; 1427e02119d5SChris Mason struct inode *inode; 1428e02119d5SChris Mason struct btrfs_key location; 1429e02119d5SChris Mason 1430e02119d5SChris Mason again: 1431e02119d5SChris Mason eb = path->nodes[0]; 1432e02119d5SChris Mason slot = path->slots[0]; 1433e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 1434e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1435e02119d5SChris Mason ptr_end = ptr + item_size; 1436e02119d5SChris Mason while (ptr < ptr_end) { 1437e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 143822a94d44SJosef Bacik if (verify_dir_item(root, eb, di)) { 143922a94d44SJosef Bacik ret = -EIO; 144022a94d44SJosef Bacik goto out; 144122a94d44SJosef Bacik } 144222a94d44SJosef Bacik 1443e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1444e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 1445e02119d5SChris Mason if (!name) { 1446e02119d5SChris Mason ret = -ENOMEM; 1447e02119d5SChris Mason goto out; 1448e02119d5SChris Mason } 1449e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1450e02119d5SChris Mason name_len); 1451e02119d5SChris Mason log_di = NULL; 145212fcfd22SChris Mason if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { 1453e02119d5SChris Mason log_di = btrfs_lookup_dir_item(trans, log, log_path, 1454e02119d5SChris Mason dir_key->objectid, 1455e02119d5SChris Mason name, name_len, 0); 145612fcfd22SChris Mason } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { 1457e02119d5SChris Mason log_di = btrfs_lookup_dir_index_item(trans, log, 1458e02119d5SChris Mason log_path, 1459e02119d5SChris Mason dir_key->objectid, 1460e02119d5SChris Mason dir_key->offset, 1461e02119d5SChris Mason name, name_len, 0); 1462e02119d5SChris Mason } 1463c704005dSDavid Sterba if (IS_ERR_OR_NULL(log_di)) { 1464e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &location); 1465b3b4aa74SDavid Sterba btrfs_release_path(path); 1466b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1467e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 1468c00e9493STsutomu Itoh if (!inode) { 1469c00e9493STsutomu Itoh kfree(name); 1470c00e9493STsutomu Itoh return -EIO; 1471c00e9493STsutomu Itoh } 1472e02119d5SChris Mason 1473e02119d5SChris Mason ret = link_to_fixup_dir(trans, root, 1474e02119d5SChris Mason path, location.objectid); 1475e02119d5SChris Mason BUG_ON(ret); 1476e02119d5SChris Mason btrfs_inc_nlink(inode); 1477e02119d5SChris Mason ret = btrfs_unlink_inode(trans, root, dir, inode, 1478e02119d5SChris Mason name, name_len); 1479e02119d5SChris Mason BUG_ON(ret); 1480b6305567SChris Mason 1481b6305567SChris Mason btrfs_run_delayed_items(trans, root); 1482b6305567SChris Mason 1483e02119d5SChris Mason kfree(name); 1484e02119d5SChris Mason iput(inode); 1485e02119d5SChris Mason 1486e02119d5SChris Mason /* there might still be more names under this key 1487e02119d5SChris Mason * check and repeat if required 1488e02119d5SChris Mason */ 1489e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, dir_key, path, 1490e02119d5SChris Mason 0, 0); 1491e02119d5SChris Mason if (ret == 0) 1492e02119d5SChris Mason goto again; 1493e02119d5SChris Mason ret = 0; 1494e02119d5SChris Mason goto out; 1495e02119d5SChris Mason } 1496b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1497e02119d5SChris Mason kfree(name); 1498e02119d5SChris Mason 1499e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1500e02119d5SChris Mason ptr += name_len; 1501e02119d5SChris Mason } 1502e02119d5SChris Mason ret = 0; 1503e02119d5SChris Mason out: 1504b3b4aa74SDavid Sterba btrfs_release_path(path); 1505b3b4aa74SDavid Sterba btrfs_release_path(log_path); 1506e02119d5SChris Mason return ret; 1507e02119d5SChris Mason } 1508e02119d5SChris Mason 1509e02119d5SChris Mason /* 1510e02119d5SChris Mason * deletion replay happens before we copy any new directory items 1511e02119d5SChris Mason * out of the log or out of backreferences from inodes. It 1512e02119d5SChris Mason * scans the log to find ranges of keys that log is authoritative for, 1513e02119d5SChris Mason * and then scans the directory to find items in those ranges that are 1514e02119d5SChris Mason * not present in the log. 1515e02119d5SChris Mason * 1516e02119d5SChris Mason * Anything we don't find in the log is unlinked and removed from the 1517e02119d5SChris Mason * directory. 1518e02119d5SChris Mason */ 1519e02119d5SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 1520e02119d5SChris Mason struct btrfs_root *root, 1521e02119d5SChris Mason struct btrfs_root *log, 1522e02119d5SChris Mason struct btrfs_path *path, 152312fcfd22SChris Mason u64 dirid, int del_all) 1524e02119d5SChris Mason { 1525e02119d5SChris Mason u64 range_start; 1526e02119d5SChris Mason u64 range_end; 1527e02119d5SChris Mason int key_type = BTRFS_DIR_LOG_ITEM_KEY; 1528e02119d5SChris Mason int ret = 0; 1529e02119d5SChris Mason struct btrfs_key dir_key; 1530e02119d5SChris Mason struct btrfs_key found_key; 1531e02119d5SChris Mason struct btrfs_path *log_path; 1532e02119d5SChris Mason struct inode *dir; 1533e02119d5SChris Mason 1534e02119d5SChris Mason dir_key.objectid = dirid; 1535e02119d5SChris Mason dir_key.type = BTRFS_DIR_ITEM_KEY; 1536e02119d5SChris Mason log_path = btrfs_alloc_path(); 1537e02119d5SChris Mason if (!log_path) 1538e02119d5SChris Mason return -ENOMEM; 1539e02119d5SChris Mason 1540e02119d5SChris Mason dir = read_one_inode(root, dirid); 1541e02119d5SChris Mason /* it isn't an error if the inode isn't there, that can happen 1542e02119d5SChris Mason * because we replay the deletes before we copy in the inode item 1543e02119d5SChris Mason * from the log 1544e02119d5SChris Mason */ 1545e02119d5SChris Mason if (!dir) { 1546e02119d5SChris Mason btrfs_free_path(log_path); 1547e02119d5SChris Mason return 0; 1548e02119d5SChris Mason } 1549e02119d5SChris Mason again: 1550e02119d5SChris Mason range_start = 0; 1551e02119d5SChris Mason range_end = 0; 1552e02119d5SChris Mason while (1) { 155312fcfd22SChris Mason if (del_all) 155412fcfd22SChris Mason range_end = (u64)-1; 155512fcfd22SChris Mason else { 1556e02119d5SChris Mason ret = find_dir_range(log, path, dirid, key_type, 1557e02119d5SChris Mason &range_start, &range_end); 1558e02119d5SChris Mason if (ret != 0) 1559e02119d5SChris Mason break; 156012fcfd22SChris Mason } 1561e02119d5SChris Mason 1562e02119d5SChris Mason dir_key.offset = range_start; 1563e02119d5SChris Mason while (1) { 1564e02119d5SChris Mason int nritems; 1565e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &dir_key, path, 1566e02119d5SChris Mason 0, 0); 1567e02119d5SChris Mason if (ret < 0) 1568e02119d5SChris Mason goto out; 1569e02119d5SChris Mason 1570e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 1571e02119d5SChris Mason if (path->slots[0] >= nritems) { 1572e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 1573e02119d5SChris Mason if (ret) 1574e02119d5SChris Mason break; 1575e02119d5SChris Mason } 1576e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1577e02119d5SChris Mason path->slots[0]); 1578e02119d5SChris Mason if (found_key.objectid != dirid || 1579e02119d5SChris Mason found_key.type != dir_key.type) 1580e02119d5SChris Mason goto next_type; 1581e02119d5SChris Mason 1582e02119d5SChris Mason if (found_key.offset > range_end) 1583e02119d5SChris Mason break; 1584e02119d5SChris Mason 1585e02119d5SChris Mason ret = check_item_in_log(trans, root, log, path, 158612fcfd22SChris Mason log_path, dir, 158712fcfd22SChris Mason &found_key); 1588e02119d5SChris Mason BUG_ON(ret); 1589e02119d5SChris Mason if (found_key.offset == (u64)-1) 1590e02119d5SChris Mason break; 1591e02119d5SChris Mason dir_key.offset = found_key.offset + 1; 1592e02119d5SChris Mason } 1593b3b4aa74SDavid Sterba btrfs_release_path(path); 1594e02119d5SChris Mason if (range_end == (u64)-1) 1595e02119d5SChris Mason break; 1596e02119d5SChris Mason range_start = range_end + 1; 1597e02119d5SChris Mason } 1598e02119d5SChris Mason 1599e02119d5SChris Mason next_type: 1600e02119d5SChris Mason ret = 0; 1601e02119d5SChris Mason if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 1602e02119d5SChris Mason key_type = BTRFS_DIR_LOG_INDEX_KEY; 1603e02119d5SChris Mason dir_key.type = BTRFS_DIR_INDEX_KEY; 1604b3b4aa74SDavid Sterba btrfs_release_path(path); 1605e02119d5SChris Mason goto again; 1606e02119d5SChris Mason } 1607e02119d5SChris Mason out: 1608b3b4aa74SDavid Sterba btrfs_release_path(path); 1609e02119d5SChris Mason btrfs_free_path(log_path); 1610e02119d5SChris Mason iput(dir); 1611e02119d5SChris Mason return ret; 1612e02119d5SChris Mason } 1613e02119d5SChris Mason 1614e02119d5SChris Mason /* 1615e02119d5SChris Mason * the process_func used to replay items from the log tree. This 1616e02119d5SChris Mason * gets called in two different stages. The first stage just looks 1617e02119d5SChris Mason * for inodes and makes sure they are all copied into the subvolume. 1618e02119d5SChris Mason * 1619e02119d5SChris Mason * The second stage copies all the other item types from the log into 1620e02119d5SChris Mason * the subvolume. The two stage approach is slower, but gets rid of 1621e02119d5SChris Mason * lots of complexity around inodes referencing other inodes that exist 1622e02119d5SChris Mason * only in the log (references come from either directory items or inode 1623e02119d5SChris Mason * back refs). 1624e02119d5SChris Mason */ 1625e02119d5SChris Mason static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 1626e02119d5SChris Mason struct walk_control *wc, u64 gen) 1627e02119d5SChris Mason { 1628e02119d5SChris Mason int nritems; 1629e02119d5SChris Mason struct btrfs_path *path; 1630e02119d5SChris Mason struct btrfs_root *root = wc->replay_dest; 1631e02119d5SChris Mason struct btrfs_key key; 1632e02119d5SChris Mason int level; 1633e02119d5SChris Mason int i; 1634e02119d5SChris Mason int ret; 1635e02119d5SChris Mason 1636018642a1STsutomu Itoh ret = btrfs_read_buffer(eb, gen); 1637018642a1STsutomu Itoh if (ret) 1638018642a1STsutomu Itoh return ret; 1639e02119d5SChris Mason 1640e02119d5SChris Mason level = btrfs_header_level(eb); 1641e02119d5SChris Mason 1642e02119d5SChris Mason if (level != 0) 1643e02119d5SChris Mason return 0; 1644e02119d5SChris Mason 1645e02119d5SChris Mason path = btrfs_alloc_path(); 16461e5063d0SMark Fasheh if (!path) 16471e5063d0SMark Fasheh return -ENOMEM; 1648e02119d5SChris Mason 1649e02119d5SChris Mason nritems = btrfs_header_nritems(eb); 1650e02119d5SChris Mason for (i = 0; i < nritems; i++) { 1651e02119d5SChris Mason btrfs_item_key_to_cpu(eb, &key, i); 1652e02119d5SChris Mason 1653e02119d5SChris Mason /* inode keys are done during the first stage */ 1654e02119d5SChris Mason if (key.type == BTRFS_INODE_ITEM_KEY && 1655e02119d5SChris Mason wc->stage == LOG_WALK_REPLAY_INODES) { 1656e02119d5SChris Mason struct btrfs_inode_item *inode_item; 1657e02119d5SChris Mason u32 mode; 1658e02119d5SChris Mason 1659e02119d5SChris Mason inode_item = btrfs_item_ptr(eb, i, 1660e02119d5SChris Mason struct btrfs_inode_item); 1661e02119d5SChris Mason mode = btrfs_inode_mode(eb, inode_item); 1662e02119d5SChris Mason if (S_ISDIR(mode)) { 1663e02119d5SChris Mason ret = replay_dir_deletes(wc->trans, 166412fcfd22SChris Mason root, log, path, key.objectid, 0); 1665e02119d5SChris Mason BUG_ON(ret); 1666e02119d5SChris Mason } 1667e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 1668e02119d5SChris Mason eb, i, &key); 1669e02119d5SChris Mason BUG_ON(ret); 1670e02119d5SChris Mason 1671c71bf099SYan, Zheng /* for regular files, make sure corresponding 1672c71bf099SYan, Zheng * orhpan item exist. extents past the new EOF 1673c71bf099SYan, Zheng * will be truncated later by orphan cleanup. 1674e02119d5SChris Mason */ 1675e02119d5SChris Mason if (S_ISREG(mode)) { 1676c71bf099SYan, Zheng ret = insert_orphan_item(wc->trans, root, 1677e02119d5SChris Mason key.objectid); 1678e02119d5SChris Mason BUG_ON(ret); 1679c71bf099SYan, Zheng } 1680a74ac322SChris Mason 1681e02119d5SChris Mason ret = link_to_fixup_dir(wc->trans, root, 1682e02119d5SChris Mason path, key.objectid); 1683e02119d5SChris Mason BUG_ON(ret); 1684e02119d5SChris Mason } 1685e02119d5SChris Mason if (wc->stage < LOG_WALK_REPLAY_ALL) 1686e02119d5SChris Mason continue; 1687e02119d5SChris Mason 1688e02119d5SChris Mason /* these keys are simply copied */ 1689e02119d5SChris Mason if (key.type == BTRFS_XATTR_ITEM_KEY) { 1690e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 1691e02119d5SChris Mason eb, i, &key); 1692e02119d5SChris Mason BUG_ON(ret); 1693e02119d5SChris Mason } else if (key.type == BTRFS_INODE_REF_KEY) { 1694e02119d5SChris Mason ret = add_inode_ref(wc->trans, root, log, path, 1695e02119d5SChris Mason eb, i, &key); 1696e02119d5SChris Mason BUG_ON(ret && ret != -ENOENT); 1697e02119d5SChris Mason } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 1698e02119d5SChris Mason ret = replay_one_extent(wc->trans, root, path, 1699e02119d5SChris Mason eb, i, &key); 1700e02119d5SChris Mason BUG_ON(ret); 1701e02119d5SChris Mason } else if (key.type == BTRFS_DIR_ITEM_KEY || 1702e02119d5SChris Mason key.type == BTRFS_DIR_INDEX_KEY) { 1703e02119d5SChris Mason ret = replay_one_dir_item(wc->trans, root, path, 1704e02119d5SChris Mason eb, i, &key); 1705e02119d5SChris Mason BUG_ON(ret); 1706e02119d5SChris Mason } 1707e02119d5SChris Mason } 1708e02119d5SChris Mason btrfs_free_path(path); 1709e02119d5SChris Mason return 0; 1710e02119d5SChris Mason } 1711e02119d5SChris Mason 1712d397712bSChris Mason static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 1713e02119d5SChris Mason struct btrfs_root *root, 1714e02119d5SChris Mason struct btrfs_path *path, int *level, 1715e02119d5SChris Mason struct walk_control *wc) 1716e02119d5SChris Mason { 1717e02119d5SChris Mason u64 root_owner; 1718e02119d5SChris Mason u64 bytenr; 1719e02119d5SChris Mason u64 ptr_gen; 1720e02119d5SChris Mason struct extent_buffer *next; 1721e02119d5SChris Mason struct extent_buffer *cur; 1722e02119d5SChris Mason struct extent_buffer *parent; 1723e02119d5SChris Mason u32 blocksize; 1724e02119d5SChris Mason int ret = 0; 1725e02119d5SChris Mason 1726e02119d5SChris Mason WARN_ON(*level < 0); 1727e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 1728e02119d5SChris Mason 1729e02119d5SChris Mason while (*level > 0) { 1730e02119d5SChris Mason WARN_ON(*level < 0); 1731e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 1732e02119d5SChris Mason cur = path->nodes[*level]; 1733e02119d5SChris Mason 1734e02119d5SChris Mason if (btrfs_header_level(cur) != *level) 1735e02119d5SChris Mason WARN_ON(1); 1736e02119d5SChris Mason 1737e02119d5SChris Mason if (path->slots[*level] >= 1738e02119d5SChris Mason btrfs_header_nritems(cur)) 1739e02119d5SChris Mason break; 1740e02119d5SChris Mason 1741e02119d5SChris Mason bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 1742e02119d5SChris Mason ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 1743e02119d5SChris Mason blocksize = btrfs_level_size(root, *level - 1); 1744e02119d5SChris Mason 1745e02119d5SChris Mason parent = path->nodes[*level]; 1746e02119d5SChris Mason root_owner = btrfs_header_owner(parent); 1747e02119d5SChris Mason 1748e02119d5SChris Mason next = btrfs_find_create_tree_block(root, bytenr, blocksize); 17492a29edc6Sliubo if (!next) 17502a29edc6Sliubo return -ENOMEM; 1751e02119d5SChris Mason 17524a500fd1SYan, Zheng if (*level == 1) { 17531e5063d0SMark Fasheh ret = wc->process_func(root, next, wc, ptr_gen); 17541e5063d0SMark Fasheh if (ret) 17551e5063d0SMark Fasheh return ret; 1756e02119d5SChris Mason 1757e02119d5SChris Mason path->slots[*level]++; 1758e02119d5SChris Mason if (wc->free) { 1759018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 1760018642a1STsutomu Itoh if (ret) { 1761018642a1STsutomu Itoh free_extent_buffer(next); 1762018642a1STsutomu Itoh return ret; 1763018642a1STsutomu Itoh } 1764e02119d5SChris Mason 1765e02119d5SChris Mason btrfs_tree_lock(next); 1766b4ce94deSChris Mason btrfs_set_lock_blocking(next); 1767bd681513SChris Mason clean_tree_block(trans, root, next); 1768e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 1769e02119d5SChris Mason btrfs_tree_unlock(next); 1770e02119d5SChris Mason 1771e02119d5SChris Mason WARN_ON(root_owner != 1772e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 1773e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 1774d00aff00SChris Mason bytenr, blocksize); 177579787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM or logic errors */ 1776e02119d5SChris Mason } 1777e02119d5SChris Mason free_extent_buffer(next); 1778e02119d5SChris Mason continue; 1779e02119d5SChris Mason } 1780018642a1STsutomu Itoh ret = btrfs_read_buffer(next, ptr_gen); 1781018642a1STsutomu Itoh if (ret) { 1782018642a1STsutomu Itoh free_extent_buffer(next); 1783018642a1STsutomu Itoh return ret; 1784018642a1STsutomu Itoh } 1785e02119d5SChris Mason 1786e02119d5SChris Mason WARN_ON(*level <= 0); 1787e02119d5SChris Mason if (path->nodes[*level-1]) 1788e02119d5SChris Mason free_extent_buffer(path->nodes[*level-1]); 1789e02119d5SChris Mason path->nodes[*level-1] = next; 1790e02119d5SChris Mason *level = btrfs_header_level(next); 1791e02119d5SChris Mason path->slots[*level] = 0; 1792e02119d5SChris Mason cond_resched(); 1793e02119d5SChris Mason } 1794e02119d5SChris Mason WARN_ON(*level < 0); 1795e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 1796e02119d5SChris Mason 17974a500fd1SYan, Zheng path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); 1798e02119d5SChris Mason 1799e02119d5SChris Mason cond_resched(); 1800e02119d5SChris Mason return 0; 1801e02119d5SChris Mason } 1802e02119d5SChris Mason 1803d397712bSChris Mason static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, 1804e02119d5SChris Mason struct btrfs_root *root, 1805e02119d5SChris Mason struct btrfs_path *path, int *level, 1806e02119d5SChris Mason struct walk_control *wc) 1807e02119d5SChris Mason { 1808e02119d5SChris Mason u64 root_owner; 1809e02119d5SChris Mason int i; 1810e02119d5SChris Mason int slot; 1811e02119d5SChris Mason int ret; 1812e02119d5SChris Mason 1813e02119d5SChris Mason for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1814e02119d5SChris Mason slot = path->slots[i]; 18154a500fd1SYan, Zheng if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1816e02119d5SChris Mason path->slots[i]++; 1817e02119d5SChris Mason *level = i; 1818e02119d5SChris Mason WARN_ON(*level == 0); 1819e02119d5SChris Mason return 0; 1820e02119d5SChris Mason } else { 182131840ae1SZheng Yan struct extent_buffer *parent; 182231840ae1SZheng Yan if (path->nodes[*level] == root->node) 182331840ae1SZheng Yan parent = path->nodes[*level]; 182431840ae1SZheng Yan else 182531840ae1SZheng Yan parent = path->nodes[*level + 1]; 182631840ae1SZheng Yan 182731840ae1SZheng Yan root_owner = btrfs_header_owner(parent); 18281e5063d0SMark Fasheh ret = wc->process_func(root, path->nodes[*level], wc, 1829e02119d5SChris Mason btrfs_header_generation(path->nodes[*level])); 18301e5063d0SMark Fasheh if (ret) 18311e5063d0SMark Fasheh return ret; 18321e5063d0SMark Fasheh 1833e02119d5SChris Mason if (wc->free) { 1834e02119d5SChris Mason struct extent_buffer *next; 1835e02119d5SChris Mason 1836e02119d5SChris Mason next = path->nodes[*level]; 1837e02119d5SChris Mason 1838e02119d5SChris Mason btrfs_tree_lock(next); 1839b4ce94deSChris Mason btrfs_set_lock_blocking(next); 1840bd681513SChris Mason clean_tree_block(trans, root, next); 1841e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 1842e02119d5SChris Mason btrfs_tree_unlock(next); 1843e02119d5SChris Mason 1844e02119d5SChris Mason WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1845e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(root, 1846e02119d5SChris Mason path->nodes[*level]->start, 1847d00aff00SChris Mason path->nodes[*level]->len); 1848e02119d5SChris Mason BUG_ON(ret); 1849e02119d5SChris Mason } 1850e02119d5SChris Mason free_extent_buffer(path->nodes[*level]); 1851e02119d5SChris Mason path->nodes[*level] = NULL; 1852e02119d5SChris Mason *level = i + 1; 1853e02119d5SChris Mason } 1854e02119d5SChris Mason } 1855e02119d5SChris Mason return 1; 1856e02119d5SChris Mason } 1857e02119d5SChris Mason 1858e02119d5SChris Mason /* 1859e02119d5SChris Mason * drop the reference count on the tree rooted at 'snap'. This traverses 1860e02119d5SChris Mason * the tree freeing any blocks that have a ref count of zero after being 1861e02119d5SChris Mason * decremented. 1862e02119d5SChris Mason */ 1863e02119d5SChris Mason static int walk_log_tree(struct btrfs_trans_handle *trans, 1864e02119d5SChris Mason struct btrfs_root *log, struct walk_control *wc) 1865e02119d5SChris Mason { 1866e02119d5SChris Mason int ret = 0; 1867e02119d5SChris Mason int wret; 1868e02119d5SChris Mason int level; 1869e02119d5SChris Mason struct btrfs_path *path; 1870e02119d5SChris Mason int i; 1871e02119d5SChris Mason int orig_level; 1872e02119d5SChris Mason 1873e02119d5SChris Mason path = btrfs_alloc_path(); 1874db5b493aSTsutomu Itoh if (!path) 1875db5b493aSTsutomu Itoh return -ENOMEM; 1876e02119d5SChris Mason 1877e02119d5SChris Mason level = btrfs_header_level(log->node); 1878e02119d5SChris Mason orig_level = level; 1879e02119d5SChris Mason path->nodes[level] = log->node; 1880e02119d5SChris Mason extent_buffer_get(log->node); 1881e02119d5SChris Mason path->slots[level] = 0; 1882e02119d5SChris Mason 1883e02119d5SChris Mason while (1) { 1884e02119d5SChris Mason wret = walk_down_log_tree(trans, log, path, &level, wc); 1885e02119d5SChris Mason if (wret > 0) 1886e02119d5SChris Mason break; 188779787eaaSJeff Mahoney if (wret < 0) { 1888e02119d5SChris Mason ret = wret; 188979787eaaSJeff Mahoney goto out; 189079787eaaSJeff Mahoney } 1891e02119d5SChris Mason 1892e02119d5SChris Mason wret = walk_up_log_tree(trans, log, path, &level, wc); 1893e02119d5SChris Mason if (wret > 0) 1894e02119d5SChris Mason break; 189579787eaaSJeff Mahoney if (wret < 0) { 1896e02119d5SChris Mason ret = wret; 189779787eaaSJeff Mahoney goto out; 189879787eaaSJeff Mahoney } 1899e02119d5SChris Mason } 1900e02119d5SChris Mason 1901e02119d5SChris Mason /* was the root node processed? if not, catch it here */ 1902e02119d5SChris Mason if (path->nodes[orig_level]) { 190379787eaaSJeff Mahoney ret = wc->process_func(log, path->nodes[orig_level], wc, 1904e02119d5SChris Mason btrfs_header_generation(path->nodes[orig_level])); 190579787eaaSJeff Mahoney if (ret) 190679787eaaSJeff Mahoney goto out; 1907e02119d5SChris Mason if (wc->free) { 1908e02119d5SChris Mason struct extent_buffer *next; 1909e02119d5SChris Mason 1910e02119d5SChris Mason next = path->nodes[orig_level]; 1911e02119d5SChris Mason 1912e02119d5SChris Mason btrfs_tree_lock(next); 1913b4ce94deSChris Mason btrfs_set_lock_blocking(next); 1914bd681513SChris Mason clean_tree_block(trans, log, next); 1915e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 1916e02119d5SChris Mason btrfs_tree_unlock(next); 1917e02119d5SChris Mason 1918e02119d5SChris Mason WARN_ON(log->root_key.objectid != 1919e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 1920e688b725SChris Mason ret = btrfs_free_and_pin_reserved_extent(log, next->start, 1921d00aff00SChris Mason next->len); 192279787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM or logic errors */ 1923e02119d5SChris Mason } 1924e02119d5SChris Mason } 1925e02119d5SChris Mason 192679787eaaSJeff Mahoney out: 1927e02119d5SChris Mason for (i = 0; i <= orig_level; i++) { 1928e02119d5SChris Mason if (path->nodes[i]) { 1929e02119d5SChris Mason free_extent_buffer(path->nodes[i]); 1930e02119d5SChris Mason path->nodes[i] = NULL; 1931e02119d5SChris Mason } 1932e02119d5SChris Mason } 1933e02119d5SChris Mason btrfs_free_path(path); 1934e02119d5SChris Mason return ret; 1935e02119d5SChris Mason } 1936e02119d5SChris Mason 19377237f183SYan Zheng /* 19387237f183SYan Zheng * helper function to update the item for a given subvolumes log root 19397237f183SYan Zheng * in the tree of log roots 19407237f183SYan Zheng */ 19417237f183SYan Zheng static int update_log_root(struct btrfs_trans_handle *trans, 19427237f183SYan Zheng struct btrfs_root *log) 19437237f183SYan Zheng { 19447237f183SYan Zheng int ret; 19457237f183SYan Zheng 19467237f183SYan Zheng if (log->log_transid == 1) { 19477237f183SYan Zheng /* insert root item on the first sync */ 19487237f183SYan Zheng ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, 19497237f183SYan Zheng &log->root_key, &log->root_item); 19507237f183SYan Zheng } else { 19517237f183SYan Zheng ret = btrfs_update_root(trans, log->fs_info->log_root_tree, 19527237f183SYan Zheng &log->root_key, &log->root_item); 19537237f183SYan Zheng } 19547237f183SYan Zheng return ret; 19557237f183SYan Zheng } 19567237f183SYan Zheng 195712fcfd22SChris Mason static int wait_log_commit(struct btrfs_trans_handle *trans, 195812fcfd22SChris Mason struct btrfs_root *root, unsigned long transid) 1959e02119d5SChris Mason { 1960e02119d5SChris Mason DEFINE_WAIT(wait); 19617237f183SYan Zheng int index = transid % 2; 1962e02119d5SChris Mason 19637237f183SYan Zheng /* 19647237f183SYan Zheng * we only allow two pending log transactions at a time, 19657237f183SYan Zheng * so we know that if ours is more than 2 older than the 19667237f183SYan Zheng * current transaction, we're done 19677237f183SYan Zheng */ 1968e02119d5SChris Mason do { 19697237f183SYan Zheng prepare_to_wait(&root->log_commit_wait[index], 19707237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 19717237f183SYan Zheng mutex_unlock(&root->log_mutex); 197212fcfd22SChris Mason 197312fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit != 197412fcfd22SChris Mason trans->transid && root->log_transid < transid + 2 && 19757237f183SYan Zheng atomic_read(&root->log_commit[index])) 1976e02119d5SChris Mason schedule(); 197712fcfd22SChris Mason 19787237f183SYan Zheng finish_wait(&root->log_commit_wait[index], &wait); 19797237f183SYan Zheng mutex_lock(&root->log_mutex); 19806dd70ce4SJan Kara } while (root->fs_info->last_trans_log_full_commit != 19816dd70ce4SJan Kara trans->transid && root->log_transid < transid + 2 && 19827237f183SYan Zheng atomic_read(&root->log_commit[index])); 19837237f183SYan Zheng return 0; 19847237f183SYan Zheng } 19857237f183SYan Zheng 1986143bede5SJeff Mahoney static void wait_for_writer(struct btrfs_trans_handle *trans, 198712fcfd22SChris Mason struct btrfs_root *root) 19887237f183SYan Zheng { 19897237f183SYan Zheng DEFINE_WAIT(wait); 19906dd70ce4SJan Kara while (root->fs_info->last_trans_log_full_commit != 19916dd70ce4SJan Kara trans->transid && atomic_read(&root->log_writers)) { 19927237f183SYan Zheng prepare_to_wait(&root->log_writer_wait, 19937237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 19947237f183SYan Zheng mutex_unlock(&root->log_mutex); 199512fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit != 199612fcfd22SChris Mason trans->transid && atomic_read(&root->log_writers)) 19977237f183SYan Zheng schedule(); 19987237f183SYan Zheng mutex_lock(&root->log_mutex); 19997237f183SYan Zheng finish_wait(&root->log_writer_wait, &wait); 20007237f183SYan Zheng } 2001e02119d5SChris Mason } 2002e02119d5SChris Mason 2003e02119d5SChris Mason /* 2004e02119d5SChris Mason * btrfs_sync_log does sends a given tree log down to the disk and 2005e02119d5SChris Mason * updates the super blocks to record it. When this call is done, 200612fcfd22SChris Mason * you know that any inodes previously logged are safely on disk only 200712fcfd22SChris Mason * if it returns 0. 200812fcfd22SChris Mason * 200912fcfd22SChris Mason * Any other return value means you need to call btrfs_commit_transaction. 201012fcfd22SChris Mason * Some of the edge cases for fsyncing directories that have had unlinks 201112fcfd22SChris Mason * or renames done in the past mean that sometimes the only safe 201212fcfd22SChris Mason * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, 201312fcfd22SChris Mason * that has happened. 2014e02119d5SChris Mason */ 2015e02119d5SChris Mason int btrfs_sync_log(struct btrfs_trans_handle *trans, 2016e02119d5SChris Mason struct btrfs_root *root) 2017e02119d5SChris Mason { 20187237f183SYan Zheng int index1; 20197237f183SYan Zheng int index2; 20208cef4e16SYan, Zheng int mark; 2021e02119d5SChris Mason int ret; 2022e02119d5SChris Mason struct btrfs_root *log = root->log_root; 20237237f183SYan Zheng struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 20248cef4e16SYan, Zheng unsigned long log_transid = 0; 2025e02119d5SChris Mason 20267237f183SYan Zheng mutex_lock(&root->log_mutex); 20277237f183SYan Zheng index1 = root->log_transid % 2; 20287237f183SYan Zheng if (atomic_read(&root->log_commit[index1])) { 202912fcfd22SChris Mason wait_log_commit(trans, root, root->log_transid); 20307237f183SYan Zheng mutex_unlock(&root->log_mutex); 20317237f183SYan Zheng return 0; 2032e02119d5SChris Mason } 20337237f183SYan Zheng atomic_set(&root->log_commit[index1], 1); 20347237f183SYan Zheng 20357237f183SYan Zheng /* wait for previous tree log sync to complete */ 20367237f183SYan Zheng if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 203712fcfd22SChris Mason wait_log_commit(trans, root, root->log_transid - 1); 203886df7eb9SYan, Zheng while (1) { 2039*2ecb7923SMiao Xie int batch = atomic_read(&root->log_batch); 2040cd354ad6SChris Mason /* when we're on an ssd, just kick the log commit out */ 2041cd354ad6SChris Mason if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { 20427237f183SYan Zheng mutex_unlock(&root->log_mutex); 2043e02119d5SChris Mason schedule_timeout_uninterruptible(1); 20447237f183SYan Zheng mutex_lock(&root->log_mutex); 204586df7eb9SYan, Zheng } 204612fcfd22SChris Mason wait_for_writer(trans, root); 2047*2ecb7923SMiao Xie if (batch == atomic_read(&root->log_batch)) 2048e02119d5SChris Mason break; 2049e02119d5SChris Mason } 2050d0c803c4SChris Mason 205112fcfd22SChris Mason /* bail out if we need to do a full commit */ 205212fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit == trans->transid) { 205312fcfd22SChris Mason ret = -EAGAIN; 205412fcfd22SChris Mason mutex_unlock(&root->log_mutex); 205512fcfd22SChris Mason goto out; 205612fcfd22SChris Mason } 205712fcfd22SChris Mason 20588cef4e16SYan, Zheng log_transid = root->log_transid; 20598cef4e16SYan, Zheng if (log_transid % 2 == 0) 20608cef4e16SYan, Zheng mark = EXTENT_DIRTY; 20618cef4e16SYan, Zheng else 20628cef4e16SYan, Zheng mark = EXTENT_NEW; 20638cef4e16SYan, Zheng 2064690587d1SChris Mason /* we start IO on all the marked extents here, but we don't actually 2065690587d1SChris Mason * wait for them until later. 2066690587d1SChris Mason */ 20678cef4e16SYan, Zheng ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 206879787eaaSJeff Mahoney if (ret) { 206979787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 207079787eaaSJeff Mahoney mutex_unlock(&root->log_mutex); 207179787eaaSJeff Mahoney goto out; 207279787eaaSJeff Mahoney } 20737237f183SYan Zheng 20745d4f98a2SYan Zheng btrfs_set_root_node(&log->root_item, log->node); 20757237f183SYan Zheng 20767237f183SYan Zheng root->log_transid++; 20777237f183SYan Zheng log->log_transid = root->log_transid; 2078ff782e0aSJosef Bacik root->log_start_pid = 0; 20797237f183SYan Zheng smp_mb(); 20807237f183SYan Zheng /* 20818cef4e16SYan, Zheng * IO has been started, blocks of the log tree have WRITTEN flag set 20828cef4e16SYan, Zheng * in their headers. new modifications of the log will be written to 20838cef4e16SYan, Zheng * new positions. so it's safe to allow log writers to go in. 20847237f183SYan Zheng */ 20857237f183SYan Zheng mutex_unlock(&root->log_mutex); 20867237f183SYan Zheng 20877237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 2088*2ecb7923SMiao Xie atomic_inc(&log_root_tree->log_batch); 20897237f183SYan Zheng atomic_inc(&log_root_tree->log_writers); 20907237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 20917237f183SYan Zheng 20927237f183SYan Zheng ret = update_log_root(trans, log); 20937237f183SYan Zheng 20947237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 20957237f183SYan Zheng if (atomic_dec_and_test(&log_root_tree->log_writers)) { 20967237f183SYan Zheng smp_mb(); 20977237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_writer_wait)) 20987237f183SYan Zheng wake_up(&log_root_tree->log_writer_wait); 20997237f183SYan Zheng } 21007237f183SYan Zheng 21014a500fd1SYan, Zheng if (ret) { 210279787eaaSJeff Mahoney if (ret != -ENOSPC) { 210379787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 210479787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 210579787eaaSJeff Mahoney goto out; 210679787eaaSJeff Mahoney } 21074a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 21084a500fd1SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 21094a500fd1SYan, Zheng mutex_unlock(&log_root_tree->log_mutex); 21104a500fd1SYan, Zheng ret = -EAGAIN; 21114a500fd1SYan, Zheng goto out; 21124a500fd1SYan, Zheng } 21134a500fd1SYan, Zheng 21147237f183SYan Zheng index2 = log_root_tree->log_transid % 2; 21157237f183SYan Zheng if (atomic_read(&log_root_tree->log_commit[index2])) { 21168cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 211712fcfd22SChris Mason wait_log_commit(trans, log_root_tree, 211812fcfd22SChris Mason log_root_tree->log_transid); 21197237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 2120b31eabd8SChris Mason ret = 0; 21217237f183SYan Zheng goto out; 21227237f183SYan Zheng } 21237237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 1); 21247237f183SYan Zheng 212512fcfd22SChris Mason if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { 212612fcfd22SChris Mason wait_log_commit(trans, log_root_tree, 212712fcfd22SChris Mason log_root_tree->log_transid - 1); 212812fcfd22SChris Mason } 21297237f183SYan Zheng 213012fcfd22SChris Mason wait_for_writer(trans, log_root_tree); 213112fcfd22SChris Mason 213212fcfd22SChris Mason /* 213312fcfd22SChris Mason * now that we've moved on to the tree of log tree roots, 213412fcfd22SChris Mason * check the full commit flag again 213512fcfd22SChris Mason */ 213612fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit == trans->transid) { 21378cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 213812fcfd22SChris Mason mutex_unlock(&log_root_tree->log_mutex); 213912fcfd22SChris Mason ret = -EAGAIN; 214012fcfd22SChris Mason goto out_wake_log_root; 214112fcfd22SChris Mason } 21427237f183SYan Zheng 21437237f183SYan Zheng ret = btrfs_write_and_wait_marked_extents(log_root_tree, 21448cef4e16SYan, Zheng &log_root_tree->dirty_log_pages, 21458cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW); 214679787eaaSJeff Mahoney if (ret) { 214779787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 214879787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 214979787eaaSJeff Mahoney goto out_wake_log_root; 215079787eaaSJeff Mahoney } 21518cef4e16SYan, Zheng btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2152e02119d5SChris Mason 21536c41761fSDavid Sterba btrfs_set_super_log_root(root->fs_info->super_for_commit, 21547237f183SYan Zheng log_root_tree->node->start); 21556c41761fSDavid Sterba btrfs_set_super_log_root_level(root->fs_info->super_for_commit, 21567237f183SYan Zheng btrfs_header_level(log_root_tree->node)); 2157e02119d5SChris Mason 21587237f183SYan Zheng log_root_tree->log_transid++; 2159e02119d5SChris Mason smp_mb(); 21607237f183SYan Zheng 21617237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 21627237f183SYan Zheng 21637237f183SYan Zheng /* 21647237f183SYan Zheng * nobody else is going to jump in and write the the ctree 21657237f183SYan Zheng * super here because the log_commit atomic below is protecting 21667237f183SYan Zheng * us. We must be called with a transaction handle pinning 21677237f183SYan Zheng * the running transaction open, so a full commit can't hop 21687237f183SYan Zheng * in and cause problems either. 21697237f183SYan Zheng */ 2170a2de733cSArne Jansen btrfs_scrub_pause_super(root); 21714722607dSChris Mason write_ctree_super(trans, root->fs_info->tree_root, 1); 2172a2de733cSArne Jansen btrfs_scrub_continue_super(root); 217312fcfd22SChris Mason ret = 0; 21747237f183SYan Zheng 2175257c62e1SChris Mason mutex_lock(&root->log_mutex); 2176257c62e1SChris Mason if (root->last_log_commit < log_transid) 2177257c62e1SChris Mason root->last_log_commit = log_transid; 2178257c62e1SChris Mason mutex_unlock(&root->log_mutex); 2179257c62e1SChris Mason 218012fcfd22SChris Mason out_wake_log_root: 21817237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 0); 21827237f183SYan Zheng smp_mb(); 21837237f183SYan Zheng if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) 21847237f183SYan Zheng wake_up(&log_root_tree->log_commit_wait[index2]); 2185e02119d5SChris Mason out: 21867237f183SYan Zheng atomic_set(&root->log_commit[index1], 0); 21877237f183SYan Zheng smp_mb(); 21887237f183SYan Zheng if (waitqueue_active(&root->log_commit_wait[index1])) 21897237f183SYan Zheng wake_up(&root->log_commit_wait[index1]); 2190b31eabd8SChris Mason return ret; 2191e02119d5SChris Mason } 2192e02119d5SChris Mason 21934a500fd1SYan, Zheng static void free_log_tree(struct btrfs_trans_handle *trans, 21944a500fd1SYan, Zheng struct btrfs_root *log) 2195e02119d5SChris Mason { 2196e02119d5SChris Mason int ret; 2197d0c803c4SChris Mason u64 start; 2198d0c803c4SChris Mason u64 end; 2199e02119d5SChris Mason struct walk_control wc = { 2200e02119d5SChris Mason .free = 1, 2201e02119d5SChris Mason .process_func = process_one_buffer 2202e02119d5SChris Mason }; 2203e02119d5SChris Mason 2204e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 2205e02119d5SChris Mason BUG_ON(ret); 2206e02119d5SChris Mason 2207d0c803c4SChris Mason while (1) { 2208d0c803c4SChris Mason ret = find_first_extent_bit(&log->dirty_log_pages, 22098cef4e16SYan, Zheng 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); 2210d0c803c4SChris Mason if (ret) 2211d0c803c4SChris Mason break; 2212d0c803c4SChris Mason 22138cef4e16SYan, Zheng clear_extent_bits(&log->dirty_log_pages, start, end, 22148cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2215d0c803c4SChris Mason } 2216d0c803c4SChris Mason 22177237f183SYan Zheng free_extent_buffer(log->node); 22187237f183SYan Zheng kfree(log); 22194a500fd1SYan, Zheng } 22204a500fd1SYan, Zheng 22214a500fd1SYan, Zheng /* 22224a500fd1SYan, Zheng * free all the extents used by the tree log. This should be called 22234a500fd1SYan, Zheng * at commit time of the full transaction 22244a500fd1SYan, Zheng */ 22254a500fd1SYan, Zheng int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) 22264a500fd1SYan, Zheng { 22274a500fd1SYan, Zheng if (root->log_root) { 22284a500fd1SYan, Zheng free_log_tree(trans, root->log_root); 22294a500fd1SYan, Zheng root->log_root = NULL; 22304a500fd1SYan, Zheng } 22314a500fd1SYan, Zheng return 0; 22324a500fd1SYan, Zheng } 22334a500fd1SYan, Zheng 22344a500fd1SYan, Zheng int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, 22354a500fd1SYan, Zheng struct btrfs_fs_info *fs_info) 22364a500fd1SYan, Zheng { 22374a500fd1SYan, Zheng if (fs_info->log_root_tree) { 22384a500fd1SYan, Zheng free_log_tree(trans, fs_info->log_root_tree); 22394a500fd1SYan, Zheng fs_info->log_root_tree = NULL; 22404a500fd1SYan, Zheng } 2241e02119d5SChris Mason return 0; 2242e02119d5SChris Mason } 2243e02119d5SChris Mason 2244e02119d5SChris Mason /* 2245e02119d5SChris Mason * If both a file and directory are logged, and unlinks or renames are 2246e02119d5SChris Mason * mixed in, we have a few interesting corners: 2247e02119d5SChris Mason * 2248e02119d5SChris Mason * create file X in dir Y 2249e02119d5SChris Mason * link file X to X.link in dir Y 2250e02119d5SChris Mason * fsync file X 2251e02119d5SChris Mason * unlink file X but leave X.link 2252e02119d5SChris Mason * fsync dir Y 2253e02119d5SChris Mason * 2254e02119d5SChris Mason * After a crash we would expect only X.link to exist. But file X 2255e02119d5SChris Mason * didn't get fsync'd again so the log has back refs for X and X.link. 2256e02119d5SChris Mason * 2257e02119d5SChris Mason * We solve this by removing directory entries and inode backrefs from the 2258e02119d5SChris Mason * log when a file that was logged in the current transaction is 2259e02119d5SChris Mason * unlinked. Any later fsync will include the updated log entries, and 2260e02119d5SChris Mason * we'll be able to reconstruct the proper directory items from backrefs. 2261e02119d5SChris Mason * 2262e02119d5SChris Mason * This optimizations allows us to avoid relogging the entire inode 2263e02119d5SChris Mason * or the entire directory. 2264e02119d5SChris Mason */ 2265e02119d5SChris Mason int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 2266e02119d5SChris Mason struct btrfs_root *root, 2267e02119d5SChris Mason const char *name, int name_len, 2268e02119d5SChris Mason struct inode *dir, u64 index) 2269e02119d5SChris Mason { 2270e02119d5SChris Mason struct btrfs_root *log; 2271e02119d5SChris Mason struct btrfs_dir_item *di; 2272e02119d5SChris Mason struct btrfs_path *path; 2273e02119d5SChris Mason int ret; 22744a500fd1SYan, Zheng int err = 0; 2275e02119d5SChris Mason int bytes_del = 0; 227633345d01SLi Zefan u64 dir_ino = btrfs_ino(dir); 2277e02119d5SChris Mason 22783a5f1d45SChris Mason if (BTRFS_I(dir)->logged_trans < trans->transid) 22793a5f1d45SChris Mason return 0; 22803a5f1d45SChris Mason 2281e02119d5SChris Mason ret = join_running_log_trans(root); 2282e02119d5SChris Mason if (ret) 2283e02119d5SChris Mason return 0; 2284e02119d5SChris Mason 2285e02119d5SChris Mason mutex_lock(&BTRFS_I(dir)->log_mutex); 2286e02119d5SChris Mason 2287e02119d5SChris Mason log = root->log_root; 2288e02119d5SChris Mason path = btrfs_alloc_path(); 2289a62f44a5STsutomu Itoh if (!path) { 2290a62f44a5STsutomu Itoh err = -ENOMEM; 2291a62f44a5STsutomu Itoh goto out_unlock; 2292a62f44a5STsutomu Itoh } 22932a29edc6Sliubo 229433345d01SLi Zefan di = btrfs_lookup_dir_item(trans, log, path, dir_ino, 2295e02119d5SChris Mason name, name_len, -1); 22964a500fd1SYan, Zheng if (IS_ERR(di)) { 22974a500fd1SYan, Zheng err = PTR_ERR(di); 22984a500fd1SYan, Zheng goto fail; 22994a500fd1SYan, Zheng } 23004a500fd1SYan, Zheng if (di) { 2301e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2302e02119d5SChris Mason bytes_del += name_len; 2303e02119d5SChris Mason BUG_ON(ret); 2304e02119d5SChris Mason } 2305b3b4aa74SDavid Sterba btrfs_release_path(path); 230633345d01SLi Zefan di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 2307e02119d5SChris Mason index, name, name_len, -1); 23084a500fd1SYan, Zheng if (IS_ERR(di)) { 23094a500fd1SYan, Zheng err = PTR_ERR(di); 23104a500fd1SYan, Zheng goto fail; 23114a500fd1SYan, Zheng } 23124a500fd1SYan, Zheng if (di) { 2313e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 2314e02119d5SChris Mason bytes_del += name_len; 2315e02119d5SChris Mason BUG_ON(ret); 2316e02119d5SChris Mason } 2317e02119d5SChris Mason 2318e02119d5SChris Mason /* update the directory size in the log to reflect the names 2319e02119d5SChris Mason * we have removed 2320e02119d5SChris Mason */ 2321e02119d5SChris Mason if (bytes_del) { 2322e02119d5SChris Mason struct btrfs_key key; 2323e02119d5SChris Mason 232433345d01SLi Zefan key.objectid = dir_ino; 2325e02119d5SChris Mason key.offset = 0; 2326e02119d5SChris Mason key.type = BTRFS_INODE_ITEM_KEY; 2327b3b4aa74SDavid Sterba btrfs_release_path(path); 2328e02119d5SChris Mason 2329e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 23304a500fd1SYan, Zheng if (ret < 0) { 23314a500fd1SYan, Zheng err = ret; 23324a500fd1SYan, Zheng goto fail; 23334a500fd1SYan, Zheng } 2334e02119d5SChris Mason if (ret == 0) { 2335e02119d5SChris Mason struct btrfs_inode_item *item; 2336e02119d5SChris Mason u64 i_size; 2337e02119d5SChris Mason 2338e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2339e02119d5SChris Mason struct btrfs_inode_item); 2340e02119d5SChris Mason i_size = btrfs_inode_size(path->nodes[0], item); 2341e02119d5SChris Mason if (i_size > bytes_del) 2342e02119d5SChris Mason i_size -= bytes_del; 2343e02119d5SChris Mason else 2344e02119d5SChris Mason i_size = 0; 2345e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], item, i_size); 2346e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2347e02119d5SChris Mason } else 2348e02119d5SChris Mason ret = 0; 2349b3b4aa74SDavid Sterba btrfs_release_path(path); 2350e02119d5SChris Mason } 23514a500fd1SYan, Zheng fail: 2352e02119d5SChris Mason btrfs_free_path(path); 2353a62f44a5STsutomu Itoh out_unlock: 2354e02119d5SChris Mason mutex_unlock(&BTRFS_I(dir)->log_mutex); 23554a500fd1SYan, Zheng if (ret == -ENOSPC) { 23564a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 23574a500fd1SYan, Zheng ret = 0; 235879787eaaSJeff Mahoney } else if (ret < 0) 235979787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 236079787eaaSJeff Mahoney 236112fcfd22SChris Mason btrfs_end_log_trans(root); 2362e02119d5SChris Mason 2363411fc6bcSAndi Kleen return err; 2364e02119d5SChris Mason } 2365e02119d5SChris Mason 2366e02119d5SChris Mason /* see comments for btrfs_del_dir_entries_in_log */ 2367e02119d5SChris Mason int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 2368e02119d5SChris Mason struct btrfs_root *root, 2369e02119d5SChris Mason const char *name, int name_len, 2370e02119d5SChris Mason struct inode *inode, u64 dirid) 2371e02119d5SChris Mason { 2372e02119d5SChris Mason struct btrfs_root *log; 2373e02119d5SChris Mason u64 index; 2374e02119d5SChris Mason int ret; 2375e02119d5SChris Mason 23763a5f1d45SChris Mason if (BTRFS_I(inode)->logged_trans < trans->transid) 23773a5f1d45SChris Mason return 0; 23783a5f1d45SChris Mason 2379e02119d5SChris Mason ret = join_running_log_trans(root); 2380e02119d5SChris Mason if (ret) 2381e02119d5SChris Mason return 0; 2382e02119d5SChris Mason log = root->log_root; 2383e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 2384e02119d5SChris Mason 238533345d01SLi Zefan ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), 2386e02119d5SChris Mason dirid, &index); 2387e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 23884a500fd1SYan, Zheng if (ret == -ENOSPC) { 23894a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 23904a500fd1SYan, Zheng ret = 0; 239179787eaaSJeff Mahoney } else if (ret < 0 && ret != -ENOENT) 239279787eaaSJeff Mahoney btrfs_abort_transaction(trans, root, ret); 239312fcfd22SChris Mason btrfs_end_log_trans(root); 2394e02119d5SChris Mason 2395e02119d5SChris Mason return ret; 2396e02119d5SChris Mason } 2397e02119d5SChris Mason 2398e02119d5SChris Mason /* 2399e02119d5SChris Mason * creates a range item in the log for 'dirid'. first_offset and 2400e02119d5SChris Mason * last_offset tell us which parts of the key space the log should 2401e02119d5SChris Mason * be considered authoritative for. 2402e02119d5SChris Mason */ 2403e02119d5SChris Mason static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, 2404e02119d5SChris Mason struct btrfs_root *log, 2405e02119d5SChris Mason struct btrfs_path *path, 2406e02119d5SChris Mason int key_type, u64 dirid, 2407e02119d5SChris Mason u64 first_offset, u64 last_offset) 2408e02119d5SChris Mason { 2409e02119d5SChris Mason int ret; 2410e02119d5SChris Mason struct btrfs_key key; 2411e02119d5SChris Mason struct btrfs_dir_log_item *item; 2412e02119d5SChris Mason 2413e02119d5SChris Mason key.objectid = dirid; 2414e02119d5SChris Mason key.offset = first_offset; 2415e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) 2416e02119d5SChris Mason key.type = BTRFS_DIR_LOG_ITEM_KEY; 2417e02119d5SChris Mason else 2418e02119d5SChris Mason key.type = BTRFS_DIR_LOG_INDEX_KEY; 2419e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); 24204a500fd1SYan, Zheng if (ret) 24214a500fd1SYan, Zheng return ret; 2422e02119d5SChris Mason 2423e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2424e02119d5SChris Mason struct btrfs_dir_log_item); 2425e02119d5SChris Mason btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2426e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 2427b3b4aa74SDavid Sterba btrfs_release_path(path); 2428e02119d5SChris Mason return 0; 2429e02119d5SChris Mason } 2430e02119d5SChris Mason 2431e02119d5SChris Mason /* 2432e02119d5SChris Mason * log all the items included in the current transaction for a given 2433e02119d5SChris Mason * directory. This also creates the range items in the log tree required 2434e02119d5SChris Mason * to replay anything deleted before the fsync 2435e02119d5SChris Mason */ 2436e02119d5SChris Mason static noinline int log_dir_items(struct btrfs_trans_handle *trans, 2437e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 2438e02119d5SChris Mason struct btrfs_path *path, 2439e02119d5SChris Mason struct btrfs_path *dst_path, int key_type, 2440e02119d5SChris Mason u64 min_offset, u64 *last_offset_ret) 2441e02119d5SChris Mason { 2442e02119d5SChris Mason struct btrfs_key min_key; 2443e02119d5SChris Mason struct btrfs_key max_key; 2444e02119d5SChris Mason struct btrfs_root *log = root->log_root; 2445e02119d5SChris Mason struct extent_buffer *src; 24464a500fd1SYan, Zheng int err = 0; 2447e02119d5SChris Mason int ret; 2448e02119d5SChris Mason int i; 2449e02119d5SChris Mason int nritems; 2450e02119d5SChris Mason u64 first_offset = min_offset; 2451e02119d5SChris Mason u64 last_offset = (u64)-1; 245233345d01SLi Zefan u64 ino = btrfs_ino(inode); 2453e02119d5SChris Mason 2454e02119d5SChris Mason log = root->log_root; 245533345d01SLi Zefan max_key.objectid = ino; 2456e02119d5SChris Mason max_key.offset = (u64)-1; 2457e02119d5SChris Mason max_key.type = key_type; 2458e02119d5SChris Mason 245933345d01SLi Zefan min_key.objectid = ino; 2460e02119d5SChris Mason min_key.type = key_type; 2461e02119d5SChris Mason min_key.offset = min_offset; 2462e02119d5SChris Mason 2463e02119d5SChris Mason path->keep_locks = 1; 2464e02119d5SChris Mason 2465e02119d5SChris Mason ret = btrfs_search_forward(root, &min_key, &max_key, 2466e02119d5SChris Mason path, 0, trans->transid); 2467e02119d5SChris Mason 2468e02119d5SChris Mason /* 2469e02119d5SChris Mason * we didn't find anything from this transaction, see if there 2470e02119d5SChris Mason * is anything at all 2471e02119d5SChris Mason */ 247233345d01SLi Zefan if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { 247333345d01SLi Zefan min_key.objectid = ino; 2474e02119d5SChris Mason min_key.type = key_type; 2475e02119d5SChris Mason min_key.offset = (u64)-1; 2476b3b4aa74SDavid Sterba btrfs_release_path(path); 2477e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2478e02119d5SChris Mason if (ret < 0) { 2479b3b4aa74SDavid Sterba btrfs_release_path(path); 2480e02119d5SChris Mason return ret; 2481e02119d5SChris Mason } 248233345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 2483e02119d5SChris Mason 2484e02119d5SChris Mason /* if ret == 0 there are items for this type, 2485e02119d5SChris Mason * create a range to tell us the last key of this type. 2486e02119d5SChris Mason * otherwise, there are no items in this directory after 2487e02119d5SChris Mason * *min_offset, and we create a range to indicate that. 2488e02119d5SChris Mason */ 2489e02119d5SChris Mason if (ret == 0) { 2490e02119d5SChris Mason struct btrfs_key tmp; 2491e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, 2492e02119d5SChris Mason path->slots[0]); 2493d397712bSChris Mason if (key_type == tmp.type) 2494e02119d5SChris Mason first_offset = max(min_offset, tmp.offset) + 1; 2495e02119d5SChris Mason } 2496e02119d5SChris Mason goto done; 2497e02119d5SChris Mason } 2498e02119d5SChris Mason 2499e02119d5SChris Mason /* go backward to find any previous key */ 250033345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 2501e02119d5SChris Mason if (ret == 0) { 2502e02119d5SChris Mason struct btrfs_key tmp; 2503e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2504e02119d5SChris Mason if (key_type == tmp.type) { 2505e02119d5SChris Mason first_offset = tmp.offset; 2506e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 2507e02119d5SChris Mason path->nodes[0], path->slots[0], 2508e02119d5SChris Mason &tmp); 25094a500fd1SYan, Zheng if (ret) { 25104a500fd1SYan, Zheng err = ret; 25114a500fd1SYan, Zheng goto done; 25124a500fd1SYan, Zheng } 2513e02119d5SChris Mason } 2514e02119d5SChris Mason } 2515b3b4aa74SDavid Sterba btrfs_release_path(path); 2516e02119d5SChris Mason 2517e02119d5SChris Mason /* find the first key from this transaction again */ 2518e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2519e02119d5SChris Mason if (ret != 0) { 2520e02119d5SChris Mason WARN_ON(1); 2521e02119d5SChris Mason goto done; 2522e02119d5SChris Mason } 2523e02119d5SChris Mason 2524e02119d5SChris Mason /* 2525e02119d5SChris Mason * we have a block from this transaction, log every item in it 2526e02119d5SChris Mason * from our directory 2527e02119d5SChris Mason */ 2528e02119d5SChris Mason while (1) { 2529e02119d5SChris Mason struct btrfs_key tmp; 2530e02119d5SChris Mason src = path->nodes[0]; 2531e02119d5SChris Mason nritems = btrfs_header_nritems(src); 2532e02119d5SChris Mason for (i = path->slots[0]; i < nritems; i++) { 2533e02119d5SChris Mason btrfs_item_key_to_cpu(src, &min_key, i); 2534e02119d5SChris Mason 253533345d01SLi Zefan if (min_key.objectid != ino || min_key.type != key_type) 2536e02119d5SChris Mason goto done; 2537e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, src, i, 2538e02119d5SChris Mason &min_key); 25394a500fd1SYan, Zheng if (ret) { 25404a500fd1SYan, Zheng err = ret; 25414a500fd1SYan, Zheng goto done; 25424a500fd1SYan, Zheng } 2543e02119d5SChris Mason } 2544e02119d5SChris Mason path->slots[0] = nritems; 2545e02119d5SChris Mason 2546e02119d5SChris Mason /* 2547e02119d5SChris Mason * look ahead to the next item and see if it is also 2548e02119d5SChris Mason * from this directory and from this transaction 2549e02119d5SChris Mason */ 2550e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 2551e02119d5SChris Mason if (ret == 1) { 2552e02119d5SChris Mason last_offset = (u64)-1; 2553e02119d5SChris Mason goto done; 2554e02119d5SChris Mason } 2555e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 255633345d01SLi Zefan if (tmp.objectid != ino || tmp.type != key_type) { 2557e02119d5SChris Mason last_offset = (u64)-1; 2558e02119d5SChris Mason goto done; 2559e02119d5SChris Mason } 2560e02119d5SChris Mason if (btrfs_header_generation(path->nodes[0]) != trans->transid) { 2561e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 2562e02119d5SChris Mason path->nodes[0], path->slots[0], 2563e02119d5SChris Mason &tmp); 25644a500fd1SYan, Zheng if (ret) 25654a500fd1SYan, Zheng err = ret; 25664a500fd1SYan, Zheng else 2567e02119d5SChris Mason last_offset = tmp.offset; 2568e02119d5SChris Mason goto done; 2569e02119d5SChris Mason } 2570e02119d5SChris Mason } 2571e02119d5SChris Mason done: 2572b3b4aa74SDavid Sterba btrfs_release_path(path); 2573b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 2574e02119d5SChris Mason 25754a500fd1SYan, Zheng if (err == 0) { 25764a500fd1SYan, Zheng *last_offset_ret = last_offset; 25774a500fd1SYan, Zheng /* 25784a500fd1SYan, Zheng * insert the log range keys to indicate where the log 25794a500fd1SYan, Zheng * is valid 25804a500fd1SYan, Zheng */ 25814a500fd1SYan, Zheng ret = insert_dir_log_key(trans, log, path, key_type, 258233345d01SLi Zefan ino, first_offset, last_offset); 25834a500fd1SYan, Zheng if (ret) 25844a500fd1SYan, Zheng err = ret; 25854a500fd1SYan, Zheng } 25864a500fd1SYan, Zheng return err; 2587e02119d5SChris Mason } 2588e02119d5SChris Mason 2589e02119d5SChris Mason /* 2590e02119d5SChris Mason * logging directories is very similar to logging inodes, We find all the items 2591e02119d5SChris Mason * from the current transaction and write them to the log. 2592e02119d5SChris Mason * 2593e02119d5SChris Mason * The recovery code scans the directory in the subvolume, and if it finds a 2594e02119d5SChris Mason * key in the range logged that is not present in the log tree, then it means 2595e02119d5SChris Mason * that dir entry was unlinked during the transaction. 2596e02119d5SChris Mason * 2597e02119d5SChris Mason * In order for that scan to work, we must include one key smaller than 2598e02119d5SChris Mason * the smallest logged by this transaction and one key larger than the largest 2599e02119d5SChris Mason * key logged by this transaction. 2600e02119d5SChris Mason */ 2601e02119d5SChris Mason static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 2602e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 2603e02119d5SChris Mason struct btrfs_path *path, 2604e02119d5SChris Mason struct btrfs_path *dst_path) 2605e02119d5SChris Mason { 2606e02119d5SChris Mason u64 min_key; 2607e02119d5SChris Mason u64 max_key; 2608e02119d5SChris Mason int ret; 2609e02119d5SChris Mason int key_type = BTRFS_DIR_ITEM_KEY; 2610e02119d5SChris Mason 2611e02119d5SChris Mason again: 2612e02119d5SChris Mason min_key = 0; 2613e02119d5SChris Mason max_key = 0; 2614e02119d5SChris Mason while (1) { 2615e02119d5SChris Mason ret = log_dir_items(trans, root, inode, path, 2616e02119d5SChris Mason dst_path, key_type, min_key, 2617e02119d5SChris Mason &max_key); 26184a500fd1SYan, Zheng if (ret) 26194a500fd1SYan, Zheng return ret; 2620e02119d5SChris Mason if (max_key == (u64)-1) 2621e02119d5SChris Mason break; 2622e02119d5SChris Mason min_key = max_key + 1; 2623e02119d5SChris Mason } 2624e02119d5SChris Mason 2625e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) { 2626e02119d5SChris Mason key_type = BTRFS_DIR_INDEX_KEY; 2627e02119d5SChris Mason goto again; 2628e02119d5SChris Mason } 2629e02119d5SChris Mason return 0; 2630e02119d5SChris Mason } 2631e02119d5SChris Mason 2632e02119d5SChris Mason /* 2633e02119d5SChris Mason * a helper function to drop items from the log before we relog an 2634e02119d5SChris Mason * inode. max_key_type indicates the highest item type to remove. 2635e02119d5SChris Mason * This cannot be run for file data extents because it does not 2636e02119d5SChris Mason * free the extents they point to. 2637e02119d5SChris Mason */ 2638e02119d5SChris Mason static int drop_objectid_items(struct btrfs_trans_handle *trans, 2639e02119d5SChris Mason struct btrfs_root *log, 2640e02119d5SChris Mason struct btrfs_path *path, 2641e02119d5SChris Mason u64 objectid, int max_key_type) 2642e02119d5SChris Mason { 2643e02119d5SChris Mason int ret; 2644e02119d5SChris Mason struct btrfs_key key; 2645e02119d5SChris Mason struct btrfs_key found_key; 2646e02119d5SChris Mason 2647e02119d5SChris Mason key.objectid = objectid; 2648e02119d5SChris Mason key.type = max_key_type; 2649e02119d5SChris Mason key.offset = (u64)-1; 2650e02119d5SChris Mason 2651e02119d5SChris Mason while (1) { 2652e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 26534a500fd1SYan, Zheng BUG_ON(ret == 0); 26544a500fd1SYan, Zheng if (ret < 0) 2655e02119d5SChris Mason break; 2656e02119d5SChris Mason 2657e02119d5SChris Mason if (path->slots[0] == 0) 2658e02119d5SChris Mason break; 2659e02119d5SChris Mason 2660e02119d5SChris Mason path->slots[0]--; 2661e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2662e02119d5SChris Mason path->slots[0]); 2663e02119d5SChris Mason 2664e02119d5SChris Mason if (found_key.objectid != objectid) 2665e02119d5SChris Mason break; 2666e02119d5SChris Mason 2667e02119d5SChris Mason ret = btrfs_del_item(trans, log, path); 266865a246c5STsutomu Itoh if (ret) 266965a246c5STsutomu Itoh break; 2670b3b4aa74SDavid Sterba btrfs_release_path(path); 2671e02119d5SChris Mason } 2672b3b4aa74SDavid Sterba btrfs_release_path(path); 26735bdbeb21SJosef Bacik if (ret > 0) 26745bdbeb21SJosef Bacik ret = 0; 26754a500fd1SYan, Zheng return ret; 2676e02119d5SChris Mason } 2677e02119d5SChris Mason 267831ff1cd2SChris Mason static noinline int copy_items(struct btrfs_trans_handle *trans, 2679d2794405SLiu Bo struct inode *inode, 268031ff1cd2SChris Mason struct btrfs_path *dst_path, 268131ff1cd2SChris Mason struct extent_buffer *src, 268231ff1cd2SChris Mason int start_slot, int nr, int inode_only) 268331ff1cd2SChris Mason { 268431ff1cd2SChris Mason unsigned long src_offset; 268531ff1cd2SChris Mason unsigned long dst_offset; 2686d2794405SLiu Bo struct btrfs_root *log = BTRFS_I(inode)->root->log_root; 268731ff1cd2SChris Mason struct btrfs_file_extent_item *extent; 268831ff1cd2SChris Mason struct btrfs_inode_item *inode_item; 268931ff1cd2SChris Mason int ret; 269031ff1cd2SChris Mason struct btrfs_key *ins_keys; 269131ff1cd2SChris Mason u32 *ins_sizes; 269231ff1cd2SChris Mason char *ins_data; 269331ff1cd2SChris Mason int i; 2694d20f7043SChris Mason struct list_head ordered_sums; 2695d2794405SLiu Bo int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 2696d20f7043SChris Mason 2697d20f7043SChris Mason INIT_LIST_HEAD(&ordered_sums); 269831ff1cd2SChris Mason 269931ff1cd2SChris Mason ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 270031ff1cd2SChris Mason nr * sizeof(u32), GFP_NOFS); 27012a29edc6Sliubo if (!ins_data) 27022a29edc6Sliubo return -ENOMEM; 27032a29edc6Sliubo 270431ff1cd2SChris Mason ins_sizes = (u32 *)ins_data; 270531ff1cd2SChris Mason ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 270631ff1cd2SChris Mason 270731ff1cd2SChris Mason for (i = 0; i < nr; i++) { 270831ff1cd2SChris Mason ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); 270931ff1cd2SChris Mason btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); 271031ff1cd2SChris Mason } 271131ff1cd2SChris Mason ret = btrfs_insert_empty_items(trans, log, dst_path, 271231ff1cd2SChris Mason ins_keys, ins_sizes, nr); 27134a500fd1SYan, Zheng if (ret) { 27144a500fd1SYan, Zheng kfree(ins_data); 27154a500fd1SYan, Zheng return ret; 27164a500fd1SYan, Zheng } 271731ff1cd2SChris Mason 27185d4f98a2SYan Zheng for (i = 0; i < nr; i++, dst_path->slots[0]++) { 271931ff1cd2SChris Mason dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 272031ff1cd2SChris Mason dst_path->slots[0]); 272131ff1cd2SChris Mason 272231ff1cd2SChris Mason src_offset = btrfs_item_ptr_offset(src, start_slot + i); 272331ff1cd2SChris Mason 272431ff1cd2SChris Mason copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 272531ff1cd2SChris Mason src_offset, ins_sizes[i]); 272631ff1cd2SChris Mason 272731ff1cd2SChris Mason if (inode_only == LOG_INODE_EXISTS && 272831ff1cd2SChris Mason ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 272931ff1cd2SChris Mason inode_item = btrfs_item_ptr(dst_path->nodes[0], 273031ff1cd2SChris Mason dst_path->slots[0], 273131ff1cd2SChris Mason struct btrfs_inode_item); 273231ff1cd2SChris Mason btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); 273331ff1cd2SChris Mason 273431ff1cd2SChris Mason /* set the generation to zero so the recover code 273531ff1cd2SChris Mason * can tell the difference between an logging 273631ff1cd2SChris Mason * just to say 'this inode exists' and a logging 273731ff1cd2SChris Mason * to say 'update this inode with these values' 273831ff1cd2SChris Mason */ 273931ff1cd2SChris Mason btrfs_set_inode_generation(dst_path->nodes[0], 274031ff1cd2SChris Mason inode_item, 0); 274131ff1cd2SChris Mason } 274231ff1cd2SChris Mason /* take a reference on file data extents so that truncates 274331ff1cd2SChris Mason * or deletes of this inode don't have to relog the inode 274431ff1cd2SChris Mason * again 274531ff1cd2SChris Mason */ 2746d2794405SLiu Bo if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && 2747d2794405SLiu Bo !skip_csum) { 274831ff1cd2SChris Mason int found_type; 274931ff1cd2SChris Mason extent = btrfs_item_ptr(src, start_slot + i, 275031ff1cd2SChris Mason struct btrfs_file_extent_item); 275131ff1cd2SChris Mason 27528e531cdfSliubo if (btrfs_file_extent_generation(src, extent) < trans->transid) 27538e531cdfSliubo continue; 27548e531cdfSliubo 275531ff1cd2SChris Mason found_type = btrfs_file_extent_type(src, extent); 2756d899e052SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 2757d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) { 27585d4f98a2SYan Zheng u64 ds, dl, cs, cl; 27595d4f98a2SYan Zheng ds = btrfs_file_extent_disk_bytenr(src, 276031ff1cd2SChris Mason extent); 27615d4f98a2SYan Zheng /* ds == 0 is a hole */ 27625d4f98a2SYan Zheng if (ds == 0) 27635d4f98a2SYan Zheng continue; 27645d4f98a2SYan Zheng 27655d4f98a2SYan Zheng dl = btrfs_file_extent_disk_num_bytes(src, 276631ff1cd2SChris Mason extent); 27675d4f98a2SYan Zheng cs = btrfs_file_extent_offset(src, extent); 27685d4f98a2SYan Zheng cl = btrfs_file_extent_num_bytes(src, 2769a419aef8SJoe Perches extent); 2770580afd76SChris Mason if (btrfs_file_extent_compression(src, 2771580afd76SChris Mason extent)) { 2772580afd76SChris Mason cs = 0; 2773580afd76SChris Mason cl = dl; 2774580afd76SChris Mason } 27755d4f98a2SYan Zheng 277607d400a6SYan Zheng ret = btrfs_lookup_csums_range( 2777d20f7043SChris Mason log->fs_info->csum_root, 277807d400a6SYan Zheng ds + cs, ds + cs + cl - 1, 2779a2de733cSArne Jansen &ordered_sums, 0); 2780d20f7043SChris Mason BUG_ON(ret); 278131ff1cd2SChris Mason } 278231ff1cd2SChris Mason } 278331ff1cd2SChris Mason } 278431ff1cd2SChris Mason 278531ff1cd2SChris Mason btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2786b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 278731ff1cd2SChris Mason kfree(ins_data); 2788d20f7043SChris Mason 2789d20f7043SChris Mason /* 2790d20f7043SChris Mason * we have to do this after the loop above to avoid changing the 2791d20f7043SChris Mason * log tree while trying to change the log tree. 2792d20f7043SChris Mason */ 27934a500fd1SYan, Zheng ret = 0; 2794d20f7043SChris Mason while (!list_empty(&ordered_sums)) { 2795d20f7043SChris Mason struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 2796d20f7043SChris Mason struct btrfs_ordered_sum, 2797d20f7043SChris Mason list); 27984a500fd1SYan, Zheng if (!ret) 2799d20f7043SChris Mason ret = btrfs_csum_file_blocks(trans, log, sums); 2800d20f7043SChris Mason list_del(&sums->list); 2801d20f7043SChris Mason kfree(sums); 2802d20f7043SChris Mason } 28034a500fd1SYan, Zheng return ret; 280431ff1cd2SChris Mason } 280531ff1cd2SChris Mason 28065dc562c5SJosef Bacik static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) 28075dc562c5SJosef Bacik { 28085dc562c5SJosef Bacik struct extent_map *em1, *em2; 28095dc562c5SJosef Bacik 28105dc562c5SJosef Bacik em1 = list_entry(a, struct extent_map, list); 28115dc562c5SJosef Bacik em2 = list_entry(b, struct extent_map, list); 28125dc562c5SJosef Bacik 28135dc562c5SJosef Bacik if (em1->start < em2->start) 28145dc562c5SJosef Bacik return -1; 28155dc562c5SJosef Bacik else if (em1->start > em2->start) 28165dc562c5SJosef Bacik return 1; 28175dc562c5SJosef Bacik return 0; 28185dc562c5SJosef Bacik } 28195dc562c5SJosef Bacik 28205dc562c5SJosef Bacik struct log_args { 28215dc562c5SJosef Bacik struct extent_buffer *src; 28225dc562c5SJosef Bacik u64 next_offset; 28235dc562c5SJosef Bacik int start_slot; 28245dc562c5SJosef Bacik int nr; 28255dc562c5SJosef Bacik }; 28265dc562c5SJosef Bacik 28275dc562c5SJosef Bacik static int log_one_extent(struct btrfs_trans_handle *trans, 28285dc562c5SJosef Bacik struct inode *inode, struct btrfs_root *root, 28295dc562c5SJosef Bacik struct extent_map *em, struct btrfs_path *path, 28305dc562c5SJosef Bacik struct btrfs_path *dst_path, struct log_args *args) 28315dc562c5SJosef Bacik { 28325dc562c5SJosef Bacik struct btrfs_root *log = root->log_root; 28335dc562c5SJosef Bacik struct btrfs_file_extent_item *fi; 28345dc562c5SJosef Bacik struct btrfs_key key; 28354e2f84e6SLiu Bo u64 start = em->mod_start; 28364e2f84e6SLiu Bo u64 len = em->mod_len; 28375dc562c5SJosef Bacik u64 num_bytes; 28385dc562c5SJosef Bacik int nritems; 28395dc562c5SJosef Bacik int ret; 28405dc562c5SJosef Bacik 28415dc562c5SJosef Bacik if (BTRFS_I(inode)->logged_trans == trans->transid) { 28425dc562c5SJosef Bacik ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, 28432aaa6655SJosef Bacik start + len, NULL, 0); 28445dc562c5SJosef Bacik if (ret) 28455dc562c5SJosef Bacik return ret; 28465dc562c5SJosef Bacik } 28475dc562c5SJosef Bacik 28485dc562c5SJosef Bacik while (len) { 28495dc562c5SJosef Bacik if (args->nr) 28505dc562c5SJosef Bacik goto next_slot; 28515dc562c5SJosef Bacik key.objectid = btrfs_ino(inode); 28525dc562c5SJosef Bacik key.type = BTRFS_EXTENT_DATA_KEY; 28535dc562c5SJosef Bacik key.offset = start; 28545dc562c5SJosef Bacik 28555dc562c5SJosef Bacik ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 28565dc562c5SJosef Bacik if (ret < 0) 28575dc562c5SJosef Bacik return ret; 28585dc562c5SJosef Bacik if (ret) { 28595dc562c5SJosef Bacik /* 28605dc562c5SJosef Bacik * This shouldn't happen, but it might so warn and 28615dc562c5SJosef Bacik * return an error. 28625dc562c5SJosef Bacik */ 28635dc562c5SJosef Bacik WARN_ON(1); 28645dc562c5SJosef Bacik return -ENOENT; 28655dc562c5SJosef Bacik } 28665dc562c5SJosef Bacik args->src = path->nodes[0]; 28675dc562c5SJosef Bacik next_slot: 28685dc562c5SJosef Bacik fi = btrfs_item_ptr(args->src, path->slots[0], 28695dc562c5SJosef Bacik struct btrfs_file_extent_item); 28705dc562c5SJosef Bacik if (args->nr && 28715dc562c5SJosef Bacik args->start_slot + args->nr == path->slots[0]) { 28725dc562c5SJosef Bacik args->nr++; 28735dc562c5SJosef Bacik } else if (args->nr) { 2874d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, args->src, 28755dc562c5SJosef Bacik args->start_slot, args->nr, 28765dc562c5SJosef Bacik LOG_INODE_ALL); 28775dc562c5SJosef Bacik if (ret) 28785dc562c5SJosef Bacik return ret; 28795dc562c5SJosef Bacik args->nr = 1; 28805dc562c5SJosef Bacik args->start_slot = path->slots[0]; 28815dc562c5SJosef Bacik } else if (!args->nr) { 28825dc562c5SJosef Bacik args->nr = 1; 28835dc562c5SJosef Bacik args->start_slot = path->slots[0]; 28845dc562c5SJosef Bacik } 28855dc562c5SJosef Bacik nritems = btrfs_header_nritems(path->nodes[0]); 28865dc562c5SJosef Bacik path->slots[0]++; 28875dc562c5SJosef Bacik num_bytes = btrfs_file_extent_num_bytes(args->src, fi); 28885dc562c5SJosef Bacik if (len < num_bytes) { 28895dc562c5SJosef Bacik /* I _think_ this is ok, envision we write to a 28905dc562c5SJosef Bacik * preallocated space that is adjacent to a previously 28915dc562c5SJosef Bacik * written preallocated space that gets merged when we 28925dc562c5SJosef Bacik * mark this preallocated space written. If we do not 28935dc562c5SJosef Bacik * have the adjacent extent in cache then when we copy 28945dc562c5SJosef Bacik * this extent it could end up being larger than our EM 28955dc562c5SJosef Bacik * thinks it is, which is a-ok, so just set len to 0. 28965dc562c5SJosef Bacik */ 28975dc562c5SJosef Bacik len = 0; 28985dc562c5SJosef Bacik } else { 28995dc562c5SJosef Bacik len -= num_bytes; 29005dc562c5SJosef Bacik } 29015dc562c5SJosef Bacik start += btrfs_file_extent_num_bytes(args->src, fi); 29025dc562c5SJosef Bacik args->next_offset = start; 29035dc562c5SJosef Bacik 29045dc562c5SJosef Bacik if (path->slots[0] < nritems) { 29055dc562c5SJosef Bacik if (len) 29065dc562c5SJosef Bacik goto next_slot; 29075dc562c5SJosef Bacik break; 29085dc562c5SJosef Bacik } 29095dc562c5SJosef Bacik 29105dc562c5SJosef Bacik if (args->nr) { 2911d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, args->src, 29125dc562c5SJosef Bacik args->start_slot, args->nr, 29135dc562c5SJosef Bacik LOG_INODE_ALL); 29145dc562c5SJosef Bacik if (ret) 29155dc562c5SJosef Bacik return ret; 29165dc562c5SJosef Bacik args->nr = 0; 29175dc562c5SJosef Bacik btrfs_release_path(path); 29185dc562c5SJosef Bacik } 29195dc562c5SJosef Bacik } 29205dc562c5SJosef Bacik 29215dc562c5SJosef Bacik return 0; 29225dc562c5SJosef Bacik } 29235dc562c5SJosef Bacik 29245dc562c5SJosef Bacik static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, 29255dc562c5SJosef Bacik struct btrfs_root *root, 29265dc562c5SJosef Bacik struct inode *inode, 29275dc562c5SJosef Bacik struct btrfs_path *path, 29285dc562c5SJosef Bacik struct btrfs_path *dst_path) 29295dc562c5SJosef Bacik { 29305dc562c5SJosef Bacik struct log_args args; 29315dc562c5SJosef Bacik struct extent_map *em, *n; 29325dc562c5SJosef Bacik struct list_head extents; 29335dc562c5SJosef Bacik struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 29345dc562c5SJosef Bacik u64 test_gen; 29355dc562c5SJosef Bacik int ret = 0; 29365dc562c5SJosef Bacik 29375dc562c5SJosef Bacik INIT_LIST_HEAD(&extents); 29385dc562c5SJosef Bacik 29395dc562c5SJosef Bacik memset(&args, 0, sizeof(args)); 29405dc562c5SJosef Bacik 29415dc562c5SJosef Bacik write_lock(&tree->lock); 29425dc562c5SJosef Bacik test_gen = root->fs_info->last_trans_committed; 29435dc562c5SJosef Bacik 29445dc562c5SJosef Bacik list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 29455dc562c5SJosef Bacik list_del_init(&em->list); 29465dc562c5SJosef Bacik if (em->generation <= test_gen) 29475dc562c5SJosef Bacik continue; 29485dc562c5SJosef Bacik list_add_tail(&em->list, &extents); 29495dc562c5SJosef Bacik } 29505dc562c5SJosef Bacik 29515dc562c5SJosef Bacik list_sort(NULL, &extents, extent_cmp); 29525dc562c5SJosef Bacik 29535dc562c5SJosef Bacik while (!list_empty(&extents)) { 29545dc562c5SJosef Bacik em = list_entry(extents.next, struct extent_map, list); 29555dc562c5SJosef Bacik 29565dc562c5SJosef Bacik list_del_init(&em->list); 29575dc562c5SJosef Bacik 29585dc562c5SJosef Bacik /* 29595dc562c5SJosef Bacik * If we had an error we just need to delete everybody from our 29605dc562c5SJosef Bacik * private list. 29615dc562c5SJosef Bacik */ 29625dc562c5SJosef Bacik if (ret) 29635dc562c5SJosef Bacik continue; 29645dc562c5SJosef Bacik 29655dc562c5SJosef Bacik /* 29665dc562c5SJosef Bacik * If the previous EM and the last extent we left off on aren't 29675dc562c5SJosef Bacik * sequential then we need to copy the items we have and redo 29685dc562c5SJosef Bacik * our search 29695dc562c5SJosef Bacik */ 29704e2f84e6SLiu Bo if (args.nr && em->mod_start != args.next_offset) { 2971d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, args.src, 29725dc562c5SJosef Bacik args.start_slot, args.nr, 29735dc562c5SJosef Bacik LOG_INODE_ALL); 29745dc562c5SJosef Bacik if (ret) 29755dc562c5SJosef Bacik continue; 29765dc562c5SJosef Bacik btrfs_release_path(path); 29775dc562c5SJosef Bacik args.nr = 0; 29785dc562c5SJosef Bacik } 29795dc562c5SJosef Bacik 29805dc562c5SJosef Bacik ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); 29815dc562c5SJosef Bacik } 29825dc562c5SJosef Bacik 29835dc562c5SJosef Bacik if (!ret && args.nr) 2984d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, args.src, 29855dc562c5SJosef Bacik args.start_slot, args.nr, LOG_INODE_ALL); 29865dc562c5SJosef Bacik btrfs_release_path(path); 29875dc562c5SJosef Bacik WARN_ON(!list_empty(&extents)); 29885dc562c5SJosef Bacik write_unlock(&tree->lock); 29895dc562c5SJosef Bacik return ret; 29905dc562c5SJosef Bacik } 29915dc562c5SJosef Bacik 2992e02119d5SChris Mason /* log a single inode in the tree log. 2993e02119d5SChris Mason * At least one parent directory for this inode must exist in the tree 2994e02119d5SChris Mason * or be logged already. 2995e02119d5SChris Mason * 2996e02119d5SChris Mason * Any items from this inode changed by the current transaction are copied 2997e02119d5SChris Mason * to the log tree. An extra reference is taken on any extents in this 2998e02119d5SChris Mason * file, allowing us to avoid a whole pile of corner cases around logging 2999e02119d5SChris Mason * blocks that have been removed from the tree. 3000e02119d5SChris Mason * 3001e02119d5SChris Mason * See LOG_INODE_ALL and related defines for a description of what inode_only 3002e02119d5SChris Mason * does. 3003e02119d5SChris Mason * 3004e02119d5SChris Mason * This handles both files and directories. 3005e02119d5SChris Mason */ 300612fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 3007e02119d5SChris Mason struct btrfs_root *root, struct inode *inode, 3008e02119d5SChris Mason int inode_only) 3009e02119d5SChris Mason { 3010e02119d5SChris Mason struct btrfs_path *path; 3011e02119d5SChris Mason struct btrfs_path *dst_path; 3012e02119d5SChris Mason struct btrfs_key min_key; 3013e02119d5SChris Mason struct btrfs_key max_key; 3014e02119d5SChris Mason struct btrfs_root *log = root->log_root; 301531ff1cd2SChris Mason struct extent_buffer *src = NULL; 30164a500fd1SYan, Zheng int err = 0; 3017e02119d5SChris Mason int ret; 30183a5f1d45SChris Mason int nritems; 301931ff1cd2SChris Mason int ins_start_slot = 0; 302031ff1cd2SChris Mason int ins_nr; 30215dc562c5SJosef Bacik bool fast_search = false; 302233345d01SLi Zefan u64 ino = btrfs_ino(inode); 3023e02119d5SChris Mason 3024e02119d5SChris Mason log = root->log_root; 3025e02119d5SChris Mason 3026e02119d5SChris Mason path = btrfs_alloc_path(); 30275df67083STsutomu Itoh if (!path) 30285df67083STsutomu Itoh return -ENOMEM; 3029e02119d5SChris Mason dst_path = btrfs_alloc_path(); 30305df67083STsutomu Itoh if (!dst_path) { 30315df67083STsutomu Itoh btrfs_free_path(path); 30325df67083STsutomu Itoh return -ENOMEM; 30335df67083STsutomu Itoh } 3034e02119d5SChris Mason 303533345d01SLi Zefan min_key.objectid = ino; 3036e02119d5SChris Mason min_key.type = BTRFS_INODE_ITEM_KEY; 3037e02119d5SChris Mason min_key.offset = 0; 3038e02119d5SChris Mason 303933345d01SLi Zefan max_key.objectid = ino; 304012fcfd22SChris Mason 304112fcfd22SChris Mason 30425dc562c5SJosef Bacik /* today the code can only do partial logging of directories */ 3043e02119d5SChris Mason if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) 3044e02119d5SChris Mason max_key.type = BTRFS_XATTR_ITEM_KEY; 3045e02119d5SChris Mason else 3046e02119d5SChris Mason max_key.type = (u8)-1; 3047e02119d5SChris Mason max_key.offset = (u64)-1; 3048e02119d5SChris Mason 304916cdcec7SMiao Xie ret = btrfs_commit_inode_delayed_items(trans, inode); 305016cdcec7SMiao Xie if (ret) { 305116cdcec7SMiao Xie btrfs_free_path(path); 305216cdcec7SMiao Xie btrfs_free_path(dst_path); 305316cdcec7SMiao Xie return ret; 305416cdcec7SMiao Xie } 305516cdcec7SMiao Xie 3056e02119d5SChris Mason mutex_lock(&BTRFS_I(inode)->log_mutex); 3057e02119d5SChris Mason 3058e02119d5SChris Mason /* 3059e02119d5SChris Mason * a brute force approach to making sure we get the most uptodate 3060e02119d5SChris Mason * copies of everything. 3061e02119d5SChris Mason */ 3062e02119d5SChris Mason if (S_ISDIR(inode->i_mode)) { 3063e02119d5SChris Mason int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 3064e02119d5SChris Mason 3065e02119d5SChris Mason if (inode_only == LOG_INODE_EXISTS) 3066e02119d5SChris Mason max_key_type = BTRFS_XATTR_ITEM_KEY; 306733345d01SLi Zefan ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3068e02119d5SChris Mason } else { 30695dc562c5SJosef Bacik if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 30705dc562c5SJosef Bacik &BTRFS_I(inode)->runtime_flags)) { 30715dc562c5SJosef Bacik ret = btrfs_truncate_inode_items(trans, log, 30725dc562c5SJosef Bacik inode, 0, 0); 30735dc562c5SJosef Bacik } else { 30745dc562c5SJosef Bacik fast_search = true; 30755dc562c5SJosef Bacik max_key.type = BTRFS_XATTR_ITEM_KEY; 30765dc562c5SJosef Bacik ret = drop_objectid_items(trans, log, path, ino, 30775dc562c5SJosef Bacik BTRFS_XATTR_ITEM_KEY); 30785dc562c5SJosef Bacik } 3079e02119d5SChris Mason } 30804a500fd1SYan, Zheng if (ret) { 30814a500fd1SYan, Zheng err = ret; 30824a500fd1SYan, Zheng goto out_unlock; 30834a500fd1SYan, Zheng } 3084e02119d5SChris Mason path->keep_locks = 1; 3085e02119d5SChris Mason 3086e02119d5SChris Mason while (1) { 308731ff1cd2SChris Mason ins_nr = 0; 3088e02119d5SChris Mason ret = btrfs_search_forward(root, &min_key, &max_key, 3089e02119d5SChris Mason path, 0, trans->transid); 3090e02119d5SChris Mason if (ret != 0) 3091e02119d5SChris Mason break; 30923a5f1d45SChris Mason again: 309331ff1cd2SChris Mason /* note, ins_nr might be > 0 here, cleanup outside the loop */ 309433345d01SLi Zefan if (min_key.objectid != ino) 3095e02119d5SChris Mason break; 3096e02119d5SChris Mason if (min_key.type > max_key.type) 3097e02119d5SChris Mason break; 309831ff1cd2SChris Mason 3099e02119d5SChris Mason src = path->nodes[0]; 310031ff1cd2SChris Mason if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 310131ff1cd2SChris Mason ins_nr++; 310231ff1cd2SChris Mason goto next_slot; 310331ff1cd2SChris Mason } else if (!ins_nr) { 310431ff1cd2SChris Mason ins_start_slot = path->slots[0]; 310531ff1cd2SChris Mason ins_nr = 1; 310631ff1cd2SChris Mason goto next_slot; 3107e02119d5SChris Mason } 3108e02119d5SChris Mason 3109d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 311031ff1cd2SChris Mason ins_nr, inode_only); 31114a500fd1SYan, Zheng if (ret) { 31124a500fd1SYan, Zheng err = ret; 31134a500fd1SYan, Zheng goto out_unlock; 31144a500fd1SYan, Zheng } 311531ff1cd2SChris Mason ins_nr = 1; 311631ff1cd2SChris Mason ins_start_slot = path->slots[0]; 311731ff1cd2SChris Mason next_slot: 3118e02119d5SChris Mason 31193a5f1d45SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 31203a5f1d45SChris Mason path->slots[0]++; 31213a5f1d45SChris Mason if (path->slots[0] < nritems) { 31223a5f1d45SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &min_key, 31233a5f1d45SChris Mason path->slots[0]); 31243a5f1d45SChris Mason goto again; 31253a5f1d45SChris Mason } 312631ff1cd2SChris Mason if (ins_nr) { 3127d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, 312831ff1cd2SChris Mason ins_start_slot, 312931ff1cd2SChris Mason ins_nr, inode_only); 31304a500fd1SYan, Zheng if (ret) { 31314a500fd1SYan, Zheng err = ret; 31324a500fd1SYan, Zheng goto out_unlock; 31334a500fd1SYan, Zheng } 313431ff1cd2SChris Mason ins_nr = 0; 313531ff1cd2SChris Mason } 3136b3b4aa74SDavid Sterba btrfs_release_path(path); 31373a5f1d45SChris Mason 3138e02119d5SChris Mason if (min_key.offset < (u64)-1) 3139e02119d5SChris Mason min_key.offset++; 3140e02119d5SChris Mason else if (min_key.type < (u8)-1) 3141e02119d5SChris Mason min_key.type++; 3142e02119d5SChris Mason else if (min_key.objectid < (u64)-1) 3143e02119d5SChris Mason min_key.objectid++; 3144e02119d5SChris Mason else 3145e02119d5SChris Mason break; 3146e02119d5SChris Mason } 314731ff1cd2SChris Mason if (ins_nr) { 3148d2794405SLiu Bo ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 314931ff1cd2SChris Mason ins_nr, inode_only); 31504a500fd1SYan, Zheng if (ret) { 31514a500fd1SYan, Zheng err = ret; 31524a500fd1SYan, Zheng goto out_unlock; 31534a500fd1SYan, Zheng } 315431ff1cd2SChris Mason ins_nr = 0; 315531ff1cd2SChris Mason } 31565dc562c5SJosef Bacik 31575dc562c5SJosef Bacik if (fast_search) { 31585dc562c5SJosef Bacik btrfs_release_path(path); 31595dc562c5SJosef Bacik btrfs_release_path(dst_path); 31605dc562c5SJosef Bacik ret = btrfs_log_changed_extents(trans, root, inode, path, 31615dc562c5SJosef Bacik dst_path); 31625dc562c5SJosef Bacik if (ret) { 31635dc562c5SJosef Bacik err = ret; 31645dc562c5SJosef Bacik goto out_unlock; 31655dc562c5SJosef Bacik } 316606d3d22bSLiu Bo } else { 316706d3d22bSLiu Bo struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 316806d3d22bSLiu Bo struct extent_map *em, *n; 316906d3d22bSLiu Bo 317006d3d22bSLiu Bo list_for_each_entry_safe(em, n, &tree->modified_extents, list) 317106d3d22bSLiu Bo list_del_init(&em->list); 31725dc562c5SJosef Bacik } 31735dc562c5SJosef Bacik 31749623f9a3SChris Mason if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3175b3b4aa74SDavid Sterba btrfs_release_path(path); 3176b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 3177e02119d5SChris Mason ret = log_directory_changes(trans, root, inode, path, dst_path); 31784a500fd1SYan, Zheng if (ret) { 31794a500fd1SYan, Zheng err = ret; 31804a500fd1SYan, Zheng goto out_unlock; 31814a500fd1SYan, Zheng } 3182e02119d5SChris Mason } 31833a5f1d45SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 318446d8bc34SLiu Bo BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 31854a500fd1SYan, Zheng out_unlock: 3186e02119d5SChris Mason mutex_unlock(&BTRFS_I(inode)->log_mutex); 3187e02119d5SChris Mason 3188e02119d5SChris Mason btrfs_free_path(path); 3189e02119d5SChris Mason btrfs_free_path(dst_path); 31904a500fd1SYan, Zheng return err; 3191e02119d5SChris Mason } 3192e02119d5SChris Mason 319312fcfd22SChris Mason /* 319412fcfd22SChris Mason * follow the dentry parent pointers up the chain and see if any 319512fcfd22SChris Mason * of the directories in it require a full commit before they can 319612fcfd22SChris Mason * be logged. Returns zero if nothing special needs to be done or 1 if 319712fcfd22SChris Mason * a full commit is required. 319812fcfd22SChris Mason */ 319912fcfd22SChris Mason static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, 320012fcfd22SChris Mason struct inode *inode, 320112fcfd22SChris Mason struct dentry *parent, 320212fcfd22SChris Mason struct super_block *sb, 320312fcfd22SChris Mason u64 last_committed) 3204e02119d5SChris Mason { 320512fcfd22SChris Mason int ret = 0; 320612fcfd22SChris Mason struct btrfs_root *root; 32076a912213SJosef Bacik struct dentry *old_parent = NULL; 3208e02119d5SChris Mason 3209af4176b4SChris Mason /* 3210af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 3211af4176b4SChris Mason * have to worry about the parents at all. This is because 3212af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 3213af4176b4SChris Mason * and other fun in this file. 3214af4176b4SChris Mason */ 3215af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 3216af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 3217af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans <= last_committed) 3218af4176b4SChris Mason goto out; 3219af4176b4SChris Mason 322012fcfd22SChris Mason if (!S_ISDIR(inode->i_mode)) { 322112fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 322212fcfd22SChris Mason goto out; 322312fcfd22SChris Mason inode = parent->d_inode; 322412fcfd22SChris Mason } 322512fcfd22SChris Mason 322612fcfd22SChris Mason while (1) { 322712fcfd22SChris Mason BTRFS_I(inode)->logged_trans = trans->transid; 322812fcfd22SChris Mason smp_mb(); 322912fcfd22SChris Mason 323012fcfd22SChris Mason if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 323112fcfd22SChris Mason root = BTRFS_I(inode)->root; 323212fcfd22SChris Mason 323312fcfd22SChris Mason /* 323412fcfd22SChris Mason * make sure any commits to the log are forced 323512fcfd22SChris Mason * to be full commits 323612fcfd22SChris Mason */ 323712fcfd22SChris Mason root->fs_info->last_trans_log_full_commit = 323812fcfd22SChris Mason trans->transid; 323912fcfd22SChris Mason ret = 1; 324012fcfd22SChris Mason break; 324112fcfd22SChris Mason } 324212fcfd22SChris Mason 324312fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 324412fcfd22SChris Mason break; 324512fcfd22SChris Mason 324676dda93cSYan, Zheng if (IS_ROOT(parent)) 324712fcfd22SChris Mason break; 324812fcfd22SChris Mason 32496a912213SJosef Bacik parent = dget_parent(parent); 32506a912213SJosef Bacik dput(old_parent); 32516a912213SJosef Bacik old_parent = parent; 325212fcfd22SChris Mason inode = parent->d_inode; 325312fcfd22SChris Mason 325412fcfd22SChris Mason } 32556a912213SJosef Bacik dput(old_parent); 325612fcfd22SChris Mason out: 3257e02119d5SChris Mason return ret; 3258e02119d5SChris Mason } 3259e02119d5SChris Mason 3260e02119d5SChris Mason /* 3261e02119d5SChris Mason * helper function around btrfs_log_inode to make sure newly created 3262e02119d5SChris Mason * parent directories also end up in the log. A minimal inode and backref 3263e02119d5SChris Mason * only logging is done of any parent directories that are older than 3264e02119d5SChris Mason * the last committed transaction 3265e02119d5SChris Mason */ 326612fcfd22SChris Mason int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 326712fcfd22SChris Mason struct btrfs_root *root, struct inode *inode, 326812fcfd22SChris Mason struct dentry *parent, int exists_only) 3269e02119d5SChris Mason { 327012fcfd22SChris Mason int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 3271e02119d5SChris Mason struct super_block *sb; 32726a912213SJosef Bacik struct dentry *old_parent = NULL; 327312fcfd22SChris Mason int ret = 0; 327412fcfd22SChris Mason u64 last_committed = root->fs_info->last_trans_committed; 327512fcfd22SChris Mason 327612fcfd22SChris Mason sb = inode->i_sb; 327712fcfd22SChris Mason 32783a5e1404SSage Weil if (btrfs_test_opt(root, NOTREELOG)) { 32793a5e1404SSage Weil ret = 1; 32803a5e1404SSage Weil goto end_no_trans; 32813a5e1404SSage Weil } 32823a5e1404SSage Weil 328312fcfd22SChris Mason if (root->fs_info->last_trans_log_full_commit > 328412fcfd22SChris Mason root->fs_info->last_trans_committed) { 328512fcfd22SChris Mason ret = 1; 328612fcfd22SChris Mason goto end_no_trans; 328712fcfd22SChris Mason } 328812fcfd22SChris Mason 328976dda93cSYan, Zheng if (root != BTRFS_I(inode)->root || 329076dda93cSYan, Zheng btrfs_root_refs(&root->root_item) == 0) { 329176dda93cSYan, Zheng ret = 1; 329276dda93cSYan, Zheng goto end_no_trans; 329376dda93cSYan, Zheng } 329476dda93cSYan, Zheng 329512fcfd22SChris Mason ret = check_parent_dirs_for_sync(trans, inode, parent, 329612fcfd22SChris Mason sb, last_committed); 329712fcfd22SChris Mason if (ret) 329812fcfd22SChris Mason goto end_no_trans; 3299e02119d5SChris Mason 330022ee6985SJosef Bacik if (btrfs_inode_in_log(inode, trans->transid)) { 3301257c62e1SChris Mason ret = BTRFS_NO_LOG_SYNC; 3302257c62e1SChris Mason goto end_no_trans; 3303257c62e1SChris Mason } 3304257c62e1SChris Mason 33054a500fd1SYan, Zheng ret = start_log_trans(trans, root); 33064a500fd1SYan, Zheng if (ret) 33074a500fd1SYan, Zheng goto end_trans; 330812fcfd22SChris Mason 330912fcfd22SChris Mason ret = btrfs_log_inode(trans, root, inode, inode_only); 33104a500fd1SYan, Zheng if (ret) 33114a500fd1SYan, Zheng goto end_trans; 3312e02119d5SChris Mason 3313af4176b4SChris Mason /* 3314af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 3315af4176b4SChris Mason * have to worry about the parents at all. This is because 3316af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 3317af4176b4SChris Mason * and other fun in this file. 3318af4176b4SChris Mason */ 3319af4176b4SChris Mason if (S_ISREG(inode->i_mode) && 3320af4176b4SChris Mason BTRFS_I(inode)->generation <= last_committed && 33214a500fd1SYan, Zheng BTRFS_I(inode)->last_unlink_trans <= last_committed) { 33224a500fd1SYan, Zheng ret = 0; 33234a500fd1SYan, Zheng goto end_trans; 33244a500fd1SYan, Zheng } 3325af4176b4SChris Mason 3326af4176b4SChris Mason inode_only = LOG_INODE_EXISTS; 332712fcfd22SChris Mason while (1) { 332812fcfd22SChris Mason if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 3329e02119d5SChris Mason break; 3330e02119d5SChris Mason 333112fcfd22SChris Mason inode = parent->d_inode; 333276dda93cSYan, Zheng if (root != BTRFS_I(inode)->root) 333376dda93cSYan, Zheng break; 333476dda93cSYan, Zheng 333512fcfd22SChris Mason if (BTRFS_I(inode)->generation > 333612fcfd22SChris Mason root->fs_info->last_trans_committed) { 333712fcfd22SChris Mason ret = btrfs_log_inode(trans, root, inode, inode_only); 33384a500fd1SYan, Zheng if (ret) 33394a500fd1SYan, Zheng goto end_trans; 3340e02119d5SChris Mason } 334176dda93cSYan, Zheng if (IS_ROOT(parent)) 334212fcfd22SChris Mason break; 334312fcfd22SChris Mason 33446a912213SJosef Bacik parent = dget_parent(parent); 33456a912213SJosef Bacik dput(old_parent); 33466a912213SJosef Bacik old_parent = parent; 334712fcfd22SChris Mason } 334812fcfd22SChris Mason ret = 0; 33494a500fd1SYan, Zheng end_trans: 33506a912213SJosef Bacik dput(old_parent); 33514a500fd1SYan, Zheng if (ret < 0) { 33520fa83cdbSJosef Bacik WARN_ON(ret != -ENOSPC); 33534a500fd1SYan, Zheng root->fs_info->last_trans_log_full_commit = trans->transid; 33544a500fd1SYan, Zheng ret = 1; 33554a500fd1SYan, Zheng } 335612fcfd22SChris Mason btrfs_end_log_trans(root); 335712fcfd22SChris Mason end_no_trans: 335812fcfd22SChris Mason return ret; 3359e02119d5SChris Mason } 3360e02119d5SChris Mason 3361e02119d5SChris Mason /* 3362e02119d5SChris Mason * it is not safe to log dentry if the chunk root has added new 3363e02119d5SChris Mason * chunks. This returns 0 if the dentry was logged, and 1 otherwise. 3364e02119d5SChris Mason * If this returns 1, you must commit the transaction to safely get your 3365e02119d5SChris Mason * data on disk. 3366e02119d5SChris Mason */ 3367e02119d5SChris Mason int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 3368e02119d5SChris Mason struct btrfs_root *root, struct dentry *dentry) 3369e02119d5SChris Mason { 33706a912213SJosef Bacik struct dentry *parent = dget_parent(dentry); 33716a912213SJosef Bacik int ret; 33726a912213SJosef Bacik 33736a912213SJosef Bacik ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); 33746a912213SJosef Bacik dput(parent); 33756a912213SJosef Bacik 33766a912213SJosef Bacik return ret; 3377e02119d5SChris Mason } 3378e02119d5SChris Mason 3379e02119d5SChris Mason /* 3380e02119d5SChris Mason * should be called during mount to recover any replay any log trees 3381e02119d5SChris Mason * from the FS 3382e02119d5SChris Mason */ 3383e02119d5SChris Mason int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) 3384e02119d5SChris Mason { 3385e02119d5SChris Mason int ret; 3386e02119d5SChris Mason struct btrfs_path *path; 3387e02119d5SChris Mason struct btrfs_trans_handle *trans; 3388e02119d5SChris Mason struct btrfs_key key; 3389e02119d5SChris Mason struct btrfs_key found_key; 3390e02119d5SChris Mason struct btrfs_key tmp_key; 3391e02119d5SChris Mason struct btrfs_root *log; 3392e02119d5SChris Mason struct btrfs_fs_info *fs_info = log_root_tree->fs_info; 3393e02119d5SChris Mason struct walk_control wc = { 3394e02119d5SChris Mason .process_func = process_one_buffer, 3395e02119d5SChris Mason .stage = 0, 3396e02119d5SChris Mason }; 3397e02119d5SChris Mason 3398e02119d5SChris Mason path = btrfs_alloc_path(); 3399db5b493aSTsutomu Itoh if (!path) 3400db5b493aSTsutomu Itoh return -ENOMEM; 3401db5b493aSTsutomu Itoh 3402db5b493aSTsutomu Itoh fs_info->log_root_recovering = 1; 3403e02119d5SChris Mason 34044a500fd1SYan, Zheng trans = btrfs_start_transaction(fs_info->tree_root, 0); 340579787eaaSJeff Mahoney if (IS_ERR(trans)) { 340679787eaaSJeff Mahoney ret = PTR_ERR(trans); 340779787eaaSJeff Mahoney goto error; 340879787eaaSJeff Mahoney } 3409e02119d5SChris Mason 3410e02119d5SChris Mason wc.trans = trans; 3411e02119d5SChris Mason wc.pin = 1; 3412e02119d5SChris Mason 3413db5b493aSTsutomu Itoh ret = walk_log_tree(trans, log_root_tree, &wc); 341479787eaaSJeff Mahoney if (ret) { 341579787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Failed to pin buffers while " 341679787eaaSJeff Mahoney "recovering log root tree."); 341779787eaaSJeff Mahoney goto error; 341879787eaaSJeff Mahoney } 3419e02119d5SChris Mason 3420e02119d5SChris Mason again: 3421e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_OBJECTID; 3422e02119d5SChris Mason key.offset = (u64)-1; 3423e02119d5SChris Mason btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 3424e02119d5SChris Mason 3425e02119d5SChris Mason while (1) { 3426e02119d5SChris Mason ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 342779787eaaSJeff Mahoney 342879787eaaSJeff Mahoney if (ret < 0) { 342979787eaaSJeff Mahoney btrfs_error(fs_info, ret, 343079787eaaSJeff Mahoney "Couldn't find tree log root."); 343179787eaaSJeff Mahoney goto error; 343279787eaaSJeff Mahoney } 3433e02119d5SChris Mason if (ret > 0) { 3434e02119d5SChris Mason if (path->slots[0] == 0) 3435e02119d5SChris Mason break; 3436e02119d5SChris Mason path->slots[0]--; 3437e02119d5SChris Mason } 3438e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3439e02119d5SChris Mason path->slots[0]); 3440b3b4aa74SDavid Sterba btrfs_release_path(path); 3441e02119d5SChris Mason if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 3442e02119d5SChris Mason break; 3443e02119d5SChris Mason 3444e02119d5SChris Mason log = btrfs_read_fs_root_no_radix(log_root_tree, 3445e02119d5SChris Mason &found_key); 344679787eaaSJeff Mahoney if (IS_ERR(log)) { 344779787eaaSJeff Mahoney ret = PTR_ERR(log); 344879787eaaSJeff Mahoney btrfs_error(fs_info, ret, 344979787eaaSJeff Mahoney "Couldn't read tree log root."); 345079787eaaSJeff Mahoney goto error; 345179787eaaSJeff Mahoney } 3452e02119d5SChris Mason 3453e02119d5SChris Mason tmp_key.objectid = found_key.offset; 3454e02119d5SChris Mason tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3455e02119d5SChris Mason tmp_key.offset = (u64)-1; 3456e02119d5SChris Mason 3457e02119d5SChris Mason wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 345879787eaaSJeff Mahoney if (IS_ERR(wc.replay_dest)) { 345979787eaaSJeff Mahoney ret = PTR_ERR(wc.replay_dest); 346079787eaaSJeff Mahoney btrfs_error(fs_info, ret, "Couldn't read target root " 346179787eaaSJeff Mahoney "for tree log recovery."); 346279787eaaSJeff Mahoney goto error; 346379787eaaSJeff Mahoney } 3464e02119d5SChris Mason 346507d400a6SYan Zheng wc.replay_dest->log_root = log; 34665d4f98a2SYan Zheng btrfs_record_root_in_trans(trans, wc.replay_dest); 3467e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 3468e02119d5SChris Mason BUG_ON(ret); 3469e02119d5SChris Mason 3470e02119d5SChris Mason if (wc.stage == LOG_WALK_REPLAY_ALL) { 3471e02119d5SChris Mason ret = fixup_inode_link_counts(trans, wc.replay_dest, 3472e02119d5SChris Mason path); 3473e02119d5SChris Mason BUG_ON(ret); 3474e02119d5SChris Mason } 3475e02119d5SChris Mason 3476e02119d5SChris Mason key.offset = found_key.offset - 1; 347707d400a6SYan Zheng wc.replay_dest->log_root = NULL; 3478e02119d5SChris Mason free_extent_buffer(log->node); 3479b263c2c8SChris Mason free_extent_buffer(log->commit_root); 3480e02119d5SChris Mason kfree(log); 3481e02119d5SChris Mason 3482e02119d5SChris Mason if (found_key.offset == 0) 3483e02119d5SChris Mason break; 3484e02119d5SChris Mason } 3485b3b4aa74SDavid Sterba btrfs_release_path(path); 3486e02119d5SChris Mason 3487e02119d5SChris Mason /* step one is to pin it all, step two is to replay just inodes */ 3488e02119d5SChris Mason if (wc.pin) { 3489e02119d5SChris Mason wc.pin = 0; 3490e02119d5SChris Mason wc.process_func = replay_one_buffer; 3491e02119d5SChris Mason wc.stage = LOG_WALK_REPLAY_INODES; 3492e02119d5SChris Mason goto again; 3493e02119d5SChris Mason } 3494e02119d5SChris Mason /* step three is to replay everything */ 3495e02119d5SChris Mason if (wc.stage < LOG_WALK_REPLAY_ALL) { 3496e02119d5SChris Mason wc.stage++; 3497e02119d5SChris Mason goto again; 3498e02119d5SChris Mason } 3499e02119d5SChris Mason 3500e02119d5SChris Mason btrfs_free_path(path); 3501e02119d5SChris Mason 3502e02119d5SChris Mason free_extent_buffer(log_root_tree->node); 3503e02119d5SChris Mason log_root_tree->log_root = NULL; 3504e02119d5SChris Mason fs_info->log_root_recovering = 0; 3505e02119d5SChris Mason 3506e02119d5SChris Mason /* step 4: commit the transaction, which also unpins the blocks */ 3507e02119d5SChris Mason btrfs_commit_transaction(trans, fs_info->tree_root); 3508e02119d5SChris Mason 3509e02119d5SChris Mason kfree(log_root_tree); 3510e02119d5SChris Mason return 0; 351179787eaaSJeff Mahoney 351279787eaaSJeff Mahoney error: 351379787eaaSJeff Mahoney btrfs_free_path(path); 351479787eaaSJeff Mahoney return ret; 3515e02119d5SChris Mason } 351612fcfd22SChris Mason 351712fcfd22SChris Mason /* 351812fcfd22SChris Mason * there are some corner cases where we want to force a full 351912fcfd22SChris Mason * commit instead of allowing a directory to be logged. 352012fcfd22SChris Mason * 352112fcfd22SChris Mason * They revolve around files there were unlinked from the directory, and 352212fcfd22SChris Mason * this function updates the parent directory so that a full commit is 352312fcfd22SChris Mason * properly done if it is fsync'd later after the unlinks are done. 352412fcfd22SChris Mason */ 352512fcfd22SChris Mason void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 352612fcfd22SChris Mason struct inode *dir, struct inode *inode, 352712fcfd22SChris Mason int for_rename) 352812fcfd22SChris Mason { 352912fcfd22SChris Mason /* 3530af4176b4SChris Mason * when we're logging a file, if it hasn't been renamed 3531af4176b4SChris Mason * or unlinked, and its inode is fully committed on disk, 3532af4176b4SChris Mason * we don't have to worry about walking up the directory chain 3533af4176b4SChris Mason * to log its parents. 3534af4176b4SChris Mason * 3535af4176b4SChris Mason * So, we use the last_unlink_trans field to put this transid 3536af4176b4SChris Mason * into the file. When the file is logged we check it and 3537af4176b4SChris Mason * don't log the parents if the file is fully on disk. 3538af4176b4SChris Mason */ 3539af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 3540af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 3541af4176b4SChris Mason 3542af4176b4SChris Mason /* 354312fcfd22SChris Mason * if this directory was already logged any new 354412fcfd22SChris Mason * names for this file/dir will get recorded 354512fcfd22SChris Mason */ 354612fcfd22SChris Mason smp_mb(); 354712fcfd22SChris Mason if (BTRFS_I(dir)->logged_trans == trans->transid) 354812fcfd22SChris Mason return; 354912fcfd22SChris Mason 355012fcfd22SChris Mason /* 355112fcfd22SChris Mason * if the inode we're about to unlink was logged, 355212fcfd22SChris Mason * the log will be properly updated for any new names 355312fcfd22SChris Mason */ 355412fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans == trans->transid) 355512fcfd22SChris Mason return; 355612fcfd22SChris Mason 355712fcfd22SChris Mason /* 355812fcfd22SChris Mason * when renaming files across directories, if the directory 355912fcfd22SChris Mason * there we're unlinking from gets fsync'd later on, there's 356012fcfd22SChris Mason * no way to find the destination directory later and fsync it 356112fcfd22SChris Mason * properly. So, we have to be conservative and force commits 356212fcfd22SChris Mason * so the new name gets discovered. 356312fcfd22SChris Mason */ 356412fcfd22SChris Mason if (for_rename) 356512fcfd22SChris Mason goto record; 356612fcfd22SChris Mason 356712fcfd22SChris Mason /* we can safely do the unlink without any special recording */ 356812fcfd22SChris Mason return; 356912fcfd22SChris Mason 357012fcfd22SChris Mason record: 357112fcfd22SChris Mason BTRFS_I(dir)->last_unlink_trans = trans->transid; 357212fcfd22SChris Mason } 357312fcfd22SChris Mason 357412fcfd22SChris Mason /* 357512fcfd22SChris Mason * Call this after adding a new name for a file and it will properly 357612fcfd22SChris Mason * update the log to reflect the new name. 357712fcfd22SChris Mason * 357812fcfd22SChris Mason * It will return zero if all goes well, and it will return 1 if a 357912fcfd22SChris Mason * full transaction commit is required. 358012fcfd22SChris Mason */ 358112fcfd22SChris Mason int btrfs_log_new_name(struct btrfs_trans_handle *trans, 358212fcfd22SChris Mason struct inode *inode, struct inode *old_dir, 358312fcfd22SChris Mason struct dentry *parent) 358412fcfd22SChris Mason { 358512fcfd22SChris Mason struct btrfs_root * root = BTRFS_I(inode)->root; 358612fcfd22SChris Mason 358712fcfd22SChris Mason /* 3588af4176b4SChris Mason * this will force the logging code to walk the dentry chain 3589af4176b4SChris Mason * up for the file 3590af4176b4SChris Mason */ 3591af4176b4SChris Mason if (S_ISREG(inode->i_mode)) 3592af4176b4SChris Mason BTRFS_I(inode)->last_unlink_trans = trans->transid; 3593af4176b4SChris Mason 3594af4176b4SChris Mason /* 359512fcfd22SChris Mason * if this inode hasn't been logged and directory we're renaming it 359612fcfd22SChris Mason * from hasn't been logged, we don't need to log it 359712fcfd22SChris Mason */ 359812fcfd22SChris Mason if (BTRFS_I(inode)->logged_trans <= 359912fcfd22SChris Mason root->fs_info->last_trans_committed && 360012fcfd22SChris Mason (!old_dir || BTRFS_I(old_dir)->logged_trans <= 360112fcfd22SChris Mason root->fs_info->last_trans_committed)) 360212fcfd22SChris Mason return 0; 360312fcfd22SChris Mason 360412fcfd22SChris Mason return btrfs_log_inode_parent(trans, root, inode, parent, 1); 360512fcfd22SChris Mason } 360612fcfd22SChris Mason 3607