1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0 2e02119d5SChris Mason /* 3e02119d5SChris Mason * Copyright (C) 2008 Oracle. All rights reserved. 4e02119d5SChris Mason */ 5e02119d5SChris Mason 6e02119d5SChris Mason #include <linux/sched.h> 75a0e3ad6STejun Heo #include <linux/slab.h> 8c6adc9ccSMiao Xie #include <linux/blkdev.h> 95dc562c5SJosef Bacik #include <linux/list_sort.h> 10c7f88c4eSJeff Layton #include <linux/iversion.h> 119678c543SNikolay Borisov #include "ctree.h" 12995946ddSMiao Xie #include "tree-log.h" 13e02119d5SChris Mason #include "disk-io.h" 14e02119d5SChris Mason #include "locking.h" 15e02119d5SChris Mason #include "print-tree.h" 16f186373fSMark Fasheh #include "backref.h" 17ebb8765bSAnand Jain #include "compression.h" 18df2c95f3SQu Wenruo #include "qgroup.h" 19900c9981SLiu Bo #include "inode-map.h" 20e02119d5SChris Mason 21e02119d5SChris Mason /* magic values for the inode_only field in btrfs_log_inode: 22e02119d5SChris Mason * 23e02119d5SChris Mason * LOG_INODE_ALL means to log everything 24e02119d5SChris Mason * LOG_INODE_EXISTS means to log just enough to recreate the inode 25e02119d5SChris Mason * during log replay 26e02119d5SChris Mason */ 27e02119d5SChris Mason #define LOG_INODE_ALL 0 28e02119d5SChris Mason #define LOG_INODE_EXISTS 1 29781feef7SLiu Bo #define LOG_OTHER_INODE 2 30e02119d5SChris Mason 31e02119d5SChris Mason /* 3212fcfd22SChris Mason * directory trouble cases 3312fcfd22SChris Mason * 3412fcfd22SChris Mason * 1) on rename or unlink, if the inode being unlinked isn't in the fsync 3512fcfd22SChris Mason * log, we must force a full commit before doing an fsync of the directory 3612fcfd22SChris Mason * where the unlink was done. 3712fcfd22SChris Mason * ---> record transid of last unlink/rename per directory 3812fcfd22SChris Mason * 3912fcfd22SChris Mason * mkdir foo/some_dir 4012fcfd22SChris Mason * normal commit 4112fcfd22SChris Mason * rename foo/some_dir foo2/some_dir 4212fcfd22SChris Mason * mkdir foo/some_dir 4312fcfd22SChris Mason * fsync foo/some_dir/some_file 4412fcfd22SChris Mason * 4512fcfd22SChris Mason * The fsync above will unlink the original some_dir without recording 4612fcfd22SChris Mason * it in its new location (foo2). After a crash, some_dir will be gone 4712fcfd22SChris Mason * unless the fsync of some_file forces a full commit 4812fcfd22SChris Mason * 4912fcfd22SChris Mason * 2) we must log any new names for any file or dir that is in the fsync 5012fcfd22SChris Mason * log. ---> check inode while renaming/linking. 5112fcfd22SChris Mason * 5212fcfd22SChris Mason * 2a) we must log any new names for any file or dir during rename 5312fcfd22SChris Mason * when the directory they are being removed from was logged. 5412fcfd22SChris Mason * ---> check inode and old parent dir during rename 5512fcfd22SChris Mason * 5612fcfd22SChris Mason * 2a is actually the more important variant. With the extra logging 5712fcfd22SChris Mason * a crash might unlink the old name without recreating the new one 5812fcfd22SChris Mason * 5912fcfd22SChris Mason * 3) after a crash, we must go through any directories with a link count 6012fcfd22SChris Mason * of zero and redo the rm -rf 6112fcfd22SChris Mason * 6212fcfd22SChris Mason * mkdir f1/foo 6312fcfd22SChris Mason * normal commit 6412fcfd22SChris Mason * rm -rf f1/foo 6512fcfd22SChris Mason * fsync(f1) 6612fcfd22SChris Mason * 6712fcfd22SChris Mason * The directory f1 was fully removed from the FS, but fsync was never 6812fcfd22SChris Mason * called on f1, only its parent dir. After a crash the rm -rf must 6912fcfd22SChris Mason * be replayed. This must be able to recurse down the entire 7012fcfd22SChris Mason * directory tree. The inode link count fixup code takes care of the 7112fcfd22SChris Mason * ugly details. 7212fcfd22SChris Mason */ 7312fcfd22SChris Mason 7412fcfd22SChris Mason /* 75e02119d5SChris Mason * stages for the tree walking. The first 76e02119d5SChris Mason * stage (0) is to only pin down the blocks we find 77e02119d5SChris Mason * the second stage (1) is to make sure that all the inodes 78e02119d5SChris Mason * we find in the log are created in the subvolume. 79e02119d5SChris Mason * 80e02119d5SChris Mason * The last stage is to deal with directories and links and extents 81e02119d5SChris Mason * and all the other fun semantics 82e02119d5SChris Mason */ 83e02119d5SChris Mason #define LOG_WALK_PIN_ONLY 0 84e02119d5SChris Mason #define LOG_WALK_REPLAY_INODES 1 85dd8e7217SJosef Bacik #define LOG_WALK_REPLAY_DIR_INDEX 2 86dd8e7217SJosef Bacik #define LOG_WALK_REPLAY_ALL 3 87e02119d5SChris Mason 8812fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 89a59108a7SNikolay Borisov struct btrfs_root *root, struct btrfs_inode *inode, 9049dae1bcSFilipe Manana int inode_only, 9149dae1bcSFilipe Manana const loff_t start, 928407f553SFilipe Manana const loff_t end, 938407f553SFilipe Manana struct btrfs_log_ctx *ctx); 94ec051c0fSYan Zheng static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 95ec051c0fSYan Zheng struct btrfs_root *root, 96ec051c0fSYan Zheng struct btrfs_path *path, u64 objectid); 9712fcfd22SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 9812fcfd22SChris Mason struct btrfs_root *root, 9912fcfd22SChris Mason struct btrfs_root *log, 10012fcfd22SChris Mason struct btrfs_path *path, 10112fcfd22SChris Mason u64 dirid, int del_all); 102e02119d5SChris Mason 103e02119d5SChris Mason /* 104e02119d5SChris Mason * tree logging is a special write ahead log used to make sure that 105e02119d5SChris Mason * fsyncs and O_SYNCs can happen without doing full tree commits. 106e02119d5SChris Mason * 107e02119d5SChris Mason * Full tree commits are expensive because they require commonly 108e02119d5SChris Mason * modified blocks to be recowed, creating many dirty pages in the 109e02119d5SChris Mason * extent tree an 4x-6x higher write load than ext3. 110e02119d5SChris Mason * 111e02119d5SChris Mason * Instead of doing a tree commit on every fsync, we use the 112e02119d5SChris Mason * key ranges and transaction ids to find items for a given file or directory 113e02119d5SChris Mason * that have changed in this transaction. Those items are copied into 114e02119d5SChris Mason * a special tree (one per subvolume root), that tree is written to disk 115e02119d5SChris Mason * and then the fsync is considered complete. 116e02119d5SChris Mason * 117e02119d5SChris Mason * After a crash, items are copied out of the log-tree back into the 118e02119d5SChris Mason * subvolume tree. Any file data extents found are recorded in the extent 119e02119d5SChris Mason * allocation tree, and the log-tree freed. 120e02119d5SChris Mason * 121e02119d5SChris Mason * The log tree is read three times, once to pin down all the extents it is 122e02119d5SChris Mason * using in ram and once, once to create all the inodes logged in the tree 123e02119d5SChris Mason * and once to do all the other items. 124e02119d5SChris Mason */ 125e02119d5SChris Mason 126e02119d5SChris Mason /* 127e02119d5SChris Mason * start a sub transaction and setup the log tree 128e02119d5SChris Mason * this increments the log tree writer count to make the people 129e02119d5SChris Mason * syncing the tree wait for us to finish 130e02119d5SChris Mason */ 131e02119d5SChris Mason static int start_log_trans(struct btrfs_trans_handle *trans, 1328b050d35SMiao Xie struct btrfs_root *root, 1338b050d35SMiao Xie struct btrfs_log_ctx *ctx) 134e02119d5SChris Mason { 1350b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 13634eb2a52SZhaolei int ret = 0; 1377237f183SYan Zheng 1387237f183SYan Zheng mutex_lock(&root->log_mutex); 13934eb2a52SZhaolei 1407237f183SYan Zheng if (root->log_root) { 1410b246afaSJeff Mahoney if (btrfs_need_log_full_commit(fs_info, trans)) { 14250471a38SMiao Xie ret = -EAGAIN; 14350471a38SMiao Xie goto out; 14450471a38SMiao Xie } 14534eb2a52SZhaolei 146ff782e0aSJosef Bacik if (!root->log_start_pid) { 14727cdeb70SMiao Xie clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 14834eb2a52SZhaolei root->log_start_pid = current->pid; 149ff782e0aSJosef Bacik } else if (root->log_start_pid != current->pid) { 15027cdeb70SMiao Xie set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 151ff782e0aSJosef Bacik } 15234eb2a52SZhaolei } else { 1530b246afaSJeff Mahoney mutex_lock(&fs_info->tree_log_mutex); 1540b246afaSJeff Mahoney if (!fs_info->log_root_tree) 1550b246afaSJeff Mahoney ret = btrfs_init_log_root_tree(trans, fs_info); 1560b246afaSJeff Mahoney mutex_unlock(&fs_info->tree_log_mutex); 1574a500fd1SYan, Zheng if (ret) 158e87ac136SMiao Xie goto out; 159e87ac136SMiao Xie 160e02119d5SChris Mason ret = btrfs_add_log_tree(trans, root); 1614a500fd1SYan, Zheng if (ret) 162e87ac136SMiao Xie goto out; 16334eb2a52SZhaolei 16427cdeb70SMiao Xie clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); 165e87ac136SMiao Xie root->log_start_pid = current->pid; 16634eb2a52SZhaolei } 16734eb2a52SZhaolei 1682ecb7923SMiao Xie atomic_inc(&root->log_batch); 1697237f183SYan Zheng atomic_inc(&root->log_writers); 1708b050d35SMiao Xie if (ctx) { 17134eb2a52SZhaolei int index = root->log_transid % 2; 1728b050d35SMiao Xie list_add_tail(&ctx->list, &root->log_ctxs[index]); 173d1433debSMiao Xie ctx->log_transid = root->log_transid; 1748b050d35SMiao Xie } 17534eb2a52SZhaolei 176e87ac136SMiao Xie out: 1777237f183SYan Zheng mutex_unlock(&root->log_mutex); 178e87ac136SMiao Xie return ret; 179e02119d5SChris Mason } 180e02119d5SChris Mason 181e02119d5SChris Mason /* 182e02119d5SChris Mason * returns 0 if there was a log transaction running and we were able 183e02119d5SChris Mason * to join, or returns -ENOENT if there were not transactions 184e02119d5SChris Mason * in progress 185e02119d5SChris Mason */ 186e02119d5SChris Mason static int join_running_log_trans(struct btrfs_root *root) 187e02119d5SChris Mason { 188e02119d5SChris Mason int ret = -ENOENT; 189e02119d5SChris Mason 190e02119d5SChris Mason smp_mb(); 191e02119d5SChris Mason if (!root->log_root) 192e02119d5SChris Mason return -ENOENT; 193e02119d5SChris Mason 1947237f183SYan Zheng mutex_lock(&root->log_mutex); 195e02119d5SChris Mason if (root->log_root) { 196e02119d5SChris Mason ret = 0; 1977237f183SYan Zheng atomic_inc(&root->log_writers); 198e02119d5SChris Mason } 1997237f183SYan Zheng mutex_unlock(&root->log_mutex); 200e02119d5SChris Mason return ret; 201e02119d5SChris Mason } 202e02119d5SChris Mason 203e02119d5SChris Mason /* 20412fcfd22SChris Mason * This either makes the current running log transaction wait 20512fcfd22SChris Mason * until you call btrfs_end_log_trans() or it makes any future 20612fcfd22SChris Mason * log transactions wait until you call btrfs_end_log_trans() 20712fcfd22SChris Mason */ 20812fcfd22SChris Mason int btrfs_pin_log_trans(struct btrfs_root *root) 20912fcfd22SChris Mason { 21012fcfd22SChris Mason int ret = -ENOENT; 21112fcfd22SChris Mason 21212fcfd22SChris Mason mutex_lock(&root->log_mutex); 21312fcfd22SChris Mason atomic_inc(&root->log_writers); 21412fcfd22SChris Mason mutex_unlock(&root->log_mutex); 21512fcfd22SChris Mason return ret; 21612fcfd22SChris Mason } 21712fcfd22SChris Mason 21812fcfd22SChris Mason /* 219e02119d5SChris Mason * indicate we're done making changes to the log tree 220e02119d5SChris Mason * and wake up anyone waiting to do a sync 221e02119d5SChris Mason */ 222143bede5SJeff Mahoney void btrfs_end_log_trans(struct btrfs_root *root) 223e02119d5SChris Mason { 2247237f183SYan Zheng if (atomic_dec_and_test(&root->log_writers)) { 225093258e6SDavid Sterba /* atomic_dec_and_test implies a barrier */ 226093258e6SDavid Sterba cond_wake_up_nomb(&root->log_writer_wait); 2277237f183SYan Zheng } 228e02119d5SChris Mason } 229e02119d5SChris Mason 230e02119d5SChris Mason 231e02119d5SChris Mason /* 232e02119d5SChris Mason * the walk control struct is used to pass state down the chain when 233e02119d5SChris Mason * processing the log tree. The stage field tells us which part 234e02119d5SChris Mason * of the log tree processing we are currently doing. The others 235e02119d5SChris Mason * are state fields used for that specific part 236e02119d5SChris Mason */ 237e02119d5SChris Mason struct walk_control { 238e02119d5SChris Mason /* should we free the extent on disk when done? This is used 239e02119d5SChris Mason * at transaction commit time while freeing a log tree 240e02119d5SChris Mason */ 241e02119d5SChris Mason int free; 242e02119d5SChris Mason 243e02119d5SChris Mason /* should we write out the extent buffer? This is used 244e02119d5SChris Mason * while flushing the log tree to disk during a sync 245e02119d5SChris Mason */ 246e02119d5SChris Mason int write; 247e02119d5SChris Mason 248e02119d5SChris Mason /* should we wait for the extent buffer io to finish? Also used 249e02119d5SChris Mason * while flushing the log tree to disk for a sync 250e02119d5SChris Mason */ 251e02119d5SChris Mason int wait; 252e02119d5SChris Mason 253e02119d5SChris Mason /* pin only walk, we record which extents on disk belong to the 254e02119d5SChris Mason * log trees 255e02119d5SChris Mason */ 256e02119d5SChris Mason int pin; 257e02119d5SChris Mason 258e02119d5SChris Mason /* what stage of the replay code we're currently in */ 259e02119d5SChris Mason int stage; 260e02119d5SChris Mason 261e02119d5SChris Mason /* the root we are currently replaying */ 262e02119d5SChris Mason struct btrfs_root *replay_dest; 263e02119d5SChris Mason 264e02119d5SChris Mason /* the trans handle for the current replay */ 265e02119d5SChris Mason struct btrfs_trans_handle *trans; 266e02119d5SChris Mason 267e02119d5SChris Mason /* the function that gets used to process blocks we find in the 268e02119d5SChris Mason * tree. Note the extent_buffer might not be up to date when it is 269e02119d5SChris Mason * passed in, and it must be checked or read if you need the data 270e02119d5SChris Mason * inside it 271e02119d5SChris Mason */ 272e02119d5SChris Mason int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 273581c1760SQu Wenruo struct walk_control *wc, u64 gen, int level); 274e02119d5SChris Mason }; 275e02119d5SChris Mason 276e02119d5SChris Mason /* 277e02119d5SChris Mason * process_func used to pin down extents, write them or wait on them 278e02119d5SChris Mason */ 279e02119d5SChris Mason static int process_one_buffer(struct btrfs_root *log, 280e02119d5SChris Mason struct extent_buffer *eb, 281581c1760SQu Wenruo struct walk_control *wc, u64 gen, int level) 282e02119d5SChris Mason { 2830b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = log->fs_info; 284b50c6e25SJosef Bacik int ret = 0; 285b50c6e25SJosef Bacik 2868c2a1a30SJosef Bacik /* 2878c2a1a30SJosef Bacik * If this fs is mixed then we need to be able to process the leaves to 2888c2a1a30SJosef Bacik * pin down any logged extents, so we have to read the block. 2898c2a1a30SJosef Bacik */ 2900b246afaSJeff Mahoney if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { 291581c1760SQu Wenruo ret = btrfs_read_buffer(eb, gen, level, NULL); 2928c2a1a30SJosef Bacik if (ret) 2938c2a1a30SJosef Bacik return ret; 2948c2a1a30SJosef Bacik } 2958c2a1a30SJosef Bacik 29604018de5SJosef Bacik if (wc->pin) 2972ff7e61eSJeff Mahoney ret = btrfs_pin_extent_for_log_replay(fs_info, eb->start, 2982ff7e61eSJeff Mahoney eb->len); 299e02119d5SChris Mason 300b50c6e25SJosef Bacik if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { 3018c2a1a30SJosef Bacik if (wc->pin && btrfs_header_level(eb) == 0) 3022ff7e61eSJeff Mahoney ret = btrfs_exclude_logged_extents(fs_info, eb); 303e02119d5SChris Mason if (wc->write) 304e02119d5SChris Mason btrfs_write_tree_block(eb); 305e02119d5SChris Mason if (wc->wait) 306e02119d5SChris Mason btrfs_wait_tree_block_writeback(eb); 307e02119d5SChris Mason } 308b50c6e25SJosef Bacik return ret; 309e02119d5SChris Mason } 310e02119d5SChris Mason 311e02119d5SChris Mason /* 312e02119d5SChris Mason * Item overwrite used by replay and tree logging. eb, slot and key all refer 313e02119d5SChris Mason * to the src data we are copying out. 314e02119d5SChris Mason * 315e02119d5SChris Mason * root is the tree we are copying into, and path is a scratch 316e02119d5SChris Mason * path for use in this function (it should be released on entry and 317e02119d5SChris Mason * will be released on exit). 318e02119d5SChris Mason * 319e02119d5SChris Mason * If the key is already in the destination tree the existing item is 320e02119d5SChris Mason * overwritten. If the existing item isn't big enough, it is extended. 321e02119d5SChris Mason * If it is too large, it is truncated. 322e02119d5SChris Mason * 323e02119d5SChris Mason * If the key isn't in the destination yet, a new item is inserted. 324e02119d5SChris Mason */ 325e02119d5SChris Mason static noinline int overwrite_item(struct btrfs_trans_handle *trans, 326e02119d5SChris Mason struct btrfs_root *root, 327e02119d5SChris Mason struct btrfs_path *path, 328e02119d5SChris Mason struct extent_buffer *eb, int slot, 329e02119d5SChris Mason struct btrfs_key *key) 330e02119d5SChris Mason { 3312ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 332e02119d5SChris Mason int ret; 333e02119d5SChris Mason u32 item_size; 334e02119d5SChris Mason u64 saved_i_size = 0; 335e02119d5SChris Mason int save_old_i_size = 0; 336e02119d5SChris Mason unsigned long src_ptr; 337e02119d5SChris Mason unsigned long dst_ptr; 338e02119d5SChris Mason int overwrite_root = 0; 3394bc4bee4SJosef Bacik bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; 340e02119d5SChris Mason 341e02119d5SChris Mason if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 342e02119d5SChris Mason overwrite_root = 1; 343e02119d5SChris Mason 344e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 345e02119d5SChris Mason src_ptr = btrfs_item_ptr_offset(eb, slot); 346e02119d5SChris Mason 347e02119d5SChris Mason /* look for the key in the destination tree */ 348e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 3494bc4bee4SJosef Bacik if (ret < 0) 3504bc4bee4SJosef Bacik return ret; 3514bc4bee4SJosef Bacik 352e02119d5SChris Mason if (ret == 0) { 353e02119d5SChris Mason char *src_copy; 354e02119d5SChris Mason char *dst_copy; 355e02119d5SChris Mason u32 dst_size = btrfs_item_size_nr(path->nodes[0], 356e02119d5SChris Mason path->slots[0]); 357e02119d5SChris Mason if (dst_size != item_size) 358e02119d5SChris Mason goto insert; 359e02119d5SChris Mason 360e02119d5SChris Mason if (item_size == 0) { 361b3b4aa74SDavid Sterba btrfs_release_path(path); 362e02119d5SChris Mason return 0; 363e02119d5SChris Mason } 364e02119d5SChris Mason dst_copy = kmalloc(item_size, GFP_NOFS); 365e02119d5SChris Mason src_copy = kmalloc(item_size, GFP_NOFS); 3662a29edc6Sliubo if (!dst_copy || !src_copy) { 367b3b4aa74SDavid Sterba btrfs_release_path(path); 3682a29edc6Sliubo kfree(dst_copy); 3692a29edc6Sliubo kfree(src_copy); 3702a29edc6Sliubo return -ENOMEM; 3712a29edc6Sliubo } 372e02119d5SChris Mason 373e02119d5SChris Mason read_extent_buffer(eb, src_copy, src_ptr, item_size); 374e02119d5SChris Mason 375e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 376e02119d5SChris Mason read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, 377e02119d5SChris Mason item_size); 378e02119d5SChris Mason ret = memcmp(dst_copy, src_copy, item_size); 379e02119d5SChris Mason 380e02119d5SChris Mason kfree(dst_copy); 381e02119d5SChris Mason kfree(src_copy); 382e02119d5SChris Mason /* 383e02119d5SChris Mason * they have the same contents, just return, this saves 384e02119d5SChris Mason * us from cowing blocks in the destination tree and doing 385e02119d5SChris Mason * extra writes that may not have been done by a previous 386e02119d5SChris Mason * sync 387e02119d5SChris Mason */ 388e02119d5SChris Mason if (ret == 0) { 389b3b4aa74SDavid Sterba btrfs_release_path(path); 390e02119d5SChris Mason return 0; 391e02119d5SChris Mason } 392e02119d5SChris Mason 3934bc4bee4SJosef Bacik /* 3944bc4bee4SJosef Bacik * We need to load the old nbytes into the inode so when we 3954bc4bee4SJosef Bacik * replay the extents we've logged we get the right nbytes. 3964bc4bee4SJosef Bacik */ 3974bc4bee4SJosef Bacik if (inode_item) { 3984bc4bee4SJosef Bacik struct btrfs_inode_item *item; 3994bc4bee4SJosef Bacik u64 nbytes; 400d555438bSJosef Bacik u32 mode; 4014bc4bee4SJosef Bacik 4024bc4bee4SJosef Bacik item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4034bc4bee4SJosef Bacik struct btrfs_inode_item); 4044bc4bee4SJosef Bacik nbytes = btrfs_inode_nbytes(path->nodes[0], item); 4054bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, 4064bc4bee4SJosef Bacik struct btrfs_inode_item); 4074bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, nbytes); 408d555438bSJosef Bacik 409d555438bSJosef Bacik /* 410d555438bSJosef Bacik * If this is a directory we need to reset the i_size to 411d555438bSJosef Bacik * 0 so that we can set it up properly when replaying 412d555438bSJosef Bacik * the rest of the items in this log. 413d555438bSJosef Bacik */ 414d555438bSJosef Bacik mode = btrfs_inode_mode(eb, item); 415d555438bSJosef Bacik if (S_ISDIR(mode)) 416d555438bSJosef Bacik btrfs_set_inode_size(eb, item, 0); 4174bc4bee4SJosef Bacik } 4184bc4bee4SJosef Bacik } else if (inode_item) { 4194bc4bee4SJosef Bacik struct btrfs_inode_item *item; 420d555438bSJosef Bacik u32 mode; 4214bc4bee4SJosef Bacik 4224bc4bee4SJosef Bacik /* 4234bc4bee4SJosef Bacik * New inode, set nbytes to 0 so that the nbytes comes out 4244bc4bee4SJosef Bacik * properly when we replay the extents. 4254bc4bee4SJosef Bacik */ 4264bc4bee4SJosef Bacik item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 4274bc4bee4SJosef Bacik btrfs_set_inode_nbytes(eb, item, 0); 428d555438bSJosef Bacik 429d555438bSJosef Bacik /* 430d555438bSJosef Bacik * If this is a directory we need to reset the i_size to 0 so 431d555438bSJosef Bacik * that we can set it up properly when replaying the rest of 432d555438bSJosef Bacik * the items in this log. 433d555438bSJosef Bacik */ 434d555438bSJosef Bacik mode = btrfs_inode_mode(eb, item); 435d555438bSJosef Bacik if (S_ISDIR(mode)) 436d555438bSJosef Bacik btrfs_set_inode_size(eb, item, 0); 437e02119d5SChris Mason } 438e02119d5SChris Mason insert: 439b3b4aa74SDavid Sterba btrfs_release_path(path); 440e02119d5SChris Mason /* try to insert the key into the destination tree */ 441df8d116fSFilipe Manana path->skip_release_on_error = 1; 442e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, 443e02119d5SChris Mason key, item_size); 444df8d116fSFilipe Manana path->skip_release_on_error = 0; 445e02119d5SChris Mason 446e02119d5SChris Mason /* make sure any existing item is the correct size */ 447df8d116fSFilipe Manana if (ret == -EEXIST || ret == -EOVERFLOW) { 448e02119d5SChris Mason u32 found_size; 449e02119d5SChris Mason found_size = btrfs_item_size_nr(path->nodes[0], 450e02119d5SChris Mason path->slots[0]); 451143bede5SJeff Mahoney if (found_size > item_size) 4522ff7e61eSJeff Mahoney btrfs_truncate_item(fs_info, path, item_size, 1); 453143bede5SJeff Mahoney else if (found_size < item_size) 4542ff7e61eSJeff Mahoney btrfs_extend_item(fs_info, path, 45587b29b20SYan Zheng item_size - found_size); 456e02119d5SChris Mason } else if (ret) { 4574a500fd1SYan, Zheng return ret; 458e02119d5SChris Mason } 459e02119d5SChris Mason dst_ptr = btrfs_item_ptr_offset(path->nodes[0], 460e02119d5SChris Mason path->slots[0]); 461e02119d5SChris Mason 462e02119d5SChris Mason /* don't overwrite an existing inode if the generation number 463e02119d5SChris Mason * was logged as zero. This is done when the tree logging code 464e02119d5SChris Mason * is just logging an inode to make sure it exists after recovery. 465e02119d5SChris Mason * 466e02119d5SChris Mason * Also, don't overwrite i_size on directories during replay. 467e02119d5SChris Mason * log replay inserts and removes directory items based on the 468e02119d5SChris Mason * state of the tree found in the subvolume, and i_size is modified 469e02119d5SChris Mason * as it goes 470e02119d5SChris Mason */ 471e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { 472e02119d5SChris Mason struct btrfs_inode_item *src_item; 473e02119d5SChris Mason struct btrfs_inode_item *dst_item; 474e02119d5SChris Mason 475e02119d5SChris Mason src_item = (struct btrfs_inode_item *)src_ptr; 476e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 477e02119d5SChris Mason 4781a4bcf47SFilipe Manana if (btrfs_inode_generation(eb, src_item) == 0) { 4791a4bcf47SFilipe Manana struct extent_buffer *dst_eb = path->nodes[0]; 4802f2ff0eeSFilipe Manana const u64 ino_size = btrfs_inode_size(eb, src_item); 4811a4bcf47SFilipe Manana 4822f2ff0eeSFilipe Manana /* 4832f2ff0eeSFilipe Manana * For regular files an ino_size == 0 is used only when 4842f2ff0eeSFilipe Manana * logging that an inode exists, as part of a directory 4852f2ff0eeSFilipe Manana * fsync, and the inode wasn't fsynced before. In this 4862f2ff0eeSFilipe Manana * case don't set the size of the inode in the fs/subvol 4872f2ff0eeSFilipe Manana * tree, otherwise we would be throwing valid data away. 4882f2ff0eeSFilipe Manana */ 4891a4bcf47SFilipe Manana if (S_ISREG(btrfs_inode_mode(eb, src_item)) && 4902f2ff0eeSFilipe Manana S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) && 4912f2ff0eeSFilipe Manana ino_size != 0) { 4921a4bcf47SFilipe Manana struct btrfs_map_token token; 4931a4bcf47SFilipe Manana 4941a4bcf47SFilipe Manana btrfs_init_map_token(&token); 4951a4bcf47SFilipe Manana btrfs_set_token_inode_size(dst_eb, dst_item, 4961a4bcf47SFilipe Manana ino_size, &token); 4971a4bcf47SFilipe Manana } 498e02119d5SChris Mason goto no_copy; 4991a4bcf47SFilipe Manana } 500e02119d5SChris Mason 501e02119d5SChris Mason if (overwrite_root && 502e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(eb, src_item)) && 503e02119d5SChris Mason S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { 504e02119d5SChris Mason save_old_i_size = 1; 505e02119d5SChris Mason saved_i_size = btrfs_inode_size(path->nodes[0], 506e02119d5SChris Mason dst_item); 507e02119d5SChris Mason } 508e02119d5SChris Mason } 509e02119d5SChris Mason 510e02119d5SChris Mason copy_extent_buffer(path->nodes[0], eb, dst_ptr, 511e02119d5SChris Mason src_ptr, item_size); 512e02119d5SChris Mason 513e02119d5SChris Mason if (save_old_i_size) { 514e02119d5SChris Mason struct btrfs_inode_item *dst_item; 515e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 516e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); 517e02119d5SChris Mason } 518e02119d5SChris Mason 519e02119d5SChris Mason /* make sure the generation is filled in */ 520e02119d5SChris Mason if (key->type == BTRFS_INODE_ITEM_KEY) { 521e02119d5SChris Mason struct btrfs_inode_item *dst_item; 522e02119d5SChris Mason dst_item = (struct btrfs_inode_item *)dst_ptr; 523e02119d5SChris Mason if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { 524e02119d5SChris Mason btrfs_set_inode_generation(path->nodes[0], dst_item, 525e02119d5SChris Mason trans->transid); 526e02119d5SChris Mason } 527e02119d5SChris Mason } 528e02119d5SChris Mason no_copy: 529e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 530b3b4aa74SDavid Sterba btrfs_release_path(path); 531e02119d5SChris Mason return 0; 532e02119d5SChris Mason } 533e02119d5SChris Mason 534e02119d5SChris Mason /* 535e02119d5SChris Mason * simple helper to read an inode off the disk from a given root 536e02119d5SChris Mason * This can only be called for subvolume roots and not for the log 537e02119d5SChris Mason */ 538e02119d5SChris Mason static noinline struct inode *read_one_inode(struct btrfs_root *root, 539e02119d5SChris Mason u64 objectid) 540e02119d5SChris Mason { 5415d4f98a2SYan Zheng struct btrfs_key key; 542e02119d5SChris Mason struct inode *inode; 543e02119d5SChris Mason 5445d4f98a2SYan Zheng key.objectid = objectid; 5455d4f98a2SYan Zheng key.type = BTRFS_INODE_ITEM_KEY; 5465d4f98a2SYan Zheng key.offset = 0; 54773f73415SJosef Bacik inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 5485d4f98a2SYan Zheng if (IS_ERR(inode)) { 5495d4f98a2SYan Zheng inode = NULL; 5505d4f98a2SYan Zheng } else if (is_bad_inode(inode)) { 551e02119d5SChris Mason iput(inode); 552e02119d5SChris Mason inode = NULL; 553e02119d5SChris Mason } 554e02119d5SChris Mason return inode; 555e02119d5SChris Mason } 556e02119d5SChris Mason 557e02119d5SChris Mason /* replays a single extent in 'eb' at 'slot' with 'key' into the 558e02119d5SChris Mason * subvolume 'root'. path is released on entry and should be released 559e02119d5SChris Mason * on exit. 560e02119d5SChris Mason * 561e02119d5SChris Mason * extents in the log tree have not been allocated out of the extent 562e02119d5SChris Mason * tree yet. So, this completes the allocation, taking a reference 563e02119d5SChris Mason * as required if the extent already exists or creating a new extent 564e02119d5SChris Mason * if it isn't in the extent allocation tree yet. 565e02119d5SChris Mason * 566e02119d5SChris Mason * The extent is inserted into the file, dropping any existing extents 567e02119d5SChris Mason * from the file that overlap the new one. 568e02119d5SChris Mason */ 569e02119d5SChris Mason static noinline int replay_one_extent(struct btrfs_trans_handle *trans, 570e02119d5SChris Mason struct btrfs_root *root, 571e02119d5SChris Mason struct btrfs_path *path, 572e02119d5SChris Mason struct extent_buffer *eb, int slot, 573e02119d5SChris Mason struct btrfs_key *key) 574e02119d5SChris Mason { 5750b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 576e02119d5SChris Mason int found_type; 577e02119d5SChris Mason u64 extent_end; 578e02119d5SChris Mason u64 start = key->offset; 5794bc4bee4SJosef Bacik u64 nbytes = 0; 580e02119d5SChris Mason struct btrfs_file_extent_item *item; 581e02119d5SChris Mason struct inode *inode = NULL; 582e02119d5SChris Mason unsigned long size; 583e02119d5SChris Mason int ret = 0; 584e02119d5SChris Mason 585e02119d5SChris Mason item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 586e02119d5SChris Mason found_type = btrfs_file_extent_type(eb, item); 587e02119d5SChris Mason 588d899e052SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 5894bc4bee4SJosef Bacik found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5904bc4bee4SJosef Bacik nbytes = btrfs_file_extent_num_bytes(eb, item); 5914bc4bee4SJosef Bacik extent_end = start + nbytes; 5924bc4bee4SJosef Bacik 5934bc4bee4SJosef Bacik /* 5944bc4bee4SJosef Bacik * We don't add to the inodes nbytes if we are prealloc or a 5954bc4bee4SJosef Bacik * hole. 5964bc4bee4SJosef Bacik */ 5974bc4bee4SJosef Bacik if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 5984bc4bee4SJosef Bacik nbytes = 0; 5994bc4bee4SJosef Bacik } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 600e41ca589SQu Wenruo size = btrfs_file_extent_ram_bytes(eb, item); 6014bc4bee4SJosef Bacik nbytes = btrfs_file_extent_ram_bytes(eb, item); 602da17066cSJeff Mahoney extent_end = ALIGN(start + size, 6030b246afaSJeff Mahoney fs_info->sectorsize); 604e02119d5SChris Mason } else { 605e02119d5SChris Mason ret = 0; 606e02119d5SChris Mason goto out; 607e02119d5SChris Mason } 608e02119d5SChris Mason 609e02119d5SChris Mason inode = read_one_inode(root, key->objectid); 610e02119d5SChris Mason if (!inode) { 611e02119d5SChris Mason ret = -EIO; 612e02119d5SChris Mason goto out; 613e02119d5SChris Mason } 614e02119d5SChris Mason 615e02119d5SChris Mason /* 616e02119d5SChris Mason * first check to see if we already have this extent in the 617e02119d5SChris Mason * file. This must be done before the btrfs_drop_extents run 618e02119d5SChris Mason * so we don't try to drop this extent. 619e02119d5SChris Mason */ 620f85b7379SDavid Sterba ret = btrfs_lookup_file_extent(trans, root, path, 621f85b7379SDavid Sterba btrfs_ino(BTRFS_I(inode)), start, 0); 622e02119d5SChris Mason 623d899e052SYan Zheng if (ret == 0 && 624d899e052SYan Zheng (found_type == BTRFS_FILE_EXTENT_REG || 625d899e052SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 626e02119d5SChris Mason struct btrfs_file_extent_item cmp1; 627e02119d5SChris Mason struct btrfs_file_extent_item cmp2; 628e02119d5SChris Mason struct btrfs_file_extent_item *existing; 629e02119d5SChris Mason struct extent_buffer *leaf; 630e02119d5SChris Mason 631e02119d5SChris Mason leaf = path->nodes[0]; 632e02119d5SChris Mason existing = btrfs_item_ptr(leaf, path->slots[0], 633e02119d5SChris Mason struct btrfs_file_extent_item); 634e02119d5SChris Mason 635e02119d5SChris Mason read_extent_buffer(eb, &cmp1, (unsigned long)item, 636e02119d5SChris Mason sizeof(cmp1)); 637e02119d5SChris Mason read_extent_buffer(leaf, &cmp2, (unsigned long)existing, 638e02119d5SChris Mason sizeof(cmp2)); 639e02119d5SChris Mason 640e02119d5SChris Mason /* 641e02119d5SChris Mason * we already have a pointer to this exact extent, 642e02119d5SChris Mason * we don't have to do anything 643e02119d5SChris Mason */ 644e02119d5SChris Mason if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 645b3b4aa74SDavid Sterba btrfs_release_path(path); 646e02119d5SChris Mason goto out; 647e02119d5SChris Mason } 648e02119d5SChris Mason } 649b3b4aa74SDavid Sterba btrfs_release_path(path); 650e02119d5SChris Mason 651e02119d5SChris Mason /* drop any overlapping extents */ 6522671485dSJosef Bacik ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 6533650860bSJosef Bacik if (ret) 6543650860bSJosef Bacik goto out; 655e02119d5SChris Mason 65607d400a6SYan Zheng if (found_type == BTRFS_FILE_EXTENT_REG || 65707d400a6SYan Zheng found_type == BTRFS_FILE_EXTENT_PREALLOC) { 6585d4f98a2SYan Zheng u64 offset; 65907d400a6SYan Zheng unsigned long dest_offset; 66007d400a6SYan Zheng struct btrfs_key ins; 66107d400a6SYan Zheng 6623168021cSFilipe Manana if (btrfs_file_extent_disk_bytenr(eb, item) == 0 && 6633168021cSFilipe Manana btrfs_fs_incompat(fs_info, NO_HOLES)) 6643168021cSFilipe Manana goto update_inode; 6653168021cSFilipe Manana 66607d400a6SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, key, 66707d400a6SYan Zheng sizeof(*item)); 6683650860bSJosef Bacik if (ret) 6693650860bSJosef Bacik goto out; 67007d400a6SYan Zheng dest_offset = btrfs_item_ptr_offset(path->nodes[0], 67107d400a6SYan Zheng path->slots[0]); 67207d400a6SYan Zheng copy_extent_buffer(path->nodes[0], eb, dest_offset, 67307d400a6SYan Zheng (unsigned long)item, sizeof(*item)); 67407d400a6SYan Zheng 67507d400a6SYan Zheng ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 67607d400a6SYan Zheng ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 67707d400a6SYan Zheng ins.type = BTRFS_EXTENT_ITEM_KEY; 6785d4f98a2SYan Zheng offset = key->offset - btrfs_file_extent_offset(eb, item); 67907d400a6SYan Zheng 680df2c95f3SQu Wenruo /* 681df2c95f3SQu Wenruo * Manually record dirty extent, as here we did a shallow 682df2c95f3SQu Wenruo * file extent item copy and skip normal backref update, 683df2c95f3SQu Wenruo * but modifying extent tree all by ourselves. 684df2c95f3SQu Wenruo * So need to manually record dirty extent for qgroup, 685df2c95f3SQu Wenruo * as the owner of the file extent changed from log tree 686df2c95f3SQu Wenruo * (doesn't affect qgroup) to fs/file tree(affects qgroup) 687df2c95f3SQu Wenruo */ 688*a95f3aafSLu Fengqi ret = btrfs_qgroup_trace_extent(trans, 689df2c95f3SQu Wenruo btrfs_file_extent_disk_bytenr(eb, item), 690df2c95f3SQu Wenruo btrfs_file_extent_disk_num_bytes(eb, item), 691df2c95f3SQu Wenruo GFP_NOFS); 692df2c95f3SQu Wenruo if (ret < 0) 693df2c95f3SQu Wenruo goto out; 694df2c95f3SQu Wenruo 69507d400a6SYan Zheng if (ins.objectid > 0) { 69607d400a6SYan Zheng u64 csum_start; 69707d400a6SYan Zheng u64 csum_end; 69807d400a6SYan Zheng LIST_HEAD(ordered_sums); 69907d400a6SYan Zheng /* 70007d400a6SYan Zheng * is this extent already allocated in the extent 70107d400a6SYan Zheng * allocation tree? If so, just add a reference 70207d400a6SYan Zheng */ 7032ff7e61eSJeff Mahoney ret = btrfs_lookup_data_extent(fs_info, ins.objectid, 70407d400a6SYan Zheng ins.offset); 70507d400a6SYan Zheng if (ret == 0) { 70684f7d8e6SJosef Bacik ret = btrfs_inc_extent_ref(trans, root, 70707d400a6SYan Zheng ins.objectid, ins.offset, 7085d4f98a2SYan Zheng 0, root->root_key.objectid, 709b06c4bf5SFilipe Manana key->objectid, offset); 710b50c6e25SJosef Bacik if (ret) 711b50c6e25SJosef Bacik goto out; 71207d400a6SYan Zheng } else { 71307d400a6SYan Zheng /* 71407d400a6SYan Zheng * insert the extent pointer in the extent 71507d400a6SYan Zheng * allocation tree 71607d400a6SYan Zheng */ 7175d4f98a2SYan Zheng ret = btrfs_alloc_logged_file_extent(trans, 7182ff7e61eSJeff Mahoney root->root_key.objectid, 7195d4f98a2SYan Zheng key->objectid, offset, &ins); 720b50c6e25SJosef Bacik if (ret) 721b50c6e25SJosef Bacik goto out; 72207d400a6SYan Zheng } 723b3b4aa74SDavid Sterba btrfs_release_path(path); 72407d400a6SYan Zheng 72507d400a6SYan Zheng if (btrfs_file_extent_compression(eb, item)) { 72607d400a6SYan Zheng csum_start = ins.objectid; 72707d400a6SYan Zheng csum_end = csum_start + ins.offset; 72807d400a6SYan Zheng } else { 72907d400a6SYan Zheng csum_start = ins.objectid + 73007d400a6SYan Zheng btrfs_file_extent_offset(eb, item); 73107d400a6SYan Zheng csum_end = csum_start + 73207d400a6SYan Zheng btrfs_file_extent_num_bytes(eb, item); 73307d400a6SYan Zheng } 73407d400a6SYan Zheng 73507d400a6SYan Zheng ret = btrfs_lookup_csums_range(root->log_root, 73607d400a6SYan Zheng csum_start, csum_end - 1, 737a2de733cSArne Jansen &ordered_sums, 0); 7383650860bSJosef Bacik if (ret) 7393650860bSJosef Bacik goto out; 740b84b8390SFilipe Manana /* 741b84b8390SFilipe Manana * Now delete all existing cums in the csum root that 742b84b8390SFilipe Manana * cover our range. We do this because we can have an 743b84b8390SFilipe Manana * extent that is completely referenced by one file 744b84b8390SFilipe Manana * extent item and partially referenced by another 745b84b8390SFilipe Manana * file extent item (like after using the clone or 746b84b8390SFilipe Manana * extent_same ioctls). In this case if we end up doing 747b84b8390SFilipe Manana * the replay of the one that partially references the 748b84b8390SFilipe Manana * extent first, and we do not do the csum deletion 749b84b8390SFilipe Manana * below, we can get 2 csum items in the csum tree that 750b84b8390SFilipe Manana * overlap each other. For example, imagine our log has 751b84b8390SFilipe Manana * the two following file extent items: 752b84b8390SFilipe Manana * 753b84b8390SFilipe Manana * key (257 EXTENT_DATA 409600) 754b84b8390SFilipe Manana * extent data disk byte 12845056 nr 102400 755b84b8390SFilipe Manana * extent data offset 20480 nr 20480 ram 102400 756b84b8390SFilipe Manana * 757b84b8390SFilipe Manana * key (257 EXTENT_DATA 819200) 758b84b8390SFilipe Manana * extent data disk byte 12845056 nr 102400 759b84b8390SFilipe Manana * extent data offset 0 nr 102400 ram 102400 760b84b8390SFilipe Manana * 761b84b8390SFilipe Manana * Where the second one fully references the 100K extent 762b84b8390SFilipe Manana * that starts at disk byte 12845056, and the log tree 763b84b8390SFilipe Manana * has a single csum item that covers the entire range 764b84b8390SFilipe Manana * of the extent: 765b84b8390SFilipe Manana * 766b84b8390SFilipe Manana * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 767b84b8390SFilipe Manana * 768b84b8390SFilipe Manana * After the first file extent item is replayed, the 769b84b8390SFilipe Manana * csum tree gets the following csum item: 770b84b8390SFilipe Manana * 771b84b8390SFilipe Manana * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 772b84b8390SFilipe Manana * 773b84b8390SFilipe Manana * Which covers the 20K sub-range starting at offset 20K 774b84b8390SFilipe Manana * of our extent. Now when we replay the second file 775b84b8390SFilipe Manana * extent item, if we do not delete existing csum items 776b84b8390SFilipe Manana * that cover any of its blocks, we end up getting two 777b84b8390SFilipe Manana * csum items in our csum tree that overlap each other: 778b84b8390SFilipe Manana * 779b84b8390SFilipe Manana * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 780b84b8390SFilipe Manana * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 781b84b8390SFilipe Manana * 782b84b8390SFilipe Manana * Which is a problem, because after this anyone trying 783b84b8390SFilipe Manana * to lookup up for the checksum of any block of our 784b84b8390SFilipe Manana * extent starting at an offset of 40K or higher, will 785b84b8390SFilipe Manana * end up looking at the second csum item only, which 786b84b8390SFilipe Manana * does not contain the checksum for any block starting 787b84b8390SFilipe Manana * at offset 40K or higher of our extent. 788b84b8390SFilipe Manana */ 78907d400a6SYan Zheng while (!list_empty(&ordered_sums)) { 79007d400a6SYan Zheng struct btrfs_ordered_sum *sums; 79107d400a6SYan Zheng sums = list_entry(ordered_sums.next, 79207d400a6SYan Zheng struct btrfs_ordered_sum, 79307d400a6SYan Zheng list); 7943650860bSJosef Bacik if (!ret) 7950b246afaSJeff Mahoney ret = btrfs_del_csums(trans, fs_info, 796b84b8390SFilipe Manana sums->bytenr, 797b84b8390SFilipe Manana sums->len); 798b84b8390SFilipe Manana if (!ret) 79907d400a6SYan Zheng ret = btrfs_csum_file_blocks(trans, 8000b246afaSJeff Mahoney fs_info->csum_root, sums); 80107d400a6SYan Zheng list_del(&sums->list); 80207d400a6SYan Zheng kfree(sums); 80307d400a6SYan Zheng } 8043650860bSJosef Bacik if (ret) 8053650860bSJosef Bacik goto out; 80607d400a6SYan Zheng } else { 807b3b4aa74SDavid Sterba btrfs_release_path(path); 80807d400a6SYan Zheng } 80907d400a6SYan Zheng } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 81007d400a6SYan Zheng /* inline extents are easy, we just overwrite them */ 811e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 8123650860bSJosef Bacik if (ret) 8133650860bSJosef Bacik goto out; 81407d400a6SYan Zheng } 815e02119d5SChris Mason 8164bc4bee4SJosef Bacik inode_add_bytes(inode, nbytes); 8173168021cSFilipe Manana update_inode: 818b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 819e02119d5SChris Mason out: 820e02119d5SChris Mason if (inode) 821e02119d5SChris Mason iput(inode); 822e02119d5SChris Mason return ret; 823e02119d5SChris Mason } 824e02119d5SChris Mason 825e02119d5SChris Mason /* 826e02119d5SChris Mason * when cleaning up conflicts between the directory names in the 827e02119d5SChris Mason * subvolume, directory names in the log and directory names in the 828e02119d5SChris Mason * inode back references, we may have to unlink inodes from directories. 829e02119d5SChris Mason * 830e02119d5SChris Mason * This is a helper function to do the unlink of a specific directory 831e02119d5SChris Mason * item 832e02119d5SChris Mason */ 833e02119d5SChris Mason static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, 834e02119d5SChris Mason struct btrfs_root *root, 835e02119d5SChris Mason struct btrfs_path *path, 836207e7d92SNikolay Borisov struct btrfs_inode *dir, 837e02119d5SChris Mason struct btrfs_dir_item *di) 838e02119d5SChris Mason { 839e02119d5SChris Mason struct inode *inode; 840e02119d5SChris Mason char *name; 841e02119d5SChris Mason int name_len; 842e02119d5SChris Mason struct extent_buffer *leaf; 843e02119d5SChris Mason struct btrfs_key location; 844e02119d5SChris Mason int ret; 845e02119d5SChris Mason 846e02119d5SChris Mason leaf = path->nodes[0]; 847e02119d5SChris Mason 848e02119d5SChris Mason btrfs_dir_item_key_to_cpu(leaf, di, &location); 849e02119d5SChris Mason name_len = btrfs_dir_name_len(leaf, di); 850e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 8512a29edc6Sliubo if (!name) 8522a29edc6Sliubo return -ENOMEM; 8532a29edc6Sliubo 854e02119d5SChris Mason read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 855b3b4aa74SDavid Sterba btrfs_release_path(path); 856e02119d5SChris Mason 857e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 858c00e9493STsutomu Itoh if (!inode) { 8593650860bSJosef Bacik ret = -EIO; 8603650860bSJosef Bacik goto out; 861c00e9493STsutomu Itoh } 862e02119d5SChris Mason 863ec051c0fSYan Zheng ret = link_to_fixup_dir(trans, root, path, location.objectid); 8643650860bSJosef Bacik if (ret) 8653650860bSJosef Bacik goto out; 86612fcfd22SChris Mason 867207e7d92SNikolay Borisov ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name, 868207e7d92SNikolay Borisov name_len); 8693650860bSJosef Bacik if (ret) 8703650860bSJosef Bacik goto out; 871ada9af21SFilipe David Borba Manana else 872e5c304e6SNikolay Borisov ret = btrfs_run_delayed_items(trans); 8733650860bSJosef Bacik out: 8743650860bSJosef Bacik kfree(name); 8753650860bSJosef Bacik iput(inode); 876e02119d5SChris Mason return ret; 877e02119d5SChris Mason } 878e02119d5SChris Mason 879e02119d5SChris Mason /* 880e02119d5SChris Mason * helper function to see if a given name and sequence number found 881e02119d5SChris Mason * in an inode back reference are already in a directory and correctly 882e02119d5SChris Mason * point to this inode 883e02119d5SChris Mason */ 884e02119d5SChris Mason static noinline int inode_in_dir(struct btrfs_root *root, 885e02119d5SChris Mason struct btrfs_path *path, 886e02119d5SChris Mason u64 dirid, u64 objectid, u64 index, 887e02119d5SChris Mason const char *name, int name_len) 888e02119d5SChris Mason { 889e02119d5SChris Mason struct btrfs_dir_item *di; 890e02119d5SChris Mason struct btrfs_key location; 891e02119d5SChris Mason int match = 0; 892e02119d5SChris Mason 893e02119d5SChris Mason di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, 894e02119d5SChris Mason index, name, name_len, 0); 895e02119d5SChris Mason if (di && !IS_ERR(di)) { 896e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 897e02119d5SChris Mason if (location.objectid != objectid) 898e02119d5SChris Mason goto out; 899e02119d5SChris Mason } else 900e02119d5SChris Mason goto out; 901b3b4aa74SDavid Sterba btrfs_release_path(path); 902e02119d5SChris Mason 903e02119d5SChris Mason di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 904e02119d5SChris Mason if (di && !IS_ERR(di)) { 905e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 906e02119d5SChris Mason if (location.objectid != objectid) 907e02119d5SChris Mason goto out; 908e02119d5SChris Mason } else 909e02119d5SChris Mason goto out; 910e02119d5SChris Mason match = 1; 911e02119d5SChris Mason out: 912b3b4aa74SDavid Sterba btrfs_release_path(path); 913e02119d5SChris Mason return match; 914e02119d5SChris Mason } 915e02119d5SChris Mason 916e02119d5SChris Mason /* 917e02119d5SChris Mason * helper function to check a log tree for a named back reference in 918e02119d5SChris Mason * an inode. This is used to decide if a back reference that is 919e02119d5SChris Mason * found in the subvolume conflicts with what we find in the log. 920e02119d5SChris Mason * 921e02119d5SChris Mason * inode backreferences may have multiple refs in a single item, 922e02119d5SChris Mason * during replay we process one reference at a time, and we don't 923e02119d5SChris Mason * want to delete valid links to a file from the subvolume if that 924e02119d5SChris Mason * link is also in the log. 925e02119d5SChris Mason */ 926e02119d5SChris Mason static noinline int backref_in_log(struct btrfs_root *log, 927e02119d5SChris Mason struct btrfs_key *key, 928f186373fSMark Fasheh u64 ref_objectid, 929df8d116fSFilipe Manana const char *name, int namelen) 930e02119d5SChris Mason { 931e02119d5SChris Mason struct btrfs_path *path; 932e02119d5SChris Mason struct btrfs_inode_ref *ref; 933e02119d5SChris Mason unsigned long ptr; 934e02119d5SChris Mason unsigned long ptr_end; 935e02119d5SChris Mason unsigned long name_ptr; 936e02119d5SChris Mason int found_name_len; 937e02119d5SChris Mason int item_size; 938e02119d5SChris Mason int ret; 939e02119d5SChris Mason int match = 0; 940e02119d5SChris Mason 941e02119d5SChris Mason path = btrfs_alloc_path(); 9422a29edc6Sliubo if (!path) 9432a29edc6Sliubo return -ENOMEM; 9442a29edc6Sliubo 945e02119d5SChris Mason ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 946e02119d5SChris Mason if (ret != 0) 947e02119d5SChris Mason goto out; 948e02119d5SChris Mason 949e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 950f186373fSMark Fasheh 951f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 9521f250e92SFilipe Manana if (btrfs_find_name_in_ext_backref(path->nodes[0], 9531f250e92SFilipe Manana path->slots[0], 9541f250e92SFilipe Manana ref_objectid, 955f186373fSMark Fasheh name, namelen, NULL)) 956f186373fSMark Fasheh match = 1; 957f186373fSMark Fasheh 958f186373fSMark Fasheh goto out; 959f186373fSMark Fasheh } 960f186373fSMark Fasheh 961f186373fSMark Fasheh item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 962e02119d5SChris Mason ptr_end = ptr + item_size; 963e02119d5SChris Mason while (ptr < ptr_end) { 964e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 965e02119d5SChris Mason found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); 966e02119d5SChris Mason if (found_name_len == namelen) { 967e02119d5SChris Mason name_ptr = (unsigned long)(ref + 1); 968e02119d5SChris Mason ret = memcmp_extent_buffer(path->nodes[0], name, 969e02119d5SChris Mason name_ptr, namelen); 970e02119d5SChris Mason if (ret == 0) { 971e02119d5SChris Mason match = 1; 972e02119d5SChris Mason goto out; 973e02119d5SChris Mason } 974e02119d5SChris Mason } 975e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + found_name_len; 976e02119d5SChris Mason } 977e02119d5SChris Mason out: 978e02119d5SChris Mason btrfs_free_path(path); 979e02119d5SChris Mason return match; 980e02119d5SChris Mason } 981e02119d5SChris Mason 9825a1d7843SJan Schmidt static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 9835a1d7843SJan Schmidt struct btrfs_root *root, 9845a1d7843SJan Schmidt struct btrfs_path *path, 9855a1d7843SJan Schmidt struct btrfs_root *log_root, 98694c91a1fSNikolay Borisov struct btrfs_inode *dir, 98794c91a1fSNikolay Borisov struct btrfs_inode *inode, 988f186373fSMark Fasheh u64 inode_objectid, u64 parent_objectid, 989f186373fSMark Fasheh u64 ref_index, char *name, int namelen, 990f186373fSMark Fasheh int *search_done) 9915a1d7843SJan Schmidt { 9925a1d7843SJan Schmidt int ret; 9935a1d7843SJan Schmidt char *victim_name; 9945a1d7843SJan Schmidt int victim_name_len; 995f186373fSMark Fasheh struct extent_buffer *leaf; 996f186373fSMark Fasheh struct btrfs_dir_item *di; 997f186373fSMark Fasheh struct btrfs_key search_key; 998f186373fSMark Fasheh struct btrfs_inode_extref *extref; 999f186373fSMark Fasheh 1000f186373fSMark Fasheh again: 1001f186373fSMark Fasheh /* Search old style refs */ 1002f186373fSMark Fasheh search_key.objectid = inode_objectid; 1003f186373fSMark Fasheh search_key.type = BTRFS_INODE_REF_KEY; 1004f186373fSMark Fasheh search_key.offset = parent_objectid; 1005f186373fSMark Fasheh ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 1006f186373fSMark Fasheh if (ret == 0) { 10075a1d7843SJan Schmidt struct btrfs_inode_ref *victim_ref; 10085a1d7843SJan Schmidt unsigned long ptr; 10095a1d7843SJan Schmidt unsigned long ptr_end; 1010f186373fSMark Fasheh 1011f186373fSMark Fasheh leaf = path->nodes[0]; 10125a1d7843SJan Schmidt 10135a1d7843SJan Schmidt /* are we trying to overwrite a back ref for the root directory 10145a1d7843SJan Schmidt * if so, just jump out, we're done 10155a1d7843SJan Schmidt */ 1016f186373fSMark Fasheh if (search_key.objectid == search_key.offset) 10175a1d7843SJan Schmidt return 1; 10185a1d7843SJan Schmidt 10195a1d7843SJan Schmidt /* check all the names in this back reference to see 10205a1d7843SJan Schmidt * if they are in the log. if so, we allow them to stay 10215a1d7843SJan Schmidt * otherwise they must be unlinked as a conflict 10225a1d7843SJan Schmidt */ 10235a1d7843SJan Schmidt ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 10245a1d7843SJan Schmidt ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); 10255a1d7843SJan Schmidt while (ptr < ptr_end) { 10265a1d7843SJan Schmidt victim_ref = (struct btrfs_inode_ref *)ptr; 10275a1d7843SJan Schmidt victim_name_len = btrfs_inode_ref_name_len(leaf, 10285a1d7843SJan Schmidt victim_ref); 10295a1d7843SJan Schmidt victim_name = kmalloc(victim_name_len, GFP_NOFS); 10303650860bSJosef Bacik if (!victim_name) 10313650860bSJosef Bacik return -ENOMEM; 10325a1d7843SJan Schmidt 10335a1d7843SJan Schmidt read_extent_buffer(leaf, victim_name, 10345a1d7843SJan Schmidt (unsigned long)(victim_ref + 1), 10355a1d7843SJan Schmidt victim_name_len); 10365a1d7843SJan Schmidt 1037f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 1038f186373fSMark Fasheh parent_objectid, 1039f186373fSMark Fasheh victim_name, 10405a1d7843SJan Schmidt victim_name_len)) { 104194c91a1fSNikolay Borisov inc_nlink(&inode->vfs_inode); 10425a1d7843SJan Schmidt btrfs_release_path(path); 10435a1d7843SJan Schmidt 104494c91a1fSNikolay Borisov ret = btrfs_unlink_inode(trans, root, dir, inode, 10454ec5934eSNikolay Borisov victim_name, victim_name_len); 1046f186373fSMark Fasheh kfree(victim_name); 10473650860bSJosef Bacik if (ret) 10483650860bSJosef Bacik return ret; 1049e5c304e6SNikolay Borisov ret = btrfs_run_delayed_items(trans); 1050ada9af21SFilipe David Borba Manana if (ret) 1051ada9af21SFilipe David Borba Manana return ret; 1052f186373fSMark Fasheh *search_done = 1; 1053f186373fSMark Fasheh goto again; 10545a1d7843SJan Schmidt } 10555a1d7843SJan Schmidt kfree(victim_name); 1056f186373fSMark Fasheh 10575a1d7843SJan Schmidt ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 10585a1d7843SJan Schmidt } 10595a1d7843SJan Schmidt 10605a1d7843SJan Schmidt /* 10615a1d7843SJan Schmidt * NOTE: we have searched root tree and checked the 1062bb7ab3b9SAdam Buchbinder * corresponding ref, it does not need to check again. 10635a1d7843SJan Schmidt */ 10645a1d7843SJan Schmidt *search_done = 1; 10655a1d7843SJan Schmidt } 10665a1d7843SJan Schmidt btrfs_release_path(path); 10675a1d7843SJan Schmidt 1068f186373fSMark Fasheh /* Same search but for extended refs */ 1069f186373fSMark Fasheh extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, 1070f186373fSMark Fasheh inode_objectid, parent_objectid, 0, 1071f186373fSMark Fasheh 0); 1072f186373fSMark Fasheh if (!IS_ERR_OR_NULL(extref)) { 1073f186373fSMark Fasheh u32 item_size; 1074f186373fSMark Fasheh u32 cur_offset = 0; 1075f186373fSMark Fasheh unsigned long base; 1076f186373fSMark Fasheh struct inode *victim_parent; 1077f186373fSMark Fasheh 1078f186373fSMark Fasheh leaf = path->nodes[0]; 1079f186373fSMark Fasheh 1080f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1081f186373fSMark Fasheh base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1082f186373fSMark Fasheh 1083f186373fSMark Fasheh while (cur_offset < item_size) { 1084dd9ef135SQuentin Casasnovas extref = (struct btrfs_inode_extref *)(base + cur_offset); 1085f186373fSMark Fasheh 1086f186373fSMark Fasheh victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1087f186373fSMark Fasheh 1088f186373fSMark Fasheh if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) 1089f186373fSMark Fasheh goto next; 1090f186373fSMark Fasheh 1091f186373fSMark Fasheh victim_name = kmalloc(victim_name_len, GFP_NOFS); 10923650860bSJosef Bacik if (!victim_name) 10933650860bSJosef Bacik return -ENOMEM; 1094f186373fSMark Fasheh read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, 1095f186373fSMark Fasheh victim_name_len); 1096f186373fSMark Fasheh 1097f186373fSMark Fasheh search_key.objectid = inode_objectid; 1098f186373fSMark Fasheh search_key.type = BTRFS_INODE_EXTREF_KEY; 1099f186373fSMark Fasheh search_key.offset = btrfs_extref_hash(parent_objectid, 1100f186373fSMark Fasheh victim_name, 1101f186373fSMark Fasheh victim_name_len); 1102f186373fSMark Fasheh ret = 0; 1103f186373fSMark Fasheh if (!backref_in_log(log_root, &search_key, 1104f186373fSMark Fasheh parent_objectid, victim_name, 1105f186373fSMark Fasheh victim_name_len)) { 1106f186373fSMark Fasheh ret = -ENOENT; 1107f186373fSMark Fasheh victim_parent = read_one_inode(root, 1108f186373fSMark Fasheh parent_objectid); 1109f186373fSMark Fasheh if (victim_parent) { 111094c91a1fSNikolay Borisov inc_nlink(&inode->vfs_inode); 1111f186373fSMark Fasheh btrfs_release_path(path); 1112f186373fSMark Fasheh 1113f186373fSMark Fasheh ret = btrfs_unlink_inode(trans, root, 11144ec5934eSNikolay Borisov BTRFS_I(victim_parent), 111594c91a1fSNikolay Borisov inode, 1116f186373fSMark Fasheh victim_name, 1117f186373fSMark Fasheh victim_name_len); 1118ada9af21SFilipe David Borba Manana if (!ret) 1119ada9af21SFilipe David Borba Manana ret = btrfs_run_delayed_items( 1120e5c304e6SNikolay Borisov trans); 1121f186373fSMark Fasheh } 1122f186373fSMark Fasheh iput(victim_parent); 1123f186373fSMark Fasheh kfree(victim_name); 11243650860bSJosef Bacik if (ret) 11253650860bSJosef Bacik return ret; 1126f186373fSMark Fasheh *search_done = 1; 1127f186373fSMark Fasheh goto again; 1128f186373fSMark Fasheh } 1129f186373fSMark Fasheh kfree(victim_name); 1130f186373fSMark Fasheh next: 1131f186373fSMark Fasheh cur_offset += victim_name_len + sizeof(*extref); 1132f186373fSMark Fasheh } 1133f186373fSMark Fasheh *search_done = 1; 1134f186373fSMark Fasheh } 1135f186373fSMark Fasheh btrfs_release_path(path); 1136f186373fSMark Fasheh 11375a1d7843SJan Schmidt /* look for a conflicting sequence number */ 113894c91a1fSNikolay Borisov di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 1139f186373fSMark Fasheh ref_index, name, namelen, 0); 11405a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 114194c91a1fSNikolay Borisov ret = drop_one_dir_item(trans, root, path, dir, di); 11423650860bSJosef Bacik if (ret) 11433650860bSJosef Bacik return ret; 11445a1d7843SJan Schmidt } 11455a1d7843SJan Schmidt btrfs_release_path(path); 11465a1d7843SJan Schmidt 11475a1d7843SJan Schmidt /* look for a conflicing name */ 114894c91a1fSNikolay Borisov di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), 11495a1d7843SJan Schmidt name, namelen, 0); 11505a1d7843SJan Schmidt if (di && !IS_ERR(di)) { 115194c91a1fSNikolay Borisov ret = drop_one_dir_item(trans, root, path, dir, di); 11523650860bSJosef Bacik if (ret) 11533650860bSJosef Bacik return ret; 11545a1d7843SJan Schmidt } 11555a1d7843SJan Schmidt btrfs_release_path(path); 11565a1d7843SJan Schmidt 11575a1d7843SJan Schmidt return 0; 11585a1d7843SJan Schmidt } 1159e02119d5SChris Mason 1160bae15d95SQu Wenruo static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1161bae15d95SQu Wenruo u32 *namelen, char **name, u64 *index, 1162bae15d95SQu Wenruo u64 *parent_objectid) 1163f186373fSMark Fasheh { 1164f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1165f186373fSMark Fasheh 1166f186373fSMark Fasheh extref = (struct btrfs_inode_extref *)ref_ptr; 1167f186373fSMark Fasheh 1168f186373fSMark Fasheh *namelen = btrfs_inode_extref_name_len(eb, extref); 1169f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1170f186373fSMark Fasheh if (*name == NULL) 1171f186373fSMark Fasheh return -ENOMEM; 1172f186373fSMark Fasheh 1173f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)&extref->name, 1174f186373fSMark Fasheh *namelen); 1175f186373fSMark Fasheh 11761f250e92SFilipe Manana if (index) 1177f186373fSMark Fasheh *index = btrfs_inode_extref_index(eb, extref); 1178f186373fSMark Fasheh if (parent_objectid) 1179f186373fSMark Fasheh *parent_objectid = btrfs_inode_extref_parent(eb, extref); 1180f186373fSMark Fasheh 1181f186373fSMark Fasheh return 0; 1182f186373fSMark Fasheh } 1183f186373fSMark Fasheh 1184bae15d95SQu Wenruo static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, 1185bae15d95SQu Wenruo u32 *namelen, char **name, u64 *index) 1186f186373fSMark Fasheh { 1187f186373fSMark Fasheh struct btrfs_inode_ref *ref; 1188f186373fSMark Fasheh 1189f186373fSMark Fasheh ref = (struct btrfs_inode_ref *)ref_ptr; 1190f186373fSMark Fasheh 1191f186373fSMark Fasheh *namelen = btrfs_inode_ref_name_len(eb, ref); 1192f186373fSMark Fasheh *name = kmalloc(*namelen, GFP_NOFS); 1193f186373fSMark Fasheh if (*name == NULL) 1194f186373fSMark Fasheh return -ENOMEM; 1195f186373fSMark Fasheh 1196f186373fSMark Fasheh read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); 1197f186373fSMark Fasheh 11981f250e92SFilipe Manana if (index) 1199f186373fSMark Fasheh *index = btrfs_inode_ref_index(eb, ref); 1200f186373fSMark Fasheh 1201f186373fSMark Fasheh return 0; 1202f186373fSMark Fasheh } 1203f186373fSMark Fasheh 1204e02119d5SChris Mason /* 12051f250e92SFilipe Manana * Take an inode reference item from the log tree and iterate all names from the 12061f250e92SFilipe Manana * inode reference item in the subvolume tree with the same key (if it exists). 12071f250e92SFilipe Manana * For any name that is not in the inode reference item from the log tree, do a 12081f250e92SFilipe Manana * proper unlink of that name (that is, remove its entry from the inode 12091f250e92SFilipe Manana * reference item and both dir index keys). 12101f250e92SFilipe Manana */ 12111f250e92SFilipe Manana static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, 12121f250e92SFilipe Manana struct btrfs_root *root, 12131f250e92SFilipe Manana struct btrfs_path *path, 12141f250e92SFilipe Manana struct btrfs_inode *inode, 12151f250e92SFilipe Manana struct extent_buffer *log_eb, 12161f250e92SFilipe Manana int log_slot, 12171f250e92SFilipe Manana struct btrfs_key *key) 12181f250e92SFilipe Manana { 12191f250e92SFilipe Manana int ret; 12201f250e92SFilipe Manana unsigned long ref_ptr; 12211f250e92SFilipe Manana unsigned long ref_end; 12221f250e92SFilipe Manana struct extent_buffer *eb; 12231f250e92SFilipe Manana 12241f250e92SFilipe Manana again: 12251f250e92SFilipe Manana btrfs_release_path(path); 12261f250e92SFilipe Manana ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 12271f250e92SFilipe Manana if (ret > 0) { 12281f250e92SFilipe Manana ret = 0; 12291f250e92SFilipe Manana goto out; 12301f250e92SFilipe Manana } 12311f250e92SFilipe Manana if (ret < 0) 12321f250e92SFilipe Manana goto out; 12331f250e92SFilipe Manana 12341f250e92SFilipe Manana eb = path->nodes[0]; 12351f250e92SFilipe Manana ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 12361f250e92SFilipe Manana ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]); 12371f250e92SFilipe Manana while (ref_ptr < ref_end) { 12381f250e92SFilipe Manana char *name = NULL; 12391f250e92SFilipe Manana int namelen; 12401f250e92SFilipe Manana u64 parent_id; 12411f250e92SFilipe Manana 12421f250e92SFilipe Manana if (key->type == BTRFS_INODE_EXTREF_KEY) { 12431f250e92SFilipe Manana ret = extref_get_fields(eb, ref_ptr, &namelen, &name, 12441f250e92SFilipe Manana NULL, &parent_id); 12451f250e92SFilipe Manana } else { 12461f250e92SFilipe Manana parent_id = key->offset; 12471f250e92SFilipe Manana ret = ref_get_fields(eb, ref_ptr, &namelen, &name, 12481f250e92SFilipe Manana NULL); 12491f250e92SFilipe Manana } 12501f250e92SFilipe Manana if (ret) 12511f250e92SFilipe Manana goto out; 12521f250e92SFilipe Manana 12531f250e92SFilipe Manana if (key->type == BTRFS_INODE_EXTREF_KEY) 12541f250e92SFilipe Manana ret = btrfs_find_name_in_ext_backref(log_eb, log_slot, 12551f250e92SFilipe Manana parent_id, name, 12561f250e92SFilipe Manana namelen, NULL); 12571f250e92SFilipe Manana else 12581f250e92SFilipe Manana ret = btrfs_find_name_in_backref(log_eb, log_slot, name, 12591f250e92SFilipe Manana namelen, NULL); 12601f250e92SFilipe Manana 12611f250e92SFilipe Manana if (!ret) { 12621f250e92SFilipe Manana struct inode *dir; 12631f250e92SFilipe Manana 12641f250e92SFilipe Manana btrfs_release_path(path); 12651f250e92SFilipe Manana dir = read_one_inode(root, parent_id); 12661f250e92SFilipe Manana if (!dir) { 12671f250e92SFilipe Manana ret = -ENOENT; 12681f250e92SFilipe Manana kfree(name); 12691f250e92SFilipe Manana goto out; 12701f250e92SFilipe Manana } 12711f250e92SFilipe Manana ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), 12721f250e92SFilipe Manana inode, name, namelen); 12731f250e92SFilipe Manana kfree(name); 12741f250e92SFilipe Manana iput(dir); 12751f250e92SFilipe Manana if (ret) 12761f250e92SFilipe Manana goto out; 12771f250e92SFilipe Manana goto again; 12781f250e92SFilipe Manana } 12791f250e92SFilipe Manana 12801f250e92SFilipe Manana kfree(name); 12811f250e92SFilipe Manana ref_ptr += namelen; 12821f250e92SFilipe Manana if (key->type == BTRFS_INODE_EXTREF_KEY) 12831f250e92SFilipe Manana ref_ptr += sizeof(struct btrfs_inode_extref); 12841f250e92SFilipe Manana else 12851f250e92SFilipe Manana ref_ptr += sizeof(struct btrfs_inode_ref); 12861f250e92SFilipe Manana } 12871f250e92SFilipe Manana ret = 0; 12881f250e92SFilipe Manana out: 12891f250e92SFilipe Manana btrfs_release_path(path); 12901f250e92SFilipe Manana return ret; 12911f250e92SFilipe Manana } 12921f250e92SFilipe Manana 12931f250e92SFilipe Manana /* 1294e02119d5SChris Mason * replay one inode back reference item found in the log tree. 1295e02119d5SChris Mason * eb, slot and key refer to the buffer and key found in the log tree. 1296e02119d5SChris Mason * root is the destination we are replaying into, and path is for temp 1297e02119d5SChris Mason * use by this function. (it should be released on return). 1298e02119d5SChris Mason */ 1299e02119d5SChris Mason static noinline int add_inode_ref(struct btrfs_trans_handle *trans, 1300e02119d5SChris Mason struct btrfs_root *root, 1301e02119d5SChris Mason struct btrfs_root *log, 1302e02119d5SChris Mason struct btrfs_path *path, 1303e02119d5SChris Mason struct extent_buffer *eb, int slot, 1304e02119d5SChris Mason struct btrfs_key *key) 1305e02119d5SChris Mason { 130603b2f08bSGeyslan G. Bem struct inode *dir = NULL; 130703b2f08bSGeyslan G. Bem struct inode *inode = NULL; 1308e02119d5SChris Mason unsigned long ref_ptr; 1309e02119d5SChris Mason unsigned long ref_end; 131003b2f08bSGeyslan G. Bem char *name = NULL; 131134f3e4f2Sliubo int namelen; 131234f3e4f2Sliubo int ret; 1313c622ae60Sliubo int search_done = 0; 1314f186373fSMark Fasheh int log_ref_ver = 0; 1315f186373fSMark Fasheh u64 parent_objectid; 1316f186373fSMark Fasheh u64 inode_objectid; 1317f46dbe3dSChris Mason u64 ref_index = 0; 1318f186373fSMark Fasheh int ref_struct_size; 1319f186373fSMark Fasheh 1320f186373fSMark Fasheh ref_ptr = btrfs_item_ptr_offset(eb, slot); 1321f186373fSMark Fasheh ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 1322f186373fSMark Fasheh 1323f186373fSMark Fasheh if (key->type == BTRFS_INODE_EXTREF_KEY) { 1324f186373fSMark Fasheh struct btrfs_inode_extref *r; 1325f186373fSMark Fasheh 1326f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_extref); 1327f186373fSMark Fasheh log_ref_ver = 1; 1328f186373fSMark Fasheh r = (struct btrfs_inode_extref *)ref_ptr; 1329f186373fSMark Fasheh parent_objectid = btrfs_inode_extref_parent(eb, r); 1330f186373fSMark Fasheh } else { 1331f186373fSMark Fasheh ref_struct_size = sizeof(struct btrfs_inode_ref); 1332f186373fSMark Fasheh parent_objectid = key->offset; 1333f186373fSMark Fasheh } 1334f186373fSMark Fasheh inode_objectid = key->objectid; 1335e02119d5SChris Mason 1336e02119d5SChris Mason /* 1337e02119d5SChris Mason * it is possible that we didn't log all the parent directories 1338e02119d5SChris Mason * for a given inode. If we don't find the dir, just don't 1339e02119d5SChris Mason * copy the back ref in. The link count fixup code will take 1340e02119d5SChris Mason * care of the rest 1341e02119d5SChris Mason */ 1342f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 134303b2f08bSGeyslan G. Bem if (!dir) { 134403b2f08bSGeyslan G. Bem ret = -ENOENT; 134503b2f08bSGeyslan G. Bem goto out; 134603b2f08bSGeyslan G. Bem } 1347e02119d5SChris Mason 1348f186373fSMark Fasheh inode = read_one_inode(root, inode_objectid); 1349c00e9493STsutomu Itoh if (!inode) { 135003b2f08bSGeyslan G. Bem ret = -EIO; 135103b2f08bSGeyslan G. Bem goto out; 1352c00e9493STsutomu Itoh } 1353e02119d5SChris Mason 13545a1d7843SJan Schmidt while (ref_ptr < ref_end) { 1355f186373fSMark Fasheh if (log_ref_ver) { 1356bae15d95SQu Wenruo ret = extref_get_fields(eb, ref_ptr, &namelen, &name, 1357bae15d95SQu Wenruo &ref_index, &parent_objectid); 1358f186373fSMark Fasheh /* 1359f186373fSMark Fasheh * parent object can change from one array 1360f186373fSMark Fasheh * item to another. 1361f186373fSMark Fasheh */ 1362f186373fSMark Fasheh if (!dir) 1363f186373fSMark Fasheh dir = read_one_inode(root, parent_objectid); 136403b2f08bSGeyslan G. Bem if (!dir) { 136503b2f08bSGeyslan G. Bem ret = -ENOENT; 136603b2f08bSGeyslan G. Bem goto out; 136703b2f08bSGeyslan G. Bem } 1368f186373fSMark Fasheh } else { 1369bae15d95SQu Wenruo ret = ref_get_fields(eb, ref_ptr, &namelen, &name, 1370bae15d95SQu Wenruo &ref_index); 1371f186373fSMark Fasheh } 1372f186373fSMark Fasheh if (ret) 137303b2f08bSGeyslan G. Bem goto out; 1374e02119d5SChris Mason 1375e02119d5SChris Mason /* if we already have a perfect match, we're done */ 1376f85b7379SDavid Sterba if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), 1377f85b7379SDavid Sterba btrfs_ino(BTRFS_I(inode)), ref_index, 1378f85b7379SDavid Sterba name, namelen)) { 13795a1d7843SJan Schmidt /* 13805a1d7843SJan Schmidt * look for a conflicting back reference in the 13815a1d7843SJan Schmidt * metadata. if we find one we have to unlink that name 13825a1d7843SJan Schmidt * of the file before we add our new link. Later on, we 13835a1d7843SJan Schmidt * overwrite any existing back reference, and we don't 13845a1d7843SJan Schmidt * want to create dangling pointers in the directory. 13855a1d7843SJan Schmidt */ 13865a1d7843SJan Schmidt 13875a1d7843SJan Schmidt if (!search_done) { 13885a1d7843SJan Schmidt ret = __add_inode_ref(trans, root, path, log, 138994c91a1fSNikolay Borisov BTRFS_I(dir), 1390d75eefdfSDavid Sterba BTRFS_I(inode), 1391f186373fSMark Fasheh inode_objectid, 1392f186373fSMark Fasheh parent_objectid, 1393f186373fSMark Fasheh ref_index, name, namelen, 13945a1d7843SJan Schmidt &search_done); 139503b2f08bSGeyslan G. Bem if (ret) { 139603b2f08bSGeyslan G. Bem if (ret == 1) 13973650860bSJosef Bacik ret = 0; 1398e02119d5SChris Mason goto out; 13993650860bSJosef Bacik } 140034f3e4f2Sliubo } 140134f3e4f2Sliubo 1402e02119d5SChris Mason /* insert our name */ 1403db0a669fSNikolay Borisov ret = btrfs_add_link(trans, BTRFS_I(dir), 1404db0a669fSNikolay Borisov BTRFS_I(inode), 1405db0a669fSNikolay Borisov name, namelen, 0, ref_index); 14063650860bSJosef Bacik if (ret) 14073650860bSJosef Bacik goto out; 1408e02119d5SChris Mason 1409e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 14105a1d7843SJan Schmidt } 1411e02119d5SChris Mason 1412f186373fSMark Fasheh ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; 1413e02119d5SChris Mason kfree(name); 141403b2f08bSGeyslan G. Bem name = NULL; 1415f186373fSMark Fasheh if (log_ref_ver) { 1416f186373fSMark Fasheh iput(dir); 1417f186373fSMark Fasheh dir = NULL; 1418f186373fSMark Fasheh } 14195a1d7843SJan Schmidt } 1420e02119d5SChris Mason 14211f250e92SFilipe Manana /* 14221f250e92SFilipe Manana * Before we overwrite the inode reference item in the subvolume tree 14231f250e92SFilipe Manana * with the item from the log tree, we must unlink all names from the 14241f250e92SFilipe Manana * parent directory that are in the subvolume's tree inode reference 14251f250e92SFilipe Manana * item, otherwise we end up with an inconsistent subvolume tree where 14261f250e92SFilipe Manana * dir index entries exist for a name but there is no inode reference 14271f250e92SFilipe Manana * item with the same name. 14281f250e92SFilipe Manana */ 14291f250e92SFilipe Manana ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot, 14301f250e92SFilipe Manana key); 14311f250e92SFilipe Manana if (ret) 14321f250e92SFilipe Manana goto out; 14331f250e92SFilipe Manana 1434e02119d5SChris Mason /* finally write the back reference in the inode */ 1435e02119d5SChris Mason ret = overwrite_item(trans, root, path, eb, slot, key); 14365a1d7843SJan Schmidt out: 1437b3b4aa74SDavid Sterba btrfs_release_path(path); 143803b2f08bSGeyslan G. Bem kfree(name); 1439e02119d5SChris Mason iput(dir); 1440e02119d5SChris Mason iput(inode); 14413650860bSJosef Bacik return ret; 1442e02119d5SChris Mason } 1443e02119d5SChris Mason 1444c71bf099SYan, Zheng static int insert_orphan_item(struct btrfs_trans_handle *trans, 14459c4f61f0SDavid Sterba struct btrfs_root *root, u64 ino) 1446c71bf099SYan, Zheng { 1447c71bf099SYan, Zheng int ret; 1448381cf658SDavid Sterba 14499c4f61f0SDavid Sterba ret = btrfs_insert_orphan_item(trans, root, ino); 14509c4f61f0SDavid Sterba if (ret == -EEXIST) 14519c4f61f0SDavid Sterba ret = 0; 1452381cf658SDavid Sterba 1453c71bf099SYan, Zheng return ret; 1454c71bf099SYan, Zheng } 1455c71bf099SYan, Zheng 1456f186373fSMark Fasheh static int count_inode_extrefs(struct btrfs_root *root, 145736283658SNikolay Borisov struct btrfs_inode *inode, struct btrfs_path *path) 1458e02119d5SChris Mason { 1459f186373fSMark Fasheh int ret = 0; 1460f186373fSMark Fasheh int name_len; 1461f186373fSMark Fasheh unsigned int nlink = 0; 1462f186373fSMark Fasheh u32 item_size; 1463f186373fSMark Fasheh u32 cur_offset = 0; 146436283658SNikolay Borisov u64 inode_objectid = btrfs_ino(inode); 1465f186373fSMark Fasheh u64 offset = 0; 1466f186373fSMark Fasheh unsigned long ptr; 1467f186373fSMark Fasheh struct btrfs_inode_extref *extref; 1468f186373fSMark Fasheh struct extent_buffer *leaf; 1469f186373fSMark Fasheh 1470f186373fSMark Fasheh while (1) { 1471f186373fSMark Fasheh ret = btrfs_find_one_extref(root, inode_objectid, offset, path, 1472f186373fSMark Fasheh &extref, &offset); 1473f186373fSMark Fasheh if (ret) 1474f186373fSMark Fasheh break; 1475f186373fSMark Fasheh 1476f186373fSMark Fasheh leaf = path->nodes[0]; 1477f186373fSMark Fasheh item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1478f186373fSMark Fasheh ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 14792c2c452bSFilipe Manana cur_offset = 0; 1480f186373fSMark Fasheh 1481f186373fSMark Fasheh while (cur_offset < item_size) { 1482f186373fSMark Fasheh extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1483f186373fSMark Fasheh name_len = btrfs_inode_extref_name_len(leaf, extref); 1484f186373fSMark Fasheh 1485f186373fSMark Fasheh nlink++; 1486f186373fSMark Fasheh 1487f186373fSMark Fasheh cur_offset += name_len + sizeof(*extref); 1488f186373fSMark Fasheh } 1489f186373fSMark Fasheh 1490f186373fSMark Fasheh offset++; 1491f186373fSMark Fasheh btrfs_release_path(path); 1492f186373fSMark Fasheh } 1493f186373fSMark Fasheh btrfs_release_path(path); 1494f186373fSMark Fasheh 14952c2c452bSFilipe Manana if (ret < 0 && ret != -ENOENT) 1496f186373fSMark Fasheh return ret; 1497f186373fSMark Fasheh return nlink; 1498f186373fSMark Fasheh } 1499f186373fSMark Fasheh 1500f186373fSMark Fasheh static int count_inode_refs(struct btrfs_root *root, 1501f329e319SNikolay Borisov struct btrfs_inode *inode, struct btrfs_path *path) 1502f186373fSMark Fasheh { 1503e02119d5SChris Mason int ret; 1504e02119d5SChris Mason struct btrfs_key key; 1505f186373fSMark Fasheh unsigned int nlink = 0; 1506e02119d5SChris Mason unsigned long ptr; 1507e02119d5SChris Mason unsigned long ptr_end; 1508e02119d5SChris Mason int name_len; 1509f329e319SNikolay Borisov u64 ino = btrfs_ino(inode); 1510e02119d5SChris Mason 151133345d01SLi Zefan key.objectid = ino; 1512e02119d5SChris Mason key.type = BTRFS_INODE_REF_KEY; 1513e02119d5SChris Mason key.offset = (u64)-1; 1514e02119d5SChris Mason 1515e02119d5SChris Mason while (1) { 1516e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1517e02119d5SChris Mason if (ret < 0) 1518e02119d5SChris Mason break; 1519e02119d5SChris Mason if (ret > 0) { 1520e02119d5SChris Mason if (path->slots[0] == 0) 1521e02119d5SChris Mason break; 1522e02119d5SChris Mason path->slots[0]--; 1523e02119d5SChris Mason } 1524e93ae26fSFilipe David Borba Manana process_slot: 1525e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, 1526e02119d5SChris Mason path->slots[0]); 152733345d01SLi Zefan if (key.objectid != ino || 1528e02119d5SChris Mason key.type != BTRFS_INODE_REF_KEY) 1529e02119d5SChris Mason break; 1530e02119d5SChris Mason ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 1531e02119d5SChris Mason ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], 1532e02119d5SChris Mason path->slots[0]); 1533e02119d5SChris Mason while (ptr < ptr_end) { 1534e02119d5SChris Mason struct btrfs_inode_ref *ref; 1535e02119d5SChris Mason 1536e02119d5SChris Mason ref = (struct btrfs_inode_ref *)ptr; 1537e02119d5SChris Mason name_len = btrfs_inode_ref_name_len(path->nodes[0], 1538e02119d5SChris Mason ref); 1539e02119d5SChris Mason ptr = (unsigned long)(ref + 1) + name_len; 1540e02119d5SChris Mason nlink++; 1541e02119d5SChris Mason } 1542e02119d5SChris Mason 1543e02119d5SChris Mason if (key.offset == 0) 1544e02119d5SChris Mason break; 1545e93ae26fSFilipe David Borba Manana if (path->slots[0] > 0) { 1546e93ae26fSFilipe David Borba Manana path->slots[0]--; 1547e93ae26fSFilipe David Borba Manana goto process_slot; 1548e93ae26fSFilipe David Borba Manana } 1549e02119d5SChris Mason key.offset--; 1550b3b4aa74SDavid Sterba btrfs_release_path(path); 1551e02119d5SChris Mason } 1552b3b4aa74SDavid Sterba btrfs_release_path(path); 1553f186373fSMark Fasheh 1554f186373fSMark Fasheh return nlink; 1555f186373fSMark Fasheh } 1556f186373fSMark Fasheh 1557f186373fSMark Fasheh /* 1558f186373fSMark Fasheh * There are a few corners where the link count of the file can't 1559f186373fSMark Fasheh * be properly maintained during replay. So, instead of adding 1560f186373fSMark Fasheh * lots of complexity to the log code, we just scan the backrefs 1561f186373fSMark Fasheh * for any file that has been through replay. 1562f186373fSMark Fasheh * 1563f186373fSMark Fasheh * The scan will update the link count on the inode to reflect the 1564f186373fSMark Fasheh * number of back refs found. If it goes down to zero, the iput 1565f186373fSMark Fasheh * will free the inode. 1566f186373fSMark Fasheh */ 1567f186373fSMark Fasheh static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1568f186373fSMark Fasheh struct btrfs_root *root, 1569f186373fSMark Fasheh struct inode *inode) 1570f186373fSMark Fasheh { 1571f186373fSMark Fasheh struct btrfs_path *path; 1572f186373fSMark Fasheh int ret; 1573f186373fSMark Fasheh u64 nlink = 0; 15744a0cc7caSNikolay Borisov u64 ino = btrfs_ino(BTRFS_I(inode)); 1575f186373fSMark Fasheh 1576f186373fSMark Fasheh path = btrfs_alloc_path(); 1577f186373fSMark Fasheh if (!path) 1578f186373fSMark Fasheh return -ENOMEM; 1579f186373fSMark Fasheh 1580f329e319SNikolay Borisov ret = count_inode_refs(root, BTRFS_I(inode), path); 1581f186373fSMark Fasheh if (ret < 0) 1582f186373fSMark Fasheh goto out; 1583f186373fSMark Fasheh 1584f186373fSMark Fasheh nlink = ret; 1585f186373fSMark Fasheh 158636283658SNikolay Borisov ret = count_inode_extrefs(root, BTRFS_I(inode), path); 1587f186373fSMark Fasheh if (ret < 0) 1588f186373fSMark Fasheh goto out; 1589f186373fSMark Fasheh 1590f186373fSMark Fasheh nlink += ret; 1591f186373fSMark Fasheh 1592f186373fSMark Fasheh ret = 0; 1593f186373fSMark Fasheh 1594e02119d5SChris Mason if (nlink != inode->i_nlink) { 1595bfe86848SMiklos Szeredi set_nlink(inode, nlink); 1596e02119d5SChris Mason btrfs_update_inode(trans, root, inode); 1597e02119d5SChris Mason } 15988d5bf1cbSChris Mason BTRFS_I(inode)->index_cnt = (u64)-1; 1599e02119d5SChris Mason 1600c71bf099SYan, Zheng if (inode->i_nlink == 0) { 1601c71bf099SYan, Zheng if (S_ISDIR(inode->i_mode)) { 160212fcfd22SChris Mason ret = replay_dir_deletes(trans, root, NULL, path, 160333345d01SLi Zefan ino, 1); 16043650860bSJosef Bacik if (ret) 16053650860bSJosef Bacik goto out; 160612fcfd22SChris Mason } 160733345d01SLi Zefan ret = insert_orphan_item(trans, root, ino); 1608c71bf099SYan, Zheng } 160912fcfd22SChris Mason 1610f186373fSMark Fasheh out: 1611f186373fSMark Fasheh btrfs_free_path(path); 1612f186373fSMark Fasheh return ret; 1613e02119d5SChris Mason } 1614e02119d5SChris Mason 1615e02119d5SChris Mason static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1616e02119d5SChris Mason struct btrfs_root *root, 1617e02119d5SChris Mason struct btrfs_path *path) 1618e02119d5SChris Mason { 1619e02119d5SChris Mason int ret; 1620e02119d5SChris Mason struct btrfs_key key; 1621e02119d5SChris Mason struct inode *inode; 1622e02119d5SChris Mason 1623e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1624e02119d5SChris Mason key.type = BTRFS_ORPHAN_ITEM_KEY; 1625e02119d5SChris Mason key.offset = (u64)-1; 1626e02119d5SChris Mason while (1) { 1627e02119d5SChris Mason ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1628e02119d5SChris Mason if (ret < 0) 1629e02119d5SChris Mason break; 1630e02119d5SChris Mason 1631e02119d5SChris Mason if (ret == 1) { 1632e02119d5SChris Mason if (path->slots[0] == 0) 1633e02119d5SChris Mason break; 1634e02119d5SChris Mason path->slots[0]--; 1635e02119d5SChris Mason } 1636e02119d5SChris Mason 1637e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1638e02119d5SChris Mason if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || 1639e02119d5SChris Mason key.type != BTRFS_ORPHAN_ITEM_KEY) 1640e02119d5SChris Mason break; 1641e02119d5SChris Mason 1642e02119d5SChris Mason ret = btrfs_del_item(trans, root, path); 164365a246c5STsutomu Itoh if (ret) 164465a246c5STsutomu Itoh goto out; 1645e02119d5SChris Mason 1646b3b4aa74SDavid Sterba btrfs_release_path(path); 1647e02119d5SChris Mason inode = read_one_inode(root, key.offset); 1648c00e9493STsutomu Itoh if (!inode) 1649c00e9493STsutomu Itoh return -EIO; 1650e02119d5SChris Mason 1651e02119d5SChris Mason ret = fixup_inode_link_count(trans, root, inode); 1652e02119d5SChris Mason iput(inode); 16533650860bSJosef Bacik if (ret) 16543650860bSJosef Bacik goto out; 1655e02119d5SChris Mason 165612fcfd22SChris Mason /* 165712fcfd22SChris Mason * fixup on a directory may create new entries, 165812fcfd22SChris Mason * make sure we always look for the highset possible 165912fcfd22SChris Mason * offset 166012fcfd22SChris Mason */ 166112fcfd22SChris Mason key.offset = (u64)-1; 1662e02119d5SChris Mason } 166365a246c5STsutomu Itoh ret = 0; 166465a246c5STsutomu Itoh out: 1665b3b4aa74SDavid Sterba btrfs_release_path(path); 166665a246c5STsutomu Itoh return ret; 1667e02119d5SChris Mason } 1668e02119d5SChris Mason 1669e02119d5SChris Mason 1670e02119d5SChris Mason /* 1671e02119d5SChris Mason * record a given inode in the fixup dir so we can check its link 1672e02119d5SChris Mason * count when replay is done. The link count is incremented here 1673e02119d5SChris Mason * so the inode won't go away until we check it 1674e02119d5SChris Mason */ 1675e02119d5SChris Mason static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, 1676e02119d5SChris Mason struct btrfs_root *root, 1677e02119d5SChris Mason struct btrfs_path *path, 1678e02119d5SChris Mason u64 objectid) 1679e02119d5SChris Mason { 1680e02119d5SChris Mason struct btrfs_key key; 1681e02119d5SChris Mason int ret = 0; 1682e02119d5SChris Mason struct inode *inode; 1683e02119d5SChris Mason 1684e02119d5SChris Mason inode = read_one_inode(root, objectid); 1685c00e9493STsutomu Itoh if (!inode) 1686c00e9493STsutomu Itoh return -EIO; 1687e02119d5SChris Mason 1688e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1689962a298fSDavid Sterba key.type = BTRFS_ORPHAN_ITEM_KEY; 1690e02119d5SChris Mason key.offset = objectid; 1691e02119d5SChris Mason 1692e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1693e02119d5SChris Mason 1694b3b4aa74SDavid Sterba btrfs_release_path(path); 1695e02119d5SChris Mason if (ret == 0) { 16969bf7a489SJosef Bacik if (!inode->i_nlink) 16979bf7a489SJosef Bacik set_nlink(inode, 1); 16989bf7a489SJosef Bacik else 16998b558c5fSZach Brown inc_nlink(inode); 1700b9959295STsutomu Itoh ret = btrfs_update_inode(trans, root, inode); 1701e02119d5SChris Mason } else if (ret == -EEXIST) { 1702e02119d5SChris Mason ret = 0; 1703e02119d5SChris Mason } else { 17043650860bSJosef Bacik BUG(); /* Logic Error */ 1705e02119d5SChris Mason } 1706e02119d5SChris Mason iput(inode); 1707e02119d5SChris Mason 1708e02119d5SChris Mason return ret; 1709e02119d5SChris Mason } 1710e02119d5SChris Mason 1711e02119d5SChris Mason /* 1712e02119d5SChris Mason * when replaying the log for a directory, we only insert names 1713e02119d5SChris Mason * for inodes that actually exist. This means an fsync on a directory 1714e02119d5SChris Mason * does not implicitly fsync all the new files in it 1715e02119d5SChris Mason */ 1716e02119d5SChris Mason static noinline int insert_one_name(struct btrfs_trans_handle *trans, 1717e02119d5SChris Mason struct btrfs_root *root, 1718e02119d5SChris Mason u64 dirid, u64 index, 171960d53eb3SZhaolei char *name, int name_len, 1720e02119d5SChris Mason struct btrfs_key *location) 1721e02119d5SChris Mason { 1722e02119d5SChris Mason struct inode *inode; 1723e02119d5SChris Mason struct inode *dir; 1724e02119d5SChris Mason int ret; 1725e02119d5SChris Mason 1726e02119d5SChris Mason inode = read_one_inode(root, location->objectid); 1727e02119d5SChris Mason if (!inode) 1728e02119d5SChris Mason return -ENOENT; 1729e02119d5SChris Mason 1730e02119d5SChris Mason dir = read_one_inode(root, dirid); 1731e02119d5SChris Mason if (!dir) { 1732e02119d5SChris Mason iput(inode); 1733e02119d5SChris Mason return -EIO; 1734e02119d5SChris Mason } 1735d555438bSJosef Bacik 1736db0a669fSNikolay Borisov ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, 1737db0a669fSNikolay Borisov name_len, 1, index); 1738e02119d5SChris Mason 1739e02119d5SChris Mason /* FIXME, put inode into FIXUP list */ 1740e02119d5SChris Mason 1741e02119d5SChris Mason iput(inode); 1742e02119d5SChris Mason iput(dir); 1743e02119d5SChris Mason return ret; 1744e02119d5SChris Mason } 1745e02119d5SChris Mason 1746e02119d5SChris Mason /* 1747df8d116fSFilipe Manana * Return true if an inode reference exists in the log for the given name, 1748df8d116fSFilipe Manana * inode and parent inode. 1749df8d116fSFilipe Manana */ 1750df8d116fSFilipe Manana static bool name_in_log_ref(struct btrfs_root *log_root, 1751df8d116fSFilipe Manana const char *name, const int name_len, 1752df8d116fSFilipe Manana const u64 dirid, const u64 ino) 1753df8d116fSFilipe Manana { 1754df8d116fSFilipe Manana struct btrfs_key search_key; 1755df8d116fSFilipe Manana 1756df8d116fSFilipe Manana search_key.objectid = ino; 1757df8d116fSFilipe Manana search_key.type = BTRFS_INODE_REF_KEY; 1758df8d116fSFilipe Manana search_key.offset = dirid; 1759df8d116fSFilipe Manana if (backref_in_log(log_root, &search_key, dirid, name, name_len)) 1760df8d116fSFilipe Manana return true; 1761df8d116fSFilipe Manana 1762df8d116fSFilipe Manana search_key.type = BTRFS_INODE_EXTREF_KEY; 1763df8d116fSFilipe Manana search_key.offset = btrfs_extref_hash(dirid, name, name_len); 1764df8d116fSFilipe Manana if (backref_in_log(log_root, &search_key, dirid, name, name_len)) 1765df8d116fSFilipe Manana return true; 1766df8d116fSFilipe Manana 1767df8d116fSFilipe Manana return false; 1768df8d116fSFilipe Manana } 1769df8d116fSFilipe Manana 1770df8d116fSFilipe Manana /* 1771e02119d5SChris Mason * take a single entry in a log directory item and replay it into 1772e02119d5SChris Mason * the subvolume. 1773e02119d5SChris Mason * 1774e02119d5SChris Mason * if a conflicting item exists in the subdirectory already, 1775e02119d5SChris Mason * the inode it points to is unlinked and put into the link count 1776e02119d5SChris Mason * fix up tree. 1777e02119d5SChris Mason * 1778e02119d5SChris Mason * If a name from the log points to a file or directory that does 1779e02119d5SChris Mason * not exist in the FS, it is skipped. fsyncs on directories 1780e02119d5SChris Mason * do not force down inodes inside that directory, just changes to the 1781e02119d5SChris Mason * names or unlinks in a directory. 1782bb53eda9SFilipe Manana * 1783bb53eda9SFilipe Manana * Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a 1784bb53eda9SFilipe Manana * non-existing inode) and 1 if the name was replayed. 1785e02119d5SChris Mason */ 1786e02119d5SChris Mason static noinline int replay_one_name(struct btrfs_trans_handle *trans, 1787e02119d5SChris Mason struct btrfs_root *root, 1788e02119d5SChris Mason struct btrfs_path *path, 1789e02119d5SChris Mason struct extent_buffer *eb, 1790e02119d5SChris Mason struct btrfs_dir_item *di, 1791e02119d5SChris Mason struct btrfs_key *key) 1792e02119d5SChris Mason { 1793e02119d5SChris Mason char *name; 1794e02119d5SChris Mason int name_len; 1795e02119d5SChris Mason struct btrfs_dir_item *dst_di; 1796e02119d5SChris Mason struct btrfs_key found_key; 1797e02119d5SChris Mason struct btrfs_key log_key; 1798e02119d5SChris Mason struct inode *dir; 1799e02119d5SChris Mason u8 log_type; 18004bef0848SChris Mason int exists; 18013650860bSJosef Bacik int ret = 0; 1802d555438bSJosef Bacik bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); 1803bb53eda9SFilipe Manana bool name_added = false; 1804e02119d5SChris Mason 1805e02119d5SChris Mason dir = read_one_inode(root, key->objectid); 1806c00e9493STsutomu Itoh if (!dir) 1807c00e9493STsutomu Itoh return -EIO; 1808e02119d5SChris Mason 1809e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1810e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 18112bac325eSFilipe David Borba Manana if (!name) { 18122bac325eSFilipe David Borba Manana ret = -ENOMEM; 18132bac325eSFilipe David Borba Manana goto out; 18142bac325eSFilipe David Borba Manana } 18152a29edc6Sliubo 1816e02119d5SChris Mason log_type = btrfs_dir_type(eb, di); 1817e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 1818e02119d5SChris Mason name_len); 1819e02119d5SChris Mason 1820e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &log_key); 18214bef0848SChris Mason exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); 18224bef0848SChris Mason if (exists == 0) 18234bef0848SChris Mason exists = 1; 18244bef0848SChris Mason else 18254bef0848SChris Mason exists = 0; 1826b3b4aa74SDavid Sterba btrfs_release_path(path); 18274bef0848SChris Mason 1828e02119d5SChris Mason if (key->type == BTRFS_DIR_ITEM_KEY) { 1829e02119d5SChris Mason dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1830e02119d5SChris Mason name, name_len, 1); 1831d397712bSChris Mason } else if (key->type == BTRFS_DIR_INDEX_KEY) { 1832e02119d5SChris Mason dst_di = btrfs_lookup_dir_index_item(trans, root, path, 1833e02119d5SChris Mason key->objectid, 1834e02119d5SChris Mason key->offset, name, 1835e02119d5SChris Mason name_len, 1); 1836e02119d5SChris Mason } else { 18373650860bSJosef Bacik /* Corruption */ 18383650860bSJosef Bacik ret = -EINVAL; 18393650860bSJosef Bacik goto out; 1840e02119d5SChris Mason } 1841c704005dSDavid Sterba if (IS_ERR_OR_NULL(dst_di)) { 1842e02119d5SChris Mason /* we need a sequence number to insert, so we only 1843e02119d5SChris Mason * do inserts for the BTRFS_DIR_INDEX_KEY types 1844e02119d5SChris Mason */ 1845e02119d5SChris Mason if (key->type != BTRFS_DIR_INDEX_KEY) 1846e02119d5SChris Mason goto out; 1847e02119d5SChris Mason goto insert; 1848e02119d5SChris Mason } 1849e02119d5SChris Mason 1850e02119d5SChris Mason btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); 1851e02119d5SChris Mason /* the existing item matches the logged item */ 1852e02119d5SChris Mason if (found_key.objectid == log_key.objectid && 1853e02119d5SChris Mason found_key.type == log_key.type && 1854e02119d5SChris Mason found_key.offset == log_key.offset && 1855e02119d5SChris Mason btrfs_dir_type(path->nodes[0], dst_di) == log_type) { 1856a2cc11dbSFilipe Manana update_size = false; 1857e02119d5SChris Mason goto out; 1858e02119d5SChris Mason } 1859e02119d5SChris Mason 1860e02119d5SChris Mason /* 1861e02119d5SChris Mason * don't drop the conflicting directory entry if the inode 1862e02119d5SChris Mason * for the new entry doesn't exist 1863e02119d5SChris Mason */ 18644bef0848SChris Mason if (!exists) 1865e02119d5SChris Mason goto out; 1866e02119d5SChris Mason 1867207e7d92SNikolay Borisov ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di); 18683650860bSJosef Bacik if (ret) 18693650860bSJosef Bacik goto out; 1870e02119d5SChris Mason 1871e02119d5SChris Mason if (key->type == BTRFS_DIR_INDEX_KEY) 1872e02119d5SChris Mason goto insert; 1873e02119d5SChris Mason out: 1874b3b4aa74SDavid Sterba btrfs_release_path(path); 1875d555438bSJosef Bacik if (!ret && update_size) { 18766ef06d27SNikolay Borisov btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2); 1877d555438bSJosef Bacik ret = btrfs_update_inode(trans, root, dir); 1878d555438bSJosef Bacik } 1879e02119d5SChris Mason kfree(name); 1880e02119d5SChris Mason iput(dir); 1881bb53eda9SFilipe Manana if (!ret && name_added) 1882bb53eda9SFilipe Manana ret = 1; 18833650860bSJosef Bacik return ret; 1884e02119d5SChris Mason 1885e02119d5SChris Mason insert: 1886df8d116fSFilipe Manana if (name_in_log_ref(root->log_root, name, name_len, 1887df8d116fSFilipe Manana key->objectid, log_key.objectid)) { 1888df8d116fSFilipe Manana /* The dentry will be added later. */ 1889df8d116fSFilipe Manana ret = 0; 1890df8d116fSFilipe Manana update_size = false; 1891df8d116fSFilipe Manana goto out; 1892df8d116fSFilipe Manana } 1893b3b4aa74SDavid Sterba btrfs_release_path(path); 189460d53eb3SZhaolei ret = insert_one_name(trans, root, key->objectid, key->offset, 189560d53eb3SZhaolei name, name_len, &log_key); 1896df8d116fSFilipe Manana if (ret && ret != -ENOENT && ret != -EEXIST) 18973650860bSJosef Bacik goto out; 1898bb53eda9SFilipe Manana if (!ret) 1899bb53eda9SFilipe Manana name_added = true; 1900d555438bSJosef Bacik update_size = false; 19013650860bSJosef Bacik ret = 0; 1902e02119d5SChris Mason goto out; 1903e02119d5SChris Mason } 1904e02119d5SChris Mason 1905e02119d5SChris Mason /* 1906e02119d5SChris Mason * find all the names in a directory item and reconcile them into 1907e02119d5SChris Mason * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than 1908e02119d5SChris Mason * one name in a directory item, but the same code gets used for 1909e02119d5SChris Mason * both directory index types 1910e02119d5SChris Mason */ 1911e02119d5SChris Mason static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, 1912e02119d5SChris Mason struct btrfs_root *root, 1913e02119d5SChris Mason struct btrfs_path *path, 1914e02119d5SChris Mason struct extent_buffer *eb, int slot, 1915e02119d5SChris Mason struct btrfs_key *key) 1916e02119d5SChris Mason { 1917bb53eda9SFilipe Manana int ret = 0; 1918e02119d5SChris Mason u32 item_size = btrfs_item_size_nr(eb, slot); 1919e02119d5SChris Mason struct btrfs_dir_item *di; 1920e02119d5SChris Mason int name_len; 1921e02119d5SChris Mason unsigned long ptr; 1922e02119d5SChris Mason unsigned long ptr_end; 1923bb53eda9SFilipe Manana struct btrfs_path *fixup_path = NULL; 1924e02119d5SChris Mason 1925e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 1926e02119d5SChris Mason ptr_end = ptr + item_size; 1927e02119d5SChris Mason while (ptr < ptr_end) { 1928e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 1929e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 1930e02119d5SChris Mason ret = replay_one_name(trans, root, path, eb, di, key); 1931bb53eda9SFilipe Manana if (ret < 0) 1932bb53eda9SFilipe Manana break; 1933e02119d5SChris Mason ptr = (unsigned long)(di + 1); 1934e02119d5SChris Mason ptr += name_len; 1935bb53eda9SFilipe Manana 1936bb53eda9SFilipe Manana /* 1937bb53eda9SFilipe Manana * If this entry refers to a non-directory (directories can not 1938bb53eda9SFilipe Manana * have a link count > 1) and it was added in the transaction 1939bb53eda9SFilipe Manana * that was not committed, make sure we fixup the link count of 1940bb53eda9SFilipe Manana * the inode it the entry points to. Otherwise something like 1941bb53eda9SFilipe Manana * the following would result in a directory pointing to an 1942bb53eda9SFilipe Manana * inode with a wrong link that does not account for this dir 1943bb53eda9SFilipe Manana * entry: 1944bb53eda9SFilipe Manana * 1945bb53eda9SFilipe Manana * mkdir testdir 1946bb53eda9SFilipe Manana * touch testdir/foo 1947bb53eda9SFilipe Manana * touch testdir/bar 1948bb53eda9SFilipe Manana * sync 1949bb53eda9SFilipe Manana * 1950bb53eda9SFilipe Manana * ln testdir/bar testdir/bar_link 1951bb53eda9SFilipe Manana * ln testdir/foo testdir/foo_link 1952bb53eda9SFilipe Manana * xfs_io -c "fsync" testdir/bar 1953bb53eda9SFilipe Manana * 1954bb53eda9SFilipe Manana * <power failure> 1955bb53eda9SFilipe Manana * 1956bb53eda9SFilipe Manana * mount fs, log replay happens 1957bb53eda9SFilipe Manana * 1958bb53eda9SFilipe Manana * File foo would remain with a link count of 1 when it has two 1959bb53eda9SFilipe Manana * entries pointing to it in the directory testdir. This would 1960bb53eda9SFilipe Manana * make it impossible to ever delete the parent directory has 1961bb53eda9SFilipe Manana * it would result in stale dentries that can never be deleted. 1962bb53eda9SFilipe Manana */ 1963bb53eda9SFilipe Manana if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) { 1964bb53eda9SFilipe Manana struct btrfs_key di_key; 1965bb53eda9SFilipe Manana 1966bb53eda9SFilipe Manana if (!fixup_path) { 1967bb53eda9SFilipe Manana fixup_path = btrfs_alloc_path(); 1968bb53eda9SFilipe Manana if (!fixup_path) { 1969bb53eda9SFilipe Manana ret = -ENOMEM; 1970bb53eda9SFilipe Manana break; 1971e02119d5SChris Mason } 1972bb53eda9SFilipe Manana } 1973bb53eda9SFilipe Manana 1974bb53eda9SFilipe Manana btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1975bb53eda9SFilipe Manana ret = link_to_fixup_dir(trans, root, fixup_path, 1976bb53eda9SFilipe Manana di_key.objectid); 1977bb53eda9SFilipe Manana if (ret) 1978bb53eda9SFilipe Manana break; 1979bb53eda9SFilipe Manana } 1980bb53eda9SFilipe Manana ret = 0; 1981bb53eda9SFilipe Manana } 1982bb53eda9SFilipe Manana btrfs_free_path(fixup_path); 1983bb53eda9SFilipe Manana return ret; 1984e02119d5SChris Mason } 1985e02119d5SChris Mason 1986e02119d5SChris Mason /* 1987e02119d5SChris Mason * directory replay has two parts. There are the standard directory 1988e02119d5SChris Mason * items in the log copied from the subvolume, and range items 1989e02119d5SChris Mason * created in the log while the subvolume was logged. 1990e02119d5SChris Mason * 1991e02119d5SChris Mason * The range items tell us which parts of the key space the log 1992e02119d5SChris Mason * is authoritative for. During replay, if a key in the subvolume 1993e02119d5SChris Mason * directory is in a logged range item, but not actually in the log 1994e02119d5SChris Mason * that means it was deleted from the directory before the fsync 1995e02119d5SChris Mason * and should be removed. 1996e02119d5SChris Mason */ 1997e02119d5SChris Mason static noinline int find_dir_range(struct btrfs_root *root, 1998e02119d5SChris Mason struct btrfs_path *path, 1999e02119d5SChris Mason u64 dirid, int key_type, 2000e02119d5SChris Mason u64 *start_ret, u64 *end_ret) 2001e02119d5SChris Mason { 2002e02119d5SChris Mason struct btrfs_key key; 2003e02119d5SChris Mason u64 found_end; 2004e02119d5SChris Mason struct btrfs_dir_log_item *item; 2005e02119d5SChris Mason int ret; 2006e02119d5SChris Mason int nritems; 2007e02119d5SChris Mason 2008e02119d5SChris Mason if (*start_ret == (u64)-1) 2009e02119d5SChris Mason return 1; 2010e02119d5SChris Mason 2011e02119d5SChris Mason key.objectid = dirid; 2012e02119d5SChris Mason key.type = key_type; 2013e02119d5SChris Mason key.offset = *start_ret; 2014e02119d5SChris Mason 2015e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2016e02119d5SChris Mason if (ret < 0) 2017e02119d5SChris Mason goto out; 2018e02119d5SChris Mason if (ret > 0) { 2019e02119d5SChris Mason if (path->slots[0] == 0) 2020e02119d5SChris Mason goto out; 2021e02119d5SChris Mason path->slots[0]--; 2022e02119d5SChris Mason } 2023e02119d5SChris Mason if (ret != 0) 2024e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2025e02119d5SChris Mason 2026e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 2027e02119d5SChris Mason ret = 1; 2028e02119d5SChris Mason goto next; 2029e02119d5SChris Mason } 2030e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2031e02119d5SChris Mason struct btrfs_dir_log_item); 2032e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 2033e02119d5SChris Mason 2034e02119d5SChris Mason if (*start_ret >= key.offset && *start_ret <= found_end) { 2035e02119d5SChris Mason ret = 0; 2036e02119d5SChris Mason *start_ret = key.offset; 2037e02119d5SChris Mason *end_ret = found_end; 2038e02119d5SChris Mason goto out; 2039e02119d5SChris Mason } 2040e02119d5SChris Mason ret = 1; 2041e02119d5SChris Mason next: 2042e02119d5SChris Mason /* check the next slot in the tree to see if it is a valid item */ 2043e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 20442a7bf53fSRobbie Ko path->slots[0]++; 2045e02119d5SChris Mason if (path->slots[0] >= nritems) { 2046e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 2047e02119d5SChris Mason if (ret) 2048e02119d5SChris Mason goto out; 2049e02119d5SChris Mason } 2050e02119d5SChris Mason 2051e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2052e02119d5SChris Mason 2053e02119d5SChris Mason if (key.type != key_type || key.objectid != dirid) { 2054e02119d5SChris Mason ret = 1; 2055e02119d5SChris Mason goto out; 2056e02119d5SChris Mason } 2057e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2058e02119d5SChris Mason struct btrfs_dir_log_item); 2059e02119d5SChris Mason found_end = btrfs_dir_log_end(path->nodes[0], item); 2060e02119d5SChris Mason *start_ret = key.offset; 2061e02119d5SChris Mason *end_ret = found_end; 2062e02119d5SChris Mason ret = 0; 2063e02119d5SChris Mason out: 2064b3b4aa74SDavid Sterba btrfs_release_path(path); 2065e02119d5SChris Mason return ret; 2066e02119d5SChris Mason } 2067e02119d5SChris Mason 2068e02119d5SChris Mason /* 2069e02119d5SChris Mason * this looks for a given directory item in the log. If the directory 2070e02119d5SChris Mason * item is not in the log, the item is removed and the inode it points 2071e02119d5SChris Mason * to is unlinked 2072e02119d5SChris Mason */ 2073e02119d5SChris Mason static noinline int check_item_in_log(struct btrfs_trans_handle *trans, 2074e02119d5SChris Mason struct btrfs_root *root, 2075e02119d5SChris Mason struct btrfs_root *log, 2076e02119d5SChris Mason struct btrfs_path *path, 2077e02119d5SChris Mason struct btrfs_path *log_path, 2078e02119d5SChris Mason struct inode *dir, 2079e02119d5SChris Mason struct btrfs_key *dir_key) 2080e02119d5SChris Mason { 2081e02119d5SChris Mason int ret; 2082e02119d5SChris Mason struct extent_buffer *eb; 2083e02119d5SChris Mason int slot; 2084e02119d5SChris Mason u32 item_size; 2085e02119d5SChris Mason struct btrfs_dir_item *di; 2086e02119d5SChris Mason struct btrfs_dir_item *log_di; 2087e02119d5SChris Mason int name_len; 2088e02119d5SChris Mason unsigned long ptr; 2089e02119d5SChris Mason unsigned long ptr_end; 2090e02119d5SChris Mason char *name; 2091e02119d5SChris Mason struct inode *inode; 2092e02119d5SChris Mason struct btrfs_key location; 2093e02119d5SChris Mason 2094e02119d5SChris Mason again: 2095e02119d5SChris Mason eb = path->nodes[0]; 2096e02119d5SChris Mason slot = path->slots[0]; 2097e02119d5SChris Mason item_size = btrfs_item_size_nr(eb, slot); 2098e02119d5SChris Mason ptr = btrfs_item_ptr_offset(eb, slot); 2099e02119d5SChris Mason ptr_end = ptr + item_size; 2100e02119d5SChris Mason while (ptr < ptr_end) { 2101e02119d5SChris Mason di = (struct btrfs_dir_item *)ptr; 2102e02119d5SChris Mason name_len = btrfs_dir_name_len(eb, di); 2103e02119d5SChris Mason name = kmalloc(name_len, GFP_NOFS); 2104e02119d5SChris Mason if (!name) { 2105e02119d5SChris Mason ret = -ENOMEM; 2106e02119d5SChris Mason goto out; 2107e02119d5SChris Mason } 2108e02119d5SChris Mason read_extent_buffer(eb, name, (unsigned long)(di + 1), 2109e02119d5SChris Mason name_len); 2110e02119d5SChris Mason log_di = NULL; 211112fcfd22SChris Mason if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { 2112e02119d5SChris Mason log_di = btrfs_lookup_dir_item(trans, log, log_path, 2113e02119d5SChris Mason dir_key->objectid, 2114e02119d5SChris Mason name, name_len, 0); 211512fcfd22SChris Mason } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { 2116e02119d5SChris Mason log_di = btrfs_lookup_dir_index_item(trans, log, 2117e02119d5SChris Mason log_path, 2118e02119d5SChris Mason dir_key->objectid, 2119e02119d5SChris Mason dir_key->offset, 2120e02119d5SChris Mason name, name_len, 0); 2121e02119d5SChris Mason } 2122269d040fSFilipe David Borba Manana if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { 2123e02119d5SChris Mason btrfs_dir_item_key_to_cpu(eb, di, &location); 2124b3b4aa74SDavid Sterba btrfs_release_path(path); 2125b3b4aa74SDavid Sterba btrfs_release_path(log_path); 2126e02119d5SChris Mason inode = read_one_inode(root, location.objectid); 2127c00e9493STsutomu Itoh if (!inode) { 2128c00e9493STsutomu Itoh kfree(name); 2129c00e9493STsutomu Itoh return -EIO; 2130c00e9493STsutomu Itoh } 2131e02119d5SChris Mason 2132e02119d5SChris Mason ret = link_to_fixup_dir(trans, root, 2133e02119d5SChris Mason path, location.objectid); 21343650860bSJosef Bacik if (ret) { 21353650860bSJosef Bacik kfree(name); 21363650860bSJosef Bacik iput(inode); 21373650860bSJosef Bacik goto out; 21383650860bSJosef Bacik } 21393650860bSJosef Bacik 21408b558c5fSZach Brown inc_nlink(inode); 21414ec5934eSNikolay Borisov ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), 21424ec5934eSNikolay Borisov BTRFS_I(inode), name, name_len); 21433650860bSJosef Bacik if (!ret) 2144e5c304e6SNikolay Borisov ret = btrfs_run_delayed_items(trans); 2145e02119d5SChris Mason kfree(name); 2146e02119d5SChris Mason iput(inode); 21473650860bSJosef Bacik if (ret) 21483650860bSJosef Bacik goto out; 2149e02119d5SChris Mason 2150e02119d5SChris Mason /* there might still be more names under this key 2151e02119d5SChris Mason * check and repeat if required 2152e02119d5SChris Mason */ 2153e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, dir_key, path, 2154e02119d5SChris Mason 0, 0); 2155e02119d5SChris Mason if (ret == 0) 2156e02119d5SChris Mason goto again; 2157e02119d5SChris Mason ret = 0; 2158e02119d5SChris Mason goto out; 2159269d040fSFilipe David Borba Manana } else if (IS_ERR(log_di)) { 2160269d040fSFilipe David Borba Manana kfree(name); 2161269d040fSFilipe David Borba Manana return PTR_ERR(log_di); 2162e02119d5SChris Mason } 2163b3b4aa74SDavid Sterba btrfs_release_path(log_path); 2164e02119d5SChris Mason kfree(name); 2165e02119d5SChris Mason 2166e02119d5SChris Mason ptr = (unsigned long)(di + 1); 2167e02119d5SChris Mason ptr += name_len; 2168e02119d5SChris Mason } 2169e02119d5SChris Mason ret = 0; 2170e02119d5SChris Mason out: 2171b3b4aa74SDavid Sterba btrfs_release_path(path); 2172b3b4aa74SDavid Sterba btrfs_release_path(log_path); 2173e02119d5SChris Mason return ret; 2174e02119d5SChris Mason } 2175e02119d5SChris Mason 21764f764e51SFilipe Manana static int replay_xattr_deletes(struct btrfs_trans_handle *trans, 21774f764e51SFilipe Manana struct btrfs_root *root, 21784f764e51SFilipe Manana struct btrfs_root *log, 21794f764e51SFilipe Manana struct btrfs_path *path, 21804f764e51SFilipe Manana const u64 ino) 21814f764e51SFilipe Manana { 21824f764e51SFilipe Manana struct btrfs_key search_key; 21834f764e51SFilipe Manana struct btrfs_path *log_path; 21844f764e51SFilipe Manana int i; 21854f764e51SFilipe Manana int nritems; 21864f764e51SFilipe Manana int ret; 21874f764e51SFilipe Manana 21884f764e51SFilipe Manana log_path = btrfs_alloc_path(); 21894f764e51SFilipe Manana if (!log_path) 21904f764e51SFilipe Manana return -ENOMEM; 21914f764e51SFilipe Manana 21924f764e51SFilipe Manana search_key.objectid = ino; 21934f764e51SFilipe Manana search_key.type = BTRFS_XATTR_ITEM_KEY; 21944f764e51SFilipe Manana search_key.offset = 0; 21954f764e51SFilipe Manana again: 21964f764e51SFilipe Manana ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 21974f764e51SFilipe Manana if (ret < 0) 21984f764e51SFilipe Manana goto out; 21994f764e51SFilipe Manana process_leaf: 22004f764e51SFilipe Manana nritems = btrfs_header_nritems(path->nodes[0]); 22014f764e51SFilipe Manana for (i = path->slots[0]; i < nritems; i++) { 22024f764e51SFilipe Manana struct btrfs_key key; 22034f764e51SFilipe Manana struct btrfs_dir_item *di; 22044f764e51SFilipe Manana struct btrfs_dir_item *log_di; 22054f764e51SFilipe Manana u32 total_size; 22064f764e51SFilipe Manana u32 cur; 22074f764e51SFilipe Manana 22084f764e51SFilipe Manana btrfs_item_key_to_cpu(path->nodes[0], &key, i); 22094f764e51SFilipe Manana if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) { 22104f764e51SFilipe Manana ret = 0; 22114f764e51SFilipe Manana goto out; 22124f764e51SFilipe Manana } 22134f764e51SFilipe Manana 22144f764e51SFilipe Manana di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item); 22154f764e51SFilipe Manana total_size = btrfs_item_size_nr(path->nodes[0], i); 22164f764e51SFilipe Manana cur = 0; 22174f764e51SFilipe Manana while (cur < total_size) { 22184f764e51SFilipe Manana u16 name_len = btrfs_dir_name_len(path->nodes[0], di); 22194f764e51SFilipe Manana u16 data_len = btrfs_dir_data_len(path->nodes[0], di); 22204f764e51SFilipe Manana u32 this_len = sizeof(*di) + name_len + data_len; 22214f764e51SFilipe Manana char *name; 22224f764e51SFilipe Manana 22234f764e51SFilipe Manana name = kmalloc(name_len, GFP_NOFS); 22244f764e51SFilipe Manana if (!name) { 22254f764e51SFilipe Manana ret = -ENOMEM; 22264f764e51SFilipe Manana goto out; 22274f764e51SFilipe Manana } 22284f764e51SFilipe Manana read_extent_buffer(path->nodes[0], name, 22294f764e51SFilipe Manana (unsigned long)(di + 1), name_len); 22304f764e51SFilipe Manana 22314f764e51SFilipe Manana log_di = btrfs_lookup_xattr(NULL, log, log_path, ino, 22324f764e51SFilipe Manana name, name_len, 0); 22334f764e51SFilipe Manana btrfs_release_path(log_path); 22344f764e51SFilipe Manana if (!log_di) { 22354f764e51SFilipe Manana /* Doesn't exist in log tree, so delete it. */ 22364f764e51SFilipe Manana btrfs_release_path(path); 22374f764e51SFilipe Manana di = btrfs_lookup_xattr(trans, root, path, ino, 22384f764e51SFilipe Manana name, name_len, -1); 22394f764e51SFilipe Manana kfree(name); 22404f764e51SFilipe Manana if (IS_ERR(di)) { 22414f764e51SFilipe Manana ret = PTR_ERR(di); 22424f764e51SFilipe Manana goto out; 22434f764e51SFilipe Manana } 22444f764e51SFilipe Manana ASSERT(di); 22454f764e51SFilipe Manana ret = btrfs_delete_one_dir_name(trans, root, 22464f764e51SFilipe Manana path, di); 22474f764e51SFilipe Manana if (ret) 22484f764e51SFilipe Manana goto out; 22494f764e51SFilipe Manana btrfs_release_path(path); 22504f764e51SFilipe Manana search_key = key; 22514f764e51SFilipe Manana goto again; 22524f764e51SFilipe Manana } 22534f764e51SFilipe Manana kfree(name); 22544f764e51SFilipe Manana if (IS_ERR(log_di)) { 22554f764e51SFilipe Manana ret = PTR_ERR(log_di); 22564f764e51SFilipe Manana goto out; 22574f764e51SFilipe Manana } 22584f764e51SFilipe Manana cur += this_len; 22594f764e51SFilipe Manana di = (struct btrfs_dir_item *)((char *)di + this_len); 22604f764e51SFilipe Manana } 22614f764e51SFilipe Manana } 22624f764e51SFilipe Manana ret = btrfs_next_leaf(root, path); 22634f764e51SFilipe Manana if (ret > 0) 22644f764e51SFilipe Manana ret = 0; 22654f764e51SFilipe Manana else if (ret == 0) 22664f764e51SFilipe Manana goto process_leaf; 22674f764e51SFilipe Manana out: 22684f764e51SFilipe Manana btrfs_free_path(log_path); 22694f764e51SFilipe Manana btrfs_release_path(path); 22704f764e51SFilipe Manana return ret; 22714f764e51SFilipe Manana } 22724f764e51SFilipe Manana 22734f764e51SFilipe Manana 2274e02119d5SChris Mason /* 2275e02119d5SChris Mason * deletion replay happens before we copy any new directory items 2276e02119d5SChris Mason * out of the log or out of backreferences from inodes. It 2277e02119d5SChris Mason * scans the log to find ranges of keys that log is authoritative for, 2278e02119d5SChris Mason * and then scans the directory to find items in those ranges that are 2279e02119d5SChris Mason * not present in the log. 2280e02119d5SChris Mason * 2281e02119d5SChris Mason * Anything we don't find in the log is unlinked and removed from the 2282e02119d5SChris Mason * directory. 2283e02119d5SChris Mason */ 2284e02119d5SChris Mason static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 2285e02119d5SChris Mason struct btrfs_root *root, 2286e02119d5SChris Mason struct btrfs_root *log, 2287e02119d5SChris Mason struct btrfs_path *path, 228812fcfd22SChris Mason u64 dirid, int del_all) 2289e02119d5SChris Mason { 2290e02119d5SChris Mason u64 range_start; 2291e02119d5SChris Mason u64 range_end; 2292e02119d5SChris Mason int key_type = BTRFS_DIR_LOG_ITEM_KEY; 2293e02119d5SChris Mason int ret = 0; 2294e02119d5SChris Mason struct btrfs_key dir_key; 2295e02119d5SChris Mason struct btrfs_key found_key; 2296e02119d5SChris Mason struct btrfs_path *log_path; 2297e02119d5SChris Mason struct inode *dir; 2298e02119d5SChris Mason 2299e02119d5SChris Mason dir_key.objectid = dirid; 2300e02119d5SChris Mason dir_key.type = BTRFS_DIR_ITEM_KEY; 2301e02119d5SChris Mason log_path = btrfs_alloc_path(); 2302e02119d5SChris Mason if (!log_path) 2303e02119d5SChris Mason return -ENOMEM; 2304e02119d5SChris Mason 2305e02119d5SChris Mason dir = read_one_inode(root, dirid); 2306e02119d5SChris Mason /* it isn't an error if the inode isn't there, that can happen 2307e02119d5SChris Mason * because we replay the deletes before we copy in the inode item 2308e02119d5SChris Mason * from the log 2309e02119d5SChris Mason */ 2310e02119d5SChris Mason if (!dir) { 2311e02119d5SChris Mason btrfs_free_path(log_path); 2312e02119d5SChris Mason return 0; 2313e02119d5SChris Mason } 2314e02119d5SChris Mason again: 2315e02119d5SChris Mason range_start = 0; 2316e02119d5SChris Mason range_end = 0; 2317e02119d5SChris Mason while (1) { 231812fcfd22SChris Mason if (del_all) 231912fcfd22SChris Mason range_end = (u64)-1; 232012fcfd22SChris Mason else { 2321e02119d5SChris Mason ret = find_dir_range(log, path, dirid, key_type, 2322e02119d5SChris Mason &range_start, &range_end); 2323e02119d5SChris Mason if (ret != 0) 2324e02119d5SChris Mason break; 232512fcfd22SChris Mason } 2326e02119d5SChris Mason 2327e02119d5SChris Mason dir_key.offset = range_start; 2328e02119d5SChris Mason while (1) { 2329e02119d5SChris Mason int nritems; 2330e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &dir_key, path, 2331e02119d5SChris Mason 0, 0); 2332e02119d5SChris Mason if (ret < 0) 2333e02119d5SChris Mason goto out; 2334e02119d5SChris Mason 2335e02119d5SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 2336e02119d5SChris Mason if (path->slots[0] >= nritems) { 2337e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 2338b98def7cSLiu Bo if (ret == 1) 2339e02119d5SChris Mason break; 2340b98def7cSLiu Bo else if (ret < 0) 2341b98def7cSLiu Bo goto out; 2342e02119d5SChris Mason } 2343e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2344e02119d5SChris Mason path->slots[0]); 2345e02119d5SChris Mason if (found_key.objectid != dirid || 2346e02119d5SChris Mason found_key.type != dir_key.type) 2347e02119d5SChris Mason goto next_type; 2348e02119d5SChris Mason 2349e02119d5SChris Mason if (found_key.offset > range_end) 2350e02119d5SChris Mason break; 2351e02119d5SChris Mason 2352e02119d5SChris Mason ret = check_item_in_log(trans, root, log, path, 235312fcfd22SChris Mason log_path, dir, 235412fcfd22SChris Mason &found_key); 23553650860bSJosef Bacik if (ret) 23563650860bSJosef Bacik goto out; 2357e02119d5SChris Mason if (found_key.offset == (u64)-1) 2358e02119d5SChris Mason break; 2359e02119d5SChris Mason dir_key.offset = found_key.offset + 1; 2360e02119d5SChris Mason } 2361b3b4aa74SDavid Sterba btrfs_release_path(path); 2362e02119d5SChris Mason if (range_end == (u64)-1) 2363e02119d5SChris Mason break; 2364e02119d5SChris Mason range_start = range_end + 1; 2365e02119d5SChris Mason } 2366e02119d5SChris Mason 2367e02119d5SChris Mason next_type: 2368e02119d5SChris Mason ret = 0; 2369e02119d5SChris Mason if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 2370e02119d5SChris Mason key_type = BTRFS_DIR_LOG_INDEX_KEY; 2371e02119d5SChris Mason dir_key.type = BTRFS_DIR_INDEX_KEY; 2372b3b4aa74SDavid Sterba btrfs_release_path(path); 2373e02119d5SChris Mason goto again; 2374e02119d5SChris Mason } 2375e02119d5SChris Mason out: 2376b3b4aa74SDavid Sterba btrfs_release_path(path); 2377e02119d5SChris Mason btrfs_free_path(log_path); 2378e02119d5SChris Mason iput(dir); 2379e02119d5SChris Mason return ret; 2380e02119d5SChris Mason } 2381e02119d5SChris Mason 2382e02119d5SChris Mason /* 2383e02119d5SChris Mason * the process_func used to replay items from the log tree. This 2384e02119d5SChris Mason * gets called in two different stages. The first stage just looks 2385e02119d5SChris Mason * for inodes and makes sure they are all copied into the subvolume. 2386e02119d5SChris Mason * 2387e02119d5SChris Mason * The second stage copies all the other item types from the log into 2388e02119d5SChris Mason * the subvolume. The two stage approach is slower, but gets rid of 2389e02119d5SChris Mason * lots of complexity around inodes referencing other inodes that exist 2390e02119d5SChris Mason * only in the log (references come from either directory items or inode 2391e02119d5SChris Mason * back refs). 2392e02119d5SChris Mason */ 2393e02119d5SChris Mason static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 2394581c1760SQu Wenruo struct walk_control *wc, u64 gen, int level) 2395e02119d5SChris Mason { 2396e02119d5SChris Mason int nritems; 2397e02119d5SChris Mason struct btrfs_path *path; 2398e02119d5SChris Mason struct btrfs_root *root = wc->replay_dest; 2399e02119d5SChris Mason struct btrfs_key key; 2400e02119d5SChris Mason int i; 2401e02119d5SChris Mason int ret; 2402e02119d5SChris Mason 2403581c1760SQu Wenruo ret = btrfs_read_buffer(eb, gen, level, NULL); 2404018642a1STsutomu Itoh if (ret) 2405018642a1STsutomu Itoh return ret; 2406e02119d5SChris Mason 2407e02119d5SChris Mason level = btrfs_header_level(eb); 2408e02119d5SChris Mason 2409e02119d5SChris Mason if (level != 0) 2410e02119d5SChris Mason return 0; 2411e02119d5SChris Mason 2412e02119d5SChris Mason path = btrfs_alloc_path(); 24131e5063d0SMark Fasheh if (!path) 24141e5063d0SMark Fasheh return -ENOMEM; 2415e02119d5SChris Mason 2416e02119d5SChris Mason nritems = btrfs_header_nritems(eb); 2417e02119d5SChris Mason for (i = 0; i < nritems; i++) { 2418e02119d5SChris Mason btrfs_item_key_to_cpu(eb, &key, i); 2419e02119d5SChris Mason 2420e02119d5SChris Mason /* inode keys are done during the first stage */ 2421e02119d5SChris Mason if (key.type == BTRFS_INODE_ITEM_KEY && 2422e02119d5SChris Mason wc->stage == LOG_WALK_REPLAY_INODES) { 2423e02119d5SChris Mason struct btrfs_inode_item *inode_item; 2424e02119d5SChris Mason u32 mode; 2425e02119d5SChris Mason 2426e02119d5SChris Mason inode_item = btrfs_item_ptr(eb, i, 2427e02119d5SChris Mason struct btrfs_inode_item); 24284f764e51SFilipe Manana ret = replay_xattr_deletes(wc->trans, root, log, 24294f764e51SFilipe Manana path, key.objectid); 24304f764e51SFilipe Manana if (ret) 24314f764e51SFilipe Manana break; 2432e02119d5SChris Mason mode = btrfs_inode_mode(eb, inode_item); 2433e02119d5SChris Mason if (S_ISDIR(mode)) { 2434e02119d5SChris Mason ret = replay_dir_deletes(wc->trans, 243512fcfd22SChris Mason root, log, path, key.objectid, 0); 2436b50c6e25SJosef Bacik if (ret) 2437b50c6e25SJosef Bacik break; 2438e02119d5SChris Mason } 2439e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 2440e02119d5SChris Mason eb, i, &key); 2441b50c6e25SJosef Bacik if (ret) 2442b50c6e25SJosef Bacik break; 2443e02119d5SChris Mason 2444471d557aSFilipe Manana /* 2445471d557aSFilipe Manana * Before replaying extents, truncate the inode to its 2446471d557aSFilipe Manana * size. We need to do it now and not after log replay 2447471d557aSFilipe Manana * because before an fsync we can have prealloc extents 2448471d557aSFilipe Manana * added beyond the inode's i_size. If we did it after, 2449471d557aSFilipe Manana * through orphan cleanup for example, we would drop 2450471d557aSFilipe Manana * those prealloc extents just after replaying them. 2451e02119d5SChris Mason */ 2452e02119d5SChris Mason if (S_ISREG(mode)) { 2453471d557aSFilipe Manana struct inode *inode; 2454471d557aSFilipe Manana u64 from; 2455471d557aSFilipe Manana 2456471d557aSFilipe Manana inode = read_one_inode(root, key.objectid); 2457471d557aSFilipe Manana if (!inode) { 2458471d557aSFilipe Manana ret = -EIO; 2459471d557aSFilipe Manana break; 2460471d557aSFilipe Manana } 2461471d557aSFilipe Manana from = ALIGN(i_size_read(inode), 2462471d557aSFilipe Manana root->fs_info->sectorsize); 2463471d557aSFilipe Manana ret = btrfs_drop_extents(wc->trans, root, inode, 2464471d557aSFilipe Manana from, (u64)-1, 1); 2465471d557aSFilipe Manana /* 2466471d557aSFilipe Manana * If the nlink count is zero here, the iput 2467471d557aSFilipe Manana * will free the inode. We bump it to make 2468471d557aSFilipe Manana * sure it doesn't get freed until the link 2469471d557aSFilipe Manana * count fixup is done. 2470471d557aSFilipe Manana */ 2471471d557aSFilipe Manana if (!ret) { 2472471d557aSFilipe Manana if (inode->i_nlink == 0) 2473471d557aSFilipe Manana inc_nlink(inode); 2474471d557aSFilipe Manana /* Update link count and nbytes. */ 2475471d557aSFilipe Manana ret = btrfs_update_inode(wc->trans, 2476471d557aSFilipe Manana root, inode); 2477471d557aSFilipe Manana } 2478471d557aSFilipe Manana iput(inode); 2479b50c6e25SJosef Bacik if (ret) 2480b50c6e25SJosef Bacik break; 2481c71bf099SYan, Zheng } 2482a74ac322SChris Mason 2483e02119d5SChris Mason ret = link_to_fixup_dir(wc->trans, root, 2484e02119d5SChris Mason path, key.objectid); 2485b50c6e25SJosef Bacik if (ret) 2486b50c6e25SJosef Bacik break; 2487e02119d5SChris Mason } 2488dd8e7217SJosef Bacik 2489dd8e7217SJosef Bacik if (key.type == BTRFS_DIR_INDEX_KEY && 2490dd8e7217SJosef Bacik wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { 2491dd8e7217SJosef Bacik ret = replay_one_dir_item(wc->trans, root, path, 2492dd8e7217SJosef Bacik eb, i, &key); 2493dd8e7217SJosef Bacik if (ret) 2494dd8e7217SJosef Bacik break; 2495dd8e7217SJosef Bacik } 2496dd8e7217SJosef Bacik 2497e02119d5SChris Mason if (wc->stage < LOG_WALK_REPLAY_ALL) 2498e02119d5SChris Mason continue; 2499e02119d5SChris Mason 2500e02119d5SChris Mason /* these keys are simply copied */ 2501e02119d5SChris Mason if (key.type == BTRFS_XATTR_ITEM_KEY) { 2502e02119d5SChris Mason ret = overwrite_item(wc->trans, root, path, 2503e02119d5SChris Mason eb, i, &key); 2504b50c6e25SJosef Bacik if (ret) 2505b50c6e25SJosef Bacik break; 25062da1c669SLiu Bo } else if (key.type == BTRFS_INODE_REF_KEY || 25072da1c669SLiu Bo key.type == BTRFS_INODE_EXTREF_KEY) { 2508f186373fSMark Fasheh ret = add_inode_ref(wc->trans, root, log, path, 2509f186373fSMark Fasheh eb, i, &key); 2510b50c6e25SJosef Bacik if (ret && ret != -ENOENT) 2511b50c6e25SJosef Bacik break; 2512b50c6e25SJosef Bacik ret = 0; 2513e02119d5SChris Mason } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2514e02119d5SChris Mason ret = replay_one_extent(wc->trans, root, path, 2515e02119d5SChris Mason eb, i, &key); 2516b50c6e25SJosef Bacik if (ret) 2517b50c6e25SJosef Bacik break; 2518dd8e7217SJosef Bacik } else if (key.type == BTRFS_DIR_ITEM_KEY) { 2519e02119d5SChris Mason ret = replay_one_dir_item(wc->trans, root, path, 2520e02119d5SChris Mason eb, i, &key); 2521b50c6e25SJosef Bacik if (ret) 2522b50c6e25SJosef Bacik break; 2523e02119d5SChris Mason } 2524e02119d5SChris Mason } 2525e02119d5SChris Mason btrfs_free_path(path); 2526b50c6e25SJosef Bacik return ret; 2527e02119d5SChris Mason } 2528e02119d5SChris Mason 2529d397712bSChris Mason static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2530e02119d5SChris Mason struct btrfs_root *root, 2531e02119d5SChris Mason struct btrfs_path *path, int *level, 2532e02119d5SChris Mason struct walk_control *wc) 2533e02119d5SChris Mason { 25340b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 2535e02119d5SChris Mason u64 root_owner; 2536e02119d5SChris Mason u64 bytenr; 2537e02119d5SChris Mason u64 ptr_gen; 2538e02119d5SChris Mason struct extent_buffer *next; 2539e02119d5SChris Mason struct extent_buffer *cur; 2540e02119d5SChris Mason struct extent_buffer *parent; 2541e02119d5SChris Mason u32 blocksize; 2542e02119d5SChris Mason int ret = 0; 2543e02119d5SChris Mason 2544e02119d5SChris Mason WARN_ON(*level < 0); 2545e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2546e02119d5SChris Mason 2547e02119d5SChris Mason while (*level > 0) { 2548581c1760SQu Wenruo struct btrfs_key first_key; 2549581c1760SQu Wenruo 2550e02119d5SChris Mason WARN_ON(*level < 0); 2551e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2552e02119d5SChris Mason cur = path->nodes[*level]; 2553e02119d5SChris Mason 2554fae7f21cSDulshani Gunawardhana WARN_ON(btrfs_header_level(cur) != *level); 2555e02119d5SChris Mason 2556e02119d5SChris Mason if (path->slots[*level] >= 2557e02119d5SChris Mason btrfs_header_nritems(cur)) 2558e02119d5SChris Mason break; 2559e02119d5SChris Mason 2560e02119d5SChris Mason bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2561e02119d5SChris Mason ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2562581c1760SQu Wenruo btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]); 25630b246afaSJeff Mahoney blocksize = fs_info->nodesize; 2564e02119d5SChris Mason 2565e02119d5SChris Mason parent = path->nodes[*level]; 2566e02119d5SChris Mason root_owner = btrfs_header_owner(parent); 2567e02119d5SChris Mason 25682ff7e61eSJeff Mahoney next = btrfs_find_create_tree_block(fs_info, bytenr); 2569c871b0f2SLiu Bo if (IS_ERR(next)) 2570c871b0f2SLiu Bo return PTR_ERR(next); 2571e02119d5SChris Mason 25724a500fd1SYan, Zheng if (*level == 1) { 2573581c1760SQu Wenruo ret = wc->process_func(root, next, wc, ptr_gen, 2574581c1760SQu Wenruo *level - 1); 2575b50c6e25SJosef Bacik if (ret) { 2576b50c6e25SJosef Bacik free_extent_buffer(next); 25771e5063d0SMark Fasheh return ret; 2578b50c6e25SJosef Bacik } 2579e02119d5SChris Mason 2580e02119d5SChris Mason path->slots[*level]++; 2581e02119d5SChris Mason if (wc->free) { 2582581c1760SQu Wenruo ret = btrfs_read_buffer(next, ptr_gen, 2583581c1760SQu Wenruo *level - 1, &first_key); 2584018642a1STsutomu Itoh if (ret) { 2585018642a1STsutomu Itoh free_extent_buffer(next); 2586018642a1STsutomu Itoh return ret; 2587018642a1STsutomu Itoh } 2588e02119d5SChris Mason 2589681ae509SJosef Bacik if (trans) { 2590e02119d5SChris Mason btrfs_tree_lock(next); 2591b4ce94deSChris Mason btrfs_set_lock_blocking(next); 25927c302b49SDavid Sterba clean_tree_block(fs_info, next); 2593e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2594e02119d5SChris Mason btrfs_tree_unlock(next); 25951846430cSLiu Bo } else { 25961846430cSLiu Bo if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) 25971846430cSLiu Bo clear_extent_buffer_dirty(next); 2598681ae509SJosef Bacik } 2599e02119d5SChris Mason 2600e02119d5SChris Mason WARN_ON(root_owner != 2601e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 26022ff7e61eSJeff Mahoney ret = btrfs_free_and_pin_reserved_extent( 26032ff7e61eSJeff Mahoney fs_info, bytenr, 26042ff7e61eSJeff Mahoney blocksize); 26053650860bSJosef Bacik if (ret) { 26063650860bSJosef Bacik free_extent_buffer(next); 26073650860bSJosef Bacik return ret; 26083650860bSJosef Bacik } 2609e02119d5SChris Mason } 2610e02119d5SChris Mason free_extent_buffer(next); 2611e02119d5SChris Mason continue; 2612e02119d5SChris Mason } 2613581c1760SQu Wenruo ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key); 2614018642a1STsutomu Itoh if (ret) { 2615018642a1STsutomu Itoh free_extent_buffer(next); 2616018642a1STsutomu Itoh return ret; 2617018642a1STsutomu Itoh } 2618e02119d5SChris Mason 2619e02119d5SChris Mason WARN_ON(*level <= 0); 2620e02119d5SChris Mason if (path->nodes[*level-1]) 2621e02119d5SChris Mason free_extent_buffer(path->nodes[*level-1]); 2622e02119d5SChris Mason path->nodes[*level-1] = next; 2623e02119d5SChris Mason *level = btrfs_header_level(next); 2624e02119d5SChris Mason path->slots[*level] = 0; 2625e02119d5SChris Mason cond_resched(); 2626e02119d5SChris Mason } 2627e02119d5SChris Mason WARN_ON(*level < 0); 2628e02119d5SChris Mason WARN_ON(*level >= BTRFS_MAX_LEVEL); 2629e02119d5SChris Mason 26304a500fd1SYan, Zheng path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); 2631e02119d5SChris Mason 2632e02119d5SChris Mason cond_resched(); 2633e02119d5SChris Mason return 0; 2634e02119d5SChris Mason } 2635e02119d5SChris Mason 2636d397712bSChris Mason static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, 2637e02119d5SChris Mason struct btrfs_root *root, 2638e02119d5SChris Mason struct btrfs_path *path, int *level, 2639e02119d5SChris Mason struct walk_control *wc) 2640e02119d5SChris Mason { 26410b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 2642e02119d5SChris Mason u64 root_owner; 2643e02119d5SChris Mason int i; 2644e02119d5SChris Mason int slot; 2645e02119d5SChris Mason int ret; 2646e02119d5SChris Mason 2647e02119d5SChris Mason for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 2648e02119d5SChris Mason slot = path->slots[i]; 26494a500fd1SYan, Zheng if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 2650e02119d5SChris Mason path->slots[i]++; 2651e02119d5SChris Mason *level = i; 2652e02119d5SChris Mason WARN_ON(*level == 0); 2653e02119d5SChris Mason return 0; 2654e02119d5SChris Mason } else { 265531840ae1SZheng Yan struct extent_buffer *parent; 265631840ae1SZheng Yan if (path->nodes[*level] == root->node) 265731840ae1SZheng Yan parent = path->nodes[*level]; 265831840ae1SZheng Yan else 265931840ae1SZheng Yan parent = path->nodes[*level + 1]; 266031840ae1SZheng Yan 266131840ae1SZheng Yan root_owner = btrfs_header_owner(parent); 26621e5063d0SMark Fasheh ret = wc->process_func(root, path->nodes[*level], wc, 2663581c1760SQu Wenruo btrfs_header_generation(path->nodes[*level]), 2664581c1760SQu Wenruo *level); 26651e5063d0SMark Fasheh if (ret) 26661e5063d0SMark Fasheh return ret; 26671e5063d0SMark Fasheh 2668e02119d5SChris Mason if (wc->free) { 2669e02119d5SChris Mason struct extent_buffer *next; 2670e02119d5SChris Mason 2671e02119d5SChris Mason next = path->nodes[*level]; 2672e02119d5SChris Mason 2673681ae509SJosef Bacik if (trans) { 2674e02119d5SChris Mason btrfs_tree_lock(next); 2675b4ce94deSChris Mason btrfs_set_lock_blocking(next); 26767c302b49SDavid Sterba clean_tree_block(fs_info, next); 2677e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2678e02119d5SChris Mason btrfs_tree_unlock(next); 26791846430cSLiu Bo } else { 26801846430cSLiu Bo if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) 26811846430cSLiu Bo clear_extent_buffer_dirty(next); 2682681ae509SJosef Bacik } 2683e02119d5SChris Mason 2684e02119d5SChris Mason WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 26852ff7e61eSJeff Mahoney ret = btrfs_free_and_pin_reserved_extent( 26862ff7e61eSJeff Mahoney fs_info, 2687e02119d5SChris Mason path->nodes[*level]->start, 2688d00aff00SChris Mason path->nodes[*level]->len); 26893650860bSJosef Bacik if (ret) 26903650860bSJosef Bacik return ret; 2691e02119d5SChris Mason } 2692e02119d5SChris Mason free_extent_buffer(path->nodes[*level]); 2693e02119d5SChris Mason path->nodes[*level] = NULL; 2694e02119d5SChris Mason *level = i + 1; 2695e02119d5SChris Mason } 2696e02119d5SChris Mason } 2697e02119d5SChris Mason return 1; 2698e02119d5SChris Mason } 2699e02119d5SChris Mason 2700e02119d5SChris Mason /* 2701e02119d5SChris Mason * drop the reference count on the tree rooted at 'snap'. This traverses 2702e02119d5SChris Mason * the tree freeing any blocks that have a ref count of zero after being 2703e02119d5SChris Mason * decremented. 2704e02119d5SChris Mason */ 2705e02119d5SChris Mason static int walk_log_tree(struct btrfs_trans_handle *trans, 2706e02119d5SChris Mason struct btrfs_root *log, struct walk_control *wc) 2707e02119d5SChris Mason { 27082ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info = log->fs_info; 2709e02119d5SChris Mason int ret = 0; 2710e02119d5SChris Mason int wret; 2711e02119d5SChris Mason int level; 2712e02119d5SChris Mason struct btrfs_path *path; 2713e02119d5SChris Mason int orig_level; 2714e02119d5SChris Mason 2715e02119d5SChris Mason path = btrfs_alloc_path(); 2716db5b493aSTsutomu Itoh if (!path) 2717db5b493aSTsutomu Itoh return -ENOMEM; 2718e02119d5SChris Mason 2719e02119d5SChris Mason level = btrfs_header_level(log->node); 2720e02119d5SChris Mason orig_level = level; 2721e02119d5SChris Mason path->nodes[level] = log->node; 2722e02119d5SChris Mason extent_buffer_get(log->node); 2723e02119d5SChris Mason path->slots[level] = 0; 2724e02119d5SChris Mason 2725e02119d5SChris Mason while (1) { 2726e02119d5SChris Mason wret = walk_down_log_tree(trans, log, path, &level, wc); 2727e02119d5SChris Mason if (wret > 0) 2728e02119d5SChris Mason break; 272979787eaaSJeff Mahoney if (wret < 0) { 2730e02119d5SChris Mason ret = wret; 273179787eaaSJeff Mahoney goto out; 273279787eaaSJeff Mahoney } 2733e02119d5SChris Mason 2734e02119d5SChris Mason wret = walk_up_log_tree(trans, log, path, &level, wc); 2735e02119d5SChris Mason if (wret > 0) 2736e02119d5SChris Mason break; 273779787eaaSJeff Mahoney if (wret < 0) { 2738e02119d5SChris Mason ret = wret; 273979787eaaSJeff Mahoney goto out; 274079787eaaSJeff Mahoney } 2741e02119d5SChris Mason } 2742e02119d5SChris Mason 2743e02119d5SChris Mason /* was the root node processed? if not, catch it here */ 2744e02119d5SChris Mason if (path->nodes[orig_level]) { 274579787eaaSJeff Mahoney ret = wc->process_func(log, path->nodes[orig_level], wc, 2746581c1760SQu Wenruo btrfs_header_generation(path->nodes[orig_level]), 2747581c1760SQu Wenruo orig_level); 274879787eaaSJeff Mahoney if (ret) 274979787eaaSJeff Mahoney goto out; 2750e02119d5SChris Mason if (wc->free) { 2751e02119d5SChris Mason struct extent_buffer *next; 2752e02119d5SChris Mason 2753e02119d5SChris Mason next = path->nodes[orig_level]; 2754e02119d5SChris Mason 2755681ae509SJosef Bacik if (trans) { 2756e02119d5SChris Mason btrfs_tree_lock(next); 2757b4ce94deSChris Mason btrfs_set_lock_blocking(next); 27587c302b49SDavid Sterba clean_tree_block(fs_info, next); 2759e02119d5SChris Mason btrfs_wait_tree_block_writeback(next); 2760e02119d5SChris Mason btrfs_tree_unlock(next); 27611846430cSLiu Bo } else { 27621846430cSLiu Bo if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) 27631846430cSLiu Bo clear_extent_buffer_dirty(next); 2764681ae509SJosef Bacik } 2765e02119d5SChris Mason 2766e02119d5SChris Mason WARN_ON(log->root_key.objectid != 2767e02119d5SChris Mason BTRFS_TREE_LOG_OBJECTID); 27682ff7e61eSJeff Mahoney ret = btrfs_free_and_pin_reserved_extent(fs_info, 27692ff7e61eSJeff Mahoney next->start, next->len); 27703650860bSJosef Bacik if (ret) 27713650860bSJosef Bacik goto out; 2772e02119d5SChris Mason } 2773e02119d5SChris Mason } 2774e02119d5SChris Mason 277579787eaaSJeff Mahoney out: 2776e02119d5SChris Mason btrfs_free_path(path); 2777e02119d5SChris Mason return ret; 2778e02119d5SChris Mason } 2779e02119d5SChris Mason 27807237f183SYan Zheng /* 27817237f183SYan Zheng * helper function to update the item for a given subvolumes log root 27827237f183SYan Zheng * in the tree of log roots 27837237f183SYan Zheng */ 27847237f183SYan Zheng static int update_log_root(struct btrfs_trans_handle *trans, 27857237f183SYan Zheng struct btrfs_root *log) 27867237f183SYan Zheng { 27870b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = log->fs_info; 27887237f183SYan Zheng int ret; 27897237f183SYan Zheng 27907237f183SYan Zheng if (log->log_transid == 1) { 27917237f183SYan Zheng /* insert root item on the first sync */ 27920b246afaSJeff Mahoney ret = btrfs_insert_root(trans, fs_info->log_root_tree, 27937237f183SYan Zheng &log->root_key, &log->root_item); 27947237f183SYan Zheng } else { 27950b246afaSJeff Mahoney ret = btrfs_update_root(trans, fs_info->log_root_tree, 27967237f183SYan Zheng &log->root_key, &log->root_item); 27977237f183SYan Zheng } 27987237f183SYan Zheng return ret; 27997237f183SYan Zheng } 28007237f183SYan Zheng 280160d53eb3SZhaolei static void wait_log_commit(struct btrfs_root *root, int transid) 2802e02119d5SChris Mason { 2803e02119d5SChris Mason DEFINE_WAIT(wait); 28047237f183SYan Zheng int index = transid % 2; 2805e02119d5SChris Mason 28067237f183SYan Zheng /* 28077237f183SYan Zheng * we only allow two pending log transactions at a time, 28087237f183SYan Zheng * so we know that if ours is more than 2 older than the 28097237f183SYan Zheng * current transaction, we're done 28107237f183SYan Zheng */ 281149e83f57SLiu Bo for (;;) { 28127237f183SYan Zheng prepare_to_wait(&root->log_commit_wait[index], 28137237f183SYan Zheng &wait, TASK_UNINTERRUPTIBLE); 281449e83f57SLiu Bo 281549e83f57SLiu Bo if (!(root->log_transid_committed < transid && 281649e83f57SLiu Bo atomic_read(&root->log_commit[index]))) 281749e83f57SLiu Bo break; 281849e83f57SLiu Bo 28197237f183SYan Zheng mutex_unlock(&root->log_mutex); 2820e02119d5SChris Mason schedule(); 28217237f183SYan Zheng mutex_lock(&root->log_mutex); 282249e83f57SLiu Bo } 282349e83f57SLiu Bo finish_wait(&root->log_commit_wait[index], &wait); 28247237f183SYan Zheng } 28257237f183SYan Zheng 282660d53eb3SZhaolei static void wait_for_writer(struct btrfs_root *root) 28277237f183SYan Zheng { 28287237f183SYan Zheng DEFINE_WAIT(wait); 28298b050d35SMiao Xie 283049e83f57SLiu Bo for (;;) { 283149e83f57SLiu Bo prepare_to_wait(&root->log_writer_wait, &wait, 283249e83f57SLiu Bo TASK_UNINTERRUPTIBLE); 283349e83f57SLiu Bo if (!atomic_read(&root->log_writers)) 283449e83f57SLiu Bo break; 283549e83f57SLiu Bo 28367237f183SYan Zheng mutex_unlock(&root->log_mutex); 28377237f183SYan Zheng schedule(); 2838575849ecSFilipe Manana mutex_lock(&root->log_mutex); 28397237f183SYan Zheng } 284049e83f57SLiu Bo finish_wait(&root->log_writer_wait, &wait); 2841e02119d5SChris Mason } 2842e02119d5SChris Mason 28438b050d35SMiao Xie static inline void btrfs_remove_log_ctx(struct btrfs_root *root, 28448b050d35SMiao Xie struct btrfs_log_ctx *ctx) 28458b050d35SMiao Xie { 28468b050d35SMiao Xie if (!ctx) 28478b050d35SMiao Xie return; 28488b050d35SMiao Xie 28498b050d35SMiao Xie mutex_lock(&root->log_mutex); 28508b050d35SMiao Xie list_del_init(&ctx->list); 28518b050d35SMiao Xie mutex_unlock(&root->log_mutex); 28528b050d35SMiao Xie } 28538b050d35SMiao Xie 28548b050d35SMiao Xie /* 28558b050d35SMiao Xie * Invoked in log mutex context, or be sure there is no other task which 28568b050d35SMiao Xie * can access the list. 28578b050d35SMiao Xie */ 28588b050d35SMiao Xie static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, 28598b050d35SMiao Xie int index, int error) 28608b050d35SMiao Xie { 28618b050d35SMiao Xie struct btrfs_log_ctx *ctx; 2862570dd450SChris Mason struct btrfs_log_ctx *safe; 28638b050d35SMiao Xie 2864570dd450SChris Mason list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) { 2865570dd450SChris Mason list_del_init(&ctx->list); 28668b050d35SMiao Xie ctx->log_ret = error; 2867570dd450SChris Mason } 28688b050d35SMiao Xie 28698b050d35SMiao Xie INIT_LIST_HEAD(&root->log_ctxs[index]); 28708b050d35SMiao Xie } 28718b050d35SMiao Xie 2872e02119d5SChris Mason /* 2873e02119d5SChris Mason * btrfs_sync_log does sends a given tree log down to the disk and 2874e02119d5SChris Mason * updates the super blocks to record it. When this call is done, 287512fcfd22SChris Mason * you know that any inodes previously logged are safely on disk only 287612fcfd22SChris Mason * if it returns 0. 287712fcfd22SChris Mason * 287812fcfd22SChris Mason * Any other return value means you need to call btrfs_commit_transaction. 287912fcfd22SChris Mason * Some of the edge cases for fsyncing directories that have had unlinks 288012fcfd22SChris Mason * or renames done in the past mean that sometimes the only safe 288112fcfd22SChris Mason * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, 288212fcfd22SChris Mason * that has happened. 2883e02119d5SChris Mason */ 2884e02119d5SChris Mason int btrfs_sync_log(struct btrfs_trans_handle *trans, 28858b050d35SMiao Xie struct btrfs_root *root, struct btrfs_log_ctx *ctx) 2886e02119d5SChris Mason { 28877237f183SYan Zheng int index1; 28887237f183SYan Zheng int index2; 28898cef4e16SYan, Zheng int mark; 2890e02119d5SChris Mason int ret; 28910b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 2892e02119d5SChris Mason struct btrfs_root *log = root->log_root; 28930b246afaSJeff Mahoney struct btrfs_root *log_root_tree = fs_info->log_root_tree; 2894bb14a59bSMiao Xie int log_transid = 0; 28958b050d35SMiao Xie struct btrfs_log_ctx root_log_ctx; 2896c6adc9ccSMiao Xie struct blk_plug plug; 2897e02119d5SChris Mason 28987237f183SYan Zheng mutex_lock(&root->log_mutex); 2899d1433debSMiao Xie log_transid = ctx->log_transid; 2900d1433debSMiao Xie if (root->log_transid_committed >= log_transid) { 29017237f183SYan Zheng mutex_unlock(&root->log_mutex); 29028b050d35SMiao Xie return ctx->log_ret; 2903e02119d5SChris Mason } 2904d1433debSMiao Xie 2905d1433debSMiao Xie index1 = log_transid % 2; 2906d1433debSMiao Xie if (atomic_read(&root->log_commit[index1])) { 290760d53eb3SZhaolei wait_log_commit(root, log_transid); 2908d1433debSMiao Xie mutex_unlock(&root->log_mutex); 2909d1433debSMiao Xie return ctx->log_ret; 2910d1433debSMiao Xie } 2911d1433debSMiao Xie ASSERT(log_transid == root->log_transid); 29127237f183SYan Zheng atomic_set(&root->log_commit[index1], 1); 29137237f183SYan Zheng 29147237f183SYan Zheng /* wait for previous tree log sync to complete */ 29157237f183SYan Zheng if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 291660d53eb3SZhaolei wait_log_commit(root, log_transid - 1); 291748cab2e0SMiao Xie 291886df7eb9SYan, Zheng while (1) { 29192ecb7923SMiao Xie int batch = atomic_read(&root->log_batch); 2920cd354ad6SChris Mason /* when we're on an ssd, just kick the log commit out */ 29210b246afaSJeff Mahoney if (!btrfs_test_opt(fs_info, SSD) && 292227cdeb70SMiao Xie test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) { 29237237f183SYan Zheng mutex_unlock(&root->log_mutex); 2924e02119d5SChris Mason schedule_timeout_uninterruptible(1); 29257237f183SYan Zheng mutex_lock(&root->log_mutex); 292686df7eb9SYan, Zheng } 292760d53eb3SZhaolei wait_for_writer(root); 29282ecb7923SMiao Xie if (batch == atomic_read(&root->log_batch)) 2929e02119d5SChris Mason break; 2930e02119d5SChris Mason } 2931d0c803c4SChris Mason 293212fcfd22SChris Mason /* bail out if we need to do a full commit */ 29330b246afaSJeff Mahoney if (btrfs_need_log_full_commit(fs_info, trans)) { 293412fcfd22SChris Mason ret = -EAGAIN; 293512fcfd22SChris Mason mutex_unlock(&root->log_mutex); 293612fcfd22SChris Mason goto out; 293712fcfd22SChris Mason } 293812fcfd22SChris Mason 29398cef4e16SYan, Zheng if (log_transid % 2 == 0) 29408cef4e16SYan, Zheng mark = EXTENT_DIRTY; 29418cef4e16SYan, Zheng else 29428cef4e16SYan, Zheng mark = EXTENT_NEW; 29438cef4e16SYan, Zheng 2944690587d1SChris Mason /* we start IO on all the marked extents here, but we don't actually 2945690587d1SChris Mason * wait for them until later. 2946690587d1SChris Mason */ 2947c6adc9ccSMiao Xie blk_start_plug(&plug); 29482ff7e61eSJeff Mahoney ret = btrfs_write_marked_extents(fs_info, &log->dirty_log_pages, mark); 294979787eaaSJeff Mahoney if (ret) { 2950c6adc9ccSMiao Xie blk_finish_plug(&plug); 295166642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 29520b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 295379787eaaSJeff Mahoney mutex_unlock(&root->log_mutex); 295479787eaaSJeff Mahoney goto out; 295579787eaaSJeff Mahoney } 29567237f183SYan Zheng 29575d4f98a2SYan Zheng btrfs_set_root_node(&log->root_item, log->node); 29587237f183SYan Zheng 29597237f183SYan Zheng root->log_transid++; 29607237f183SYan Zheng log->log_transid = root->log_transid; 2961ff782e0aSJosef Bacik root->log_start_pid = 0; 29627237f183SYan Zheng /* 29638cef4e16SYan, Zheng * IO has been started, blocks of the log tree have WRITTEN flag set 29648cef4e16SYan, Zheng * in their headers. new modifications of the log will be written to 29658cef4e16SYan, Zheng * new positions. so it's safe to allow log writers to go in. 29667237f183SYan Zheng */ 29677237f183SYan Zheng mutex_unlock(&root->log_mutex); 29687237f183SYan Zheng 296928a23593SFilipe Manana btrfs_init_log_ctx(&root_log_ctx, NULL); 2970d1433debSMiao Xie 29717237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 29722ecb7923SMiao Xie atomic_inc(&log_root_tree->log_batch); 29737237f183SYan Zheng atomic_inc(&log_root_tree->log_writers); 2974d1433debSMiao Xie 2975d1433debSMiao Xie index2 = log_root_tree->log_transid % 2; 2976d1433debSMiao Xie list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); 2977d1433debSMiao Xie root_log_ctx.log_transid = log_root_tree->log_transid; 2978d1433debSMiao Xie 29797237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 29807237f183SYan Zheng 29817237f183SYan Zheng ret = update_log_root(trans, log); 29827237f183SYan Zheng 29837237f183SYan Zheng mutex_lock(&log_root_tree->log_mutex); 29847237f183SYan Zheng if (atomic_dec_and_test(&log_root_tree->log_writers)) { 2985093258e6SDavid Sterba /* atomic_dec_and_test implies a barrier */ 2986093258e6SDavid Sterba cond_wake_up_nomb(&log_root_tree->log_writer_wait); 29877237f183SYan Zheng } 29887237f183SYan Zheng 29894a500fd1SYan, Zheng if (ret) { 2990d1433debSMiao Xie if (!list_empty(&root_log_ctx.list)) 2991d1433debSMiao Xie list_del_init(&root_log_ctx.list); 2992d1433debSMiao Xie 2993c6adc9ccSMiao Xie blk_finish_plug(&plug); 29940b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 2995995946ddSMiao Xie 299679787eaaSJeff Mahoney if (ret != -ENOSPC) { 299766642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 299879787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 299979787eaaSJeff Mahoney goto out; 300079787eaaSJeff Mahoney } 3001bf89d38fSJeff Mahoney btrfs_wait_tree_log_extents(log, mark); 30024a500fd1SYan, Zheng mutex_unlock(&log_root_tree->log_mutex); 30034a500fd1SYan, Zheng ret = -EAGAIN; 30044a500fd1SYan, Zheng goto out; 30054a500fd1SYan, Zheng } 30064a500fd1SYan, Zheng 3007d1433debSMiao Xie if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { 30083da5ab56SForrest Liu blk_finish_plug(&plug); 3009cbd60aa7SChris Mason list_del_init(&root_log_ctx.list); 3010d1433debSMiao Xie mutex_unlock(&log_root_tree->log_mutex); 3011d1433debSMiao Xie ret = root_log_ctx.log_ret; 3012d1433debSMiao Xie goto out; 3013d1433debSMiao Xie } 30148b050d35SMiao Xie 3015d1433debSMiao Xie index2 = root_log_ctx.log_transid % 2; 30167237f183SYan Zheng if (atomic_read(&log_root_tree->log_commit[index2])) { 3017c6adc9ccSMiao Xie blk_finish_plug(&plug); 3018bf89d38fSJeff Mahoney ret = btrfs_wait_tree_log_extents(log, mark); 301960d53eb3SZhaolei wait_log_commit(log_root_tree, 3020d1433debSMiao Xie root_log_ctx.log_transid); 30217237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 30225ab5e44aSFilipe Manana if (!ret) 30238b050d35SMiao Xie ret = root_log_ctx.log_ret; 30247237f183SYan Zheng goto out; 30257237f183SYan Zheng } 3026d1433debSMiao Xie ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); 30277237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 1); 30287237f183SYan Zheng 302912fcfd22SChris Mason if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { 303060d53eb3SZhaolei wait_log_commit(log_root_tree, 3031d1433debSMiao Xie root_log_ctx.log_transid - 1); 303212fcfd22SChris Mason } 30337237f183SYan Zheng 303460d53eb3SZhaolei wait_for_writer(log_root_tree); 303512fcfd22SChris Mason 303612fcfd22SChris Mason /* 303712fcfd22SChris Mason * now that we've moved on to the tree of log tree roots, 303812fcfd22SChris Mason * check the full commit flag again 303912fcfd22SChris Mason */ 30400b246afaSJeff Mahoney if (btrfs_need_log_full_commit(fs_info, trans)) { 3041c6adc9ccSMiao Xie blk_finish_plug(&plug); 3042bf89d38fSJeff Mahoney btrfs_wait_tree_log_extents(log, mark); 304312fcfd22SChris Mason mutex_unlock(&log_root_tree->log_mutex); 304412fcfd22SChris Mason ret = -EAGAIN; 304512fcfd22SChris Mason goto out_wake_log_root; 304612fcfd22SChris Mason } 30477237f183SYan Zheng 30482ff7e61eSJeff Mahoney ret = btrfs_write_marked_extents(fs_info, 30498cef4e16SYan, Zheng &log_root_tree->dirty_log_pages, 30508cef4e16SYan, Zheng EXTENT_DIRTY | EXTENT_NEW); 3051c6adc9ccSMiao Xie blk_finish_plug(&plug); 305279787eaaSJeff Mahoney if (ret) { 30530b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 305466642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 305579787eaaSJeff Mahoney mutex_unlock(&log_root_tree->log_mutex); 305679787eaaSJeff Mahoney goto out_wake_log_root; 305779787eaaSJeff Mahoney } 3058bf89d38fSJeff Mahoney ret = btrfs_wait_tree_log_extents(log, mark); 30595ab5e44aSFilipe Manana if (!ret) 3060bf89d38fSJeff Mahoney ret = btrfs_wait_tree_log_extents(log_root_tree, 3061c6adc9ccSMiao Xie EXTENT_NEW | EXTENT_DIRTY); 30625ab5e44aSFilipe Manana if (ret) { 30630b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 30645ab5e44aSFilipe Manana mutex_unlock(&log_root_tree->log_mutex); 30655ab5e44aSFilipe Manana goto out_wake_log_root; 30665ab5e44aSFilipe Manana } 3067e02119d5SChris Mason 30680b246afaSJeff Mahoney btrfs_set_super_log_root(fs_info->super_for_commit, 30697237f183SYan Zheng log_root_tree->node->start); 30700b246afaSJeff Mahoney btrfs_set_super_log_root_level(fs_info->super_for_commit, 30717237f183SYan Zheng btrfs_header_level(log_root_tree->node)); 3072e02119d5SChris Mason 30737237f183SYan Zheng log_root_tree->log_transid++; 30747237f183SYan Zheng mutex_unlock(&log_root_tree->log_mutex); 30757237f183SYan Zheng 30767237f183SYan Zheng /* 30777237f183SYan Zheng * nobody else is going to jump in and write the the ctree 30787237f183SYan Zheng * super here because the log_commit atomic below is protecting 30797237f183SYan Zheng * us. We must be called with a transaction handle pinning 30807237f183SYan Zheng * the running transaction open, so a full commit can't hop 30817237f183SYan Zheng * in and cause problems either. 30827237f183SYan Zheng */ 3083eece6a9cSDavid Sterba ret = write_all_supers(fs_info, 1); 30845af3e8ccSStefan Behrens if (ret) { 30850b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 308666642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 30875af3e8ccSStefan Behrens goto out_wake_log_root; 30885af3e8ccSStefan Behrens } 30897237f183SYan Zheng 3090257c62e1SChris Mason mutex_lock(&root->log_mutex); 3091257c62e1SChris Mason if (root->last_log_commit < log_transid) 3092257c62e1SChris Mason root->last_log_commit = log_transid; 3093257c62e1SChris Mason mutex_unlock(&root->log_mutex); 3094257c62e1SChris Mason 309512fcfd22SChris Mason out_wake_log_root: 3096570dd450SChris Mason mutex_lock(&log_root_tree->log_mutex); 30978b050d35SMiao Xie btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); 30988b050d35SMiao Xie 3099d1433debSMiao Xie log_root_tree->log_transid_committed++; 31007237f183SYan Zheng atomic_set(&log_root_tree->log_commit[index2], 0); 3101d1433debSMiao Xie mutex_unlock(&log_root_tree->log_mutex); 3102d1433debSMiao Xie 310333a9eca7SDavid Sterba /* 3104093258e6SDavid Sterba * The barrier before waitqueue_active (in cond_wake_up) is needed so 3105093258e6SDavid Sterba * all the updates above are seen by the woken threads. It might not be 3106093258e6SDavid Sterba * necessary, but proving that seems to be hard. 310733a9eca7SDavid Sterba */ 3108093258e6SDavid Sterba cond_wake_up(&log_root_tree->log_commit_wait[index2]); 3109e02119d5SChris Mason out: 3110d1433debSMiao Xie mutex_lock(&root->log_mutex); 3111570dd450SChris Mason btrfs_remove_all_log_ctxs(root, index1, ret); 3112d1433debSMiao Xie root->log_transid_committed++; 31137237f183SYan Zheng atomic_set(&root->log_commit[index1], 0); 3114d1433debSMiao Xie mutex_unlock(&root->log_mutex); 31158b050d35SMiao Xie 311633a9eca7SDavid Sterba /* 3117093258e6SDavid Sterba * The barrier before waitqueue_active (in cond_wake_up) is needed so 3118093258e6SDavid Sterba * all the updates above are seen by the woken threads. It might not be 3119093258e6SDavid Sterba * necessary, but proving that seems to be hard. 312033a9eca7SDavid Sterba */ 3121093258e6SDavid Sterba cond_wake_up(&root->log_commit_wait[index1]); 3122b31eabd8SChris Mason return ret; 3123e02119d5SChris Mason } 3124e02119d5SChris Mason 31254a500fd1SYan, Zheng static void free_log_tree(struct btrfs_trans_handle *trans, 31264a500fd1SYan, Zheng struct btrfs_root *log) 3127e02119d5SChris Mason { 3128e02119d5SChris Mason int ret; 3129d0c803c4SChris Mason u64 start; 3130d0c803c4SChris Mason u64 end; 3131e02119d5SChris Mason struct walk_control wc = { 3132e02119d5SChris Mason .free = 1, 3133e02119d5SChris Mason .process_func = process_one_buffer 3134e02119d5SChris Mason }; 3135e02119d5SChris Mason 3136e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 31373650860bSJosef Bacik /* I don't think this can happen but just in case */ 31383650860bSJosef Bacik if (ret) 313966642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 3140e02119d5SChris Mason 3141d0c803c4SChris Mason while (1) { 3142d0c803c4SChris Mason ret = find_first_extent_bit(&log->dirty_log_pages, 314355237a5fSLiu Bo 0, &start, &end, 314455237a5fSLiu Bo EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT, 3145e6138876SJosef Bacik NULL); 3146d0c803c4SChris Mason if (ret) 3147d0c803c4SChris Mason break; 3148d0c803c4SChris Mason 31498cef4e16SYan, Zheng clear_extent_bits(&log->dirty_log_pages, start, end, 315055237a5fSLiu Bo EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); 3151d0c803c4SChris Mason } 3152d0c803c4SChris Mason 31537237f183SYan Zheng free_extent_buffer(log->node); 31547237f183SYan Zheng kfree(log); 31554a500fd1SYan, Zheng } 31564a500fd1SYan, Zheng 31574a500fd1SYan, Zheng /* 31584a500fd1SYan, Zheng * free all the extents used by the tree log. This should be called 31594a500fd1SYan, Zheng * at commit time of the full transaction 31604a500fd1SYan, Zheng */ 31614a500fd1SYan, Zheng int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) 31624a500fd1SYan, Zheng { 31634a500fd1SYan, Zheng if (root->log_root) { 31644a500fd1SYan, Zheng free_log_tree(trans, root->log_root); 31654a500fd1SYan, Zheng root->log_root = NULL; 31664a500fd1SYan, Zheng } 31674a500fd1SYan, Zheng return 0; 31684a500fd1SYan, Zheng } 31694a500fd1SYan, Zheng 31704a500fd1SYan, Zheng int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, 31714a500fd1SYan, Zheng struct btrfs_fs_info *fs_info) 31724a500fd1SYan, Zheng { 31734a500fd1SYan, Zheng if (fs_info->log_root_tree) { 31744a500fd1SYan, Zheng free_log_tree(trans, fs_info->log_root_tree); 31754a500fd1SYan, Zheng fs_info->log_root_tree = NULL; 31764a500fd1SYan, Zheng } 3177e02119d5SChris Mason return 0; 3178e02119d5SChris Mason } 3179e02119d5SChris Mason 3180e02119d5SChris Mason /* 3181e02119d5SChris Mason * If both a file and directory are logged, and unlinks or renames are 3182e02119d5SChris Mason * mixed in, we have a few interesting corners: 3183e02119d5SChris Mason * 3184e02119d5SChris Mason * create file X in dir Y 3185e02119d5SChris Mason * link file X to X.link in dir Y 3186e02119d5SChris Mason * fsync file X 3187e02119d5SChris Mason * unlink file X but leave X.link 3188e02119d5SChris Mason * fsync dir Y 3189e02119d5SChris Mason * 3190e02119d5SChris Mason * After a crash we would expect only X.link to exist. But file X 3191e02119d5SChris Mason * didn't get fsync'd again so the log has back refs for X and X.link. 3192e02119d5SChris Mason * 3193e02119d5SChris Mason * We solve this by removing directory entries and inode backrefs from the 3194e02119d5SChris Mason * log when a file that was logged in the current transaction is 3195e02119d5SChris Mason * unlinked. Any later fsync will include the updated log entries, and 3196e02119d5SChris Mason * we'll be able to reconstruct the proper directory items from backrefs. 3197e02119d5SChris Mason * 3198e02119d5SChris Mason * This optimizations allows us to avoid relogging the entire inode 3199e02119d5SChris Mason * or the entire directory. 3200e02119d5SChris Mason */ 3201e02119d5SChris Mason int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 3202e02119d5SChris Mason struct btrfs_root *root, 3203e02119d5SChris Mason const char *name, int name_len, 320449f34d1fSNikolay Borisov struct btrfs_inode *dir, u64 index) 3205e02119d5SChris Mason { 3206e02119d5SChris Mason struct btrfs_root *log; 3207e02119d5SChris Mason struct btrfs_dir_item *di; 3208e02119d5SChris Mason struct btrfs_path *path; 3209e02119d5SChris Mason int ret; 32104a500fd1SYan, Zheng int err = 0; 3211e02119d5SChris Mason int bytes_del = 0; 321249f34d1fSNikolay Borisov u64 dir_ino = btrfs_ino(dir); 3213e02119d5SChris Mason 321449f34d1fSNikolay Borisov if (dir->logged_trans < trans->transid) 32153a5f1d45SChris Mason return 0; 32163a5f1d45SChris Mason 3217e02119d5SChris Mason ret = join_running_log_trans(root); 3218e02119d5SChris Mason if (ret) 3219e02119d5SChris Mason return 0; 3220e02119d5SChris Mason 322149f34d1fSNikolay Borisov mutex_lock(&dir->log_mutex); 3222e02119d5SChris Mason 3223e02119d5SChris Mason log = root->log_root; 3224e02119d5SChris Mason path = btrfs_alloc_path(); 3225a62f44a5STsutomu Itoh if (!path) { 3226a62f44a5STsutomu Itoh err = -ENOMEM; 3227a62f44a5STsutomu Itoh goto out_unlock; 3228a62f44a5STsutomu Itoh } 32292a29edc6Sliubo 323033345d01SLi Zefan di = btrfs_lookup_dir_item(trans, log, path, dir_ino, 3231e02119d5SChris Mason name, name_len, -1); 32324a500fd1SYan, Zheng if (IS_ERR(di)) { 32334a500fd1SYan, Zheng err = PTR_ERR(di); 32344a500fd1SYan, Zheng goto fail; 32354a500fd1SYan, Zheng } 32364a500fd1SYan, Zheng if (di) { 3237e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 3238e02119d5SChris Mason bytes_del += name_len; 32393650860bSJosef Bacik if (ret) { 32403650860bSJosef Bacik err = ret; 32413650860bSJosef Bacik goto fail; 32423650860bSJosef Bacik } 3243e02119d5SChris Mason } 3244b3b4aa74SDavid Sterba btrfs_release_path(path); 324533345d01SLi Zefan di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 3246e02119d5SChris Mason index, name, name_len, -1); 32474a500fd1SYan, Zheng if (IS_ERR(di)) { 32484a500fd1SYan, Zheng err = PTR_ERR(di); 32494a500fd1SYan, Zheng goto fail; 32504a500fd1SYan, Zheng } 32514a500fd1SYan, Zheng if (di) { 3252e02119d5SChris Mason ret = btrfs_delete_one_dir_name(trans, log, path, di); 3253e02119d5SChris Mason bytes_del += name_len; 32543650860bSJosef Bacik if (ret) { 32553650860bSJosef Bacik err = ret; 32563650860bSJosef Bacik goto fail; 32573650860bSJosef Bacik } 3258e02119d5SChris Mason } 3259e02119d5SChris Mason 3260e02119d5SChris Mason /* update the directory size in the log to reflect the names 3261e02119d5SChris Mason * we have removed 3262e02119d5SChris Mason */ 3263e02119d5SChris Mason if (bytes_del) { 3264e02119d5SChris Mason struct btrfs_key key; 3265e02119d5SChris Mason 326633345d01SLi Zefan key.objectid = dir_ino; 3267e02119d5SChris Mason key.offset = 0; 3268e02119d5SChris Mason key.type = BTRFS_INODE_ITEM_KEY; 3269b3b4aa74SDavid Sterba btrfs_release_path(path); 3270e02119d5SChris Mason 3271e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 32724a500fd1SYan, Zheng if (ret < 0) { 32734a500fd1SYan, Zheng err = ret; 32744a500fd1SYan, Zheng goto fail; 32754a500fd1SYan, Zheng } 3276e02119d5SChris Mason if (ret == 0) { 3277e02119d5SChris Mason struct btrfs_inode_item *item; 3278e02119d5SChris Mason u64 i_size; 3279e02119d5SChris Mason 3280e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3281e02119d5SChris Mason struct btrfs_inode_item); 3282e02119d5SChris Mason i_size = btrfs_inode_size(path->nodes[0], item); 3283e02119d5SChris Mason if (i_size > bytes_del) 3284e02119d5SChris Mason i_size -= bytes_del; 3285e02119d5SChris Mason else 3286e02119d5SChris Mason i_size = 0; 3287e02119d5SChris Mason btrfs_set_inode_size(path->nodes[0], item, i_size); 3288e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 3289e02119d5SChris Mason } else 3290e02119d5SChris Mason ret = 0; 3291b3b4aa74SDavid Sterba btrfs_release_path(path); 3292e02119d5SChris Mason } 32934a500fd1SYan, Zheng fail: 3294e02119d5SChris Mason btrfs_free_path(path); 3295a62f44a5STsutomu Itoh out_unlock: 329649f34d1fSNikolay Borisov mutex_unlock(&dir->log_mutex); 32974a500fd1SYan, Zheng if (ret == -ENOSPC) { 3298995946ddSMiao Xie btrfs_set_log_full_commit(root->fs_info, trans); 32994a500fd1SYan, Zheng ret = 0; 330079787eaaSJeff Mahoney } else if (ret < 0) 330166642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 330279787eaaSJeff Mahoney 330312fcfd22SChris Mason btrfs_end_log_trans(root); 3304e02119d5SChris Mason 3305411fc6bcSAndi Kleen return err; 3306e02119d5SChris Mason } 3307e02119d5SChris Mason 3308e02119d5SChris Mason /* see comments for btrfs_del_dir_entries_in_log */ 3309e02119d5SChris Mason int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 3310e02119d5SChris Mason struct btrfs_root *root, 3311e02119d5SChris Mason const char *name, int name_len, 3312a491abb2SNikolay Borisov struct btrfs_inode *inode, u64 dirid) 3313e02119d5SChris Mason { 33140b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 3315e02119d5SChris Mason struct btrfs_root *log; 3316e02119d5SChris Mason u64 index; 3317e02119d5SChris Mason int ret; 3318e02119d5SChris Mason 3319a491abb2SNikolay Borisov if (inode->logged_trans < trans->transid) 33203a5f1d45SChris Mason return 0; 33213a5f1d45SChris Mason 3322e02119d5SChris Mason ret = join_running_log_trans(root); 3323e02119d5SChris Mason if (ret) 3324e02119d5SChris Mason return 0; 3325e02119d5SChris Mason log = root->log_root; 3326a491abb2SNikolay Borisov mutex_lock(&inode->log_mutex); 3327e02119d5SChris Mason 3328a491abb2SNikolay Borisov ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), 3329e02119d5SChris Mason dirid, &index); 3330a491abb2SNikolay Borisov mutex_unlock(&inode->log_mutex); 33314a500fd1SYan, Zheng if (ret == -ENOSPC) { 33320b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 33334a500fd1SYan, Zheng ret = 0; 333479787eaaSJeff Mahoney } else if (ret < 0 && ret != -ENOENT) 333566642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 333612fcfd22SChris Mason btrfs_end_log_trans(root); 3337e02119d5SChris Mason 3338e02119d5SChris Mason return ret; 3339e02119d5SChris Mason } 3340e02119d5SChris Mason 3341e02119d5SChris Mason /* 3342e02119d5SChris Mason * creates a range item in the log for 'dirid'. first_offset and 3343e02119d5SChris Mason * last_offset tell us which parts of the key space the log should 3344e02119d5SChris Mason * be considered authoritative for. 3345e02119d5SChris Mason */ 3346e02119d5SChris Mason static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, 3347e02119d5SChris Mason struct btrfs_root *log, 3348e02119d5SChris Mason struct btrfs_path *path, 3349e02119d5SChris Mason int key_type, u64 dirid, 3350e02119d5SChris Mason u64 first_offset, u64 last_offset) 3351e02119d5SChris Mason { 3352e02119d5SChris Mason int ret; 3353e02119d5SChris Mason struct btrfs_key key; 3354e02119d5SChris Mason struct btrfs_dir_log_item *item; 3355e02119d5SChris Mason 3356e02119d5SChris Mason key.objectid = dirid; 3357e02119d5SChris Mason key.offset = first_offset; 3358e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) 3359e02119d5SChris Mason key.type = BTRFS_DIR_LOG_ITEM_KEY; 3360e02119d5SChris Mason else 3361e02119d5SChris Mason key.type = BTRFS_DIR_LOG_INDEX_KEY; 3362e02119d5SChris Mason ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); 33634a500fd1SYan, Zheng if (ret) 33644a500fd1SYan, Zheng return ret; 3365e02119d5SChris Mason 3366e02119d5SChris Mason item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3367e02119d5SChris Mason struct btrfs_dir_log_item); 3368e02119d5SChris Mason btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 3369e02119d5SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 3370b3b4aa74SDavid Sterba btrfs_release_path(path); 3371e02119d5SChris Mason return 0; 3372e02119d5SChris Mason } 3373e02119d5SChris Mason 3374e02119d5SChris Mason /* 3375e02119d5SChris Mason * log all the items included in the current transaction for a given 3376e02119d5SChris Mason * directory. This also creates the range items in the log tree required 3377e02119d5SChris Mason * to replay anything deleted before the fsync 3378e02119d5SChris Mason */ 3379e02119d5SChris Mason static noinline int log_dir_items(struct btrfs_trans_handle *trans, 3380684a5773SNikolay Borisov struct btrfs_root *root, struct btrfs_inode *inode, 3381e02119d5SChris Mason struct btrfs_path *path, 3382e02119d5SChris Mason struct btrfs_path *dst_path, int key_type, 33832f2ff0eeSFilipe Manana struct btrfs_log_ctx *ctx, 3384e02119d5SChris Mason u64 min_offset, u64 *last_offset_ret) 3385e02119d5SChris Mason { 3386e02119d5SChris Mason struct btrfs_key min_key; 3387e02119d5SChris Mason struct btrfs_root *log = root->log_root; 3388e02119d5SChris Mason struct extent_buffer *src; 33894a500fd1SYan, Zheng int err = 0; 3390e02119d5SChris Mason int ret; 3391e02119d5SChris Mason int i; 3392e02119d5SChris Mason int nritems; 3393e02119d5SChris Mason u64 first_offset = min_offset; 3394e02119d5SChris Mason u64 last_offset = (u64)-1; 3395684a5773SNikolay Borisov u64 ino = btrfs_ino(inode); 3396e02119d5SChris Mason 3397e02119d5SChris Mason log = root->log_root; 3398e02119d5SChris Mason 339933345d01SLi Zefan min_key.objectid = ino; 3400e02119d5SChris Mason min_key.type = key_type; 3401e02119d5SChris Mason min_key.offset = min_offset; 3402e02119d5SChris Mason 34036174d3cbSFilipe David Borba Manana ret = btrfs_search_forward(root, &min_key, path, trans->transid); 3404e02119d5SChris Mason 3405e02119d5SChris Mason /* 3406e02119d5SChris Mason * we didn't find anything from this transaction, see if there 3407e02119d5SChris Mason * is anything at all 3408e02119d5SChris Mason */ 340933345d01SLi Zefan if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { 341033345d01SLi Zefan min_key.objectid = ino; 3411e02119d5SChris Mason min_key.type = key_type; 3412e02119d5SChris Mason min_key.offset = (u64)-1; 3413b3b4aa74SDavid Sterba btrfs_release_path(path); 3414e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 3415e02119d5SChris Mason if (ret < 0) { 3416b3b4aa74SDavid Sterba btrfs_release_path(path); 3417e02119d5SChris Mason return ret; 3418e02119d5SChris Mason } 341933345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 3420e02119d5SChris Mason 3421e02119d5SChris Mason /* if ret == 0 there are items for this type, 3422e02119d5SChris Mason * create a range to tell us the last key of this type. 3423e02119d5SChris Mason * otherwise, there are no items in this directory after 3424e02119d5SChris Mason * *min_offset, and we create a range to indicate that. 3425e02119d5SChris Mason */ 3426e02119d5SChris Mason if (ret == 0) { 3427e02119d5SChris Mason struct btrfs_key tmp; 3428e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, 3429e02119d5SChris Mason path->slots[0]); 3430d397712bSChris Mason if (key_type == tmp.type) 3431e02119d5SChris Mason first_offset = max(min_offset, tmp.offset) + 1; 3432e02119d5SChris Mason } 3433e02119d5SChris Mason goto done; 3434e02119d5SChris Mason } 3435e02119d5SChris Mason 3436e02119d5SChris Mason /* go backward to find any previous key */ 343733345d01SLi Zefan ret = btrfs_previous_item(root, path, ino, key_type); 3438e02119d5SChris Mason if (ret == 0) { 3439e02119d5SChris Mason struct btrfs_key tmp; 3440e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 3441e02119d5SChris Mason if (key_type == tmp.type) { 3442e02119d5SChris Mason first_offset = tmp.offset; 3443e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 3444e02119d5SChris Mason path->nodes[0], path->slots[0], 3445e02119d5SChris Mason &tmp); 34464a500fd1SYan, Zheng if (ret) { 34474a500fd1SYan, Zheng err = ret; 34484a500fd1SYan, Zheng goto done; 34494a500fd1SYan, Zheng } 3450e02119d5SChris Mason } 3451e02119d5SChris Mason } 3452b3b4aa74SDavid Sterba btrfs_release_path(path); 3453e02119d5SChris Mason 3454e02119d5SChris Mason /* find the first key from this transaction again */ 3455e02119d5SChris Mason ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 3456fae7f21cSDulshani Gunawardhana if (WARN_ON(ret != 0)) 3457e02119d5SChris Mason goto done; 3458e02119d5SChris Mason 3459e02119d5SChris Mason /* 3460e02119d5SChris Mason * we have a block from this transaction, log every item in it 3461e02119d5SChris Mason * from our directory 3462e02119d5SChris Mason */ 3463e02119d5SChris Mason while (1) { 3464e02119d5SChris Mason struct btrfs_key tmp; 3465e02119d5SChris Mason src = path->nodes[0]; 3466e02119d5SChris Mason nritems = btrfs_header_nritems(src); 3467e02119d5SChris Mason for (i = path->slots[0]; i < nritems; i++) { 34682f2ff0eeSFilipe Manana struct btrfs_dir_item *di; 34692f2ff0eeSFilipe Manana 3470e02119d5SChris Mason btrfs_item_key_to_cpu(src, &min_key, i); 3471e02119d5SChris Mason 347233345d01SLi Zefan if (min_key.objectid != ino || min_key.type != key_type) 3473e02119d5SChris Mason goto done; 3474e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, src, i, 3475e02119d5SChris Mason &min_key); 34764a500fd1SYan, Zheng if (ret) { 34774a500fd1SYan, Zheng err = ret; 34784a500fd1SYan, Zheng goto done; 34794a500fd1SYan, Zheng } 34802f2ff0eeSFilipe Manana 34812f2ff0eeSFilipe Manana /* 34822f2ff0eeSFilipe Manana * We must make sure that when we log a directory entry, 34832f2ff0eeSFilipe Manana * the corresponding inode, after log replay, has a 34842f2ff0eeSFilipe Manana * matching link count. For example: 34852f2ff0eeSFilipe Manana * 34862f2ff0eeSFilipe Manana * touch foo 34872f2ff0eeSFilipe Manana * mkdir mydir 34882f2ff0eeSFilipe Manana * sync 34892f2ff0eeSFilipe Manana * ln foo mydir/bar 34902f2ff0eeSFilipe Manana * xfs_io -c "fsync" mydir 34912f2ff0eeSFilipe Manana * <crash> 34922f2ff0eeSFilipe Manana * <mount fs and log replay> 34932f2ff0eeSFilipe Manana * 34942f2ff0eeSFilipe Manana * Would result in a fsync log that when replayed, our 34952f2ff0eeSFilipe Manana * file inode would have a link count of 1, but we get 34962f2ff0eeSFilipe Manana * two directory entries pointing to the same inode. 34972f2ff0eeSFilipe Manana * After removing one of the names, it would not be 34982f2ff0eeSFilipe Manana * possible to remove the other name, which resulted 34992f2ff0eeSFilipe Manana * always in stale file handle errors, and would not 35002f2ff0eeSFilipe Manana * be possible to rmdir the parent directory, since 35012f2ff0eeSFilipe Manana * its i_size could never decrement to the value 35022f2ff0eeSFilipe Manana * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors. 35032f2ff0eeSFilipe Manana */ 35042f2ff0eeSFilipe Manana di = btrfs_item_ptr(src, i, struct btrfs_dir_item); 35052f2ff0eeSFilipe Manana btrfs_dir_item_key_to_cpu(src, di, &tmp); 35062f2ff0eeSFilipe Manana if (ctx && 35072f2ff0eeSFilipe Manana (btrfs_dir_transid(src, di) == trans->transid || 35082f2ff0eeSFilipe Manana btrfs_dir_type(src, di) == BTRFS_FT_DIR) && 35092f2ff0eeSFilipe Manana tmp.type != BTRFS_ROOT_ITEM_KEY) 35102f2ff0eeSFilipe Manana ctx->log_new_dentries = true; 3511e02119d5SChris Mason } 3512e02119d5SChris Mason path->slots[0] = nritems; 3513e02119d5SChris Mason 3514e02119d5SChris Mason /* 3515e02119d5SChris Mason * look ahead to the next item and see if it is also 3516e02119d5SChris Mason * from this directory and from this transaction 3517e02119d5SChris Mason */ 3518e02119d5SChris Mason ret = btrfs_next_leaf(root, path); 351980c0b421SLiu Bo if (ret) { 352080c0b421SLiu Bo if (ret == 1) 3521e02119d5SChris Mason last_offset = (u64)-1; 352280c0b421SLiu Bo else 352380c0b421SLiu Bo err = ret; 3524e02119d5SChris Mason goto done; 3525e02119d5SChris Mason } 3526e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 352733345d01SLi Zefan if (tmp.objectid != ino || tmp.type != key_type) { 3528e02119d5SChris Mason last_offset = (u64)-1; 3529e02119d5SChris Mason goto done; 3530e02119d5SChris Mason } 3531e02119d5SChris Mason if (btrfs_header_generation(path->nodes[0]) != trans->transid) { 3532e02119d5SChris Mason ret = overwrite_item(trans, log, dst_path, 3533e02119d5SChris Mason path->nodes[0], path->slots[0], 3534e02119d5SChris Mason &tmp); 35354a500fd1SYan, Zheng if (ret) 35364a500fd1SYan, Zheng err = ret; 35374a500fd1SYan, Zheng else 3538e02119d5SChris Mason last_offset = tmp.offset; 3539e02119d5SChris Mason goto done; 3540e02119d5SChris Mason } 3541e02119d5SChris Mason } 3542e02119d5SChris Mason done: 3543b3b4aa74SDavid Sterba btrfs_release_path(path); 3544b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 3545e02119d5SChris Mason 35464a500fd1SYan, Zheng if (err == 0) { 35474a500fd1SYan, Zheng *last_offset_ret = last_offset; 35484a500fd1SYan, Zheng /* 35494a500fd1SYan, Zheng * insert the log range keys to indicate where the log 35504a500fd1SYan, Zheng * is valid 35514a500fd1SYan, Zheng */ 35524a500fd1SYan, Zheng ret = insert_dir_log_key(trans, log, path, key_type, 355333345d01SLi Zefan ino, first_offset, last_offset); 35544a500fd1SYan, Zheng if (ret) 35554a500fd1SYan, Zheng err = ret; 35564a500fd1SYan, Zheng } 35574a500fd1SYan, Zheng return err; 3558e02119d5SChris Mason } 3559e02119d5SChris Mason 3560e02119d5SChris Mason /* 3561e02119d5SChris Mason * logging directories is very similar to logging inodes, We find all the items 3562e02119d5SChris Mason * from the current transaction and write them to the log. 3563e02119d5SChris Mason * 3564e02119d5SChris Mason * The recovery code scans the directory in the subvolume, and if it finds a 3565e02119d5SChris Mason * key in the range logged that is not present in the log tree, then it means 3566e02119d5SChris Mason * that dir entry was unlinked during the transaction. 3567e02119d5SChris Mason * 3568e02119d5SChris Mason * In order for that scan to work, we must include one key smaller than 3569e02119d5SChris Mason * the smallest logged by this transaction and one key larger than the largest 3570e02119d5SChris Mason * key logged by this transaction. 3571e02119d5SChris Mason */ 3572e02119d5SChris Mason static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 3573dbf39ea4SNikolay Borisov struct btrfs_root *root, struct btrfs_inode *inode, 3574e02119d5SChris Mason struct btrfs_path *path, 35752f2ff0eeSFilipe Manana struct btrfs_path *dst_path, 35762f2ff0eeSFilipe Manana struct btrfs_log_ctx *ctx) 3577e02119d5SChris Mason { 3578e02119d5SChris Mason u64 min_key; 3579e02119d5SChris Mason u64 max_key; 3580e02119d5SChris Mason int ret; 3581e02119d5SChris Mason int key_type = BTRFS_DIR_ITEM_KEY; 3582e02119d5SChris Mason 3583e02119d5SChris Mason again: 3584e02119d5SChris Mason min_key = 0; 3585e02119d5SChris Mason max_key = 0; 3586e02119d5SChris Mason while (1) { 3587dbf39ea4SNikolay Borisov ret = log_dir_items(trans, root, inode, path, dst_path, key_type, 3588dbf39ea4SNikolay Borisov ctx, min_key, &max_key); 35894a500fd1SYan, Zheng if (ret) 35904a500fd1SYan, Zheng return ret; 3591e02119d5SChris Mason if (max_key == (u64)-1) 3592e02119d5SChris Mason break; 3593e02119d5SChris Mason min_key = max_key + 1; 3594e02119d5SChris Mason } 3595e02119d5SChris Mason 3596e02119d5SChris Mason if (key_type == BTRFS_DIR_ITEM_KEY) { 3597e02119d5SChris Mason key_type = BTRFS_DIR_INDEX_KEY; 3598e02119d5SChris Mason goto again; 3599e02119d5SChris Mason } 3600e02119d5SChris Mason return 0; 3601e02119d5SChris Mason } 3602e02119d5SChris Mason 3603e02119d5SChris Mason /* 3604e02119d5SChris Mason * a helper function to drop items from the log before we relog an 3605e02119d5SChris Mason * inode. max_key_type indicates the highest item type to remove. 3606e02119d5SChris Mason * This cannot be run for file data extents because it does not 3607e02119d5SChris Mason * free the extents they point to. 3608e02119d5SChris Mason */ 3609e02119d5SChris Mason static int drop_objectid_items(struct btrfs_trans_handle *trans, 3610e02119d5SChris Mason struct btrfs_root *log, 3611e02119d5SChris Mason struct btrfs_path *path, 3612e02119d5SChris Mason u64 objectid, int max_key_type) 3613e02119d5SChris Mason { 3614e02119d5SChris Mason int ret; 3615e02119d5SChris Mason struct btrfs_key key; 3616e02119d5SChris Mason struct btrfs_key found_key; 361718ec90d6SJosef Bacik int start_slot; 3618e02119d5SChris Mason 3619e02119d5SChris Mason key.objectid = objectid; 3620e02119d5SChris Mason key.type = max_key_type; 3621e02119d5SChris Mason key.offset = (u64)-1; 3622e02119d5SChris Mason 3623e02119d5SChris Mason while (1) { 3624e02119d5SChris Mason ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 36253650860bSJosef Bacik BUG_ON(ret == 0); /* Logic error */ 36264a500fd1SYan, Zheng if (ret < 0) 3627e02119d5SChris Mason break; 3628e02119d5SChris Mason 3629e02119d5SChris Mason if (path->slots[0] == 0) 3630e02119d5SChris Mason break; 3631e02119d5SChris Mason 3632e02119d5SChris Mason path->slots[0]--; 3633e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3634e02119d5SChris Mason path->slots[0]); 3635e02119d5SChris Mason 3636e02119d5SChris Mason if (found_key.objectid != objectid) 3637e02119d5SChris Mason break; 3638e02119d5SChris Mason 363918ec90d6SJosef Bacik found_key.offset = 0; 364018ec90d6SJosef Bacik found_key.type = 0; 364118ec90d6SJosef Bacik ret = btrfs_bin_search(path->nodes[0], &found_key, 0, 364218ec90d6SJosef Bacik &start_slot); 364318ec90d6SJosef Bacik 364418ec90d6SJosef Bacik ret = btrfs_del_items(trans, log, path, start_slot, 364518ec90d6SJosef Bacik path->slots[0] - start_slot + 1); 364618ec90d6SJosef Bacik /* 364718ec90d6SJosef Bacik * If start slot isn't 0 then we don't need to re-search, we've 364818ec90d6SJosef Bacik * found the last guy with the objectid in this tree. 364918ec90d6SJosef Bacik */ 365018ec90d6SJosef Bacik if (ret || start_slot != 0) 365165a246c5STsutomu Itoh break; 3652b3b4aa74SDavid Sterba btrfs_release_path(path); 3653e02119d5SChris Mason } 3654b3b4aa74SDavid Sterba btrfs_release_path(path); 36555bdbeb21SJosef Bacik if (ret > 0) 36565bdbeb21SJosef Bacik ret = 0; 36574a500fd1SYan, Zheng return ret; 3658e02119d5SChris Mason } 3659e02119d5SChris Mason 366094edf4aeSJosef Bacik static void fill_inode_item(struct btrfs_trans_handle *trans, 366194edf4aeSJosef Bacik struct extent_buffer *leaf, 366294edf4aeSJosef Bacik struct btrfs_inode_item *item, 36631a4bcf47SFilipe Manana struct inode *inode, int log_inode_only, 36641a4bcf47SFilipe Manana u64 logged_isize) 366594edf4aeSJosef Bacik { 36660b1c6ccaSJosef Bacik struct btrfs_map_token token; 366794edf4aeSJosef Bacik 36680b1c6ccaSJosef Bacik btrfs_init_map_token(&token); 366994edf4aeSJosef Bacik 367094edf4aeSJosef Bacik if (log_inode_only) { 367194edf4aeSJosef Bacik /* set the generation to zero so the recover code 367294edf4aeSJosef Bacik * can tell the difference between an logging 367394edf4aeSJosef Bacik * just to say 'this inode exists' and a logging 367494edf4aeSJosef Bacik * to say 'update this inode with these values' 367594edf4aeSJosef Bacik */ 36760b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 0, &token); 36771a4bcf47SFilipe Manana btrfs_set_token_inode_size(leaf, item, logged_isize, &token); 367894edf4aeSJosef Bacik } else { 36790b1c6ccaSJosef Bacik btrfs_set_token_inode_generation(leaf, item, 36800b1c6ccaSJosef Bacik BTRFS_I(inode)->generation, 36810b1c6ccaSJosef Bacik &token); 36820b1c6ccaSJosef Bacik btrfs_set_token_inode_size(leaf, item, inode->i_size, &token); 368394edf4aeSJosef Bacik } 368494edf4aeSJosef Bacik 36850b1c6ccaSJosef Bacik btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); 36860b1c6ccaSJosef Bacik btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); 36870b1c6ccaSJosef Bacik btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 36880b1c6ccaSJosef Bacik btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 36890b1c6ccaSJosef Bacik 3690a937b979SDavid Sterba btrfs_set_token_timespec_sec(leaf, &item->atime, 36910b1c6ccaSJosef Bacik inode->i_atime.tv_sec, &token); 3692a937b979SDavid Sterba btrfs_set_token_timespec_nsec(leaf, &item->atime, 36930b1c6ccaSJosef Bacik inode->i_atime.tv_nsec, &token); 36940b1c6ccaSJosef Bacik 3695a937b979SDavid Sterba btrfs_set_token_timespec_sec(leaf, &item->mtime, 36960b1c6ccaSJosef Bacik inode->i_mtime.tv_sec, &token); 3697a937b979SDavid Sterba btrfs_set_token_timespec_nsec(leaf, &item->mtime, 36980b1c6ccaSJosef Bacik inode->i_mtime.tv_nsec, &token); 36990b1c6ccaSJosef Bacik 3700a937b979SDavid Sterba btrfs_set_token_timespec_sec(leaf, &item->ctime, 37010b1c6ccaSJosef Bacik inode->i_ctime.tv_sec, &token); 3702a937b979SDavid Sterba btrfs_set_token_timespec_nsec(leaf, &item->ctime, 37030b1c6ccaSJosef Bacik inode->i_ctime.tv_nsec, &token); 37040b1c6ccaSJosef Bacik 37050b1c6ccaSJosef Bacik btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 37060b1c6ccaSJosef Bacik &token); 37070b1c6ccaSJosef Bacik 3708c7f88c4eSJeff Layton btrfs_set_token_inode_sequence(leaf, item, 3709c7f88c4eSJeff Layton inode_peek_iversion(inode), &token); 37100b1c6ccaSJosef Bacik btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); 37110b1c6ccaSJosef Bacik btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); 37120b1c6ccaSJosef Bacik btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); 37130b1c6ccaSJosef Bacik btrfs_set_token_inode_block_group(leaf, item, 0, &token); 371494edf4aeSJosef Bacik } 371594edf4aeSJosef Bacik 3716a95249b3SJosef Bacik static int log_inode_item(struct btrfs_trans_handle *trans, 3717a95249b3SJosef Bacik struct btrfs_root *log, struct btrfs_path *path, 37186d889a3bSNikolay Borisov struct btrfs_inode *inode) 3719a95249b3SJosef Bacik { 3720a95249b3SJosef Bacik struct btrfs_inode_item *inode_item; 3721a95249b3SJosef Bacik int ret; 3722a95249b3SJosef Bacik 3723efd0c405SFilipe David Borba Manana ret = btrfs_insert_empty_item(trans, log, path, 37246d889a3bSNikolay Borisov &inode->location, sizeof(*inode_item)); 3725a95249b3SJosef Bacik if (ret && ret != -EEXIST) 3726a95249b3SJosef Bacik return ret; 3727a95249b3SJosef Bacik inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3728a95249b3SJosef Bacik struct btrfs_inode_item); 37296d889a3bSNikolay Borisov fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode, 37306d889a3bSNikolay Borisov 0, 0); 3731a95249b3SJosef Bacik btrfs_release_path(path); 3732a95249b3SJosef Bacik return 0; 3733a95249b3SJosef Bacik } 3734a95249b3SJosef Bacik 373531ff1cd2SChris Mason static noinline int copy_items(struct btrfs_trans_handle *trans, 373644d70e19SNikolay Borisov struct btrfs_inode *inode, 373731ff1cd2SChris Mason struct btrfs_path *dst_path, 373816e7549fSJosef Bacik struct btrfs_path *src_path, u64 *last_extent, 37391a4bcf47SFilipe Manana int start_slot, int nr, int inode_only, 37401a4bcf47SFilipe Manana u64 logged_isize) 374131ff1cd2SChris Mason { 37423ffbd68cSDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 374331ff1cd2SChris Mason unsigned long src_offset; 374431ff1cd2SChris Mason unsigned long dst_offset; 374544d70e19SNikolay Borisov struct btrfs_root *log = inode->root->log_root; 374631ff1cd2SChris Mason struct btrfs_file_extent_item *extent; 374731ff1cd2SChris Mason struct btrfs_inode_item *inode_item; 374816e7549fSJosef Bacik struct extent_buffer *src = src_path->nodes[0]; 374916e7549fSJosef Bacik struct btrfs_key first_key, last_key, key; 375031ff1cd2SChris Mason int ret; 375131ff1cd2SChris Mason struct btrfs_key *ins_keys; 375231ff1cd2SChris Mason u32 *ins_sizes; 375331ff1cd2SChris Mason char *ins_data; 375431ff1cd2SChris Mason int i; 3755d20f7043SChris Mason struct list_head ordered_sums; 375644d70e19SNikolay Borisov int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; 375716e7549fSJosef Bacik bool has_extents = false; 375874121f7cSFilipe Manana bool need_find_last_extent = true; 375916e7549fSJosef Bacik bool done = false; 3760d20f7043SChris Mason 3761d20f7043SChris Mason INIT_LIST_HEAD(&ordered_sums); 376231ff1cd2SChris Mason 376331ff1cd2SChris Mason ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 376431ff1cd2SChris Mason nr * sizeof(u32), GFP_NOFS); 37652a29edc6Sliubo if (!ins_data) 37662a29edc6Sliubo return -ENOMEM; 37672a29edc6Sliubo 376816e7549fSJosef Bacik first_key.objectid = (u64)-1; 376916e7549fSJosef Bacik 377031ff1cd2SChris Mason ins_sizes = (u32 *)ins_data; 377131ff1cd2SChris Mason ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 377231ff1cd2SChris Mason 377331ff1cd2SChris Mason for (i = 0; i < nr; i++) { 377431ff1cd2SChris Mason ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); 377531ff1cd2SChris Mason btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); 377631ff1cd2SChris Mason } 377731ff1cd2SChris Mason ret = btrfs_insert_empty_items(trans, log, dst_path, 377831ff1cd2SChris Mason ins_keys, ins_sizes, nr); 37794a500fd1SYan, Zheng if (ret) { 37804a500fd1SYan, Zheng kfree(ins_data); 37814a500fd1SYan, Zheng return ret; 37824a500fd1SYan, Zheng } 378331ff1cd2SChris Mason 37845d4f98a2SYan Zheng for (i = 0; i < nr; i++, dst_path->slots[0]++) { 378531ff1cd2SChris Mason dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 378631ff1cd2SChris Mason dst_path->slots[0]); 378731ff1cd2SChris Mason 378831ff1cd2SChris Mason src_offset = btrfs_item_ptr_offset(src, start_slot + i); 378931ff1cd2SChris Mason 37900dde10beSMatthias Kaehlcke if (i == nr - 1) 379116e7549fSJosef Bacik last_key = ins_keys[i]; 379216e7549fSJosef Bacik 379394edf4aeSJosef Bacik if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 379431ff1cd2SChris Mason inode_item = btrfs_item_ptr(dst_path->nodes[0], 379531ff1cd2SChris Mason dst_path->slots[0], 379631ff1cd2SChris Mason struct btrfs_inode_item); 379794edf4aeSJosef Bacik fill_inode_item(trans, dst_path->nodes[0], inode_item, 3798f85b7379SDavid Sterba &inode->vfs_inode, 3799f85b7379SDavid Sterba inode_only == LOG_INODE_EXISTS, 38001a4bcf47SFilipe Manana logged_isize); 380194edf4aeSJosef Bacik } else { 380294edf4aeSJosef Bacik copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 380394edf4aeSJosef Bacik src_offset, ins_sizes[i]); 380431ff1cd2SChris Mason } 380594edf4aeSJosef Bacik 380616e7549fSJosef Bacik /* 380716e7549fSJosef Bacik * We set need_find_last_extent here in case we know we were 380816e7549fSJosef Bacik * processing other items and then walk into the first extent in 380916e7549fSJosef Bacik * the inode. If we don't hit an extent then nothing changes, 381016e7549fSJosef Bacik * we'll do the last search the next time around. 381116e7549fSJosef Bacik */ 381216e7549fSJosef Bacik if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { 381316e7549fSJosef Bacik has_extents = true; 381474121f7cSFilipe Manana if (first_key.objectid == (u64)-1) 381516e7549fSJosef Bacik first_key = ins_keys[i]; 381616e7549fSJosef Bacik } else { 381716e7549fSJosef Bacik need_find_last_extent = false; 381816e7549fSJosef Bacik } 381916e7549fSJosef Bacik 382031ff1cd2SChris Mason /* take a reference on file data extents so that truncates 382131ff1cd2SChris Mason * or deletes of this inode don't have to relog the inode 382231ff1cd2SChris Mason * again 382331ff1cd2SChris Mason */ 3824962a298fSDavid Sterba if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY && 3825d2794405SLiu Bo !skip_csum) { 382631ff1cd2SChris Mason int found_type; 382731ff1cd2SChris Mason extent = btrfs_item_ptr(src, start_slot + i, 382831ff1cd2SChris Mason struct btrfs_file_extent_item); 382931ff1cd2SChris Mason 38308e531cdfSliubo if (btrfs_file_extent_generation(src, extent) < trans->transid) 38318e531cdfSliubo continue; 38328e531cdfSliubo 383331ff1cd2SChris Mason found_type = btrfs_file_extent_type(src, extent); 38346f1fed77SJosef Bacik if (found_type == BTRFS_FILE_EXTENT_REG) { 38355d4f98a2SYan Zheng u64 ds, dl, cs, cl; 38365d4f98a2SYan Zheng ds = btrfs_file_extent_disk_bytenr(src, 383731ff1cd2SChris Mason extent); 38385d4f98a2SYan Zheng /* ds == 0 is a hole */ 38395d4f98a2SYan Zheng if (ds == 0) 38405d4f98a2SYan Zheng continue; 38415d4f98a2SYan Zheng 38425d4f98a2SYan Zheng dl = btrfs_file_extent_disk_num_bytes(src, 384331ff1cd2SChris Mason extent); 38445d4f98a2SYan Zheng cs = btrfs_file_extent_offset(src, extent); 38455d4f98a2SYan Zheng cl = btrfs_file_extent_num_bytes(src, 3846a419aef8SJoe Perches extent); 3847580afd76SChris Mason if (btrfs_file_extent_compression(src, 3848580afd76SChris Mason extent)) { 3849580afd76SChris Mason cs = 0; 3850580afd76SChris Mason cl = dl; 3851580afd76SChris Mason } 38525d4f98a2SYan Zheng 385307d400a6SYan Zheng ret = btrfs_lookup_csums_range( 38540b246afaSJeff Mahoney fs_info->csum_root, 385507d400a6SYan Zheng ds + cs, ds + cs + cl - 1, 3856a2de733cSArne Jansen &ordered_sums, 0); 38573650860bSJosef Bacik if (ret) { 38583650860bSJosef Bacik btrfs_release_path(dst_path); 38593650860bSJosef Bacik kfree(ins_data); 38603650860bSJosef Bacik return ret; 38613650860bSJosef Bacik } 386231ff1cd2SChris Mason } 386331ff1cd2SChris Mason } 386431ff1cd2SChris Mason } 386531ff1cd2SChris Mason 386631ff1cd2SChris Mason btrfs_mark_buffer_dirty(dst_path->nodes[0]); 3867b3b4aa74SDavid Sterba btrfs_release_path(dst_path); 386831ff1cd2SChris Mason kfree(ins_data); 3869d20f7043SChris Mason 3870d20f7043SChris Mason /* 3871d20f7043SChris Mason * we have to do this after the loop above to avoid changing the 3872d20f7043SChris Mason * log tree while trying to change the log tree. 3873d20f7043SChris Mason */ 38744a500fd1SYan, Zheng ret = 0; 3875d20f7043SChris Mason while (!list_empty(&ordered_sums)) { 3876d20f7043SChris Mason struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 3877d20f7043SChris Mason struct btrfs_ordered_sum, 3878d20f7043SChris Mason list); 38794a500fd1SYan, Zheng if (!ret) 3880d20f7043SChris Mason ret = btrfs_csum_file_blocks(trans, log, sums); 3881d20f7043SChris Mason list_del(&sums->list); 3882d20f7043SChris Mason kfree(sums); 3883d20f7043SChris Mason } 388416e7549fSJosef Bacik 388516e7549fSJosef Bacik if (!has_extents) 388616e7549fSJosef Bacik return ret; 388716e7549fSJosef Bacik 388874121f7cSFilipe Manana if (need_find_last_extent && *last_extent == first_key.offset) { 388974121f7cSFilipe Manana /* 389074121f7cSFilipe Manana * We don't have any leafs between our current one and the one 389174121f7cSFilipe Manana * we processed before that can have file extent items for our 389274121f7cSFilipe Manana * inode (and have a generation number smaller than our current 389374121f7cSFilipe Manana * transaction id). 389474121f7cSFilipe Manana */ 389574121f7cSFilipe Manana need_find_last_extent = false; 389674121f7cSFilipe Manana } 389774121f7cSFilipe Manana 389816e7549fSJosef Bacik /* 389916e7549fSJosef Bacik * Because we use btrfs_search_forward we could skip leaves that were 390016e7549fSJosef Bacik * not modified and then assume *last_extent is valid when it really 390116e7549fSJosef Bacik * isn't. So back up to the previous leaf and read the end of the last 390216e7549fSJosef Bacik * extent before we go and fill in holes. 390316e7549fSJosef Bacik */ 390416e7549fSJosef Bacik if (need_find_last_extent) { 390516e7549fSJosef Bacik u64 len; 390616e7549fSJosef Bacik 390744d70e19SNikolay Borisov ret = btrfs_prev_leaf(inode->root, src_path); 390816e7549fSJosef Bacik if (ret < 0) 390916e7549fSJosef Bacik return ret; 391016e7549fSJosef Bacik if (ret) 391116e7549fSJosef Bacik goto fill_holes; 391216e7549fSJosef Bacik if (src_path->slots[0]) 391316e7549fSJosef Bacik src_path->slots[0]--; 391416e7549fSJosef Bacik src = src_path->nodes[0]; 391516e7549fSJosef Bacik btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); 391644d70e19SNikolay Borisov if (key.objectid != btrfs_ino(inode) || 391716e7549fSJosef Bacik key.type != BTRFS_EXTENT_DATA_KEY) 391816e7549fSJosef Bacik goto fill_holes; 391916e7549fSJosef Bacik extent = btrfs_item_ptr(src, src_path->slots[0], 392016e7549fSJosef Bacik struct btrfs_file_extent_item); 392116e7549fSJosef Bacik if (btrfs_file_extent_type(src, extent) == 392216e7549fSJosef Bacik BTRFS_FILE_EXTENT_INLINE) { 3923e41ca589SQu Wenruo len = btrfs_file_extent_ram_bytes(src, extent); 392416e7549fSJosef Bacik *last_extent = ALIGN(key.offset + len, 39250b246afaSJeff Mahoney fs_info->sectorsize); 392616e7549fSJosef Bacik } else { 392716e7549fSJosef Bacik len = btrfs_file_extent_num_bytes(src, extent); 392816e7549fSJosef Bacik *last_extent = key.offset + len; 392916e7549fSJosef Bacik } 393016e7549fSJosef Bacik } 393116e7549fSJosef Bacik fill_holes: 393216e7549fSJosef Bacik /* So we did prev_leaf, now we need to move to the next leaf, but a few 393316e7549fSJosef Bacik * things could have happened 393416e7549fSJosef Bacik * 393516e7549fSJosef Bacik * 1) A merge could have happened, so we could currently be on a leaf 393616e7549fSJosef Bacik * that holds what we were copying in the first place. 393716e7549fSJosef Bacik * 2) A split could have happened, and now not all of the items we want 393816e7549fSJosef Bacik * are on the same leaf. 393916e7549fSJosef Bacik * 394016e7549fSJosef Bacik * So we need to adjust how we search for holes, we need to drop the 394116e7549fSJosef Bacik * path and re-search for the first extent key we found, and then walk 394216e7549fSJosef Bacik * forward until we hit the last one we copied. 394316e7549fSJosef Bacik */ 394416e7549fSJosef Bacik if (need_find_last_extent) { 394516e7549fSJosef Bacik /* btrfs_prev_leaf could return 1 without releasing the path */ 394616e7549fSJosef Bacik btrfs_release_path(src_path); 3947f85b7379SDavid Sterba ret = btrfs_search_slot(NULL, inode->root, &first_key, 3948f85b7379SDavid Sterba src_path, 0, 0); 394916e7549fSJosef Bacik if (ret < 0) 395016e7549fSJosef Bacik return ret; 395116e7549fSJosef Bacik ASSERT(ret == 0); 395216e7549fSJosef Bacik src = src_path->nodes[0]; 395316e7549fSJosef Bacik i = src_path->slots[0]; 395416e7549fSJosef Bacik } else { 395516e7549fSJosef Bacik i = start_slot; 395616e7549fSJosef Bacik } 395716e7549fSJosef Bacik 395816e7549fSJosef Bacik /* 395916e7549fSJosef Bacik * Ok so here we need to go through and fill in any holes we may have 396016e7549fSJosef Bacik * to make sure that holes are punched for those areas in case they had 396116e7549fSJosef Bacik * extents previously. 396216e7549fSJosef Bacik */ 396316e7549fSJosef Bacik while (!done) { 396416e7549fSJosef Bacik u64 offset, len; 396516e7549fSJosef Bacik u64 extent_end; 396616e7549fSJosef Bacik 396716e7549fSJosef Bacik if (i >= btrfs_header_nritems(src_path->nodes[0])) { 396844d70e19SNikolay Borisov ret = btrfs_next_leaf(inode->root, src_path); 396916e7549fSJosef Bacik if (ret < 0) 397016e7549fSJosef Bacik return ret; 397116e7549fSJosef Bacik ASSERT(ret == 0); 397216e7549fSJosef Bacik src = src_path->nodes[0]; 397316e7549fSJosef Bacik i = 0; 39748434ec46SFilipe Manana need_find_last_extent = true; 397516e7549fSJosef Bacik } 397616e7549fSJosef Bacik 397716e7549fSJosef Bacik btrfs_item_key_to_cpu(src, &key, i); 397816e7549fSJosef Bacik if (!btrfs_comp_cpu_keys(&key, &last_key)) 397916e7549fSJosef Bacik done = true; 398044d70e19SNikolay Borisov if (key.objectid != btrfs_ino(inode) || 398116e7549fSJosef Bacik key.type != BTRFS_EXTENT_DATA_KEY) { 398216e7549fSJosef Bacik i++; 398316e7549fSJosef Bacik continue; 398416e7549fSJosef Bacik } 398516e7549fSJosef Bacik extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); 398616e7549fSJosef Bacik if (btrfs_file_extent_type(src, extent) == 398716e7549fSJosef Bacik BTRFS_FILE_EXTENT_INLINE) { 3988e41ca589SQu Wenruo len = btrfs_file_extent_ram_bytes(src, extent); 3989da17066cSJeff Mahoney extent_end = ALIGN(key.offset + len, 39900b246afaSJeff Mahoney fs_info->sectorsize); 399116e7549fSJosef Bacik } else { 399216e7549fSJosef Bacik len = btrfs_file_extent_num_bytes(src, extent); 399316e7549fSJosef Bacik extent_end = key.offset + len; 399416e7549fSJosef Bacik } 399516e7549fSJosef Bacik i++; 399616e7549fSJosef Bacik 399716e7549fSJosef Bacik if (*last_extent == key.offset) { 399816e7549fSJosef Bacik *last_extent = extent_end; 399916e7549fSJosef Bacik continue; 400016e7549fSJosef Bacik } 400116e7549fSJosef Bacik offset = *last_extent; 400216e7549fSJosef Bacik len = key.offset - *last_extent; 400344d70e19SNikolay Borisov ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), 400444d70e19SNikolay Borisov offset, 0, 0, len, 0, len, 0, 0, 0); 400516e7549fSJosef Bacik if (ret) 400616e7549fSJosef Bacik break; 400774121f7cSFilipe Manana *last_extent = extent_end; 400816e7549fSJosef Bacik } 40094ee3fad3SFilipe Manana 40104ee3fad3SFilipe Manana /* 40114ee3fad3SFilipe Manana * Check if there is a hole between the last extent found in our leaf 40124ee3fad3SFilipe Manana * and the first extent in the next leaf. If there is one, we need to 40134ee3fad3SFilipe Manana * log an explicit hole so that at replay time we can punch the hole. 40144ee3fad3SFilipe Manana */ 40154ee3fad3SFilipe Manana if (ret == 0 && 40164ee3fad3SFilipe Manana key.objectid == btrfs_ino(inode) && 40174ee3fad3SFilipe Manana key.type == BTRFS_EXTENT_DATA_KEY && 40184ee3fad3SFilipe Manana i == btrfs_header_nritems(src_path->nodes[0])) { 40194ee3fad3SFilipe Manana ret = btrfs_next_leaf(inode->root, src_path); 40204ee3fad3SFilipe Manana need_find_last_extent = true; 40214ee3fad3SFilipe Manana if (ret > 0) { 40224ee3fad3SFilipe Manana ret = 0; 40234ee3fad3SFilipe Manana } else if (ret == 0) { 40244ee3fad3SFilipe Manana btrfs_item_key_to_cpu(src_path->nodes[0], &key, 40254ee3fad3SFilipe Manana src_path->slots[0]); 40264ee3fad3SFilipe Manana if (key.objectid == btrfs_ino(inode) && 40274ee3fad3SFilipe Manana key.type == BTRFS_EXTENT_DATA_KEY && 40284ee3fad3SFilipe Manana *last_extent < key.offset) { 40294ee3fad3SFilipe Manana const u64 len = key.offset - *last_extent; 40304ee3fad3SFilipe Manana 40314ee3fad3SFilipe Manana ret = btrfs_insert_file_extent(trans, log, 40324ee3fad3SFilipe Manana btrfs_ino(inode), 40334ee3fad3SFilipe Manana *last_extent, 0, 40344ee3fad3SFilipe Manana 0, len, 0, len, 40354ee3fad3SFilipe Manana 0, 0, 0); 40364ee3fad3SFilipe Manana } 40374ee3fad3SFilipe Manana } 40384ee3fad3SFilipe Manana } 403916e7549fSJosef Bacik /* 404016e7549fSJosef Bacik * Need to let the callers know we dropped the path so they should 404116e7549fSJosef Bacik * re-search. 404216e7549fSJosef Bacik */ 404316e7549fSJosef Bacik if (!ret && need_find_last_extent) 404416e7549fSJosef Bacik ret = 1; 40454a500fd1SYan, Zheng return ret; 404631ff1cd2SChris Mason } 404731ff1cd2SChris Mason 40485dc562c5SJosef Bacik static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) 40495dc562c5SJosef Bacik { 40505dc562c5SJosef Bacik struct extent_map *em1, *em2; 40515dc562c5SJosef Bacik 40525dc562c5SJosef Bacik em1 = list_entry(a, struct extent_map, list); 40535dc562c5SJosef Bacik em2 = list_entry(b, struct extent_map, list); 40545dc562c5SJosef Bacik 40555dc562c5SJosef Bacik if (em1->start < em2->start) 40565dc562c5SJosef Bacik return -1; 40575dc562c5SJosef Bacik else if (em1->start > em2->start) 40585dc562c5SJosef Bacik return 1; 40595dc562c5SJosef Bacik return 0; 40605dc562c5SJosef Bacik } 40615dc562c5SJosef Bacik 4062e7175a69SJosef Bacik static int log_extent_csums(struct btrfs_trans_handle *trans, 4063e7175a69SJosef Bacik struct btrfs_inode *inode, 4064a9ecb653SNikolay Borisov struct btrfs_root *log_root, 4065e7175a69SJosef Bacik const struct extent_map *em) 40665dc562c5SJosef Bacik { 40672ab28f32SJosef Bacik u64 csum_offset; 40682ab28f32SJosef Bacik u64 csum_len; 40698407f553SFilipe Manana LIST_HEAD(ordered_sums); 40708407f553SFilipe Manana int ret = 0; 407109a2a8f9SJosef Bacik 4072e7175a69SJosef Bacik if (inode->flags & BTRFS_INODE_NODATASUM || 4073e7175a69SJosef Bacik test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 40748407f553SFilipe Manana em->block_start == EXTENT_MAP_HOLE) 407570c8a91cSJosef Bacik return 0; 407670c8a91cSJosef Bacik 4077e7175a69SJosef Bacik /* If we're compressed we have to save the entire range of csums. */ 4078488111aaSFilipe David Borba Manana if (em->compress_type) { 4079488111aaSFilipe David Borba Manana csum_offset = 0; 40808407f553SFilipe Manana csum_len = max(em->block_len, em->orig_block_len); 4081488111aaSFilipe David Borba Manana } else { 4082e7175a69SJosef Bacik csum_offset = em->mod_start - em->start; 4083e7175a69SJosef Bacik csum_len = em->mod_len; 4084488111aaSFilipe David Borba Manana } 40852ab28f32SJosef Bacik 408670c8a91cSJosef Bacik /* block start is already adjusted for the file extent offset. */ 4087a9ecb653SNikolay Borisov ret = btrfs_lookup_csums_range(trans->fs_info->csum_root, 408870c8a91cSJosef Bacik em->block_start + csum_offset, 408970c8a91cSJosef Bacik em->block_start + csum_offset + 409070c8a91cSJosef Bacik csum_len - 1, &ordered_sums, 0); 40915dc562c5SJosef Bacik if (ret) 40925dc562c5SJosef Bacik return ret; 409370c8a91cSJosef Bacik 409470c8a91cSJosef Bacik while (!list_empty(&ordered_sums)) { 409570c8a91cSJosef Bacik struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, 409670c8a91cSJosef Bacik struct btrfs_ordered_sum, 409770c8a91cSJosef Bacik list); 409870c8a91cSJosef Bacik if (!ret) 4099a9ecb653SNikolay Borisov ret = btrfs_csum_file_blocks(trans, log_root, sums); 410070c8a91cSJosef Bacik list_del(&sums->list); 410170c8a91cSJosef Bacik kfree(sums); 41025dc562c5SJosef Bacik } 41035dc562c5SJosef Bacik 410470c8a91cSJosef Bacik return ret; 41055dc562c5SJosef Bacik } 41065dc562c5SJosef Bacik 41078407f553SFilipe Manana static int log_one_extent(struct btrfs_trans_handle *trans, 41089d122629SNikolay Borisov struct btrfs_inode *inode, struct btrfs_root *root, 41098407f553SFilipe Manana const struct extent_map *em, 41108407f553SFilipe Manana struct btrfs_path *path, 41118407f553SFilipe Manana struct btrfs_log_ctx *ctx) 41128407f553SFilipe Manana { 41138407f553SFilipe Manana struct btrfs_root *log = root->log_root; 41148407f553SFilipe Manana struct btrfs_file_extent_item *fi; 41158407f553SFilipe Manana struct extent_buffer *leaf; 41168407f553SFilipe Manana struct btrfs_map_token token; 41178407f553SFilipe Manana struct btrfs_key key; 41188407f553SFilipe Manana u64 extent_offset = em->start - em->orig_start; 41198407f553SFilipe Manana u64 block_len; 41208407f553SFilipe Manana int ret; 41218407f553SFilipe Manana int extent_inserted = 0; 41228407f553SFilipe Manana 4123a9ecb653SNikolay Borisov ret = log_extent_csums(trans, inode, log, em); 41248407f553SFilipe Manana if (ret) 41258407f553SFilipe Manana return ret; 41268407f553SFilipe Manana 41278407f553SFilipe Manana btrfs_init_map_token(&token); 41288407f553SFilipe Manana 41299d122629SNikolay Borisov ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, 41308407f553SFilipe Manana em->start + em->len, NULL, 0, 1, 41318407f553SFilipe Manana sizeof(*fi), &extent_inserted); 41328407f553SFilipe Manana if (ret) 41338407f553SFilipe Manana return ret; 41348407f553SFilipe Manana 41358407f553SFilipe Manana if (!extent_inserted) { 41369d122629SNikolay Borisov key.objectid = btrfs_ino(inode); 41378407f553SFilipe Manana key.type = BTRFS_EXTENT_DATA_KEY; 41388407f553SFilipe Manana key.offset = em->start; 41398407f553SFilipe Manana 41408407f553SFilipe Manana ret = btrfs_insert_empty_item(trans, log, path, &key, 41418407f553SFilipe Manana sizeof(*fi)); 41428407f553SFilipe Manana if (ret) 41438407f553SFilipe Manana return ret; 41448407f553SFilipe Manana } 41458407f553SFilipe Manana leaf = path->nodes[0]; 41468407f553SFilipe Manana fi = btrfs_item_ptr(leaf, path->slots[0], 41478407f553SFilipe Manana struct btrfs_file_extent_item); 41488407f553SFilipe Manana 414950d9aa99SJosef Bacik btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, 41508407f553SFilipe Manana &token); 41518407f553SFilipe Manana if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 41528407f553SFilipe Manana btrfs_set_token_file_extent_type(leaf, fi, 41538407f553SFilipe Manana BTRFS_FILE_EXTENT_PREALLOC, 41548407f553SFilipe Manana &token); 41558407f553SFilipe Manana else 41568407f553SFilipe Manana btrfs_set_token_file_extent_type(leaf, fi, 41578407f553SFilipe Manana BTRFS_FILE_EXTENT_REG, 41588407f553SFilipe Manana &token); 41598407f553SFilipe Manana 41608407f553SFilipe Manana block_len = max(em->block_len, em->orig_block_len); 41618407f553SFilipe Manana if (em->compress_type != BTRFS_COMPRESS_NONE) { 41628407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 41638407f553SFilipe Manana em->block_start, 41648407f553SFilipe Manana &token); 41658407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 41668407f553SFilipe Manana &token); 41678407f553SFilipe Manana } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { 41688407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 41698407f553SFilipe Manana em->block_start - 41708407f553SFilipe Manana extent_offset, &token); 41718407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, 41728407f553SFilipe Manana &token); 41738407f553SFilipe Manana } else { 41748407f553SFilipe Manana btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); 41758407f553SFilipe Manana btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, 41768407f553SFilipe Manana &token); 41778407f553SFilipe Manana } 41788407f553SFilipe Manana 41798407f553SFilipe Manana btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token); 41808407f553SFilipe Manana btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); 41818407f553SFilipe Manana btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); 41828407f553SFilipe Manana btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, 41838407f553SFilipe Manana &token); 41848407f553SFilipe Manana btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); 41858407f553SFilipe Manana btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); 41868407f553SFilipe Manana btrfs_mark_buffer_dirty(leaf); 41878407f553SFilipe Manana 41888407f553SFilipe Manana btrfs_release_path(path); 41898407f553SFilipe Manana 41908407f553SFilipe Manana return ret; 41918407f553SFilipe Manana } 41928407f553SFilipe Manana 419331d11b83SFilipe Manana /* 419431d11b83SFilipe Manana * Log all prealloc extents beyond the inode's i_size to make sure we do not 419531d11b83SFilipe Manana * lose them after doing a fast fsync and replaying the log. We scan the 419631d11b83SFilipe Manana * subvolume's root instead of iterating the inode's extent map tree because 419731d11b83SFilipe Manana * otherwise we can log incorrect extent items based on extent map conversion. 419831d11b83SFilipe Manana * That can happen due to the fact that extent maps are merged when they 419931d11b83SFilipe Manana * are not in the extent map tree's list of modified extents. 420031d11b83SFilipe Manana */ 420131d11b83SFilipe Manana static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, 420231d11b83SFilipe Manana struct btrfs_inode *inode, 420331d11b83SFilipe Manana struct btrfs_path *path) 420431d11b83SFilipe Manana { 420531d11b83SFilipe Manana struct btrfs_root *root = inode->root; 420631d11b83SFilipe Manana struct btrfs_key key; 420731d11b83SFilipe Manana const u64 i_size = i_size_read(&inode->vfs_inode); 420831d11b83SFilipe Manana const u64 ino = btrfs_ino(inode); 420931d11b83SFilipe Manana struct btrfs_path *dst_path = NULL; 421031d11b83SFilipe Manana u64 last_extent = (u64)-1; 421131d11b83SFilipe Manana int ins_nr = 0; 421231d11b83SFilipe Manana int start_slot; 421331d11b83SFilipe Manana int ret; 421431d11b83SFilipe Manana 421531d11b83SFilipe Manana if (!(inode->flags & BTRFS_INODE_PREALLOC)) 421631d11b83SFilipe Manana return 0; 421731d11b83SFilipe Manana 421831d11b83SFilipe Manana key.objectid = ino; 421931d11b83SFilipe Manana key.type = BTRFS_EXTENT_DATA_KEY; 422031d11b83SFilipe Manana key.offset = i_size; 422131d11b83SFilipe Manana ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 422231d11b83SFilipe Manana if (ret < 0) 422331d11b83SFilipe Manana goto out; 422431d11b83SFilipe Manana 422531d11b83SFilipe Manana while (true) { 422631d11b83SFilipe Manana struct extent_buffer *leaf = path->nodes[0]; 422731d11b83SFilipe Manana int slot = path->slots[0]; 422831d11b83SFilipe Manana 422931d11b83SFilipe Manana if (slot >= btrfs_header_nritems(leaf)) { 423031d11b83SFilipe Manana if (ins_nr > 0) { 423131d11b83SFilipe Manana ret = copy_items(trans, inode, dst_path, path, 423231d11b83SFilipe Manana &last_extent, start_slot, 423331d11b83SFilipe Manana ins_nr, 1, 0); 423431d11b83SFilipe Manana if (ret < 0) 423531d11b83SFilipe Manana goto out; 423631d11b83SFilipe Manana ins_nr = 0; 423731d11b83SFilipe Manana } 423831d11b83SFilipe Manana ret = btrfs_next_leaf(root, path); 423931d11b83SFilipe Manana if (ret < 0) 424031d11b83SFilipe Manana goto out; 424131d11b83SFilipe Manana if (ret > 0) { 424231d11b83SFilipe Manana ret = 0; 424331d11b83SFilipe Manana break; 424431d11b83SFilipe Manana } 424531d11b83SFilipe Manana continue; 424631d11b83SFilipe Manana } 424731d11b83SFilipe Manana 424831d11b83SFilipe Manana btrfs_item_key_to_cpu(leaf, &key, slot); 424931d11b83SFilipe Manana if (key.objectid > ino) 425031d11b83SFilipe Manana break; 425131d11b83SFilipe Manana if (WARN_ON_ONCE(key.objectid < ino) || 425231d11b83SFilipe Manana key.type < BTRFS_EXTENT_DATA_KEY || 425331d11b83SFilipe Manana key.offset < i_size) { 425431d11b83SFilipe Manana path->slots[0]++; 425531d11b83SFilipe Manana continue; 425631d11b83SFilipe Manana } 425731d11b83SFilipe Manana if (last_extent == (u64)-1) { 425831d11b83SFilipe Manana last_extent = key.offset; 425931d11b83SFilipe Manana /* 426031d11b83SFilipe Manana * Avoid logging extent items logged in past fsync calls 426131d11b83SFilipe Manana * and leading to duplicate keys in the log tree. 426231d11b83SFilipe Manana */ 426331d11b83SFilipe Manana do { 426431d11b83SFilipe Manana ret = btrfs_truncate_inode_items(trans, 426531d11b83SFilipe Manana root->log_root, 426631d11b83SFilipe Manana &inode->vfs_inode, 426731d11b83SFilipe Manana i_size, 426831d11b83SFilipe Manana BTRFS_EXTENT_DATA_KEY); 426931d11b83SFilipe Manana } while (ret == -EAGAIN); 427031d11b83SFilipe Manana if (ret) 427131d11b83SFilipe Manana goto out; 427231d11b83SFilipe Manana } 427331d11b83SFilipe Manana if (ins_nr == 0) 427431d11b83SFilipe Manana start_slot = slot; 427531d11b83SFilipe Manana ins_nr++; 427631d11b83SFilipe Manana path->slots[0]++; 427731d11b83SFilipe Manana if (!dst_path) { 427831d11b83SFilipe Manana dst_path = btrfs_alloc_path(); 427931d11b83SFilipe Manana if (!dst_path) { 428031d11b83SFilipe Manana ret = -ENOMEM; 428131d11b83SFilipe Manana goto out; 428231d11b83SFilipe Manana } 428331d11b83SFilipe Manana } 428431d11b83SFilipe Manana } 428531d11b83SFilipe Manana if (ins_nr > 0) { 428631d11b83SFilipe Manana ret = copy_items(trans, inode, dst_path, path, &last_extent, 428731d11b83SFilipe Manana start_slot, ins_nr, 1, 0); 428831d11b83SFilipe Manana if (ret > 0) 428931d11b83SFilipe Manana ret = 0; 429031d11b83SFilipe Manana } 429131d11b83SFilipe Manana out: 429231d11b83SFilipe Manana btrfs_release_path(path); 429331d11b83SFilipe Manana btrfs_free_path(dst_path); 429431d11b83SFilipe Manana return ret; 429531d11b83SFilipe Manana } 429631d11b83SFilipe Manana 42975dc562c5SJosef Bacik static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, 42985dc562c5SJosef Bacik struct btrfs_root *root, 42999d122629SNikolay Borisov struct btrfs_inode *inode, 4300827463c4SMiao Xie struct btrfs_path *path, 4301de0ee0edSFilipe Manana struct btrfs_log_ctx *ctx, 4302de0ee0edSFilipe Manana const u64 start, 4303de0ee0edSFilipe Manana const u64 end) 43045dc562c5SJosef Bacik { 43055dc562c5SJosef Bacik struct extent_map *em, *n; 43065dc562c5SJosef Bacik struct list_head extents; 43079d122629SNikolay Borisov struct extent_map_tree *tree = &inode->extent_tree; 43088c6c5928SJosef Bacik u64 logged_start, logged_end; 43095dc562c5SJosef Bacik u64 test_gen; 43105dc562c5SJosef Bacik int ret = 0; 43112ab28f32SJosef Bacik int num = 0; 43125dc562c5SJosef Bacik 43135dc562c5SJosef Bacik INIT_LIST_HEAD(&extents); 43145dc562c5SJosef Bacik 43159d122629SNikolay Borisov down_write(&inode->dio_sem); 43165dc562c5SJosef Bacik write_lock(&tree->lock); 43175dc562c5SJosef Bacik test_gen = root->fs_info->last_trans_committed; 43188c6c5928SJosef Bacik logged_start = start; 43198c6c5928SJosef Bacik logged_end = end; 43205dc562c5SJosef Bacik 43215dc562c5SJosef Bacik list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 43225dc562c5SJosef Bacik list_del_init(&em->list); 43232ab28f32SJosef Bacik /* 43242ab28f32SJosef Bacik * Just an arbitrary number, this can be really CPU intensive 43252ab28f32SJosef Bacik * once we start getting a lot of extents, and really once we 43262ab28f32SJosef Bacik * have a bunch of extents we just want to commit since it will 43272ab28f32SJosef Bacik * be faster. 43282ab28f32SJosef Bacik */ 43292ab28f32SJosef Bacik if (++num > 32768) { 43302ab28f32SJosef Bacik list_del_init(&tree->modified_extents); 43312ab28f32SJosef Bacik ret = -EFBIG; 43322ab28f32SJosef Bacik goto process; 43332ab28f32SJosef Bacik } 43342ab28f32SJosef Bacik 43355dc562c5SJosef Bacik if (em->generation <= test_gen) 43365dc562c5SJosef Bacik continue; 43378c6c5928SJosef Bacik 433831d11b83SFilipe Manana /* We log prealloc extents beyond eof later. */ 433931d11b83SFilipe Manana if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && 434031d11b83SFilipe Manana em->start >= i_size_read(&inode->vfs_inode)) 434131d11b83SFilipe Manana continue; 434231d11b83SFilipe Manana 43438c6c5928SJosef Bacik if (em->start < logged_start) 43448c6c5928SJosef Bacik logged_start = em->start; 43458c6c5928SJosef Bacik if ((em->start + em->len - 1) > logged_end) 43468c6c5928SJosef Bacik logged_end = em->start + em->len - 1; 43478c6c5928SJosef Bacik 4348ff44c6e3SJosef Bacik /* Need a ref to keep it from getting evicted from cache */ 4349490b54d6SElena Reshetova refcount_inc(&em->refs); 4350ff44c6e3SJosef Bacik set_bit(EXTENT_FLAG_LOGGING, &em->flags); 43515dc562c5SJosef Bacik list_add_tail(&em->list, &extents); 43522ab28f32SJosef Bacik num++; 43535dc562c5SJosef Bacik } 43545dc562c5SJosef Bacik 43555dc562c5SJosef Bacik list_sort(NULL, &extents, extent_cmp); 43562ab28f32SJosef Bacik process: 43575dc562c5SJosef Bacik while (!list_empty(&extents)) { 43585dc562c5SJosef Bacik em = list_entry(extents.next, struct extent_map, list); 43595dc562c5SJosef Bacik 43605dc562c5SJosef Bacik list_del_init(&em->list); 43615dc562c5SJosef Bacik 43625dc562c5SJosef Bacik /* 43635dc562c5SJosef Bacik * If we had an error we just need to delete everybody from our 43645dc562c5SJosef Bacik * private list. 43655dc562c5SJosef Bacik */ 4366ff44c6e3SJosef Bacik if (ret) { 4367201a9038SJosef Bacik clear_em_logging(tree, em); 4368ff44c6e3SJosef Bacik free_extent_map(em); 43695dc562c5SJosef Bacik continue; 4370ff44c6e3SJosef Bacik } 4371ff44c6e3SJosef Bacik 4372ff44c6e3SJosef Bacik write_unlock(&tree->lock); 43735dc562c5SJosef Bacik 4374a2120a47SJosef Bacik ret = log_one_extent(trans, inode, root, em, path, ctx); 4375ff44c6e3SJosef Bacik write_lock(&tree->lock); 4376201a9038SJosef Bacik clear_em_logging(tree, em); 4377201a9038SJosef Bacik free_extent_map(em); 43785dc562c5SJosef Bacik } 4379ff44c6e3SJosef Bacik WARN_ON(!list_empty(&extents)); 4380ff44c6e3SJosef Bacik write_unlock(&tree->lock); 43819d122629SNikolay Borisov up_write(&inode->dio_sem); 43825dc562c5SJosef Bacik 43835dc562c5SJosef Bacik btrfs_release_path(path); 438431d11b83SFilipe Manana if (!ret) 438531d11b83SFilipe Manana ret = btrfs_log_prealloc_extents(trans, inode, path); 438631d11b83SFilipe Manana 43875dc562c5SJosef Bacik return ret; 43885dc562c5SJosef Bacik } 43895dc562c5SJosef Bacik 4390481b01c0SNikolay Borisov static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, 43911a4bcf47SFilipe Manana struct btrfs_path *path, u64 *size_ret) 43921a4bcf47SFilipe Manana { 43931a4bcf47SFilipe Manana struct btrfs_key key; 43941a4bcf47SFilipe Manana int ret; 43951a4bcf47SFilipe Manana 4396481b01c0SNikolay Borisov key.objectid = btrfs_ino(inode); 43971a4bcf47SFilipe Manana key.type = BTRFS_INODE_ITEM_KEY; 43981a4bcf47SFilipe Manana key.offset = 0; 43991a4bcf47SFilipe Manana 44001a4bcf47SFilipe Manana ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); 44011a4bcf47SFilipe Manana if (ret < 0) { 44021a4bcf47SFilipe Manana return ret; 44031a4bcf47SFilipe Manana } else if (ret > 0) { 44042f2ff0eeSFilipe Manana *size_ret = 0; 44051a4bcf47SFilipe Manana } else { 44061a4bcf47SFilipe Manana struct btrfs_inode_item *item; 44071a4bcf47SFilipe Manana 44081a4bcf47SFilipe Manana item = btrfs_item_ptr(path->nodes[0], path->slots[0], 44091a4bcf47SFilipe Manana struct btrfs_inode_item); 44101a4bcf47SFilipe Manana *size_ret = btrfs_inode_size(path->nodes[0], item); 44111a4bcf47SFilipe Manana } 44121a4bcf47SFilipe Manana 44131a4bcf47SFilipe Manana btrfs_release_path(path); 44141a4bcf47SFilipe Manana return 0; 44151a4bcf47SFilipe Manana } 44161a4bcf47SFilipe Manana 441736283bf7SFilipe Manana /* 441836283bf7SFilipe Manana * At the moment we always log all xattrs. This is to figure out at log replay 441936283bf7SFilipe Manana * time which xattrs must have their deletion replayed. If a xattr is missing 442036283bf7SFilipe Manana * in the log tree and exists in the fs/subvol tree, we delete it. This is 442136283bf7SFilipe Manana * because if a xattr is deleted, the inode is fsynced and a power failure 442236283bf7SFilipe Manana * happens, causing the log to be replayed the next time the fs is mounted, 442336283bf7SFilipe Manana * we want the xattr to not exist anymore (same behaviour as other filesystems 442436283bf7SFilipe Manana * with a journal, ext3/4, xfs, f2fs, etc). 442536283bf7SFilipe Manana */ 442636283bf7SFilipe Manana static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, 442736283bf7SFilipe Manana struct btrfs_root *root, 44281a93c36aSNikolay Borisov struct btrfs_inode *inode, 442936283bf7SFilipe Manana struct btrfs_path *path, 443036283bf7SFilipe Manana struct btrfs_path *dst_path) 443136283bf7SFilipe Manana { 443236283bf7SFilipe Manana int ret; 443336283bf7SFilipe Manana struct btrfs_key key; 44341a93c36aSNikolay Borisov const u64 ino = btrfs_ino(inode); 443536283bf7SFilipe Manana int ins_nr = 0; 443636283bf7SFilipe Manana int start_slot = 0; 443736283bf7SFilipe Manana 443836283bf7SFilipe Manana key.objectid = ino; 443936283bf7SFilipe Manana key.type = BTRFS_XATTR_ITEM_KEY; 444036283bf7SFilipe Manana key.offset = 0; 444136283bf7SFilipe Manana 444236283bf7SFilipe Manana ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 444336283bf7SFilipe Manana if (ret < 0) 444436283bf7SFilipe Manana return ret; 444536283bf7SFilipe Manana 444636283bf7SFilipe Manana while (true) { 444736283bf7SFilipe Manana int slot = path->slots[0]; 444836283bf7SFilipe Manana struct extent_buffer *leaf = path->nodes[0]; 444936283bf7SFilipe Manana int nritems = btrfs_header_nritems(leaf); 445036283bf7SFilipe Manana 445136283bf7SFilipe Manana if (slot >= nritems) { 445236283bf7SFilipe Manana if (ins_nr > 0) { 445336283bf7SFilipe Manana u64 last_extent = 0; 445436283bf7SFilipe Manana 44551a93c36aSNikolay Borisov ret = copy_items(trans, inode, dst_path, path, 445636283bf7SFilipe Manana &last_extent, start_slot, 445736283bf7SFilipe Manana ins_nr, 1, 0); 445836283bf7SFilipe Manana /* can't be 1, extent items aren't processed */ 445936283bf7SFilipe Manana ASSERT(ret <= 0); 446036283bf7SFilipe Manana if (ret < 0) 446136283bf7SFilipe Manana return ret; 446236283bf7SFilipe Manana ins_nr = 0; 446336283bf7SFilipe Manana } 446436283bf7SFilipe Manana ret = btrfs_next_leaf(root, path); 446536283bf7SFilipe Manana if (ret < 0) 446636283bf7SFilipe Manana return ret; 446736283bf7SFilipe Manana else if (ret > 0) 446836283bf7SFilipe Manana break; 446936283bf7SFilipe Manana continue; 447036283bf7SFilipe Manana } 447136283bf7SFilipe Manana 447236283bf7SFilipe Manana btrfs_item_key_to_cpu(leaf, &key, slot); 447336283bf7SFilipe Manana if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) 447436283bf7SFilipe Manana break; 447536283bf7SFilipe Manana 447636283bf7SFilipe Manana if (ins_nr == 0) 447736283bf7SFilipe Manana start_slot = slot; 447836283bf7SFilipe Manana ins_nr++; 447936283bf7SFilipe Manana path->slots[0]++; 448036283bf7SFilipe Manana cond_resched(); 448136283bf7SFilipe Manana } 448236283bf7SFilipe Manana if (ins_nr > 0) { 448336283bf7SFilipe Manana u64 last_extent = 0; 448436283bf7SFilipe Manana 44851a93c36aSNikolay Borisov ret = copy_items(trans, inode, dst_path, path, 448636283bf7SFilipe Manana &last_extent, start_slot, 448736283bf7SFilipe Manana ins_nr, 1, 0); 448836283bf7SFilipe Manana /* can't be 1, extent items aren't processed */ 448936283bf7SFilipe Manana ASSERT(ret <= 0); 449036283bf7SFilipe Manana if (ret < 0) 449136283bf7SFilipe Manana return ret; 449236283bf7SFilipe Manana } 449336283bf7SFilipe Manana 449436283bf7SFilipe Manana return 0; 449536283bf7SFilipe Manana } 449636283bf7SFilipe Manana 4497a89ca6f2SFilipe Manana /* 4498a89ca6f2SFilipe Manana * If the no holes feature is enabled we need to make sure any hole between the 4499a89ca6f2SFilipe Manana * last extent and the i_size of our inode is explicitly marked in the log. This 4500a89ca6f2SFilipe Manana * is to make sure that doing something like: 4501a89ca6f2SFilipe Manana * 4502a89ca6f2SFilipe Manana * 1) create file with 128Kb of data 4503a89ca6f2SFilipe Manana * 2) truncate file to 64Kb 4504a89ca6f2SFilipe Manana * 3) truncate file to 256Kb 4505a89ca6f2SFilipe Manana * 4) fsync file 4506a89ca6f2SFilipe Manana * 5) <crash/power failure> 4507a89ca6f2SFilipe Manana * 6) mount fs and trigger log replay 4508a89ca6f2SFilipe Manana * 4509a89ca6f2SFilipe Manana * Will give us a file with a size of 256Kb, the first 64Kb of data match what 4510a89ca6f2SFilipe Manana * the file had in its first 64Kb of data at step 1 and the last 192Kb of the 4511a89ca6f2SFilipe Manana * file correspond to a hole. The presence of explicit holes in a log tree is 4512a89ca6f2SFilipe Manana * what guarantees that log replay will remove/adjust file extent items in the 4513a89ca6f2SFilipe Manana * fs/subvol tree. 4514a89ca6f2SFilipe Manana * 4515a89ca6f2SFilipe Manana * Here we do not need to care about holes between extents, that is already done 4516a89ca6f2SFilipe Manana * by copy_items(). We also only need to do this in the full sync path, where we 4517a89ca6f2SFilipe Manana * lookup for extents from the fs/subvol tree only. In the fast path case, we 4518a89ca6f2SFilipe Manana * lookup the list of modified extent maps and if any represents a hole, we 4519a89ca6f2SFilipe Manana * insert a corresponding extent representing a hole in the log tree. 4520a89ca6f2SFilipe Manana */ 4521a89ca6f2SFilipe Manana static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, 4522a89ca6f2SFilipe Manana struct btrfs_root *root, 4523a0308dd7SNikolay Borisov struct btrfs_inode *inode, 4524a89ca6f2SFilipe Manana struct btrfs_path *path) 4525a89ca6f2SFilipe Manana { 45260b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 4527a89ca6f2SFilipe Manana int ret; 4528a89ca6f2SFilipe Manana struct btrfs_key key; 4529a89ca6f2SFilipe Manana u64 hole_start; 4530a89ca6f2SFilipe Manana u64 hole_size; 4531a89ca6f2SFilipe Manana struct extent_buffer *leaf; 4532a89ca6f2SFilipe Manana struct btrfs_root *log = root->log_root; 4533a0308dd7SNikolay Borisov const u64 ino = btrfs_ino(inode); 4534a0308dd7SNikolay Borisov const u64 i_size = i_size_read(&inode->vfs_inode); 4535a89ca6f2SFilipe Manana 45360b246afaSJeff Mahoney if (!btrfs_fs_incompat(fs_info, NO_HOLES)) 4537a89ca6f2SFilipe Manana return 0; 4538a89ca6f2SFilipe Manana 4539a89ca6f2SFilipe Manana key.objectid = ino; 4540a89ca6f2SFilipe Manana key.type = BTRFS_EXTENT_DATA_KEY; 4541a89ca6f2SFilipe Manana key.offset = (u64)-1; 4542a89ca6f2SFilipe Manana 4543a89ca6f2SFilipe Manana ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4544a89ca6f2SFilipe Manana ASSERT(ret != 0); 4545a89ca6f2SFilipe Manana if (ret < 0) 4546a89ca6f2SFilipe Manana return ret; 4547a89ca6f2SFilipe Manana 4548a89ca6f2SFilipe Manana ASSERT(path->slots[0] > 0); 4549a89ca6f2SFilipe Manana path->slots[0]--; 4550a89ca6f2SFilipe Manana leaf = path->nodes[0]; 4551a89ca6f2SFilipe Manana btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4552a89ca6f2SFilipe Manana 4553a89ca6f2SFilipe Manana if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { 4554a89ca6f2SFilipe Manana /* inode does not have any extents */ 4555a89ca6f2SFilipe Manana hole_start = 0; 4556a89ca6f2SFilipe Manana hole_size = i_size; 4557a89ca6f2SFilipe Manana } else { 4558a89ca6f2SFilipe Manana struct btrfs_file_extent_item *extent; 4559a89ca6f2SFilipe Manana u64 len; 4560a89ca6f2SFilipe Manana 4561a89ca6f2SFilipe Manana /* 4562a89ca6f2SFilipe Manana * If there's an extent beyond i_size, an explicit hole was 4563a89ca6f2SFilipe Manana * already inserted by copy_items(). 4564a89ca6f2SFilipe Manana */ 4565a89ca6f2SFilipe Manana if (key.offset >= i_size) 4566a89ca6f2SFilipe Manana return 0; 4567a89ca6f2SFilipe Manana 4568a89ca6f2SFilipe Manana extent = btrfs_item_ptr(leaf, path->slots[0], 4569a89ca6f2SFilipe Manana struct btrfs_file_extent_item); 4570a89ca6f2SFilipe Manana 4571a89ca6f2SFilipe Manana if (btrfs_file_extent_type(leaf, extent) == 4572a89ca6f2SFilipe Manana BTRFS_FILE_EXTENT_INLINE) { 4573e41ca589SQu Wenruo len = btrfs_file_extent_ram_bytes(leaf, extent); 45746399fb5aSFilipe Manana ASSERT(len == i_size || 45756399fb5aSFilipe Manana (len == fs_info->sectorsize && 45766399fb5aSFilipe Manana btrfs_file_extent_compression(leaf, extent) != 45776399fb5aSFilipe Manana BTRFS_COMPRESS_NONE)); 4578a89ca6f2SFilipe Manana return 0; 4579a89ca6f2SFilipe Manana } 4580a89ca6f2SFilipe Manana 4581a89ca6f2SFilipe Manana len = btrfs_file_extent_num_bytes(leaf, extent); 4582a89ca6f2SFilipe Manana /* Last extent goes beyond i_size, no need to log a hole. */ 4583a89ca6f2SFilipe Manana if (key.offset + len > i_size) 4584a89ca6f2SFilipe Manana return 0; 4585a89ca6f2SFilipe Manana hole_start = key.offset + len; 4586a89ca6f2SFilipe Manana hole_size = i_size - hole_start; 4587a89ca6f2SFilipe Manana } 4588a89ca6f2SFilipe Manana btrfs_release_path(path); 4589a89ca6f2SFilipe Manana 4590a89ca6f2SFilipe Manana /* Last extent ends at i_size. */ 4591a89ca6f2SFilipe Manana if (hole_size == 0) 4592a89ca6f2SFilipe Manana return 0; 4593a89ca6f2SFilipe Manana 45940b246afaSJeff Mahoney hole_size = ALIGN(hole_size, fs_info->sectorsize); 4595a89ca6f2SFilipe Manana ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, 4596a89ca6f2SFilipe Manana hole_size, 0, hole_size, 0, 0, 0); 4597a89ca6f2SFilipe Manana return ret; 4598a89ca6f2SFilipe Manana } 4599a89ca6f2SFilipe Manana 460056f23fdbSFilipe Manana /* 460156f23fdbSFilipe Manana * When we are logging a new inode X, check if it doesn't have a reference that 460256f23fdbSFilipe Manana * matches the reference from some other inode Y created in a past transaction 460356f23fdbSFilipe Manana * and that was renamed in the current transaction. If we don't do this, then at 460456f23fdbSFilipe Manana * log replay time we can lose inode Y (and all its files if it's a directory): 460556f23fdbSFilipe Manana * 460656f23fdbSFilipe Manana * mkdir /mnt/x 460756f23fdbSFilipe Manana * echo "hello world" > /mnt/x/foobar 460856f23fdbSFilipe Manana * sync 460956f23fdbSFilipe Manana * mv /mnt/x /mnt/y 461056f23fdbSFilipe Manana * mkdir /mnt/x # or touch /mnt/x 461156f23fdbSFilipe Manana * xfs_io -c fsync /mnt/x 461256f23fdbSFilipe Manana * <power fail> 461356f23fdbSFilipe Manana * mount fs, trigger log replay 461456f23fdbSFilipe Manana * 461556f23fdbSFilipe Manana * After the log replay procedure, we would lose the first directory and all its 461656f23fdbSFilipe Manana * files (file foobar). 461756f23fdbSFilipe Manana * For the case where inode Y is not a directory we simply end up losing it: 461856f23fdbSFilipe Manana * 461956f23fdbSFilipe Manana * echo "123" > /mnt/foo 462056f23fdbSFilipe Manana * sync 462156f23fdbSFilipe Manana * mv /mnt/foo /mnt/bar 462256f23fdbSFilipe Manana * echo "abc" > /mnt/foo 462356f23fdbSFilipe Manana * xfs_io -c fsync /mnt/foo 462456f23fdbSFilipe Manana * <power fail> 462556f23fdbSFilipe Manana * 462656f23fdbSFilipe Manana * We also need this for cases where a snapshot entry is replaced by some other 462756f23fdbSFilipe Manana * entry (file or directory) otherwise we end up with an unreplayable log due to 462856f23fdbSFilipe Manana * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as 462956f23fdbSFilipe Manana * if it were a regular entry: 463056f23fdbSFilipe Manana * 463156f23fdbSFilipe Manana * mkdir /mnt/x 463256f23fdbSFilipe Manana * btrfs subvolume snapshot /mnt /mnt/x/snap 463356f23fdbSFilipe Manana * btrfs subvolume delete /mnt/x/snap 463456f23fdbSFilipe Manana * rmdir /mnt/x 463556f23fdbSFilipe Manana * mkdir /mnt/x 463656f23fdbSFilipe Manana * fsync /mnt/x or fsync some new file inside it 463756f23fdbSFilipe Manana * <power fail> 463856f23fdbSFilipe Manana * 463956f23fdbSFilipe Manana * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in 464056f23fdbSFilipe Manana * the same transaction. 464156f23fdbSFilipe Manana */ 464256f23fdbSFilipe Manana static int btrfs_check_ref_name_override(struct extent_buffer *eb, 464356f23fdbSFilipe Manana const int slot, 464456f23fdbSFilipe Manana const struct btrfs_key *key, 46454791c8f1SNikolay Borisov struct btrfs_inode *inode, 464644f714daSFilipe Manana u64 *other_ino) 464756f23fdbSFilipe Manana { 464856f23fdbSFilipe Manana int ret; 464956f23fdbSFilipe Manana struct btrfs_path *search_path; 465056f23fdbSFilipe Manana char *name = NULL; 465156f23fdbSFilipe Manana u32 name_len = 0; 465256f23fdbSFilipe Manana u32 item_size = btrfs_item_size_nr(eb, slot); 465356f23fdbSFilipe Manana u32 cur_offset = 0; 465456f23fdbSFilipe Manana unsigned long ptr = btrfs_item_ptr_offset(eb, slot); 465556f23fdbSFilipe Manana 465656f23fdbSFilipe Manana search_path = btrfs_alloc_path(); 465756f23fdbSFilipe Manana if (!search_path) 465856f23fdbSFilipe Manana return -ENOMEM; 465956f23fdbSFilipe Manana search_path->search_commit_root = 1; 466056f23fdbSFilipe Manana search_path->skip_locking = 1; 466156f23fdbSFilipe Manana 466256f23fdbSFilipe Manana while (cur_offset < item_size) { 466356f23fdbSFilipe Manana u64 parent; 466456f23fdbSFilipe Manana u32 this_name_len; 466556f23fdbSFilipe Manana u32 this_len; 466656f23fdbSFilipe Manana unsigned long name_ptr; 466756f23fdbSFilipe Manana struct btrfs_dir_item *di; 466856f23fdbSFilipe Manana 466956f23fdbSFilipe Manana if (key->type == BTRFS_INODE_REF_KEY) { 467056f23fdbSFilipe Manana struct btrfs_inode_ref *iref; 467156f23fdbSFilipe Manana 467256f23fdbSFilipe Manana iref = (struct btrfs_inode_ref *)(ptr + cur_offset); 467356f23fdbSFilipe Manana parent = key->offset; 467456f23fdbSFilipe Manana this_name_len = btrfs_inode_ref_name_len(eb, iref); 467556f23fdbSFilipe Manana name_ptr = (unsigned long)(iref + 1); 467656f23fdbSFilipe Manana this_len = sizeof(*iref) + this_name_len; 467756f23fdbSFilipe Manana } else { 467856f23fdbSFilipe Manana struct btrfs_inode_extref *extref; 467956f23fdbSFilipe Manana 468056f23fdbSFilipe Manana extref = (struct btrfs_inode_extref *)(ptr + 468156f23fdbSFilipe Manana cur_offset); 468256f23fdbSFilipe Manana parent = btrfs_inode_extref_parent(eb, extref); 468356f23fdbSFilipe Manana this_name_len = btrfs_inode_extref_name_len(eb, extref); 468456f23fdbSFilipe Manana name_ptr = (unsigned long)&extref->name; 468556f23fdbSFilipe Manana this_len = sizeof(*extref) + this_name_len; 468656f23fdbSFilipe Manana } 468756f23fdbSFilipe Manana 468856f23fdbSFilipe Manana if (this_name_len > name_len) { 468956f23fdbSFilipe Manana char *new_name; 469056f23fdbSFilipe Manana 469156f23fdbSFilipe Manana new_name = krealloc(name, this_name_len, GFP_NOFS); 469256f23fdbSFilipe Manana if (!new_name) { 469356f23fdbSFilipe Manana ret = -ENOMEM; 469456f23fdbSFilipe Manana goto out; 469556f23fdbSFilipe Manana } 469656f23fdbSFilipe Manana name_len = this_name_len; 469756f23fdbSFilipe Manana name = new_name; 469856f23fdbSFilipe Manana } 469956f23fdbSFilipe Manana 470056f23fdbSFilipe Manana read_extent_buffer(eb, name, name_ptr, this_name_len); 47014791c8f1SNikolay Borisov di = btrfs_lookup_dir_item(NULL, inode->root, search_path, 47024791c8f1SNikolay Borisov parent, name, this_name_len, 0); 470356f23fdbSFilipe Manana if (di && !IS_ERR(di)) { 470444f714daSFilipe Manana struct btrfs_key di_key; 470544f714daSFilipe Manana 470644f714daSFilipe Manana btrfs_dir_item_key_to_cpu(search_path->nodes[0], 470744f714daSFilipe Manana di, &di_key); 470844f714daSFilipe Manana if (di_key.type == BTRFS_INODE_ITEM_KEY) { 470956f23fdbSFilipe Manana ret = 1; 471044f714daSFilipe Manana *other_ino = di_key.objectid; 471144f714daSFilipe Manana } else { 471244f714daSFilipe Manana ret = -EAGAIN; 471344f714daSFilipe Manana } 471456f23fdbSFilipe Manana goto out; 471556f23fdbSFilipe Manana } else if (IS_ERR(di)) { 471656f23fdbSFilipe Manana ret = PTR_ERR(di); 471756f23fdbSFilipe Manana goto out; 471856f23fdbSFilipe Manana } 471956f23fdbSFilipe Manana btrfs_release_path(search_path); 472056f23fdbSFilipe Manana 472156f23fdbSFilipe Manana cur_offset += this_len; 472256f23fdbSFilipe Manana } 472356f23fdbSFilipe Manana ret = 0; 472456f23fdbSFilipe Manana out: 472556f23fdbSFilipe Manana btrfs_free_path(search_path); 472656f23fdbSFilipe Manana kfree(name); 472756f23fdbSFilipe Manana return ret; 472856f23fdbSFilipe Manana } 472956f23fdbSFilipe Manana 4730e02119d5SChris Mason /* log a single inode in the tree log. 4731e02119d5SChris Mason * At least one parent directory for this inode must exist in the tree 4732e02119d5SChris Mason * or be logged already. 4733e02119d5SChris Mason * 4734e02119d5SChris Mason * Any items from this inode changed by the current transaction are copied 4735e02119d5SChris Mason * to the log tree. An extra reference is taken on any extents in this 4736e02119d5SChris Mason * file, allowing us to avoid a whole pile of corner cases around logging 4737e02119d5SChris Mason * blocks that have been removed from the tree. 4738e02119d5SChris Mason * 4739e02119d5SChris Mason * See LOG_INODE_ALL and related defines for a description of what inode_only 4740e02119d5SChris Mason * does. 4741e02119d5SChris Mason * 4742e02119d5SChris Mason * This handles both files and directories. 4743e02119d5SChris Mason */ 474412fcfd22SChris Mason static int btrfs_log_inode(struct btrfs_trans_handle *trans, 4745a59108a7SNikolay Borisov struct btrfs_root *root, struct btrfs_inode *inode, 474649dae1bcSFilipe Manana int inode_only, 474749dae1bcSFilipe Manana const loff_t start, 47488407f553SFilipe Manana const loff_t end, 47498407f553SFilipe Manana struct btrfs_log_ctx *ctx) 4750e02119d5SChris Mason { 47510b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 4752e02119d5SChris Mason struct btrfs_path *path; 4753e02119d5SChris Mason struct btrfs_path *dst_path; 4754e02119d5SChris Mason struct btrfs_key min_key; 4755e02119d5SChris Mason struct btrfs_key max_key; 4756e02119d5SChris Mason struct btrfs_root *log = root->log_root; 475716e7549fSJosef Bacik u64 last_extent = 0; 47584a500fd1SYan, Zheng int err = 0; 4759e02119d5SChris Mason int ret; 47603a5f1d45SChris Mason int nritems; 476131ff1cd2SChris Mason int ins_start_slot = 0; 476231ff1cd2SChris Mason int ins_nr; 47635dc562c5SJosef Bacik bool fast_search = false; 4764a59108a7SNikolay Borisov u64 ino = btrfs_ino(inode); 4765a59108a7SNikolay Borisov struct extent_map_tree *em_tree = &inode->extent_tree; 47661a4bcf47SFilipe Manana u64 logged_isize = 0; 4767e4545de5SFilipe Manana bool need_log_inode_item = true; 47689a8fca62SFilipe Manana bool xattrs_logged = false; 4769e02119d5SChris Mason 4770e02119d5SChris Mason path = btrfs_alloc_path(); 47715df67083STsutomu Itoh if (!path) 47725df67083STsutomu Itoh return -ENOMEM; 4773e02119d5SChris Mason dst_path = btrfs_alloc_path(); 47745df67083STsutomu Itoh if (!dst_path) { 47755df67083STsutomu Itoh btrfs_free_path(path); 47765df67083STsutomu Itoh return -ENOMEM; 47775df67083STsutomu Itoh } 4778e02119d5SChris Mason 477933345d01SLi Zefan min_key.objectid = ino; 4780e02119d5SChris Mason min_key.type = BTRFS_INODE_ITEM_KEY; 4781e02119d5SChris Mason min_key.offset = 0; 4782e02119d5SChris Mason 478333345d01SLi Zefan max_key.objectid = ino; 478412fcfd22SChris Mason 478512fcfd22SChris Mason 47865dc562c5SJosef Bacik /* today the code can only do partial logging of directories */ 4787a59108a7SNikolay Borisov if (S_ISDIR(inode->vfs_inode.i_mode) || 47885269b67eSMiao Xie (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4789a59108a7SNikolay Borisov &inode->runtime_flags) && 4790781feef7SLiu Bo inode_only >= LOG_INODE_EXISTS)) 4791e02119d5SChris Mason max_key.type = BTRFS_XATTR_ITEM_KEY; 4792e02119d5SChris Mason else 4793e02119d5SChris Mason max_key.type = (u8)-1; 4794e02119d5SChris Mason max_key.offset = (u64)-1; 4795e02119d5SChris Mason 47962c2c452bSFilipe Manana /* 47972c2c452bSFilipe Manana * Only run delayed items if we are a dir or a new file. 47982c2c452bSFilipe Manana * Otherwise commit the delayed inode only, which is needed in 47992c2c452bSFilipe Manana * order for the log replay code to mark inodes for link count 48002c2c452bSFilipe Manana * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items). 48012c2c452bSFilipe Manana */ 4802a59108a7SNikolay Borisov if (S_ISDIR(inode->vfs_inode.i_mode) || 4803a59108a7SNikolay Borisov inode->generation > fs_info->last_trans_committed) 4804a59108a7SNikolay Borisov ret = btrfs_commit_inode_delayed_items(trans, inode); 48052c2c452bSFilipe Manana else 4806a59108a7SNikolay Borisov ret = btrfs_commit_inode_delayed_inode(inode); 48072c2c452bSFilipe Manana 480816cdcec7SMiao Xie if (ret) { 480916cdcec7SMiao Xie btrfs_free_path(path); 481016cdcec7SMiao Xie btrfs_free_path(dst_path); 481116cdcec7SMiao Xie return ret; 481216cdcec7SMiao Xie } 481316cdcec7SMiao Xie 4814781feef7SLiu Bo if (inode_only == LOG_OTHER_INODE) { 4815781feef7SLiu Bo inode_only = LOG_INODE_EXISTS; 4816a59108a7SNikolay Borisov mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING); 4817781feef7SLiu Bo } else { 4818a59108a7SNikolay Borisov mutex_lock(&inode->log_mutex); 4819781feef7SLiu Bo } 4820e02119d5SChris Mason 48215e33a2bdSFilipe Manana /* 4822e02119d5SChris Mason * a brute force approach to making sure we get the most uptodate 4823e02119d5SChris Mason * copies of everything. 4824e02119d5SChris Mason */ 4825a59108a7SNikolay Borisov if (S_ISDIR(inode->vfs_inode.i_mode)) { 4826e02119d5SChris Mason int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4827e02119d5SChris Mason 48284f764e51SFilipe Manana if (inode_only == LOG_INODE_EXISTS) 48294f764e51SFilipe Manana max_key_type = BTRFS_XATTR_ITEM_KEY; 483033345d01SLi Zefan ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4831e02119d5SChris Mason } else { 48321a4bcf47SFilipe Manana if (inode_only == LOG_INODE_EXISTS) { 48331a4bcf47SFilipe Manana /* 48341a4bcf47SFilipe Manana * Make sure the new inode item we write to the log has 48351a4bcf47SFilipe Manana * the same isize as the current one (if it exists). 48361a4bcf47SFilipe Manana * This is necessary to prevent data loss after log 48371a4bcf47SFilipe Manana * replay, and also to prevent doing a wrong expanding 48381a4bcf47SFilipe Manana * truncate - for e.g. create file, write 4K into offset 48391a4bcf47SFilipe Manana * 0, fsync, write 4K into offset 4096, add hard link, 48401a4bcf47SFilipe Manana * fsync some other file (to sync log), power fail - if 48411a4bcf47SFilipe Manana * we use the inode's current i_size, after log replay 48421a4bcf47SFilipe Manana * we get a 8Kb file, with the last 4Kb extent as a hole 48431a4bcf47SFilipe Manana * (zeroes), as if an expanding truncate happened, 48441a4bcf47SFilipe Manana * instead of getting a file of 4Kb only. 48451a4bcf47SFilipe Manana */ 4846a59108a7SNikolay Borisov err = logged_inode_size(log, inode, path, &logged_isize); 48471a4bcf47SFilipe Manana if (err) 48481a4bcf47SFilipe Manana goto out_unlock; 48491a4bcf47SFilipe Manana } 4850a742994aSFilipe Manana if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4851a59108a7SNikolay Borisov &inode->runtime_flags)) { 4852a742994aSFilipe Manana if (inode_only == LOG_INODE_EXISTS) { 48534f764e51SFilipe Manana max_key.type = BTRFS_XATTR_ITEM_KEY; 4854a742994aSFilipe Manana ret = drop_objectid_items(trans, log, path, ino, 4855a742994aSFilipe Manana max_key.type); 4856a742994aSFilipe Manana } else { 4857a742994aSFilipe Manana clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4858a59108a7SNikolay Borisov &inode->runtime_flags); 4859e9976151SJosef Bacik clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4860a59108a7SNikolay Borisov &inode->runtime_flags); 486128ed1345SChris Mason while(1) { 486228ed1345SChris Mason ret = btrfs_truncate_inode_items(trans, 4863a59108a7SNikolay Borisov log, &inode->vfs_inode, 0, 0); 486428ed1345SChris Mason if (ret != -EAGAIN) 486528ed1345SChris Mason break; 486628ed1345SChris Mason } 4867a742994aSFilipe Manana } 48684f764e51SFilipe Manana } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4869a59108a7SNikolay Borisov &inode->runtime_flags) || 48706cfab851SJosef Bacik inode_only == LOG_INODE_EXISTS) { 48714f764e51SFilipe Manana if (inode_only == LOG_INODE_ALL) 4872a95249b3SJosef Bacik fast_search = true; 4873a95249b3SJosef Bacik max_key.type = BTRFS_XATTR_ITEM_KEY; 4874a95249b3SJosef Bacik ret = drop_objectid_items(trans, log, path, ino, 4875a95249b3SJosef Bacik max_key.type); 48765dc562c5SJosef Bacik } else { 4877183f37faSLiu Bo if (inode_only == LOG_INODE_ALL) 48785dc562c5SJosef Bacik fast_search = true; 4879a95249b3SJosef Bacik goto log_extents; 4880a95249b3SJosef Bacik } 4881a95249b3SJosef Bacik 4882e02119d5SChris Mason } 48834a500fd1SYan, Zheng if (ret) { 48844a500fd1SYan, Zheng err = ret; 48854a500fd1SYan, Zheng goto out_unlock; 48864a500fd1SYan, Zheng } 4887e02119d5SChris Mason 4888e02119d5SChris Mason while (1) { 488931ff1cd2SChris Mason ins_nr = 0; 48906174d3cbSFilipe David Borba Manana ret = btrfs_search_forward(root, &min_key, 4891de78b51aSEric Sandeen path, trans->transid); 4892fb770ae4SLiu Bo if (ret < 0) { 4893fb770ae4SLiu Bo err = ret; 4894fb770ae4SLiu Bo goto out_unlock; 4895fb770ae4SLiu Bo } 4896e02119d5SChris Mason if (ret != 0) 4897e02119d5SChris Mason break; 48983a5f1d45SChris Mason again: 489931ff1cd2SChris Mason /* note, ins_nr might be > 0 here, cleanup outside the loop */ 490033345d01SLi Zefan if (min_key.objectid != ino) 4901e02119d5SChris Mason break; 4902e02119d5SChris Mason if (min_key.type > max_key.type) 4903e02119d5SChris Mason break; 490431ff1cd2SChris Mason 4905e4545de5SFilipe Manana if (min_key.type == BTRFS_INODE_ITEM_KEY) 4906e4545de5SFilipe Manana need_log_inode_item = false; 4907e4545de5SFilipe Manana 490856f23fdbSFilipe Manana if ((min_key.type == BTRFS_INODE_REF_KEY || 490956f23fdbSFilipe Manana min_key.type == BTRFS_INODE_EXTREF_KEY) && 4910a59108a7SNikolay Borisov inode->generation == trans->transid) { 491144f714daSFilipe Manana u64 other_ino = 0; 491244f714daSFilipe Manana 491356f23fdbSFilipe Manana ret = btrfs_check_ref_name_override(path->nodes[0], 4914a59108a7SNikolay Borisov path->slots[0], &min_key, inode, 491544f714daSFilipe Manana &other_ino); 491656f23fdbSFilipe Manana if (ret < 0) { 491756f23fdbSFilipe Manana err = ret; 491856f23fdbSFilipe Manana goto out_unlock; 491928a23593SFilipe Manana } else if (ret > 0 && ctx && 49204a0cc7caSNikolay Borisov other_ino != btrfs_ino(BTRFS_I(ctx->inode))) { 492144f714daSFilipe Manana struct btrfs_key inode_key; 492244f714daSFilipe Manana struct inode *other_inode; 492344f714daSFilipe Manana 492444f714daSFilipe Manana if (ins_nr > 0) { 492544f714daSFilipe Manana ins_nr++; 492644f714daSFilipe Manana } else { 492744f714daSFilipe Manana ins_nr = 1; 492844f714daSFilipe Manana ins_start_slot = path->slots[0]; 492944f714daSFilipe Manana } 4930a59108a7SNikolay Borisov ret = copy_items(trans, inode, dst_path, path, 493144f714daSFilipe Manana &last_extent, ins_start_slot, 493244f714daSFilipe Manana ins_nr, inode_only, 493344f714daSFilipe Manana logged_isize); 493444f714daSFilipe Manana if (ret < 0) { 493544f714daSFilipe Manana err = ret; 493656f23fdbSFilipe Manana goto out_unlock; 493756f23fdbSFilipe Manana } 493844f714daSFilipe Manana ins_nr = 0; 493944f714daSFilipe Manana btrfs_release_path(path); 494044f714daSFilipe Manana inode_key.objectid = other_ino; 494144f714daSFilipe Manana inode_key.type = BTRFS_INODE_ITEM_KEY; 494244f714daSFilipe Manana inode_key.offset = 0; 49430b246afaSJeff Mahoney other_inode = btrfs_iget(fs_info->sb, 494444f714daSFilipe Manana &inode_key, root, 494544f714daSFilipe Manana NULL); 494644f714daSFilipe Manana /* 494744f714daSFilipe Manana * If the other inode that had a conflicting dir 494844f714daSFilipe Manana * entry was deleted in the current transaction, 494944f714daSFilipe Manana * we don't need to do more work nor fallback to 495044f714daSFilipe Manana * a transaction commit. 495144f714daSFilipe Manana */ 495244f714daSFilipe Manana if (IS_ERR(other_inode) && 495344f714daSFilipe Manana PTR_ERR(other_inode) == -ENOENT) { 495444f714daSFilipe Manana goto next_key; 495544f714daSFilipe Manana } else if (IS_ERR(other_inode)) { 495644f714daSFilipe Manana err = PTR_ERR(other_inode); 495744f714daSFilipe Manana goto out_unlock; 495844f714daSFilipe Manana } 495944f714daSFilipe Manana /* 496044f714daSFilipe Manana * We are safe logging the other inode without 496144f714daSFilipe Manana * acquiring its i_mutex as long as we log with 496244f714daSFilipe Manana * the LOG_INODE_EXISTS mode. We're safe against 496344f714daSFilipe Manana * concurrent renames of the other inode as well 496444f714daSFilipe Manana * because during a rename we pin the log and 496544f714daSFilipe Manana * update the log with the new name before we 496644f714daSFilipe Manana * unpin it. 496744f714daSFilipe Manana */ 4968a59108a7SNikolay Borisov err = btrfs_log_inode(trans, root, 4969a59108a7SNikolay Borisov BTRFS_I(other_inode), 4970a59108a7SNikolay Borisov LOG_OTHER_INODE, 0, LLONG_MAX, 4971a59108a7SNikolay Borisov ctx); 497244f714daSFilipe Manana iput(other_inode); 497344f714daSFilipe Manana if (err) 497444f714daSFilipe Manana goto out_unlock; 497544f714daSFilipe Manana else 497644f714daSFilipe Manana goto next_key; 497744f714daSFilipe Manana } 497856f23fdbSFilipe Manana } 497956f23fdbSFilipe Manana 498036283bf7SFilipe Manana /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ 498136283bf7SFilipe Manana if (min_key.type == BTRFS_XATTR_ITEM_KEY) { 498236283bf7SFilipe Manana if (ins_nr == 0) 498336283bf7SFilipe Manana goto next_slot; 4984a59108a7SNikolay Borisov ret = copy_items(trans, inode, dst_path, path, 498536283bf7SFilipe Manana &last_extent, ins_start_slot, 498636283bf7SFilipe Manana ins_nr, inode_only, logged_isize); 498736283bf7SFilipe Manana if (ret < 0) { 498836283bf7SFilipe Manana err = ret; 498936283bf7SFilipe Manana goto out_unlock; 499036283bf7SFilipe Manana } 499136283bf7SFilipe Manana ins_nr = 0; 499236283bf7SFilipe Manana if (ret) { 499336283bf7SFilipe Manana btrfs_release_path(path); 499436283bf7SFilipe Manana continue; 499536283bf7SFilipe Manana } 499636283bf7SFilipe Manana goto next_slot; 499736283bf7SFilipe Manana } 499836283bf7SFilipe Manana 499931ff1cd2SChris Mason if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 500031ff1cd2SChris Mason ins_nr++; 500131ff1cd2SChris Mason goto next_slot; 500231ff1cd2SChris Mason } else if (!ins_nr) { 500331ff1cd2SChris Mason ins_start_slot = path->slots[0]; 500431ff1cd2SChris Mason ins_nr = 1; 500531ff1cd2SChris Mason goto next_slot; 5006e02119d5SChris Mason } 5007e02119d5SChris Mason 5008a59108a7SNikolay Borisov ret = copy_items(trans, inode, dst_path, path, &last_extent, 50091a4bcf47SFilipe Manana ins_start_slot, ins_nr, inode_only, 50101a4bcf47SFilipe Manana logged_isize); 501116e7549fSJosef Bacik if (ret < 0) { 50124a500fd1SYan, Zheng err = ret; 50134a500fd1SYan, Zheng goto out_unlock; 5014a71db86eSRasmus Villemoes } 5015a71db86eSRasmus Villemoes if (ret) { 501616e7549fSJosef Bacik ins_nr = 0; 501716e7549fSJosef Bacik btrfs_release_path(path); 501816e7549fSJosef Bacik continue; 50194a500fd1SYan, Zheng } 502031ff1cd2SChris Mason ins_nr = 1; 502131ff1cd2SChris Mason ins_start_slot = path->slots[0]; 502231ff1cd2SChris Mason next_slot: 5023e02119d5SChris Mason 50243a5f1d45SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 50253a5f1d45SChris Mason path->slots[0]++; 50263a5f1d45SChris Mason if (path->slots[0] < nritems) { 50273a5f1d45SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &min_key, 50283a5f1d45SChris Mason path->slots[0]); 50293a5f1d45SChris Mason goto again; 50303a5f1d45SChris Mason } 503131ff1cd2SChris Mason if (ins_nr) { 5032a59108a7SNikolay Borisov ret = copy_items(trans, inode, dst_path, path, 503316e7549fSJosef Bacik &last_extent, ins_start_slot, 50341a4bcf47SFilipe Manana ins_nr, inode_only, logged_isize); 503516e7549fSJosef Bacik if (ret < 0) { 50364a500fd1SYan, Zheng err = ret; 50374a500fd1SYan, Zheng goto out_unlock; 50384a500fd1SYan, Zheng } 503916e7549fSJosef Bacik ret = 0; 504031ff1cd2SChris Mason ins_nr = 0; 504131ff1cd2SChris Mason } 5042b3b4aa74SDavid Sterba btrfs_release_path(path); 504344f714daSFilipe Manana next_key: 50443d41d702SFilipe David Borba Manana if (min_key.offset < (u64)-1) { 5045e02119d5SChris Mason min_key.offset++; 50463d41d702SFilipe David Borba Manana } else if (min_key.type < max_key.type) { 5047e02119d5SChris Mason min_key.type++; 50483d41d702SFilipe David Borba Manana min_key.offset = 0; 50493d41d702SFilipe David Borba Manana } else { 5050e02119d5SChris Mason break; 5051e02119d5SChris Mason } 50523d41d702SFilipe David Borba Manana } 505331ff1cd2SChris Mason if (ins_nr) { 5054a59108a7SNikolay Borisov ret = copy_items(trans, inode, dst_path, path, &last_extent, 50551a4bcf47SFilipe Manana ins_start_slot, ins_nr, inode_only, 50561a4bcf47SFilipe Manana logged_isize); 505716e7549fSJosef Bacik if (ret < 0) { 50584a500fd1SYan, Zheng err = ret; 50594a500fd1SYan, Zheng goto out_unlock; 50604a500fd1SYan, Zheng } 506116e7549fSJosef Bacik ret = 0; 506231ff1cd2SChris Mason ins_nr = 0; 506331ff1cd2SChris Mason } 50645dc562c5SJosef Bacik 506536283bf7SFilipe Manana btrfs_release_path(path); 506636283bf7SFilipe Manana btrfs_release_path(dst_path); 5067a59108a7SNikolay Borisov err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); 506836283bf7SFilipe Manana if (err) 506936283bf7SFilipe Manana goto out_unlock; 50709a8fca62SFilipe Manana xattrs_logged = true; 5071a89ca6f2SFilipe Manana if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { 5072a89ca6f2SFilipe Manana btrfs_release_path(path); 5073a89ca6f2SFilipe Manana btrfs_release_path(dst_path); 5074a59108a7SNikolay Borisov err = btrfs_log_trailing_hole(trans, root, inode, path); 5075a89ca6f2SFilipe Manana if (err) 5076a89ca6f2SFilipe Manana goto out_unlock; 5077a89ca6f2SFilipe Manana } 5078a95249b3SJosef Bacik log_extents: 5079f3b15ccdSJosef Bacik btrfs_release_path(path); 50805dc562c5SJosef Bacik btrfs_release_path(dst_path); 5081e4545de5SFilipe Manana if (need_log_inode_item) { 5082a59108a7SNikolay Borisov err = log_inode_item(trans, log, dst_path, inode); 50839a8fca62SFilipe Manana if (!err && !xattrs_logged) { 50849a8fca62SFilipe Manana err = btrfs_log_all_xattrs(trans, root, inode, path, 50859a8fca62SFilipe Manana dst_path); 50869a8fca62SFilipe Manana btrfs_release_path(path); 50879a8fca62SFilipe Manana } 5088e4545de5SFilipe Manana if (err) 5089e4545de5SFilipe Manana goto out_unlock; 5090e4545de5SFilipe Manana } 5091f3b15ccdSJosef Bacik if (fast_search) { 5092a59108a7SNikolay Borisov ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 5093a2120a47SJosef Bacik ctx, start, end); 50945dc562c5SJosef Bacik if (ret) { 50955dc562c5SJosef Bacik err = ret; 50965dc562c5SJosef Bacik goto out_unlock; 50975dc562c5SJosef Bacik } 5098d006a048SJosef Bacik } else if (inode_only == LOG_INODE_ALL) { 509906d3d22bSLiu Bo struct extent_map *em, *n; 510006d3d22bSLiu Bo 510149dae1bcSFilipe Manana write_lock(&em_tree->lock); 510249dae1bcSFilipe Manana /* 510349dae1bcSFilipe Manana * We can't just remove every em if we're called for a ranged 510449dae1bcSFilipe Manana * fsync - that is, one that doesn't cover the whole possible 510549dae1bcSFilipe Manana * file range (0 to LLONG_MAX). This is because we can have 510649dae1bcSFilipe Manana * em's that fall outside the range we're logging and therefore 510749dae1bcSFilipe Manana * their ordered operations haven't completed yet 510849dae1bcSFilipe Manana * (btrfs_finish_ordered_io() not invoked yet). This means we 510949dae1bcSFilipe Manana * didn't get their respective file extent item in the fs/subvol 511049dae1bcSFilipe Manana * tree yet, and need to let the next fast fsync (one which 511149dae1bcSFilipe Manana * consults the list of modified extent maps) find the em so 511249dae1bcSFilipe Manana * that it logs a matching file extent item and waits for the 511349dae1bcSFilipe Manana * respective ordered operation to complete (if it's still 511449dae1bcSFilipe Manana * running). 511549dae1bcSFilipe Manana * 511649dae1bcSFilipe Manana * Removing every em outside the range we're logging would make 511749dae1bcSFilipe Manana * the next fast fsync not log their matching file extent items, 511849dae1bcSFilipe Manana * therefore making us lose data after a log replay. 511949dae1bcSFilipe Manana */ 512049dae1bcSFilipe Manana list_for_each_entry_safe(em, n, &em_tree->modified_extents, 512149dae1bcSFilipe Manana list) { 512249dae1bcSFilipe Manana const u64 mod_end = em->mod_start + em->mod_len - 1; 512349dae1bcSFilipe Manana 512449dae1bcSFilipe Manana if (em->mod_start >= start && mod_end <= end) 512506d3d22bSLiu Bo list_del_init(&em->list); 512649dae1bcSFilipe Manana } 512749dae1bcSFilipe Manana write_unlock(&em_tree->lock); 51285dc562c5SJosef Bacik } 51295dc562c5SJosef Bacik 5130a59108a7SNikolay Borisov if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) { 5131a59108a7SNikolay Borisov ret = log_directory_changes(trans, root, inode, path, dst_path, 5132a59108a7SNikolay Borisov ctx); 51334a500fd1SYan, Zheng if (ret) { 51344a500fd1SYan, Zheng err = ret; 51354a500fd1SYan, Zheng goto out_unlock; 51364a500fd1SYan, Zheng } 5137e02119d5SChris Mason } 513849dae1bcSFilipe Manana 5139a59108a7SNikolay Borisov spin_lock(&inode->lock); 5140a59108a7SNikolay Borisov inode->logged_trans = trans->transid; 5141a59108a7SNikolay Borisov inode->last_log_commit = inode->last_sub_trans; 5142a59108a7SNikolay Borisov spin_unlock(&inode->lock); 51434a500fd1SYan, Zheng out_unlock: 5144a59108a7SNikolay Borisov mutex_unlock(&inode->log_mutex); 5145e02119d5SChris Mason 5146e02119d5SChris Mason btrfs_free_path(path); 5147e02119d5SChris Mason btrfs_free_path(dst_path); 51484a500fd1SYan, Zheng return err; 5149e02119d5SChris Mason } 5150e02119d5SChris Mason 515112fcfd22SChris Mason /* 51522be63d5cSFilipe Manana * Check if we must fallback to a transaction commit when logging an inode. 51532be63d5cSFilipe Manana * This must be called after logging the inode and is used only in the context 51542be63d5cSFilipe Manana * when fsyncing an inode requires the need to log some other inode - in which 51552be63d5cSFilipe Manana * case we can't lock the i_mutex of each other inode we need to log as that 51562be63d5cSFilipe Manana * can lead to deadlocks with concurrent fsync against other inodes (as we can 51572be63d5cSFilipe Manana * log inodes up or down in the hierarchy) or rename operations for example. So 51582be63d5cSFilipe Manana * we take the log_mutex of the inode after we have logged it and then check for 51592be63d5cSFilipe Manana * its last_unlink_trans value - this is safe because any task setting 51602be63d5cSFilipe Manana * last_unlink_trans must take the log_mutex and it must do this before it does 51612be63d5cSFilipe Manana * the actual unlink operation, so if we do this check before a concurrent task 51622be63d5cSFilipe Manana * sets last_unlink_trans it means we've logged a consistent version/state of 51632be63d5cSFilipe Manana * all the inode items, otherwise we are not sure and must do a transaction 516401327610SNicholas D Steeves * commit (the concurrent task might have only updated last_unlink_trans before 51652be63d5cSFilipe Manana * we logged the inode or it might have also done the unlink). 51662be63d5cSFilipe Manana */ 51672be63d5cSFilipe Manana static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, 5168ab1717b2SNikolay Borisov struct btrfs_inode *inode) 51692be63d5cSFilipe Manana { 5170ab1717b2SNikolay Borisov struct btrfs_fs_info *fs_info = inode->root->fs_info; 51712be63d5cSFilipe Manana bool ret = false; 51722be63d5cSFilipe Manana 5173ab1717b2SNikolay Borisov mutex_lock(&inode->log_mutex); 5174ab1717b2SNikolay Borisov if (inode->last_unlink_trans > fs_info->last_trans_committed) { 51752be63d5cSFilipe Manana /* 51762be63d5cSFilipe Manana * Make sure any commits to the log are forced to be full 51772be63d5cSFilipe Manana * commits. 51782be63d5cSFilipe Manana */ 51792be63d5cSFilipe Manana btrfs_set_log_full_commit(fs_info, trans); 51802be63d5cSFilipe Manana ret = true; 51812be63d5cSFilipe Manana } 5182ab1717b2SNikolay Borisov mutex_unlock(&inode->log_mutex); 51832be63d5cSFilipe Manana 51842be63d5cSFilipe Manana return ret; 51852be63d5cSFilipe Manana } 51862be63d5cSFilipe Manana 51872be63d5cSFilipe Manana /* 518812fcfd22SChris Mason * follow the dentry parent pointers up the chain and see if any 518912fcfd22SChris Mason * of the directories in it require a full commit before they can 519012fcfd22SChris Mason * be logged. Returns zero if nothing special needs to be done or 1 if 519112fcfd22SChris Mason * a full commit is required. 519212fcfd22SChris Mason */ 519312fcfd22SChris Mason static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, 5194aefa6115SNikolay Borisov struct btrfs_inode *inode, 519512fcfd22SChris Mason struct dentry *parent, 519612fcfd22SChris Mason struct super_block *sb, 519712fcfd22SChris Mason u64 last_committed) 5198e02119d5SChris Mason { 519912fcfd22SChris Mason int ret = 0; 52006a912213SJosef Bacik struct dentry *old_parent = NULL; 5201aefa6115SNikolay Borisov struct btrfs_inode *orig_inode = inode; 5202e02119d5SChris Mason 5203af4176b4SChris Mason /* 5204af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 5205af4176b4SChris Mason * have to worry about the parents at all. This is because 5206af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 5207af4176b4SChris Mason * and other fun in this file. 5208af4176b4SChris Mason */ 5209aefa6115SNikolay Borisov if (S_ISREG(inode->vfs_inode.i_mode) && 5210aefa6115SNikolay Borisov inode->generation <= last_committed && 5211aefa6115SNikolay Borisov inode->last_unlink_trans <= last_committed) 5212af4176b4SChris Mason goto out; 5213af4176b4SChris Mason 5214aefa6115SNikolay Borisov if (!S_ISDIR(inode->vfs_inode.i_mode)) { 5215fc64005cSAl Viro if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 521612fcfd22SChris Mason goto out; 5217aefa6115SNikolay Borisov inode = BTRFS_I(d_inode(parent)); 521812fcfd22SChris Mason } 521912fcfd22SChris Mason 522012fcfd22SChris Mason while (1) { 5221de2b530bSJosef Bacik /* 5222de2b530bSJosef Bacik * If we are logging a directory then we start with our inode, 522301327610SNicholas D Steeves * not our parent's inode, so we need to skip setting the 5224de2b530bSJosef Bacik * logged_trans so that further down in the log code we don't 5225de2b530bSJosef Bacik * think this inode has already been logged. 5226de2b530bSJosef Bacik */ 5227de2b530bSJosef Bacik if (inode != orig_inode) 5228aefa6115SNikolay Borisov inode->logged_trans = trans->transid; 522912fcfd22SChris Mason smp_mb(); 523012fcfd22SChris Mason 5231aefa6115SNikolay Borisov if (btrfs_must_commit_transaction(trans, inode)) { 523212fcfd22SChris Mason ret = 1; 523312fcfd22SChris Mason break; 523412fcfd22SChris Mason } 523512fcfd22SChris Mason 5236fc64005cSAl Viro if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 523712fcfd22SChris Mason break; 523812fcfd22SChris Mason 523944f714daSFilipe Manana if (IS_ROOT(parent)) { 5240aefa6115SNikolay Borisov inode = BTRFS_I(d_inode(parent)); 5241aefa6115SNikolay Borisov if (btrfs_must_commit_transaction(trans, inode)) 524244f714daSFilipe Manana ret = 1; 524312fcfd22SChris Mason break; 524444f714daSFilipe Manana } 524512fcfd22SChris Mason 52466a912213SJosef Bacik parent = dget_parent(parent); 52476a912213SJosef Bacik dput(old_parent); 52486a912213SJosef Bacik old_parent = parent; 5249aefa6115SNikolay Borisov inode = BTRFS_I(d_inode(parent)); 525012fcfd22SChris Mason 525112fcfd22SChris Mason } 52526a912213SJosef Bacik dput(old_parent); 525312fcfd22SChris Mason out: 5254e02119d5SChris Mason return ret; 5255e02119d5SChris Mason } 5256e02119d5SChris Mason 52572f2ff0eeSFilipe Manana struct btrfs_dir_list { 52582f2ff0eeSFilipe Manana u64 ino; 52592f2ff0eeSFilipe Manana struct list_head list; 52602f2ff0eeSFilipe Manana }; 52612f2ff0eeSFilipe Manana 52622f2ff0eeSFilipe Manana /* 52632f2ff0eeSFilipe Manana * Log the inodes of the new dentries of a directory. See log_dir_items() for 52642f2ff0eeSFilipe Manana * details about the why it is needed. 52652f2ff0eeSFilipe Manana * This is a recursive operation - if an existing dentry corresponds to a 52662f2ff0eeSFilipe Manana * directory, that directory's new entries are logged too (same behaviour as 52672f2ff0eeSFilipe Manana * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes 52682f2ff0eeSFilipe Manana * the dentries point to we do not lock their i_mutex, otherwise lockdep 52692f2ff0eeSFilipe Manana * complains about the following circular lock dependency / possible deadlock: 52702f2ff0eeSFilipe Manana * 52712f2ff0eeSFilipe Manana * CPU0 CPU1 52722f2ff0eeSFilipe Manana * ---- ---- 52732f2ff0eeSFilipe Manana * lock(&type->i_mutex_dir_key#3/2); 52742f2ff0eeSFilipe Manana * lock(sb_internal#2); 52752f2ff0eeSFilipe Manana * lock(&type->i_mutex_dir_key#3/2); 52762f2ff0eeSFilipe Manana * lock(&sb->s_type->i_mutex_key#14); 52772f2ff0eeSFilipe Manana * 52782f2ff0eeSFilipe Manana * Where sb_internal is the lock (a counter that works as a lock) acquired by 52792f2ff0eeSFilipe Manana * sb_start_intwrite() in btrfs_start_transaction(). 52802f2ff0eeSFilipe Manana * Not locking i_mutex of the inodes is still safe because: 52812f2ff0eeSFilipe Manana * 52822f2ff0eeSFilipe Manana * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible 52832f2ff0eeSFilipe Manana * that while logging the inode new references (names) are added or removed 52842f2ff0eeSFilipe Manana * from the inode, leaving the logged inode item with a link count that does 52852f2ff0eeSFilipe Manana * not match the number of logged inode reference items. This is fine because 52862f2ff0eeSFilipe Manana * at log replay time we compute the real number of links and correct the 52872f2ff0eeSFilipe Manana * link count in the inode item (see replay_one_buffer() and 52882f2ff0eeSFilipe Manana * link_to_fixup_dir()); 52892f2ff0eeSFilipe Manana * 52902f2ff0eeSFilipe Manana * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that 52912f2ff0eeSFilipe Manana * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and 52922f2ff0eeSFilipe Manana * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item 52932f2ff0eeSFilipe Manana * has a size that doesn't match the sum of the lengths of all the logged 52942f2ff0eeSFilipe Manana * names. This does not result in a problem because if a dir_item key is 52952f2ff0eeSFilipe Manana * logged but its matching dir_index key is not logged, at log replay time we 52962f2ff0eeSFilipe Manana * don't use it to replay the respective name (see replay_one_name()). On the 52972f2ff0eeSFilipe Manana * other hand if only the dir_index key ends up being logged, the respective 52982f2ff0eeSFilipe Manana * name is added to the fs/subvol tree with both the dir_item and dir_index 52992f2ff0eeSFilipe Manana * keys created (see replay_one_name()). 53002f2ff0eeSFilipe Manana * The directory's inode item with a wrong i_size is not a problem as well, 53012f2ff0eeSFilipe Manana * since we don't use it at log replay time to set the i_size in the inode 53022f2ff0eeSFilipe Manana * item of the fs/subvol tree (see overwrite_item()). 53032f2ff0eeSFilipe Manana */ 53042f2ff0eeSFilipe Manana static int log_new_dir_dentries(struct btrfs_trans_handle *trans, 53052f2ff0eeSFilipe Manana struct btrfs_root *root, 530651cc0d32SNikolay Borisov struct btrfs_inode *start_inode, 53072f2ff0eeSFilipe Manana struct btrfs_log_ctx *ctx) 53082f2ff0eeSFilipe Manana { 53090b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 53102f2ff0eeSFilipe Manana struct btrfs_root *log = root->log_root; 53112f2ff0eeSFilipe Manana struct btrfs_path *path; 53122f2ff0eeSFilipe Manana LIST_HEAD(dir_list); 53132f2ff0eeSFilipe Manana struct btrfs_dir_list *dir_elem; 53142f2ff0eeSFilipe Manana int ret = 0; 53152f2ff0eeSFilipe Manana 53162f2ff0eeSFilipe Manana path = btrfs_alloc_path(); 53172f2ff0eeSFilipe Manana if (!path) 53182f2ff0eeSFilipe Manana return -ENOMEM; 53192f2ff0eeSFilipe Manana 53202f2ff0eeSFilipe Manana dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS); 53212f2ff0eeSFilipe Manana if (!dir_elem) { 53222f2ff0eeSFilipe Manana btrfs_free_path(path); 53232f2ff0eeSFilipe Manana return -ENOMEM; 53242f2ff0eeSFilipe Manana } 532551cc0d32SNikolay Borisov dir_elem->ino = btrfs_ino(start_inode); 53262f2ff0eeSFilipe Manana list_add_tail(&dir_elem->list, &dir_list); 53272f2ff0eeSFilipe Manana 53282f2ff0eeSFilipe Manana while (!list_empty(&dir_list)) { 53292f2ff0eeSFilipe Manana struct extent_buffer *leaf; 53302f2ff0eeSFilipe Manana struct btrfs_key min_key; 53312f2ff0eeSFilipe Manana int nritems; 53322f2ff0eeSFilipe Manana int i; 53332f2ff0eeSFilipe Manana 53342f2ff0eeSFilipe Manana dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list, 53352f2ff0eeSFilipe Manana list); 53362f2ff0eeSFilipe Manana if (ret) 53372f2ff0eeSFilipe Manana goto next_dir_inode; 53382f2ff0eeSFilipe Manana 53392f2ff0eeSFilipe Manana min_key.objectid = dir_elem->ino; 53402f2ff0eeSFilipe Manana min_key.type = BTRFS_DIR_ITEM_KEY; 53412f2ff0eeSFilipe Manana min_key.offset = 0; 53422f2ff0eeSFilipe Manana again: 53432f2ff0eeSFilipe Manana btrfs_release_path(path); 53442f2ff0eeSFilipe Manana ret = btrfs_search_forward(log, &min_key, path, trans->transid); 53452f2ff0eeSFilipe Manana if (ret < 0) { 53462f2ff0eeSFilipe Manana goto next_dir_inode; 53472f2ff0eeSFilipe Manana } else if (ret > 0) { 53482f2ff0eeSFilipe Manana ret = 0; 53492f2ff0eeSFilipe Manana goto next_dir_inode; 53502f2ff0eeSFilipe Manana } 53512f2ff0eeSFilipe Manana 53522f2ff0eeSFilipe Manana process_leaf: 53532f2ff0eeSFilipe Manana leaf = path->nodes[0]; 53542f2ff0eeSFilipe Manana nritems = btrfs_header_nritems(leaf); 53552f2ff0eeSFilipe Manana for (i = path->slots[0]; i < nritems; i++) { 53562f2ff0eeSFilipe Manana struct btrfs_dir_item *di; 53572f2ff0eeSFilipe Manana struct btrfs_key di_key; 53582f2ff0eeSFilipe Manana struct inode *di_inode; 53592f2ff0eeSFilipe Manana struct btrfs_dir_list *new_dir_elem; 53602f2ff0eeSFilipe Manana int log_mode = LOG_INODE_EXISTS; 53612f2ff0eeSFilipe Manana int type; 53622f2ff0eeSFilipe Manana 53632f2ff0eeSFilipe Manana btrfs_item_key_to_cpu(leaf, &min_key, i); 53642f2ff0eeSFilipe Manana if (min_key.objectid != dir_elem->ino || 53652f2ff0eeSFilipe Manana min_key.type != BTRFS_DIR_ITEM_KEY) 53662f2ff0eeSFilipe Manana goto next_dir_inode; 53672f2ff0eeSFilipe Manana 53682f2ff0eeSFilipe Manana di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item); 53692f2ff0eeSFilipe Manana type = btrfs_dir_type(leaf, di); 53702f2ff0eeSFilipe Manana if (btrfs_dir_transid(leaf, di) < trans->transid && 53712f2ff0eeSFilipe Manana type != BTRFS_FT_DIR) 53722f2ff0eeSFilipe Manana continue; 53732f2ff0eeSFilipe Manana btrfs_dir_item_key_to_cpu(leaf, di, &di_key); 53742f2ff0eeSFilipe Manana if (di_key.type == BTRFS_ROOT_ITEM_KEY) 53752f2ff0eeSFilipe Manana continue; 53762f2ff0eeSFilipe Manana 5377ec125cfbSRobbie Ko btrfs_release_path(path); 53780b246afaSJeff Mahoney di_inode = btrfs_iget(fs_info->sb, &di_key, root, NULL); 53792f2ff0eeSFilipe Manana if (IS_ERR(di_inode)) { 53802f2ff0eeSFilipe Manana ret = PTR_ERR(di_inode); 53812f2ff0eeSFilipe Manana goto next_dir_inode; 53822f2ff0eeSFilipe Manana } 53832f2ff0eeSFilipe Manana 53840f8939b8SNikolay Borisov if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) { 53852f2ff0eeSFilipe Manana iput(di_inode); 5386ec125cfbSRobbie Ko break; 53872f2ff0eeSFilipe Manana } 53882f2ff0eeSFilipe Manana 53892f2ff0eeSFilipe Manana ctx->log_new_dentries = false; 53903f9749f6SFilipe Manana if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) 53912f2ff0eeSFilipe Manana log_mode = LOG_INODE_ALL; 5392a59108a7SNikolay Borisov ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode), 53932f2ff0eeSFilipe Manana log_mode, 0, LLONG_MAX, ctx); 53942be63d5cSFilipe Manana if (!ret && 5395ab1717b2SNikolay Borisov btrfs_must_commit_transaction(trans, BTRFS_I(di_inode))) 53962be63d5cSFilipe Manana ret = 1; 53972f2ff0eeSFilipe Manana iput(di_inode); 53982f2ff0eeSFilipe Manana if (ret) 53992f2ff0eeSFilipe Manana goto next_dir_inode; 54002f2ff0eeSFilipe Manana if (ctx->log_new_dentries) { 54012f2ff0eeSFilipe Manana new_dir_elem = kmalloc(sizeof(*new_dir_elem), 54022f2ff0eeSFilipe Manana GFP_NOFS); 54032f2ff0eeSFilipe Manana if (!new_dir_elem) { 54042f2ff0eeSFilipe Manana ret = -ENOMEM; 54052f2ff0eeSFilipe Manana goto next_dir_inode; 54062f2ff0eeSFilipe Manana } 54072f2ff0eeSFilipe Manana new_dir_elem->ino = di_key.objectid; 54082f2ff0eeSFilipe Manana list_add_tail(&new_dir_elem->list, &dir_list); 54092f2ff0eeSFilipe Manana } 54102f2ff0eeSFilipe Manana break; 54112f2ff0eeSFilipe Manana } 54122f2ff0eeSFilipe Manana if (i == nritems) { 54132f2ff0eeSFilipe Manana ret = btrfs_next_leaf(log, path); 54142f2ff0eeSFilipe Manana if (ret < 0) { 54152f2ff0eeSFilipe Manana goto next_dir_inode; 54162f2ff0eeSFilipe Manana } else if (ret > 0) { 54172f2ff0eeSFilipe Manana ret = 0; 54182f2ff0eeSFilipe Manana goto next_dir_inode; 54192f2ff0eeSFilipe Manana } 54202f2ff0eeSFilipe Manana goto process_leaf; 54212f2ff0eeSFilipe Manana } 54222f2ff0eeSFilipe Manana if (min_key.offset < (u64)-1) { 54232f2ff0eeSFilipe Manana min_key.offset++; 54242f2ff0eeSFilipe Manana goto again; 54252f2ff0eeSFilipe Manana } 54262f2ff0eeSFilipe Manana next_dir_inode: 54272f2ff0eeSFilipe Manana list_del(&dir_elem->list); 54282f2ff0eeSFilipe Manana kfree(dir_elem); 54292f2ff0eeSFilipe Manana } 54302f2ff0eeSFilipe Manana 54312f2ff0eeSFilipe Manana btrfs_free_path(path); 54322f2ff0eeSFilipe Manana return ret; 54332f2ff0eeSFilipe Manana } 54342f2ff0eeSFilipe Manana 543518aa0922SFilipe Manana static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, 5436d0a0b78dSNikolay Borisov struct btrfs_inode *inode, 543718aa0922SFilipe Manana struct btrfs_log_ctx *ctx) 543818aa0922SFilipe Manana { 54393ffbd68cSDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 544018aa0922SFilipe Manana int ret; 544118aa0922SFilipe Manana struct btrfs_path *path; 544218aa0922SFilipe Manana struct btrfs_key key; 5443d0a0b78dSNikolay Borisov struct btrfs_root *root = inode->root; 5444d0a0b78dSNikolay Borisov const u64 ino = btrfs_ino(inode); 544518aa0922SFilipe Manana 544618aa0922SFilipe Manana path = btrfs_alloc_path(); 544718aa0922SFilipe Manana if (!path) 544818aa0922SFilipe Manana return -ENOMEM; 544918aa0922SFilipe Manana path->skip_locking = 1; 545018aa0922SFilipe Manana path->search_commit_root = 1; 545118aa0922SFilipe Manana 545218aa0922SFilipe Manana key.objectid = ino; 545318aa0922SFilipe Manana key.type = BTRFS_INODE_REF_KEY; 545418aa0922SFilipe Manana key.offset = 0; 545518aa0922SFilipe Manana ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 545618aa0922SFilipe Manana if (ret < 0) 545718aa0922SFilipe Manana goto out; 545818aa0922SFilipe Manana 545918aa0922SFilipe Manana while (true) { 546018aa0922SFilipe Manana struct extent_buffer *leaf = path->nodes[0]; 546118aa0922SFilipe Manana int slot = path->slots[0]; 546218aa0922SFilipe Manana u32 cur_offset = 0; 546318aa0922SFilipe Manana u32 item_size; 546418aa0922SFilipe Manana unsigned long ptr; 546518aa0922SFilipe Manana 546618aa0922SFilipe Manana if (slot >= btrfs_header_nritems(leaf)) { 546718aa0922SFilipe Manana ret = btrfs_next_leaf(root, path); 546818aa0922SFilipe Manana if (ret < 0) 546918aa0922SFilipe Manana goto out; 547018aa0922SFilipe Manana else if (ret > 0) 547118aa0922SFilipe Manana break; 547218aa0922SFilipe Manana continue; 547318aa0922SFilipe Manana } 547418aa0922SFilipe Manana 547518aa0922SFilipe Manana btrfs_item_key_to_cpu(leaf, &key, slot); 547618aa0922SFilipe Manana /* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */ 547718aa0922SFilipe Manana if (key.objectid != ino || key.type > BTRFS_INODE_EXTREF_KEY) 547818aa0922SFilipe Manana break; 547918aa0922SFilipe Manana 548018aa0922SFilipe Manana item_size = btrfs_item_size_nr(leaf, slot); 548118aa0922SFilipe Manana ptr = btrfs_item_ptr_offset(leaf, slot); 548218aa0922SFilipe Manana while (cur_offset < item_size) { 548318aa0922SFilipe Manana struct btrfs_key inode_key; 548418aa0922SFilipe Manana struct inode *dir_inode; 548518aa0922SFilipe Manana 548618aa0922SFilipe Manana inode_key.type = BTRFS_INODE_ITEM_KEY; 548718aa0922SFilipe Manana inode_key.offset = 0; 548818aa0922SFilipe Manana 548918aa0922SFilipe Manana if (key.type == BTRFS_INODE_EXTREF_KEY) { 549018aa0922SFilipe Manana struct btrfs_inode_extref *extref; 549118aa0922SFilipe Manana 549218aa0922SFilipe Manana extref = (struct btrfs_inode_extref *) 549318aa0922SFilipe Manana (ptr + cur_offset); 549418aa0922SFilipe Manana inode_key.objectid = btrfs_inode_extref_parent( 549518aa0922SFilipe Manana leaf, extref); 549618aa0922SFilipe Manana cur_offset += sizeof(*extref); 549718aa0922SFilipe Manana cur_offset += btrfs_inode_extref_name_len(leaf, 549818aa0922SFilipe Manana extref); 549918aa0922SFilipe Manana } else { 550018aa0922SFilipe Manana inode_key.objectid = key.offset; 550118aa0922SFilipe Manana cur_offset = item_size; 550218aa0922SFilipe Manana } 550318aa0922SFilipe Manana 55040b246afaSJeff Mahoney dir_inode = btrfs_iget(fs_info->sb, &inode_key, 550518aa0922SFilipe Manana root, NULL); 550618aa0922SFilipe Manana /* If parent inode was deleted, skip it. */ 550718aa0922SFilipe Manana if (IS_ERR(dir_inode)) 550818aa0922SFilipe Manana continue; 550918aa0922SFilipe Manana 5510657ed1aaSFilipe Manana if (ctx) 5511657ed1aaSFilipe Manana ctx->log_new_dentries = false; 5512a59108a7SNikolay Borisov ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode), 551318aa0922SFilipe Manana LOG_INODE_ALL, 0, LLONG_MAX, ctx); 55142be63d5cSFilipe Manana if (!ret && 5515ab1717b2SNikolay Borisov btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode))) 55162be63d5cSFilipe Manana ret = 1; 5517657ed1aaSFilipe Manana if (!ret && ctx && ctx->log_new_dentries) 5518657ed1aaSFilipe Manana ret = log_new_dir_dentries(trans, root, 551951cc0d32SNikolay Borisov BTRFS_I(dir_inode), ctx); 552018aa0922SFilipe Manana iput(dir_inode); 552118aa0922SFilipe Manana if (ret) 552218aa0922SFilipe Manana goto out; 552318aa0922SFilipe Manana } 552418aa0922SFilipe Manana path->slots[0]++; 552518aa0922SFilipe Manana } 552618aa0922SFilipe Manana ret = 0; 552718aa0922SFilipe Manana out: 552818aa0922SFilipe Manana btrfs_free_path(path); 552918aa0922SFilipe Manana return ret; 553018aa0922SFilipe Manana } 553118aa0922SFilipe Manana 5532e02119d5SChris Mason /* 5533e02119d5SChris Mason * helper function around btrfs_log_inode to make sure newly created 5534e02119d5SChris Mason * parent directories also end up in the log. A minimal inode and backref 5535e02119d5SChris Mason * only logging is done of any parent directories that are older than 5536e02119d5SChris Mason * the last committed transaction 5537e02119d5SChris Mason */ 553848a3b636SEric Sandeen static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 553919df27a9SNikolay Borisov struct btrfs_inode *inode, 554049dae1bcSFilipe Manana struct dentry *parent, 554149dae1bcSFilipe Manana const loff_t start, 554249dae1bcSFilipe Manana const loff_t end, 554341a1eadaSEdmund Nadolski int inode_only, 55448b050d35SMiao Xie struct btrfs_log_ctx *ctx) 5545e02119d5SChris Mason { 5546f882274bSNikolay Borisov struct btrfs_root *root = inode->root; 55470b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 5548e02119d5SChris Mason struct super_block *sb; 55496a912213SJosef Bacik struct dentry *old_parent = NULL; 555012fcfd22SChris Mason int ret = 0; 55510b246afaSJeff Mahoney u64 last_committed = fs_info->last_trans_committed; 55522f2ff0eeSFilipe Manana bool log_dentries = false; 555319df27a9SNikolay Borisov struct btrfs_inode *orig_inode = inode; 555412fcfd22SChris Mason 555519df27a9SNikolay Borisov sb = inode->vfs_inode.i_sb; 555612fcfd22SChris Mason 55570b246afaSJeff Mahoney if (btrfs_test_opt(fs_info, NOTREELOG)) { 55583a5e1404SSage Weil ret = 1; 55593a5e1404SSage Weil goto end_no_trans; 55603a5e1404SSage Weil } 55613a5e1404SSage Weil 5562995946ddSMiao Xie /* 5563995946ddSMiao Xie * The prev transaction commit doesn't complete, we need do 5564995946ddSMiao Xie * full commit by ourselves. 5565995946ddSMiao Xie */ 55660b246afaSJeff Mahoney if (fs_info->last_trans_log_full_commit > 55670b246afaSJeff Mahoney fs_info->last_trans_committed) { 556812fcfd22SChris Mason ret = 1; 556912fcfd22SChris Mason goto end_no_trans; 557012fcfd22SChris Mason } 557112fcfd22SChris Mason 5572f882274bSNikolay Borisov if (btrfs_root_refs(&root->root_item) == 0) { 557376dda93cSYan, Zheng ret = 1; 557476dda93cSYan, Zheng goto end_no_trans; 557576dda93cSYan, Zheng } 557676dda93cSYan, Zheng 557719df27a9SNikolay Borisov ret = check_parent_dirs_for_sync(trans, inode, parent, sb, 557819df27a9SNikolay Borisov last_committed); 557912fcfd22SChris Mason if (ret) 558012fcfd22SChris Mason goto end_no_trans; 5581e02119d5SChris Mason 558219df27a9SNikolay Borisov if (btrfs_inode_in_log(inode, trans->transid)) { 5583257c62e1SChris Mason ret = BTRFS_NO_LOG_SYNC; 5584257c62e1SChris Mason goto end_no_trans; 5585257c62e1SChris Mason } 5586257c62e1SChris Mason 55878b050d35SMiao Xie ret = start_log_trans(trans, root, ctx); 55884a500fd1SYan, Zheng if (ret) 5589e87ac136SMiao Xie goto end_no_trans; 559012fcfd22SChris Mason 559119df27a9SNikolay Borisov ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); 55924a500fd1SYan, Zheng if (ret) 55934a500fd1SYan, Zheng goto end_trans; 5594e02119d5SChris Mason 5595af4176b4SChris Mason /* 5596af4176b4SChris Mason * for regular files, if its inode is already on disk, we don't 5597af4176b4SChris Mason * have to worry about the parents at all. This is because 5598af4176b4SChris Mason * we can use the last_unlink_trans field to record renames 5599af4176b4SChris Mason * and other fun in this file. 5600af4176b4SChris Mason */ 560119df27a9SNikolay Borisov if (S_ISREG(inode->vfs_inode.i_mode) && 560219df27a9SNikolay Borisov inode->generation <= last_committed && 560319df27a9SNikolay Borisov inode->last_unlink_trans <= last_committed) { 56044a500fd1SYan, Zheng ret = 0; 56054a500fd1SYan, Zheng goto end_trans; 56064a500fd1SYan, Zheng } 5607af4176b4SChris Mason 560819df27a9SNikolay Borisov if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries) 56092f2ff0eeSFilipe Manana log_dentries = true; 56102f2ff0eeSFilipe Manana 561118aa0922SFilipe Manana /* 561201327610SNicholas D Steeves * On unlink we must make sure all our current and old parent directory 561318aa0922SFilipe Manana * inodes are fully logged. This is to prevent leaving dangling 561418aa0922SFilipe Manana * directory index entries in directories that were our parents but are 561518aa0922SFilipe Manana * not anymore. Not doing this results in old parent directory being 561618aa0922SFilipe Manana * impossible to delete after log replay (rmdir will always fail with 561718aa0922SFilipe Manana * error -ENOTEMPTY). 561818aa0922SFilipe Manana * 561918aa0922SFilipe Manana * Example 1: 562018aa0922SFilipe Manana * 562118aa0922SFilipe Manana * mkdir testdir 562218aa0922SFilipe Manana * touch testdir/foo 562318aa0922SFilipe Manana * ln testdir/foo testdir/bar 562418aa0922SFilipe Manana * sync 562518aa0922SFilipe Manana * unlink testdir/bar 562618aa0922SFilipe Manana * xfs_io -c fsync testdir/foo 562718aa0922SFilipe Manana * <power failure> 562818aa0922SFilipe Manana * mount fs, triggers log replay 562918aa0922SFilipe Manana * 563018aa0922SFilipe Manana * If we don't log the parent directory (testdir), after log replay the 563118aa0922SFilipe Manana * directory still has an entry pointing to the file inode using the bar 563218aa0922SFilipe Manana * name, but a matching BTRFS_INODE_[REF|EXTREF]_KEY does not exist and 563318aa0922SFilipe Manana * the file inode has a link count of 1. 563418aa0922SFilipe Manana * 563518aa0922SFilipe Manana * Example 2: 563618aa0922SFilipe Manana * 563718aa0922SFilipe Manana * mkdir testdir 563818aa0922SFilipe Manana * touch foo 563918aa0922SFilipe Manana * ln foo testdir/foo2 564018aa0922SFilipe Manana * ln foo testdir/foo3 564118aa0922SFilipe Manana * sync 564218aa0922SFilipe Manana * unlink testdir/foo3 564318aa0922SFilipe Manana * xfs_io -c fsync foo 564418aa0922SFilipe Manana * <power failure> 564518aa0922SFilipe Manana * mount fs, triggers log replay 564618aa0922SFilipe Manana * 564718aa0922SFilipe Manana * Similar as the first example, after log replay the parent directory 564818aa0922SFilipe Manana * testdir still has an entry pointing to the inode file with name foo3 564918aa0922SFilipe Manana * but the file inode does not have a matching BTRFS_INODE_REF_KEY item 565018aa0922SFilipe Manana * and has a link count of 2. 565118aa0922SFilipe Manana */ 565219df27a9SNikolay Borisov if (inode->last_unlink_trans > last_committed) { 565318aa0922SFilipe Manana ret = btrfs_log_all_parents(trans, orig_inode, ctx); 565418aa0922SFilipe Manana if (ret) 565518aa0922SFilipe Manana goto end_trans; 565618aa0922SFilipe Manana } 565718aa0922SFilipe Manana 565812fcfd22SChris Mason while (1) { 5659fc64005cSAl Viro if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 5660e02119d5SChris Mason break; 5661e02119d5SChris Mason 566219df27a9SNikolay Borisov inode = BTRFS_I(d_inode(parent)); 566319df27a9SNikolay Borisov if (root != inode->root) 566476dda93cSYan, Zheng break; 566576dda93cSYan, Zheng 566619df27a9SNikolay Borisov if (inode->generation > last_committed) { 566719df27a9SNikolay Borisov ret = btrfs_log_inode(trans, root, inode, 566819df27a9SNikolay Borisov LOG_INODE_EXISTS, 0, LLONG_MAX, ctx); 56694a500fd1SYan, Zheng if (ret) 56704a500fd1SYan, Zheng goto end_trans; 5671e02119d5SChris Mason } 567276dda93cSYan, Zheng if (IS_ROOT(parent)) 567312fcfd22SChris Mason break; 567412fcfd22SChris Mason 56756a912213SJosef Bacik parent = dget_parent(parent); 56766a912213SJosef Bacik dput(old_parent); 56776a912213SJosef Bacik old_parent = parent; 567812fcfd22SChris Mason } 56792f2ff0eeSFilipe Manana if (log_dentries) 568019df27a9SNikolay Borisov ret = log_new_dir_dentries(trans, root, orig_inode, ctx); 56812f2ff0eeSFilipe Manana else 568212fcfd22SChris Mason ret = 0; 56834a500fd1SYan, Zheng end_trans: 56846a912213SJosef Bacik dput(old_parent); 56854a500fd1SYan, Zheng if (ret < 0) { 56860b246afaSJeff Mahoney btrfs_set_log_full_commit(fs_info, trans); 56874a500fd1SYan, Zheng ret = 1; 56884a500fd1SYan, Zheng } 56898b050d35SMiao Xie 56908b050d35SMiao Xie if (ret) 56918b050d35SMiao Xie btrfs_remove_log_ctx(root, ctx); 569212fcfd22SChris Mason btrfs_end_log_trans(root); 569312fcfd22SChris Mason end_no_trans: 569412fcfd22SChris Mason return ret; 5695e02119d5SChris Mason } 5696e02119d5SChris Mason 5697e02119d5SChris Mason /* 5698e02119d5SChris Mason * it is not safe to log dentry if the chunk root has added new 5699e02119d5SChris Mason * chunks. This returns 0 if the dentry was logged, and 1 otherwise. 5700e02119d5SChris Mason * If this returns 1, you must commit the transaction to safely get your 5701e02119d5SChris Mason * data on disk. 5702e02119d5SChris Mason */ 5703e02119d5SChris Mason int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 5704e5b84f7aSNikolay Borisov struct dentry *dentry, 570549dae1bcSFilipe Manana const loff_t start, 570649dae1bcSFilipe Manana const loff_t end, 57078b050d35SMiao Xie struct btrfs_log_ctx *ctx) 5708e02119d5SChris Mason { 57096a912213SJosef Bacik struct dentry *parent = dget_parent(dentry); 57106a912213SJosef Bacik int ret; 57116a912213SJosef Bacik 5712f882274bSNikolay Borisov ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent, 5713f882274bSNikolay Borisov start, end, LOG_INODE_ALL, ctx); 57146a912213SJosef Bacik dput(parent); 57156a912213SJosef Bacik 57166a912213SJosef Bacik return ret; 5717e02119d5SChris Mason } 5718e02119d5SChris Mason 5719e02119d5SChris Mason /* 5720e02119d5SChris Mason * should be called during mount to recover any replay any log trees 5721e02119d5SChris Mason * from the FS 5722e02119d5SChris Mason */ 5723e02119d5SChris Mason int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) 5724e02119d5SChris Mason { 5725e02119d5SChris Mason int ret; 5726e02119d5SChris Mason struct btrfs_path *path; 5727e02119d5SChris Mason struct btrfs_trans_handle *trans; 5728e02119d5SChris Mason struct btrfs_key key; 5729e02119d5SChris Mason struct btrfs_key found_key; 5730e02119d5SChris Mason struct btrfs_key tmp_key; 5731e02119d5SChris Mason struct btrfs_root *log; 5732e02119d5SChris Mason struct btrfs_fs_info *fs_info = log_root_tree->fs_info; 5733e02119d5SChris Mason struct walk_control wc = { 5734e02119d5SChris Mason .process_func = process_one_buffer, 5735e02119d5SChris Mason .stage = 0, 5736e02119d5SChris Mason }; 5737e02119d5SChris Mason 5738e02119d5SChris Mason path = btrfs_alloc_path(); 5739db5b493aSTsutomu Itoh if (!path) 5740db5b493aSTsutomu Itoh return -ENOMEM; 5741db5b493aSTsutomu Itoh 5742afcdd129SJosef Bacik set_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); 5743e02119d5SChris Mason 57444a500fd1SYan, Zheng trans = btrfs_start_transaction(fs_info->tree_root, 0); 574579787eaaSJeff Mahoney if (IS_ERR(trans)) { 574679787eaaSJeff Mahoney ret = PTR_ERR(trans); 574779787eaaSJeff Mahoney goto error; 574879787eaaSJeff Mahoney } 5749e02119d5SChris Mason 5750e02119d5SChris Mason wc.trans = trans; 5751e02119d5SChris Mason wc.pin = 1; 5752e02119d5SChris Mason 5753db5b493aSTsutomu Itoh ret = walk_log_tree(trans, log_root_tree, &wc); 575479787eaaSJeff Mahoney if (ret) { 57555d163e0eSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, 57565d163e0eSJeff Mahoney "Failed to pin buffers while recovering log root tree."); 575779787eaaSJeff Mahoney goto error; 575879787eaaSJeff Mahoney } 5759e02119d5SChris Mason 5760e02119d5SChris Mason again: 5761e02119d5SChris Mason key.objectid = BTRFS_TREE_LOG_OBJECTID; 5762e02119d5SChris Mason key.offset = (u64)-1; 5763962a298fSDavid Sterba key.type = BTRFS_ROOT_ITEM_KEY; 5764e02119d5SChris Mason 5765e02119d5SChris Mason while (1) { 5766e02119d5SChris Mason ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 576779787eaaSJeff Mahoney 576879787eaaSJeff Mahoney if (ret < 0) { 576934d97007SAnand Jain btrfs_handle_fs_error(fs_info, ret, 577079787eaaSJeff Mahoney "Couldn't find tree log root."); 577179787eaaSJeff Mahoney goto error; 577279787eaaSJeff Mahoney } 5773e02119d5SChris Mason if (ret > 0) { 5774e02119d5SChris Mason if (path->slots[0] == 0) 5775e02119d5SChris Mason break; 5776e02119d5SChris Mason path->slots[0]--; 5777e02119d5SChris Mason } 5778e02119d5SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &found_key, 5779e02119d5SChris Mason path->slots[0]); 5780b3b4aa74SDavid Sterba btrfs_release_path(path); 5781e02119d5SChris Mason if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 5782e02119d5SChris Mason break; 5783e02119d5SChris Mason 5784cb517eabSMiao Xie log = btrfs_read_fs_root(log_root_tree, &found_key); 578579787eaaSJeff Mahoney if (IS_ERR(log)) { 578679787eaaSJeff Mahoney ret = PTR_ERR(log); 578734d97007SAnand Jain btrfs_handle_fs_error(fs_info, ret, 578879787eaaSJeff Mahoney "Couldn't read tree log root."); 578979787eaaSJeff Mahoney goto error; 579079787eaaSJeff Mahoney } 5791e02119d5SChris Mason 5792e02119d5SChris Mason tmp_key.objectid = found_key.offset; 5793e02119d5SChris Mason tmp_key.type = BTRFS_ROOT_ITEM_KEY; 5794e02119d5SChris Mason tmp_key.offset = (u64)-1; 5795e02119d5SChris Mason 5796e02119d5SChris Mason wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 579779787eaaSJeff Mahoney if (IS_ERR(wc.replay_dest)) { 579879787eaaSJeff Mahoney ret = PTR_ERR(wc.replay_dest); 5799b50c6e25SJosef Bacik free_extent_buffer(log->node); 5800b50c6e25SJosef Bacik free_extent_buffer(log->commit_root); 5801b50c6e25SJosef Bacik kfree(log); 58025d163e0eSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, 58035d163e0eSJeff Mahoney "Couldn't read target root for tree log recovery."); 580479787eaaSJeff Mahoney goto error; 580579787eaaSJeff Mahoney } 5806e02119d5SChris Mason 580707d400a6SYan Zheng wc.replay_dest->log_root = log; 58085d4f98a2SYan Zheng btrfs_record_root_in_trans(trans, wc.replay_dest); 5809e02119d5SChris Mason ret = walk_log_tree(trans, log, &wc); 5810e02119d5SChris Mason 5811b50c6e25SJosef Bacik if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { 5812e02119d5SChris Mason ret = fixup_inode_link_counts(trans, wc.replay_dest, 5813e02119d5SChris Mason path); 5814e02119d5SChris Mason } 5815e02119d5SChris Mason 5816900c9981SLiu Bo if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { 5817900c9981SLiu Bo struct btrfs_root *root = wc.replay_dest; 5818900c9981SLiu Bo 5819900c9981SLiu Bo btrfs_release_path(path); 5820900c9981SLiu Bo 5821900c9981SLiu Bo /* 5822900c9981SLiu Bo * We have just replayed everything, and the highest 5823900c9981SLiu Bo * objectid of fs roots probably has changed in case 5824900c9981SLiu Bo * some inode_item's got replayed. 5825900c9981SLiu Bo * 5826900c9981SLiu Bo * root->objectid_mutex is not acquired as log replay 5827900c9981SLiu Bo * could only happen during mount. 5828900c9981SLiu Bo */ 5829900c9981SLiu Bo ret = btrfs_find_highest_objectid(root, 5830900c9981SLiu Bo &root->highest_objectid); 5831900c9981SLiu Bo } 5832900c9981SLiu Bo 5833e02119d5SChris Mason key.offset = found_key.offset - 1; 583407d400a6SYan Zheng wc.replay_dest->log_root = NULL; 5835e02119d5SChris Mason free_extent_buffer(log->node); 5836b263c2c8SChris Mason free_extent_buffer(log->commit_root); 5837e02119d5SChris Mason kfree(log); 5838e02119d5SChris Mason 5839b50c6e25SJosef Bacik if (ret) 5840b50c6e25SJosef Bacik goto error; 5841b50c6e25SJosef Bacik 5842e02119d5SChris Mason if (found_key.offset == 0) 5843e02119d5SChris Mason break; 5844e02119d5SChris Mason } 5845b3b4aa74SDavid Sterba btrfs_release_path(path); 5846e02119d5SChris Mason 5847e02119d5SChris Mason /* step one is to pin it all, step two is to replay just inodes */ 5848e02119d5SChris Mason if (wc.pin) { 5849e02119d5SChris Mason wc.pin = 0; 5850e02119d5SChris Mason wc.process_func = replay_one_buffer; 5851e02119d5SChris Mason wc.stage = LOG_WALK_REPLAY_INODES; 5852e02119d5SChris Mason goto again; 5853e02119d5SChris Mason } 5854e02119d5SChris Mason /* step three is to replay everything */ 5855e02119d5SChris Mason if (wc.stage < LOG_WALK_REPLAY_ALL) { 5856e02119d5SChris Mason wc.stage++; 5857e02119d5SChris Mason goto again; 5858e02119d5SChris Mason } 5859e02119d5SChris Mason 5860e02119d5SChris Mason btrfs_free_path(path); 5861e02119d5SChris Mason 5862abefa55aSJosef Bacik /* step 4: commit the transaction, which also unpins the blocks */ 58633a45bb20SJeff Mahoney ret = btrfs_commit_transaction(trans); 5864abefa55aSJosef Bacik if (ret) 5865abefa55aSJosef Bacik return ret; 5866abefa55aSJosef Bacik 5867e02119d5SChris Mason free_extent_buffer(log_root_tree->node); 5868e02119d5SChris Mason log_root_tree->log_root = NULL; 5869afcdd129SJosef Bacik clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); 5870e02119d5SChris Mason kfree(log_root_tree); 587179787eaaSJeff Mahoney 5872abefa55aSJosef Bacik return 0; 587379787eaaSJeff Mahoney error: 5874b50c6e25SJosef Bacik if (wc.trans) 58753a45bb20SJeff Mahoney btrfs_end_transaction(wc.trans); 587679787eaaSJeff Mahoney btrfs_free_path(path); 587779787eaaSJeff Mahoney return ret; 5878e02119d5SChris Mason } 587912fcfd22SChris Mason 588012fcfd22SChris Mason /* 588112fcfd22SChris Mason * there are some corner cases where we want to force a full 588212fcfd22SChris Mason * commit instead of allowing a directory to be logged. 588312fcfd22SChris Mason * 588412fcfd22SChris Mason * They revolve around files there were unlinked from the directory, and 588512fcfd22SChris Mason * this function updates the parent directory so that a full commit is 588612fcfd22SChris Mason * properly done if it is fsync'd later after the unlinks are done. 58872be63d5cSFilipe Manana * 58882be63d5cSFilipe Manana * Must be called before the unlink operations (updates to the subvolume tree, 58892be63d5cSFilipe Manana * inodes, etc) are done. 589012fcfd22SChris Mason */ 589112fcfd22SChris Mason void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 58924176bdbfSNikolay Borisov struct btrfs_inode *dir, struct btrfs_inode *inode, 589312fcfd22SChris Mason int for_rename) 589412fcfd22SChris Mason { 589512fcfd22SChris Mason /* 5896af4176b4SChris Mason * when we're logging a file, if it hasn't been renamed 5897af4176b4SChris Mason * or unlinked, and its inode is fully committed on disk, 5898af4176b4SChris Mason * we don't have to worry about walking up the directory chain 5899af4176b4SChris Mason * to log its parents. 5900af4176b4SChris Mason * 5901af4176b4SChris Mason * So, we use the last_unlink_trans field to put this transid 5902af4176b4SChris Mason * into the file. When the file is logged we check it and 5903af4176b4SChris Mason * don't log the parents if the file is fully on disk. 5904af4176b4SChris Mason */ 59054176bdbfSNikolay Borisov mutex_lock(&inode->log_mutex); 59064176bdbfSNikolay Borisov inode->last_unlink_trans = trans->transid; 59074176bdbfSNikolay Borisov mutex_unlock(&inode->log_mutex); 5908af4176b4SChris Mason 5909af4176b4SChris Mason /* 591012fcfd22SChris Mason * if this directory was already logged any new 591112fcfd22SChris Mason * names for this file/dir will get recorded 591212fcfd22SChris Mason */ 591312fcfd22SChris Mason smp_mb(); 59144176bdbfSNikolay Borisov if (dir->logged_trans == trans->transid) 591512fcfd22SChris Mason return; 591612fcfd22SChris Mason 591712fcfd22SChris Mason /* 591812fcfd22SChris Mason * if the inode we're about to unlink was logged, 591912fcfd22SChris Mason * the log will be properly updated for any new names 592012fcfd22SChris Mason */ 59214176bdbfSNikolay Borisov if (inode->logged_trans == trans->transid) 592212fcfd22SChris Mason return; 592312fcfd22SChris Mason 592412fcfd22SChris Mason /* 592512fcfd22SChris Mason * when renaming files across directories, if the directory 592612fcfd22SChris Mason * there we're unlinking from gets fsync'd later on, there's 592712fcfd22SChris Mason * no way to find the destination directory later and fsync it 592812fcfd22SChris Mason * properly. So, we have to be conservative and force commits 592912fcfd22SChris Mason * so the new name gets discovered. 593012fcfd22SChris Mason */ 593112fcfd22SChris Mason if (for_rename) 593212fcfd22SChris Mason goto record; 593312fcfd22SChris Mason 593412fcfd22SChris Mason /* we can safely do the unlink without any special recording */ 593512fcfd22SChris Mason return; 593612fcfd22SChris Mason 593712fcfd22SChris Mason record: 59384176bdbfSNikolay Borisov mutex_lock(&dir->log_mutex); 59394176bdbfSNikolay Borisov dir->last_unlink_trans = trans->transid; 59404176bdbfSNikolay Borisov mutex_unlock(&dir->log_mutex); 594112fcfd22SChris Mason } 594212fcfd22SChris Mason 594312fcfd22SChris Mason /* 59441ec9a1aeSFilipe Manana * Make sure that if someone attempts to fsync the parent directory of a deleted 59451ec9a1aeSFilipe Manana * snapshot, it ends up triggering a transaction commit. This is to guarantee 59461ec9a1aeSFilipe Manana * that after replaying the log tree of the parent directory's root we will not 59471ec9a1aeSFilipe Manana * see the snapshot anymore and at log replay time we will not see any log tree 59481ec9a1aeSFilipe Manana * corresponding to the deleted snapshot's root, which could lead to replaying 59491ec9a1aeSFilipe Manana * it after replaying the log tree of the parent directory (which would replay 59501ec9a1aeSFilipe Manana * the snapshot delete operation). 59512be63d5cSFilipe Manana * 59522be63d5cSFilipe Manana * Must be called before the actual snapshot destroy operation (updates to the 59532be63d5cSFilipe Manana * parent root and tree of tree roots trees, etc) are done. 59541ec9a1aeSFilipe Manana */ 59551ec9a1aeSFilipe Manana void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, 595643663557SNikolay Borisov struct btrfs_inode *dir) 59571ec9a1aeSFilipe Manana { 595843663557SNikolay Borisov mutex_lock(&dir->log_mutex); 595943663557SNikolay Borisov dir->last_unlink_trans = trans->transid; 596043663557SNikolay Borisov mutex_unlock(&dir->log_mutex); 59611ec9a1aeSFilipe Manana } 59621ec9a1aeSFilipe Manana 59631ec9a1aeSFilipe Manana /* 596412fcfd22SChris Mason * Call this after adding a new name for a file and it will properly 596512fcfd22SChris Mason * update the log to reflect the new name. 596612fcfd22SChris Mason * 596712fcfd22SChris Mason * It will return zero if all goes well, and it will return 1 if a 596812fcfd22SChris Mason * full transaction commit is required. 596912fcfd22SChris Mason */ 597012fcfd22SChris Mason int btrfs_log_new_name(struct btrfs_trans_handle *trans, 59719ca5fbfbSNikolay Borisov struct btrfs_inode *inode, struct btrfs_inode *old_dir, 597212fcfd22SChris Mason struct dentry *parent) 597312fcfd22SChris Mason { 59743ffbd68cSDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 597512fcfd22SChris Mason 597612fcfd22SChris Mason /* 5977af4176b4SChris Mason * this will force the logging code to walk the dentry chain 5978af4176b4SChris Mason * up for the file 5979af4176b4SChris Mason */ 59809a6509c4SFilipe Manana if (!S_ISDIR(inode->vfs_inode.i_mode)) 59819ca5fbfbSNikolay Borisov inode->last_unlink_trans = trans->transid; 5982af4176b4SChris Mason 5983af4176b4SChris Mason /* 598412fcfd22SChris Mason * if this inode hasn't been logged and directory we're renaming it 598512fcfd22SChris Mason * from hasn't been logged, we don't need to log it 598612fcfd22SChris Mason */ 59879ca5fbfbSNikolay Borisov if (inode->logged_trans <= fs_info->last_trans_committed && 59889ca5fbfbSNikolay Borisov (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) 598912fcfd22SChris Mason return 0; 599012fcfd22SChris Mason 5991f882274bSNikolay Borisov return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, 5992f882274bSNikolay Borisov LOG_INODE_EXISTS, NULL); 599312fcfd22SChris Mason } 599412fcfd22SChris Mason 5995