1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0 26cbd5570SChris Mason /* 3d352ac68SChris Mason * Copyright (C) 2007,2008 Oracle. All rights reserved. 46cbd5570SChris Mason */ 56cbd5570SChris Mason 6a6b6e75eSChris Mason #include <linux/sched.h> 75a0e3ad6STejun Heo #include <linux/slab.h> 8bd989ba3SJan Schmidt #include <linux/rbtree.h> 9adf02123SDavid Sterba #include <linux/mm.h> 10eb60ceacSChris Mason #include "ctree.h" 11eb60ceacSChris Mason #include "disk-io.h" 127f5c1516SChris Mason #include "transaction.h" 135f39d397SChris Mason #include "print-tree.h" 14925baeddSChris Mason #include "locking.h" 15de37aa51SNikolay Borisov #include "volumes.h" 16f616f5cdSQu Wenruo #include "qgroup.h" 179a8dd150SChris Mason 18e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 19e089f05cSChris Mason *root, struct btrfs_path *path, int level); 20310712b2SOmar Sandoval static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, 21310712b2SOmar Sandoval const struct btrfs_key *ins_key, struct btrfs_path *path, 22310712b2SOmar Sandoval int data_size, int extend); 235f39d397SChris Mason static int push_node_left(struct btrfs_trans_handle *trans, 242ff7e61eSJeff Mahoney struct extent_buffer *dst, 25971a1f66SChris Mason struct extent_buffer *src, int empty); 265f39d397SChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, 275f39d397SChris Mason struct extent_buffer *dst_buf, 285f39d397SChris Mason struct extent_buffer *src_buf); 29afe5fea7STsutomu Itoh static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 30afe5fea7STsutomu Itoh int level, int slot); 31d97e63b6SChris Mason 32af024ed2SJohannes Thumshirn static const struct btrfs_csums { 33af024ed2SJohannes Thumshirn u16 size; 3459a0fcdbSDavid Sterba const char name[10]; 3559a0fcdbSDavid Sterba const char driver[12]; 36af024ed2SJohannes Thumshirn } btrfs_csums[] = { 37af024ed2SJohannes Thumshirn [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" }, 383951e7f0SJohannes Thumshirn [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" }, 393831bf00SJohannes Thumshirn [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" }, 40352ae07bSDavid Sterba [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b", 41352ae07bSDavid Sterba .driver = "blake2b-256" }, 42af024ed2SJohannes Thumshirn }; 43af024ed2SJohannes Thumshirn 44af024ed2SJohannes Thumshirn int btrfs_super_csum_size(const struct btrfs_super_block *s) 45af024ed2SJohannes Thumshirn { 46af024ed2SJohannes Thumshirn u16 t = btrfs_super_csum_type(s); 47af024ed2SJohannes Thumshirn /* 48af024ed2SJohannes Thumshirn * csum type is validated at mount time 49af024ed2SJohannes Thumshirn */ 50af024ed2SJohannes Thumshirn return btrfs_csums[t].size; 51af024ed2SJohannes Thumshirn } 52af024ed2SJohannes Thumshirn 53af024ed2SJohannes Thumshirn const char *btrfs_super_csum_name(u16 csum_type) 54af024ed2SJohannes Thumshirn { 55af024ed2SJohannes Thumshirn /* csum type is validated at mount time */ 56af024ed2SJohannes Thumshirn return btrfs_csums[csum_type].name; 57af024ed2SJohannes Thumshirn } 58af024ed2SJohannes Thumshirn 59b4e967beSDavid Sterba /* 60b4e967beSDavid Sterba * Return driver name if defined, otherwise the name that's also a valid driver 61b4e967beSDavid Sterba * name 62b4e967beSDavid Sterba */ 63b4e967beSDavid Sterba const char *btrfs_super_csum_driver(u16 csum_type) 64b4e967beSDavid Sterba { 65b4e967beSDavid Sterba /* csum type is validated at mount time */ 6659a0fcdbSDavid Sterba return btrfs_csums[csum_type].driver[0] ? 6759a0fcdbSDavid Sterba btrfs_csums[csum_type].driver : 68b4e967beSDavid Sterba btrfs_csums[csum_type].name; 69b4e967beSDavid Sterba } 70b4e967beSDavid Sterba 71604997b4SDavid Sterba size_t __attribute_const__ btrfs_get_num_csums(void) 72f7cea56cSDavid Sterba { 73f7cea56cSDavid Sterba return ARRAY_SIZE(btrfs_csums); 74f7cea56cSDavid Sterba } 75f7cea56cSDavid Sterba 762c90e5d6SChris Mason struct btrfs_path *btrfs_alloc_path(void) 772c90e5d6SChris Mason { 78e2c89907SMasahiro Yamada return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 792c90e5d6SChris Mason } 802c90e5d6SChris Mason 81d352ac68SChris Mason /* this also releases the path */ 822c90e5d6SChris Mason void btrfs_free_path(struct btrfs_path *p) 832c90e5d6SChris Mason { 84ff175d57SJesper Juhl if (!p) 85ff175d57SJesper Juhl return; 86b3b4aa74SDavid Sterba btrfs_release_path(p); 872c90e5d6SChris Mason kmem_cache_free(btrfs_path_cachep, p); 882c90e5d6SChris Mason } 892c90e5d6SChris Mason 90d352ac68SChris Mason /* 91d352ac68SChris Mason * path release drops references on the extent buffers in the path 92d352ac68SChris Mason * and it drops any locks held by this path 93d352ac68SChris Mason * 94d352ac68SChris Mason * It is safe to call this on paths that no locks or extent buffers held. 95d352ac68SChris Mason */ 96b3b4aa74SDavid Sterba noinline void btrfs_release_path(struct btrfs_path *p) 97eb60ceacSChris Mason { 98eb60ceacSChris Mason int i; 99a2135011SChris Mason 100234b63a0SChris Mason for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 1013f157a2fSChris Mason p->slots[i] = 0; 102eb60ceacSChris Mason if (!p->nodes[i]) 103925baeddSChris Mason continue; 104925baeddSChris Mason if (p->locks[i]) { 105bd681513SChris Mason btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); 106925baeddSChris Mason p->locks[i] = 0; 107925baeddSChris Mason } 1085f39d397SChris Mason free_extent_buffer(p->nodes[i]); 1093f157a2fSChris Mason p->nodes[i] = NULL; 110eb60ceacSChris Mason } 111eb60ceacSChris Mason } 112eb60ceacSChris Mason 113d352ac68SChris Mason /* 114d352ac68SChris Mason * safely gets a reference on the root node of a tree. A lock 115d352ac68SChris Mason * is not taken, so a concurrent writer may put a different node 116d352ac68SChris Mason * at the root of the tree. See btrfs_lock_root_node for the 117d352ac68SChris Mason * looping required. 118d352ac68SChris Mason * 119d352ac68SChris Mason * The extent buffer returned by this has a reference taken, so 120d352ac68SChris Mason * it won't disappear. It may stop being the root of the tree 121d352ac68SChris Mason * at any time because there are no locks held. 122d352ac68SChris Mason */ 123925baeddSChris Mason struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 124925baeddSChris Mason { 125925baeddSChris Mason struct extent_buffer *eb; 126240f62c8SChris Mason 1273083ee2eSJosef Bacik while (1) { 128240f62c8SChris Mason rcu_read_lock(); 129240f62c8SChris Mason eb = rcu_dereference(root->node); 1303083ee2eSJosef Bacik 1313083ee2eSJosef Bacik /* 1323083ee2eSJosef Bacik * RCU really hurts here, we could free up the root node because 13301327610SNicholas D Steeves * it was COWed but we may not get the new root node yet so do 1343083ee2eSJosef Bacik * the inc_not_zero dance and if it doesn't work then 1353083ee2eSJosef Bacik * synchronize_rcu and try again. 1363083ee2eSJosef Bacik */ 1373083ee2eSJosef Bacik if (atomic_inc_not_zero(&eb->refs)) { 138240f62c8SChris Mason rcu_read_unlock(); 1393083ee2eSJosef Bacik break; 1403083ee2eSJosef Bacik } 1413083ee2eSJosef Bacik rcu_read_unlock(); 1423083ee2eSJosef Bacik synchronize_rcu(); 1433083ee2eSJosef Bacik } 144925baeddSChris Mason return eb; 145925baeddSChris Mason } 146925baeddSChris Mason 14792a7cc42SQu Wenruo /* 14892a7cc42SQu Wenruo * Cowonly root (not-shareable trees, everything not subvolume or reloc roots), 14992a7cc42SQu Wenruo * just get put onto a simple dirty list. Transaction walks this list to make 15092a7cc42SQu Wenruo * sure they get properly updated on disk. 151d352ac68SChris Mason */ 1520b86a832SChris Mason static void add_root_to_dirty_list(struct btrfs_root *root) 1530b86a832SChris Mason { 1540b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 1550b246afaSJeff Mahoney 156e7070be1SJosef Bacik if (test_bit(BTRFS_ROOT_DIRTY, &root->state) || 157e7070be1SJosef Bacik !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state)) 158e7070be1SJosef Bacik return; 159e7070be1SJosef Bacik 1600b246afaSJeff Mahoney spin_lock(&fs_info->trans_lock); 161e7070be1SJosef Bacik if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) { 162e7070be1SJosef Bacik /* Want the extent tree to be the last on the list */ 1634fd786e6SMisono Tomohiro if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID) 164e7070be1SJosef Bacik list_move_tail(&root->dirty_list, 1650b246afaSJeff Mahoney &fs_info->dirty_cowonly_roots); 166e7070be1SJosef Bacik else 167e7070be1SJosef Bacik list_move(&root->dirty_list, 1680b246afaSJeff Mahoney &fs_info->dirty_cowonly_roots); 1690b86a832SChris Mason } 1700b246afaSJeff Mahoney spin_unlock(&fs_info->trans_lock); 1710b86a832SChris Mason } 1720b86a832SChris Mason 173d352ac68SChris Mason /* 174d352ac68SChris Mason * used by snapshot creation to make a copy of a root for a tree with 175d352ac68SChris Mason * a given objectid. The buffer with the new root node is returned in 176d352ac68SChris Mason * cow_ret, and this func returns zero on success or a negative error code. 177d352ac68SChris Mason */ 178be20aa9dSChris Mason int btrfs_copy_root(struct btrfs_trans_handle *trans, 179be20aa9dSChris Mason struct btrfs_root *root, 180be20aa9dSChris Mason struct extent_buffer *buf, 181be20aa9dSChris Mason struct extent_buffer **cow_ret, u64 new_root_objectid) 182be20aa9dSChris Mason { 1830b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 184be20aa9dSChris Mason struct extent_buffer *cow; 185be20aa9dSChris Mason int ret = 0; 186be20aa9dSChris Mason int level; 1875d4f98a2SYan Zheng struct btrfs_disk_key disk_key; 188be20aa9dSChris Mason 18992a7cc42SQu Wenruo WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 1900b246afaSJeff Mahoney trans->transid != fs_info->running_transaction->transid); 19192a7cc42SQu Wenruo WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 19227cdeb70SMiao Xie trans->transid != root->last_trans); 193be20aa9dSChris Mason 194be20aa9dSChris Mason level = btrfs_header_level(buf); 1955d4f98a2SYan Zheng if (level == 0) 1965d4f98a2SYan Zheng btrfs_item_key(buf, &disk_key, 0); 1975d4f98a2SYan Zheng else 1985d4f98a2SYan Zheng btrfs_node_key(buf, &disk_key, 0); 19931840ae1SZheng Yan 2004d75f8a9SDavid Sterba cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, 2019631e4ccSJosef Bacik &disk_key, level, buf->start, 0, BTRFS_NESTING_NORMAL); 2025d4f98a2SYan Zheng if (IS_ERR(cow)) 203be20aa9dSChris Mason return PTR_ERR(cow); 204be20aa9dSChris Mason 20558e8012cSDavid Sterba copy_extent_buffer_full(cow, buf); 206be20aa9dSChris Mason btrfs_set_header_bytenr(cow, cow->start); 207be20aa9dSChris Mason btrfs_set_header_generation(cow, trans->transid); 2085d4f98a2SYan Zheng btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 2095d4f98a2SYan Zheng btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 2105d4f98a2SYan Zheng BTRFS_HEADER_FLAG_RELOC); 2115d4f98a2SYan Zheng if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 2125d4f98a2SYan Zheng btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 2135d4f98a2SYan Zheng else 214be20aa9dSChris Mason btrfs_set_header_owner(cow, new_root_objectid); 215be20aa9dSChris Mason 216de37aa51SNikolay Borisov write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 2172b82032cSYan Zheng 218be20aa9dSChris Mason WARN_ON(btrfs_header_generation(buf) > trans->transid); 2195d4f98a2SYan Zheng if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 220e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 1); 2215d4f98a2SYan Zheng else 222e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 0); 2234aec2b52SChris Mason 224be20aa9dSChris Mason if (ret) 225be20aa9dSChris Mason return ret; 226be20aa9dSChris Mason 227be20aa9dSChris Mason btrfs_mark_buffer_dirty(cow); 228be20aa9dSChris Mason *cow_ret = cow; 229be20aa9dSChris Mason return 0; 230be20aa9dSChris Mason } 231be20aa9dSChris Mason 232bd989ba3SJan Schmidt enum mod_log_op { 233bd989ba3SJan Schmidt MOD_LOG_KEY_REPLACE, 234bd989ba3SJan Schmidt MOD_LOG_KEY_ADD, 235bd989ba3SJan Schmidt MOD_LOG_KEY_REMOVE, 236bd989ba3SJan Schmidt MOD_LOG_KEY_REMOVE_WHILE_FREEING, 237bd989ba3SJan Schmidt MOD_LOG_KEY_REMOVE_WHILE_MOVING, 238bd989ba3SJan Schmidt MOD_LOG_MOVE_KEYS, 239bd989ba3SJan Schmidt MOD_LOG_ROOT_REPLACE, 240bd989ba3SJan Schmidt }; 241bd989ba3SJan Schmidt 242bd989ba3SJan Schmidt struct tree_mod_root { 243bd989ba3SJan Schmidt u64 logical; 244bd989ba3SJan Schmidt u8 level; 245bd989ba3SJan Schmidt }; 246bd989ba3SJan Schmidt 247bd989ba3SJan Schmidt struct tree_mod_elem { 248bd989ba3SJan Schmidt struct rb_node node; 249298cfd36SChandan Rajendra u64 logical; 250097b8a7cSJan Schmidt u64 seq; 251bd989ba3SJan Schmidt enum mod_log_op op; 252bd989ba3SJan Schmidt 253bd989ba3SJan Schmidt /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ 254bd989ba3SJan Schmidt int slot; 255bd989ba3SJan Schmidt 256bd989ba3SJan Schmidt /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ 257bd989ba3SJan Schmidt u64 generation; 258bd989ba3SJan Schmidt 259bd989ba3SJan Schmidt /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ 260bd989ba3SJan Schmidt struct btrfs_disk_key key; 261bd989ba3SJan Schmidt u64 blockptr; 262bd989ba3SJan Schmidt 263bd989ba3SJan Schmidt /* this is used for op == MOD_LOG_MOVE_KEYS */ 264b6dfa35bSDavid Sterba struct { 265b6dfa35bSDavid Sterba int dst_slot; 266b6dfa35bSDavid Sterba int nr_items; 267b6dfa35bSDavid Sterba } move; 268bd989ba3SJan Schmidt 269bd989ba3SJan Schmidt /* this is used for op == MOD_LOG_ROOT_REPLACE */ 270bd989ba3SJan Schmidt struct tree_mod_root old_root; 271bd989ba3SJan Schmidt }; 272bd989ba3SJan Schmidt 273097b8a7cSJan Schmidt /* 274fcebe456SJosef Bacik * Pull a new tree mod seq number for our operation. 275fc36ed7eSJan Schmidt */ 276fcebe456SJosef Bacik static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) 277fc36ed7eSJan Schmidt { 278fc36ed7eSJan Schmidt return atomic64_inc_return(&fs_info->tree_mod_seq); 279fc36ed7eSJan Schmidt } 280fc36ed7eSJan Schmidt 281fc36ed7eSJan Schmidt /* 282097b8a7cSJan Schmidt * This adds a new blocker to the tree mod log's blocker list if the @elem 283097b8a7cSJan Schmidt * passed does not already have a sequence number set. So when a caller expects 284097b8a7cSJan Schmidt * to record tree modifications, it should ensure to set elem->seq to zero 285097b8a7cSJan Schmidt * before calling btrfs_get_tree_mod_seq. 286097b8a7cSJan Schmidt * Returns a fresh, unused tree log modification sequence number, even if no new 287097b8a7cSJan Schmidt * blocker was added. 288097b8a7cSJan Schmidt */ 289097b8a7cSJan Schmidt u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 290bd989ba3SJan Schmidt struct seq_list *elem) 291bd989ba3SJan Schmidt { 292b1a09f1eSDavid Sterba write_lock(&fs_info->tree_mod_log_lock); 293097b8a7cSJan Schmidt if (!elem->seq) { 294fcebe456SJosef Bacik elem->seq = btrfs_inc_tree_mod_seq(fs_info); 295097b8a7cSJan Schmidt list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 296097b8a7cSJan Schmidt } 297b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 298097b8a7cSJan Schmidt 299fcebe456SJosef Bacik return elem->seq; 300bd989ba3SJan Schmidt } 301bd989ba3SJan Schmidt 302bd989ba3SJan Schmidt void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 303bd989ba3SJan Schmidt struct seq_list *elem) 304bd989ba3SJan Schmidt { 305bd989ba3SJan Schmidt struct rb_root *tm_root; 306bd989ba3SJan Schmidt struct rb_node *node; 307bd989ba3SJan Schmidt struct rb_node *next; 308bd989ba3SJan Schmidt struct tree_mod_elem *tm; 309bd989ba3SJan Schmidt u64 min_seq = (u64)-1; 310bd989ba3SJan Schmidt u64 seq_putting = elem->seq; 311bd989ba3SJan Schmidt 312bd989ba3SJan Schmidt if (!seq_putting) 313bd989ba3SJan Schmidt return; 314bd989ba3SJan Schmidt 3157227ff4dSFilipe Manana write_lock(&fs_info->tree_mod_log_lock); 316bd989ba3SJan Schmidt list_del(&elem->list); 317097b8a7cSJan Schmidt elem->seq = 0; 318bd989ba3SJan Schmidt 31942836cf4SFilipe Manana if (!list_empty(&fs_info->tree_mod_seq_list)) { 32042836cf4SFilipe Manana struct seq_list *first; 32142836cf4SFilipe Manana 32242836cf4SFilipe Manana first = list_first_entry(&fs_info->tree_mod_seq_list, 32342836cf4SFilipe Manana struct seq_list, list); 32442836cf4SFilipe Manana if (seq_putting > first->seq) { 325bd989ba3SJan Schmidt /* 32642836cf4SFilipe Manana * Blocker with lower sequence number exists, we 32742836cf4SFilipe Manana * cannot remove anything from the log. 328bd989ba3SJan Schmidt */ 3297227ff4dSFilipe Manana write_unlock(&fs_info->tree_mod_log_lock); 330097b8a7cSJan Schmidt return; 331bd989ba3SJan Schmidt } 33242836cf4SFilipe Manana min_seq = first->seq; 333bd989ba3SJan Schmidt } 334097b8a7cSJan Schmidt 335097b8a7cSJan Schmidt /* 336bd989ba3SJan Schmidt * anything that's lower than the lowest existing (read: blocked) 337bd989ba3SJan Schmidt * sequence number can be removed from the tree. 338bd989ba3SJan Schmidt */ 339bd989ba3SJan Schmidt tm_root = &fs_info->tree_mod_log; 340bd989ba3SJan Schmidt for (node = rb_first(tm_root); node; node = next) { 341bd989ba3SJan Schmidt next = rb_next(node); 3426b4df8b6SGeliang Tang tm = rb_entry(node, struct tree_mod_elem, node); 3436609fee8SFilipe Manana if (tm->seq >= min_seq) 344bd989ba3SJan Schmidt continue; 345bd989ba3SJan Schmidt rb_erase(node, tm_root); 346bd989ba3SJan Schmidt kfree(tm); 347bd989ba3SJan Schmidt } 348b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 349bd989ba3SJan Schmidt } 350bd989ba3SJan Schmidt 351bd989ba3SJan Schmidt /* 352bd989ba3SJan Schmidt * key order of the log: 353298cfd36SChandan Rajendra * node/leaf start address -> sequence 354bd989ba3SJan Schmidt * 355298cfd36SChandan Rajendra * The 'start address' is the logical address of the *new* root node 356298cfd36SChandan Rajendra * for root replace operations, or the logical address of the affected 357298cfd36SChandan Rajendra * block for all other operations. 358bd989ba3SJan Schmidt */ 359bd989ba3SJan Schmidt static noinline int 360bd989ba3SJan Schmidt __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) 361bd989ba3SJan Schmidt { 362bd989ba3SJan Schmidt struct rb_root *tm_root; 363bd989ba3SJan Schmidt struct rb_node **new; 364bd989ba3SJan Schmidt struct rb_node *parent = NULL; 365bd989ba3SJan Schmidt struct tree_mod_elem *cur; 366bd989ba3SJan Schmidt 36773e82fe4SDavid Sterba lockdep_assert_held_write(&fs_info->tree_mod_log_lock); 36873e82fe4SDavid Sterba 369fcebe456SJosef Bacik tm->seq = btrfs_inc_tree_mod_seq(fs_info); 370bd989ba3SJan Schmidt 371bd989ba3SJan Schmidt tm_root = &fs_info->tree_mod_log; 372bd989ba3SJan Schmidt new = &tm_root->rb_node; 373bd989ba3SJan Schmidt while (*new) { 3746b4df8b6SGeliang Tang cur = rb_entry(*new, struct tree_mod_elem, node); 375bd989ba3SJan Schmidt parent = *new; 376298cfd36SChandan Rajendra if (cur->logical < tm->logical) 377bd989ba3SJan Schmidt new = &((*new)->rb_left); 378298cfd36SChandan Rajendra else if (cur->logical > tm->logical) 379bd989ba3SJan Schmidt new = &((*new)->rb_right); 380097b8a7cSJan Schmidt else if (cur->seq < tm->seq) 381bd989ba3SJan Schmidt new = &((*new)->rb_left); 382097b8a7cSJan Schmidt else if (cur->seq > tm->seq) 383bd989ba3SJan Schmidt new = &((*new)->rb_right); 3845de865eeSFilipe David Borba Manana else 3855de865eeSFilipe David Borba Manana return -EEXIST; 386bd989ba3SJan Schmidt } 387bd989ba3SJan Schmidt 388bd989ba3SJan Schmidt rb_link_node(&tm->node, parent, new); 389bd989ba3SJan Schmidt rb_insert_color(&tm->node, tm_root); 3905de865eeSFilipe David Borba Manana return 0; 391bd989ba3SJan Schmidt } 392bd989ba3SJan Schmidt 393097b8a7cSJan Schmidt /* 394097b8a7cSJan Schmidt * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it 395097b8a7cSJan Schmidt * returns zero with the tree_mod_log_lock acquired. The caller must hold 396097b8a7cSJan Schmidt * this until all tree mod log insertions are recorded in the rb tree and then 397b1a09f1eSDavid Sterba * write unlock fs_info::tree_mod_log_lock. 398097b8a7cSJan Schmidt */ 399e9b7fd4dSJan Schmidt static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, 400e9b7fd4dSJan Schmidt struct extent_buffer *eb) { 401e9b7fd4dSJan Schmidt smp_mb(); 402e9b7fd4dSJan Schmidt if (list_empty(&(fs_info)->tree_mod_seq_list)) 403e9b7fd4dSJan Schmidt return 1; 404097b8a7cSJan Schmidt if (eb && btrfs_header_level(eb) == 0) 405e9b7fd4dSJan Schmidt return 1; 4065de865eeSFilipe David Borba Manana 407b1a09f1eSDavid Sterba write_lock(&fs_info->tree_mod_log_lock); 4085de865eeSFilipe David Borba Manana if (list_empty(&(fs_info)->tree_mod_seq_list)) { 409b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 4105de865eeSFilipe David Borba Manana return 1; 4115de865eeSFilipe David Borba Manana } 4125de865eeSFilipe David Borba Manana 413e9b7fd4dSJan Schmidt return 0; 414e9b7fd4dSJan Schmidt } 415e9b7fd4dSJan Schmidt 4165de865eeSFilipe David Borba Manana /* Similar to tree_mod_dont_log, but doesn't acquire any locks. */ 4175de865eeSFilipe David Borba Manana static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info, 4185de865eeSFilipe David Borba Manana struct extent_buffer *eb) 4195de865eeSFilipe David Borba Manana { 4205de865eeSFilipe David Borba Manana smp_mb(); 4215de865eeSFilipe David Borba Manana if (list_empty(&(fs_info)->tree_mod_seq_list)) 4225de865eeSFilipe David Borba Manana return 0; 4235de865eeSFilipe David Borba Manana if (eb && btrfs_header_level(eb) == 0) 4245de865eeSFilipe David Borba Manana return 0; 4255de865eeSFilipe David Borba Manana 4265de865eeSFilipe David Borba Manana return 1; 4275de865eeSFilipe David Borba Manana } 4285de865eeSFilipe David Borba Manana 4295de865eeSFilipe David Borba Manana static struct tree_mod_elem * 4305de865eeSFilipe David Borba Manana alloc_tree_mod_elem(struct extent_buffer *eb, int slot, 431bd989ba3SJan Schmidt enum mod_log_op op, gfp_t flags) 432bd989ba3SJan Schmidt { 433097b8a7cSJan Schmidt struct tree_mod_elem *tm; 434bd989ba3SJan Schmidt 435c8cc6341SJosef Bacik tm = kzalloc(sizeof(*tm), flags); 436c8cc6341SJosef Bacik if (!tm) 4375de865eeSFilipe David Borba Manana return NULL; 438bd989ba3SJan Schmidt 439298cfd36SChandan Rajendra tm->logical = eb->start; 440bd989ba3SJan Schmidt if (op != MOD_LOG_KEY_ADD) { 441bd989ba3SJan Schmidt btrfs_node_key(eb, &tm->key, slot); 442bd989ba3SJan Schmidt tm->blockptr = btrfs_node_blockptr(eb, slot); 443bd989ba3SJan Schmidt } 444bd989ba3SJan Schmidt tm->op = op; 445bd989ba3SJan Schmidt tm->slot = slot; 446bd989ba3SJan Schmidt tm->generation = btrfs_node_ptr_generation(eb, slot); 4475de865eeSFilipe David Borba Manana RB_CLEAR_NODE(&tm->node); 448bd989ba3SJan Schmidt 4495de865eeSFilipe David Borba Manana return tm; 450097b8a7cSJan Schmidt } 451097b8a7cSJan Schmidt 452e09c2efeSDavid Sterba static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot, 453097b8a7cSJan Schmidt enum mod_log_op op, gfp_t flags) 454097b8a7cSJan Schmidt { 4555de865eeSFilipe David Borba Manana struct tree_mod_elem *tm; 4565de865eeSFilipe David Borba Manana int ret; 4575de865eeSFilipe David Borba Manana 458e09c2efeSDavid Sterba if (!tree_mod_need_log(eb->fs_info, eb)) 459097b8a7cSJan Schmidt return 0; 460097b8a7cSJan Schmidt 4615de865eeSFilipe David Borba Manana tm = alloc_tree_mod_elem(eb, slot, op, flags); 4625de865eeSFilipe David Borba Manana if (!tm) 4635de865eeSFilipe David Borba Manana return -ENOMEM; 4645de865eeSFilipe David Borba Manana 465e09c2efeSDavid Sterba if (tree_mod_dont_log(eb->fs_info, eb)) { 4665de865eeSFilipe David Borba Manana kfree(tm); 4675de865eeSFilipe David Borba Manana return 0; 4685de865eeSFilipe David Borba Manana } 4695de865eeSFilipe David Borba Manana 470e09c2efeSDavid Sterba ret = __tree_mod_log_insert(eb->fs_info, tm); 471b1a09f1eSDavid Sterba write_unlock(&eb->fs_info->tree_mod_log_lock); 4725de865eeSFilipe David Borba Manana if (ret) 4735de865eeSFilipe David Borba Manana kfree(tm); 4745de865eeSFilipe David Borba Manana 4755de865eeSFilipe David Borba Manana return ret; 476097b8a7cSJan Schmidt } 477097b8a7cSJan Schmidt 4786074d45fSDavid Sterba static noinline int tree_mod_log_insert_move(struct extent_buffer *eb, 4796074d45fSDavid Sterba int dst_slot, int src_slot, int nr_items) 480bd989ba3SJan Schmidt { 4815de865eeSFilipe David Borba Manana struct tree_mod_elem *tm = NULL; 4825de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list = NULL; 4835de865eeSFilipe David Borba Manana int ret = 0; 484bd989ba3SJan Schmidt int i; 4855de865eeSFilipe David Borba Manana int locked = 0; 486bd989ba3SJan Schmidt 4876074d45fSDavid Sterba if (!tree_mod_need_log(eb->fs_info, eb)) 488f395694cSJan Schmidt return 0; 489bd989ba3SJan Schmidt 490176ef8f5SDavid Sterba tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS); 4915de865eeSFilipe David Borba Manana if (!tm_list) 4925de865eeSFilipe David Borba Manana return -ENOMEM; 493bd989ba3SJan Schmidt 494176ef8f5SDavid Sterba tm = kzalloc(sizeof(*tm), GFP_NOFS); 4955de865eeSFilipe David Borba Manana if (!tm) { 4965de865eeSFilipe David Borba Manana ret = -ENOMEM; 4975de865eeSFilipe David Borba Manana goto free_tms; 4985de865eeSFilipe David Borba Manana } 499f395694cSJan Schmidt 500298cfd36SChandan Rajendra tm->logical = eb->start; 501bd989ba3SJan Schmidt tm->slot = src_slot; 502bd989ba3SJan Schmidt tm->move.dst_slot = dst_slot; 503bd989ba3SJan Schmidt tm->move.nr_items = nr_items; 504bd989ba3SJan Schmidt tm->op = MOD_LOG_MOVE_KEYS; 505bd989ba3SJan Schmidt 5065de865eeSFilipe David Borba Manana for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 5075de865eeSFilipe David Borba Manana tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot, 508176ef8f5SDavid Sterba MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); 5095de865eeSFilipe David Borba Manana if (!tm_list[i]) { 5105de865eeSFilipe David Borba Manana ret = -ENOMEM; 5115de865eeSFilipe David Borba Manana goto free_tms; 5125de865eeSFilipe David Borba Manana } 513bd989ba3SJan Schmidt } 514bd989ba3SJan Schmidt 5156074d45fSDavid Sterba if (tree_mod_dont_log(eb->fs_info, eb)) 5165de865eeSFilipe David Borba Manana goto free_tms; 5175de865eeSFilipe David Borba Manana locked = 1; 5185de865eeSFilipe David Borba Manana 5195de865eeSFilipe David Borba Manana /* 5205de865eeSFilipe David Borba Manana * When we override something during the move, we log these removals. 5215de865eeSFilipe David Borba Manana * This can only happen when we move towards the beginning of the 5225de865eeSFilipe David Borba Manana * buffer, i.e. dst_slot < src_slot. 5235de865eeSFilipe David Borba Manana */ 5245de865eeSFilipe David Borba Manana for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 5256074d45fSDavid Sterba ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]); 5265de865eeSFilipe David Borba Manana if (ret) 5275de865eeSFilipe David Borba Manana goto free_tms; 5285de865eeSFilipe David Borba Manana } 5295de865eeSFilipe David Borba Manana 5306074d45fSDavid Sterba ret = __tree_mod_log_insert(eb->fs_info, tm); 5315de865eeSFilipe David Borba Manana if (ret) 5325de865eeSFilipe David Borba Manana goto free_tms; 533b1a09f1eSDavid Sterba write_unlock(&eb->fs_info->tree_mod_log_lock); 5345de865eeSFilipe David Borba Manana kfree(tm_list); 5355de865eeSFilipe David Borba Manana 5365de865eeSFilipe David Borba Manana return 0; 5375de865eeSFilipe David Borba Manana free_tms: 5385de865eeSFilipe David Borba Manana for (i = 0; i < nr_items; i++) { 5395de865eeSFilipe David Borba Manana if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node)) 5406074d45fSDavid Sterba rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log); 5415de865eeSFilipe David Borba Manana kfree(tm_list[i]); 5425de865eeSFilipe David Borba Manana } 5435de865eeSFilipe David Borba Manana if (locked) 544b1a09f1eSDavid Sterba write_unlock(&eb->fs_info->tree_mod_log_lock); 5455de865eeSFilipe David Borba Manana kfree(tm_list); 5465de865eeSFilipe David Borba Manana kfree(tm); 5475de865eeSFilipe David Borba Manana 5485de865eeSFilipe David Borba Manana return ret; 5495de865eeSFilipe David Borba Manana } 5505de865eeSFilipe David Borba Manana 5515de865eeSFilipe David Borba Manana static inline int 5525de865eeSFilipe David Borba Manana __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 5535de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list, 5545de865eeSFilipe David Borba Manana int nritems) 555097b8a7cSJan Schmidt { 5565de865eeSFilipe David Borba Manana int i, j; 557097b8a7cSJan Schmidt int ret; 558097b8a7cSJan Schmidt 559097b8a7cSJan Schmidt for (i = nritems - 1; i >= 0; i--) { 5605de865eeSFilipe David Borba Manana ret = __tree_mod_log_insert(fs_info, tm_list[i]); 5615de865eeSFilipe David Borba Manana if (ret) { 5625de865eeSFilipe David Borba Manana for (j = nritems - 1; j > i; j--) 5635de865eeSFilipe David Borba Manana rb_erase(&tm_list[j]->node, 5645de865eeSFilipe David Borba Manana &fs_info->tree_mod_log); 5655de865eeSFilipe David Borba Manana return ret; 566097b8a7cSJan Schmidt } 567097b8a7cSJan Schmidt } 568097b8a7cSJan Schmidt 5695de865eeSFilipe David Borba Manana return 0; 5705de865eeSFilipe David Borba Manana } 5715de865eeSFilipe David Borba Manana 57295b757c1SDavid Sterba static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root, 57395b757c1SDavid Sterba struct extent_buffer *new_root, int log_removal) 574bd989ba3SJan Schmidt { 57595b757c1SDavid Sterba struct btrfs_fs_info *fs_info = old_root->fs_info; 5765de865eeSFilipe David Borba Manana struct tree_mod_elem *tm = NULL; 5775de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list = NULL; 5785de865eeSFilipe David Borba Manana int nritems = 0; 5795de865eeSFilipe David Borba Manana int ret = 0; 5805de865eeSFilipe David Borba Manana int i; 581bd989ba3SJan Schmidt 5825de865eeSFilipe David Borba Manana if (!tree_mod_need_log(fs_info, NULL)) 583097b8a7cSJan Schmidt return 0; 584097b8a7cSJan Schmidt 5855de865eeSFilipe David Borba Manana if (log_removal && btrfs_header_level(old_root) > 0) { 5865de865eeSFilipe David Borba Manana nritems = btrfs_header_nritems(old_root); 58731e818feSDavid Sterba tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), 588bcc8e07fSDavid Sterba GFP_NOFS); 5895de865eeSFilipe David Borba Manana if (!tm_list) { 5905de865eeSFilipe David Borba Manana ret = -ENOMEM; 5915de865eeSFilipe David Borba Manana goto free_tms; 5925de865eeSFilipe David Borba Manana } 5935de865eeSFilipe David Borba Manana for (i = 0; i < nritems; i++) { 5945de865eeSFilipe David Borba Manana tm_list[i] = alloc_tree_mod_elem(old_root, i, 595bcc8e07fSDavid Sterba MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 5965de865eeSFilipe David Borba Manana if (!tm_list[i]) { 5975de865eeSFilipe David Borba Manana ret = -ENOMEM; 5985de865eeSFilipe David Borba Manana goto free_tms; 5995de865eeSFilipe David Borba Manana } 6005de865eeSFilipe David Borba Manana } 6015de865eeSFilipe David Borba Manana } 602d9abbf1cSJan Schmidt 603bcc8e07fSDavid Sterba tm = kzalloc(sizeof(*tm), GFP_NOFS); 6045de865eeSFilipe David Borba Manana if (!tm) { 6055de865eeSFilipe David Borba Manana ret = -ENOMEM; 6065de865eeSFilipe David Borba Manana goto free_tms; 6075de865eeSFilipe David Borba Manana } 608bd989ba3SJan Schmidt 609298cfd36SChandan Rajendra tm->logical = new_root->start; 610bd989ba3SJan Schmidt tm->old_root.logical = old_root->start; 611bd989ba3SJan Schmidt tm->old_root.level = btrfs_header_level(old_root); 612bd989ba3SJan Schmidt tm->generation = btrfs_header_generation(old_root); 613bd989ba3SJan Schmidt tm->op = MOD_LOG_ROOT_REPLACE; 614bd989ba3SJan Schmidt 6155de865eeSFilipe David Borba Manana if (tree_mod_dont_log(fs_info, NULL)) 6165de865eeSFilipe David Borba Manana goto free_tms; 6175de865eeSFilipe David Borba Manana 6185de865eeSFilipe David Borba Manana if (tm_list) 6195de865eeSFilipe David Borba Manana ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems); 6205de865eeSFilipe David Borba Manana if (!ret) 6215de865eeSFilipe David Borba Manana ret = __tree_mod_log_insert(fs_info, tm); 6225de865eeSFilipe David Borba Manana 623b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 6245de865eeSFilipe David Borba Manana if (ret) 6255de865eeSFilipe David Borba Manana goto free_tms; 6265de865eeSFilipe David Borba Manana kfree(tm_list); 6275de865eeSFilipe David Borba Manana 6285de865eeSFilipe David Borba Manana return ret; 6295de865eeSFilipe David Borba Manana 6305de865eeSFilipe David Borba Manana free_tms: 6315de865eeSFilipe David Borba Manana if (tm_list) { 6325de865eeSFilipe David Borba Manana for (i = 0; i < nritems; i++) 6335de865eeSFilipe David Borba Manana kfree(tm_list[i]); 6345de865eeSFilipe David Borba Manana kfree(tm_list); 6355de865eeSFilipe David Borba Manana } 6365de865eeSFilipe David Borba Manana kfree(tm); 6375de865eeSFilipe David Borba Manana 6385de865eeSFilipe David Borba Manana return ret; 639bd989ba3SJan Schmidt } 640bd989ba3SJan Schmidt 641bd989ba3SJan Schmidt static struct tree_mod_elem * 642bd989ba3SJan Schmidt __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, 643bd989ba3SJan Schmidt int smallest) 644bd989ba3SJan Schmidt { 645bd989ba3SJan Schmidt struct rb_root *tm_root; 646bd989ba3SJan Schmidt struct rb_node *node; 647bd989ba3SJan Schmidt struct tree_mod_elem *cur = NULL; 648bd989ba3SJan Schmidt struct tree_mod_elem *found = NULL; 649bd989ba3SJan Schmidt 650b1a09f1eSDavid Sterba read_lock(&fs_info->tree_mod_log_lock); 651bd989ba3SJan Schmidt tm_root = &fs_info->tree_mod_log; 652bd989ba3SJan Schmidt node = tm_root->rb_node; 653bd989ba3SJan Schmidt while (node) { 6546b4df8b6SGeliang Tang cur = rb_entry(node, struct tree_mod_elem, node); 655298cfd36SChandan Rajendra if (cur->logical < start) { 656bd989ba3SJan Schmidt node = node->rb_left; 657298cfd36SChandan Rajendra } else if (cur->logical > start) { 658bd989ba3SJan Schmidt node = node->rb_right; 659097b8a7cSJan Schmidt } else if (cur->seq < min_seq) { 660bd989ba3SJan Schmidt node = node->rb_left; 661bd989ba3SJan Schmidt } else if (!smallest) { 662bd989ba3SJan Schmidt /* we want the node with the highest seq */ 663bd989ba3SJan Schmidt if (found) 664097b8a7cSJan Schmidt BUG_ON(found->seq > cur->seq); 665bd989ba3SJan Schmidt found = cur; 666bd989ba3SJan Schmidt node = node->rb_left; 667097b8a7cSJan Schmidt } else if (cur->seq > min_seq) { 668bd989ba3SJan Schmidt /* we want the node with the smallest seq */ 669bd989ba3SJan Schmidt if (found) 670097b8a7cSJan Schmidt BUG_ON(found->seq < cur->seq); 671bd989ba3SJan Schmidt found = cur; 672bd989ba3SJan Schmidt node = node->rb_right; 673bd989ba3SJan Schmidt } else { 674bd989ba3SJan Schmidt found = cur; 675bd989ba3SJan Schmidt break; 676bd989ba3SJan Schmidt } 677bd989ba3SJan Schmidt } 678b1a09f1eSDavid Sterba read_unlock(&fs_info->tree_mod_log_lock); 679bd989ba3SJan Schmidt 680bd989ba3SJan Schmidt return found; 681bd989ba3SJan Schmidt } 682bd989ba3SJan Schmidt 683bd989ba3SJan Schmidt /* 684bd989ba3SJan Schmidt * this returns the element from the log with the smallest time sequence 685bd989ba3SJan Schmidt * value that's in the log (the oldest log item). any element with a time 686bd989ba3SJan Schmidt * sequence lower than min_seq will be ignored. 687bd989ba3SJan Schmidt */ 688bd989ba3SJan Schmidt static struct tree_mod_elem * 689bd989ba3SJan Schmidt tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, 690bd989ba3SJan Schmidt u64 min_seq) 691bd989ba3SJan Schmidt { 692bd989ba3SJan Schmidt return __tree_mod_log_search(fs_info, start, min_seq, 1); 693bd989ba3SJan Schmidt } 694bd989ba3SJan Schmidt 695bd989ba3SJan Schmidt /* 696bd989ba3SJan Schmidt * this returns the element from the log with the largest time sequence 697bd989ba3SJan Schmidt * value that's in the log (the most recent log item). any element with 698bd989ba3SJan Schmidt * a time sequence lower than min_seq will be ignored. 699bd989ba3SJan Schmidt */ 700bd989ba3SJan Schmidt static struct tree_mod_elem * 701bd989ba3SJan Schmidt tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) 702bd989ba3SJan Schmidt { 703bd989ba3SJan Schmidt return __tree_mod_log_search(fs_info, start, min_seq, 0); 704bd989ba3SJan Schmidt } 705bd989ba3SJan Schmidt 706ed874f0dSDavid Sterba static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst, 707bd989ba3SJan Schmidt struct extent_buffer *src, unsigned long dst_offset, 70890f8d62eSJan Schmidt unsigned long src_offset, int nr_items) 709bd989ba3SJan Schmidt { 710ed874f0dSDavid Sterba struct btrfs_fs_info *fs_info = dst->fs_info; 7115de865eeSFilipe David Borba Manana int ret = 0; 7125de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list = NULL; 7135de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list_add, **tm_list_rem; 714bd989ba3SJan Schmidt int i; 7155de865eeSFilipe David Borba Manana int locked = 0; 716bd989ba3SJan Schmidt 7175de865eeSFilipe David Borba Manana if (!tree_mod_need_log(fs_info, NULL)) 7185de865eeSFilipe David Borba Manana return 0; 719bd989ba3SJan Schmidt 720c8cc6341SJosef Bacik if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 7215de865eeSFilipe David Borba Manana return 0; 7225de865eeSFilipe David Borba Manana 72331e818feSDavid Sterba tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *), 7245de865eeSFilipe David Borba Manana GFP_NOFS); 7255de865eeSFilipe David Borba Manana if (!tm_list) 7265de865eeSFilipe David Borba Manana return -ENOMEM; 7275de865eeSFilipe David Borba Manana 7285de865eeSFilipe David Borba Manana tm_list_add = tm_list; 7295de865eeSFilipe David Borba Manana tm_list_rem = tm_list + nr_items; 7305de865eeSFilipe David Borba Manana for (i = 0; i < nr_items; i++) { 7315de865eeSFilipe David Borba Manana tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset, 7325de865eeSFilipe David Borba Manana MOD_LOG_KEY_REMOVE, GFP_NOFS); 7335de865eeSFilipe David Borba Manana if (!tm_list_rem[i]) { 7345de865eeSFilipe David Borba Manana ret = -ENOMEM; 7355de865eeSFilipe David Borba Manana goto free_tms; 7365de865eeSFilipe David Borba Manana } 7375de865eeSFilipe David Borba Manana 7385de865eeSFilipe David Borba Manana tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset, 7395de865eeSFilipe David Borba Manana MOD_LOG_KEY_ADD, GFP_NOFS); 7405de865eeSFilipe David Borba Manana if (!tm_list_add[i]) { 7415de865eeSFilipe David Borba Manana ret = -ENOMEM; 7425de865eeSFilipe David Borba Manana goto free_tms; 7435de865eeSFilipe David Borba Manana } 7445de865eeSFilipe David Borba Manana } 7455de865eeSFilipe David Borba Manana 7465de865eeSFilipe David Borba Manana if (tree_mod_dont_log(fs_info, NULL)) 7475de865eeSFilipe David Borba Manana goto free_tms; 7485de865eeSFilipe David Borba Manana locked = 1; 749bd989ba3SJan Schmidt 750bd989ba3SJan Schmidt for (i = 0; i < nr_items; i++) { 7515de865eeSFilipe David Borba Manana ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]); 7525de865eeSFilipe David Borba Manana if (ret) 7535de865eeSFilipe David Borba Manana goto free_tms; 7545de865eeSFilipe David Borba Manana ret = __tree_mod_log_insert(fs_info, tm_list_add[i]); 7555de865eeSFilipe David Borba Manana if (ret) 7565de865eeSFilipe David Borba Manana goto free_tms; 757bd989ba3SJan Schmidt } 7585de865eeSFilipe David Borba Manana 759b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 7605de865eeSFilipe David Borba Manana kfree(tm_list); 7615de865eeSFilipe David Borba Manana 7625de865eeSFilipe David Borba Manana return 0; 7635de865eeSFilipe David Borba Manana 7645de865eeSFilipe David Borba Manana free_tms: 7655de865eeSFilipe David Borba Manana for (i = 0; i < nr_items * 2; i++) { 7665de865eeSFilipe David Borba Manana if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node)) 7675de865eeSFilipe David Borba Manana rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log); 7685de865eeSFilipe David Borba Manana kfree(tm_list[i]); 7695de865eeSFilipe David Borba Manana } 7705de865eeSFilipe David Borba Manana if (locked) 771b1a09f1eSDavid Sterba write_unlock(&fs_info->tree_mod_log_lock); 7725de865eeSFilipe David Borba Manana kfree(tm_list); 7735de865eeSFilipe David Borba Manana 7745de865eeSFilipe David Borba Manana return ret; 775bd989ba3SJan Schmidt } 776bd989ba3SJan Schmidt 777db7279a2SDavid Sterba static noinline int tree_mod_log_free_eb(struct extent_buffer *eb) 778bd989ba3SJan Schmidt { 7795de865eeSFilipe David Borba Manana struct tree_mod_elem **tm_list = NULL; 7805de865eeSFilipe David Borba Manana int nritems = 0; 7815de865eeSFilipe David Borba Manana int i; 7825de865eeSFilipe David Borba Manana int ret = 0; 7835de865eeSFilipe David Borba Manana 7845de865eeSFilipe David Borba Manana if (btrfs_header_level(eb) == 0) 7855de865eeSFilipe David Borba Manana return 0; 7865de865eeSFilipe David Borba Manana 787db7279a2SDavid Sterba if (!tree_mod_need_log(eb->fs_info, NULL)) 7885de865eeSFilipe David Borba Manana return 0; 7895de865eeSFilipe David Borba Manana 7905de865eeSFilipe David Borba Manana nritems = btrfs_header_nritems(eb); 79131e818feSDavid Sterba tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS); 7925de865eeSFilipe David Borba Manana if (!tm_list) 7935de865eeSFilipe David Borba Manana return -ENOMEM; 7945de865eeSFilipe David Borba Manana 7955de865eeSFilipe David Borba Manana for (i = 0; i < nritems; i++) { 7965de865eeSFilipe David Borba Manana tm_list[i] = alloc_tree_mod_elem(eb, i, 7975de865eeSFilipe David Borba Manana MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 7985de865eeSFilipe David Borba Manana if (!tm_list[i]) { 7995de865eeSFilipe David Borba Manana ret = -ENOMEM; 8005de865eeSFilipe David Borba Manana goto free_tms; 8015de865eeSFilipe David Borba Manana } 8025de865eeSFilipe David Borba Manana } 8035de865eeSFilipe David Borba Manana 804db7279a2SDavid Sterba if (tree_mod_dont_log(eb->fs_info, eb)) 8055de865eeSFilipe David Borba Manana goto free_tms; 8065de865eeSFilipe David Borba Manana 807db7279a2SDavid Sterba ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems); 808b1a09f1eSDavid Sterba write_unlock(&eb->fs_info->tree_mod_log_lock); 8095de865eeSFilipe David Borba Manana if (ret) 8105de865eeSFilipe David Borba Manana goto free_tms; 8115de865eeSFilipe David Borba Manana kfree(tm_list); 8125de865eeSFilipe David Borba Manana 8135de865eeSFilipe David Borba Manana return 0; 8145de865eeSFilipe David Borba Manana 8155de865eeSFilipe David Borba Manana free_tms: 8165de865eeSFilipe David Borba Manana for (i = 0; i < nritems; i++) 8175de865eeSFilipe David Borba Manana kfree(tm_list[i]); 8185de865eeSFilipe David Borba Manana kfree(tm_list); 8195de865eeSFilipe David Borba Manana 8205de865eeSFilipe David Borba Manana return ret; 821bd989ba3SJan Schmidt } 822bd989ba3SJan Schmidt 823d352ac68SChris Mason /* 8245d4f98a2SYan Zheng * check if the tree block can be shared by multiple trees 8255d4f98a2SYan Zheng */ 8265d4f98a2SYan Zheng int btrfs_block_can_be_shared(struct btrfs_root *root, 8275d4f98a2SYan Zheng struct extent_buffer *buf) 8285d4f98a2SYan Zheng { 8295d4f98a2SYan Zheng /* 83092a7cc42SQu Wenruo * Tree blocks not in shareable trees and tree roots are never shared. 83192a7cc42SQu Wenruo * If a block was allocated after the last snapshot and the block was 83292a7cc42SQu Wenruo * not allocated by tree relocation, we know the block is not shared. 8335d4f98a2SYan Zheng */ 83492a7cc42SQu Wenruo if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 8355d4f98a2SYan Zheng buf != root->node && buf != root->commit_root && 8365d4f98a2SYan Zheng (btrfs_header_generation(buf) <= 8375d4f98a2SYan Zheng btrfs_root_last_snapshot(&root->root_item) || 8385d4f98a2SYan Zheng btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 8395d4f98a2SYan Zheng return 1; 840a79865c6SNikolay Borisov 8415d4f98a2SYan Zheng return 0; 8425d4f98a2SYan Zheng } 8435d4f98a2SYan Zheng 8445d4f98a2SYan Zheng static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 8455d4f98a2SYan Zheng struct btrfs_root *root, 8465d4f98a2SYan Zheng struct extent_buffer *buf, 847f0486c68SYan, Zheng struct extent_buffer *cow, 848f0486c68SYan, Zheng int *last_ref) 8495d4f98a2SYan Zheng { 8500b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 8515d4f98a2SYan Zheng u64 refs; 8525d4f98a2SYan Zheng u64 owner; 8535d4f98a2SYan Zheng u64 flags; 8545d4f98a2SYan Zheng u64 new_flags = 0; 8555d4f98a2SYan Zheng int ret; 8565d4f98a2SYan Zheng 8575d4f98a2SYan Zheng /* 8585d4f98a2SYan Zheng * Backrefs update rules: 8595d4f98a2SYan Zheng * 8605d4f98a2SYan Zheng * Always use full backrefs for extent pointers in tree block 8615d4f98a2SYan Zheng * allocated by tree relocation. 8625d4f98a2SYan Zheng * 8635d4f98a2SYan Zheng * If a shared tree block is no longer referenced by its owner 8645d4f98a2SYan Zheng * tree (btrfs_header_owner(buf) == root->root_key.objectid), 8655d4f98a2SYan Zheng * use full backrefs for extent pointers in tree block. 8665d4f98a2SYan Zheng * 8675d4f98a2SYan Zheng * If a tree block is been relocating 8685d4f98a2SYan Zheng * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 8695d4f98a2SYan Zheng * use full backrefs for extent pointers in tree block. 8705d4f98a2SYan Zheng * The reason for this is some operations (such as drop tree) 8715d4f98a2SYan Zheng * are only allowed for blocks use full backrefs. 8725d4f98a2SYan Zheng */ 8735d4f98a2SYan Zheng 8745d4f98a2SYan Zheng if (btrfs_block_can_be_shared(root, buf)) { 8752ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, 8763173a18fSJosef Bacik btrfs_header_level(buf), 1, 8773173a18fSJosef Bacik &refs, &flags); 878be1a5564SMark Fasheh if (ret) 879be1a5564SMark Fasheh return ret; 880e5df9573SMark Fasheh if (refs == 0) { 881e5df9573SMark Fasheh ret = -EROFS; 8820b246afaSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, NULL); 883e5df9573SMark Fasheh return ret; 884e5df9573SMark Fasheh } 8855d4f98a2SYan Zheng } else { 8865d4f98a2SYan Zheng refs = 1; 8875d4f98a2SYan Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 8885d4f98a2SYan Zheng btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 8895d4f98a2SYan Zheng flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 8905d4f98a2SYan Zheng else 8915d4f98a2SYan Zheng flags = 0; 8925d4f98a2SYan Zheng } 8935d4f98a2SYan Zheng 8945d4f98a2SYan Zheng owner = btrfs_header_owner(buf); 8955d4f98a2SYan Zheng BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 8965d4f98a2SYan Zheng !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 8975d4f98a2SYan Zheng 8985d4f98a2SYan Zheng if (refs > 1) { 8995d4f98a2SYan Zheng if ((owner == root->root_key.objectid || 9005d4f98a2SYan Zheng root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 9015d4f98a2SYan Zheng !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 902e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, buf, 1); 903692826b2SJeff Mahoney if (ret) 904692826b2SJeff Mahoney return ret; 9055d4f98a2SYan Zheng 9065d4f98a2SYan Zheng if (root->root_key.objectid == 9075d4f98a2SYan Zheng BTRFS_TREE_RELOC_OBJECTID) { 908e339a6b0SJosef Bacik ret = btrfs_dec_ref(trans, root, buf, 0); 909692826b2SJeff Mahoney if (ret) 910692826b2SJeff Mahoney return ret; 911e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 1); 912692826b2SJeff Mahoney if (ret) 913692826b2SJeff Mahoney return ret; 9145d4f98a2SYan Zheng } 9155d4f98a2SYan Zheng new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 9165d4f98a2SYan Zheng } else { 9175d4f98a2SYan Zheng 9185d4f98a2SYan Zheng if (root->root_key.objectid == 9195d4f98a2SYan Zheng BTRFS_TREE_RELOC_OBJECTID) 920e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 1); 9215d4f98a2SYan Zheng else 922e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 0); 923692826b2SJeff Mahoney if (ret) 924692826b2SJeff Mahoney return ret; 9255d4f98a2SYan Zheng } 9265d4f98a2SYan Zheng if (new_flags != 0) { 927b1c79e09SJosef Bacik int level = btrfs_header_level(buf); 928b1c79e09SJosef Bacik 92942c9d0b5SDavid Sterba ret = btrfs_set_disk_extent_flags(trans, buf, 930b1c79e09SJosef Bacik new_flags, level, 0); 931be1a5564SMark Fasheh if (ret) 932be1a5564SMark Fasheh return ret; 9335d4f98a2SYan Zheng } 9345d4f98a2SYan Zheng } else { 9355d4f98a2SYan Zheng if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 9365d4f98a2SYan Zheng if (root->root_key.objectid == 9375d4f98a2SYan Zheng BTRFS_TREE_RELOC_OBJECTID) 938e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 1); 9395d4f98a2SYan Zheng else 940e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, cow, 0); 941692826b2SJeff Mahoney if (ret) 942692826b2SJeff Mahoney return ret; 943e339a6b0SJosef Bacik ret = btrfs_dec_ref(trans, root, buf, 1); 944692826b2SJeff Mahoney if (ret) 945692826b2SJeff Mahoney return ret; 9465d4f98a2SYan Zheng } 9476a884d7dSDavid Sterba btrfs_clean_tree_block(buf); 948f0486c68SYan, Zheng *last_ref = 1; 9495d4f98a2SYan Zheng } 9505d4f98a2SYan Zheng return 0; 9515d4f98a2SYan Zheng } 9525d4f98a2SYan Zheng 953a6279470SFilipe Manana static struct extent_buffer *alloc_tree_block_no_bg_flush( 954a6279470SFilipe Manana struct btrfs_trans_handle *trans, 955a6279470SFilipe Manana struct btrfs_root *root, 956a6279470SFilipe Manana u64 parent_start, 957a6279470SFilipe Manana const struct btrfs_disk_key *disk_key, 958a6279470SFilipe Manana int level, 959a6279470SFilipe Manana u64 hint, 9609631e4ccSJosef Bacik u64 empty_size, 9619631e4ccSJosef Bacik enum btrfs_lock_nesting nest) 962a6279470SFilipe Manana { 963a6279470SFilipe Manana struct btrfs_fs_info *fs_info = root->fs_info; 964a6279470SFilipe Manana struct extent_buffer *ret; 965a6279470SFilipe Manana 966a6279470SFilipe Manana /* 967a6279470SFilipe Manana * If we are COWing a node/leaf from the extent, chunk, device or free 968a6279470SFilipe Manana * space trees, make sure that we do not finish block group creation of 969a6279470SFilipe Manana * pending block groups. We do this to avoid a deadlock. 970a6279470SFilipe Manana * COWing can result in allocation of a new chunk, and flushing pending 971a6279470SFilipe Manana * block groups (btrfs_create_pending_block_groups()) can be triggered 972a6279470SFilipe Manana * when finishing allocation of a new chunk. Creation of a pending block 973a6279470SFilipe Manana * group modifies the extent, chunk, device and free space trees, 974a6279470SFilipe Manana * therefore we could deadlock with ourselves since we are holding a 975a6279470SFilipe Manana * lock on an extent buffer that btrfs_create_pending_block_groups() may 976a6279470SFilipe Manana * try to COW later. 977a6279470SFilipe Manana * For similar reasons, we also need to delay flushing pending block 978a6279470SFilipe Manana * groups when splitting a leaf or node, from one of those trees, since 979a6279470SFilipe Manana * we are holding a write lock on it and its parent or when inserting a 980a6279470SFilipe Manana * new root node for one of those trees. 981a6279470SFilipe Manana */ 982a6279470SFilipe Manana if (root == fs_info->extent_root || 983a6279470SFilipe Manana root == fs_info->chunk_root || 984a6279470SFilipe Manana root == fs_info->dev_root || 985a6279470SFilipe Manana root == fs_info->free_space_root) 986a6279470SFilipe Manana trans->can_flush_pending_bgs = false; 987a6279470SFilipe Manana 988a6279470SFilipe Manana ret = btrfs_alloc_tree_block(trans, root, parent_start, 989a6279470SFilipe Manana root->root_key.objectid, disk_key, level, 9909631e4ccSJosef Bacik hint, empty_size, nest); 991a6279470SFilipe Manana trans->can_flush_pending_bgs = true; 992a6279470SFilipe Manana 993a6279470SFilipe Manana return ret; 994a6279470SFilipe Manana } 995a6279470SFilipe Manana 9965d4f98a2SYan Zheng /* 997d397712bSChris Mason * does the dirty work in cow of a single block. The parent block (if 998d397712bSChris Mason * supplied) is updated to point to the new cow copy. The new buffer is marked 999d397712bSChris Mason * dirty and returned locked. If you modify the block it needs to be marked 1000d397712bSChris Mason * dirty again. 1001d352ac68SChris Mason * 1002d352ac68SChris Mason * search_start -- an allocation hint for the new block 1003d352ac68SChris Mason * 1004d397712bSChris Mason * empty_size -- a hint that you plan on doing more cow. This is the size in 1005d397712bSChris Mason * bytes the allocator should try to find free next to the block it returns. 1006d397712bSChris Mason * This is just a hint and may be ignored by the allocator. 1007d352ac68SChris Mason */ 1008d397712bSChris Mason static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 10095f39d397SChris Mason struct btrfs_root *root, 10105f39d397SChris Mason struct extent_buffer *buf, 10115f39d397SChris Mason struct extent_buffer *parent, int parent_slot, 10125f39d397SChris Mason struct extent_buffer **cow_ret, 10139631e4ccSJosef Bacik u64 search_start, u64 empty_size, 10149631e4ccSJosef Bacik enum btrfs_lock_nesting nest) 10156702ed49SChris Mason { 10160b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 10175d4f98a2SYan Zheng struct btrfs_disk_key disk_key; 10185f39d397SChris Mason struct extent_buffer *cow; 1019be1a5564SMark Fasheh int level, ret; 1020f0486c68SYan, Zheng int last_ref = 0; 1021925baeddSChris Mason int unlock_orig = 0; 10220f5053ebSGoldwyn Rodrigues u64 parent_start = 0; 10236702ed49SChris Mason 1024925baeddSChris Mason if (*cow_ret == buf) 1025925baeddSChris Mason unlock_orig = 1; 1026925baeddSChris Mason 1027b9447ef8SChris Mason btrfs_assert_tree_locked(buf); 1028925baeddSChris Mason 102992a7cc42SQu Wenruo WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 10300b246afaSJeff Mahoney trans->transid != fs_info->running_transaction->transid); 103192a7cc42SQu Wenruo WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 103227cdeb70SMiao Xie trans->transid != root->last_trans); 10335f39d397SChris Mason 10347bb86316SChris Mason level = btrfs_header_level(buf); 103531840ae1SZheng Yan 10365d4f98a2SYan Zheng if (level == 0) 10375d4f98a2SYan Zheng btrfs_item_key(buf, &disk_key, 0); 10385d4f98a2SYan Zheng else 10395d4f98a2SYan Zheng btrfs_node_key(buf, &disk_key, 0); 10405d4f98a2SYan Zheng 10410f5053ebSGoldwyn Rodrigues if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 10425d4f98a2SYan Zheng parent_start = parent->start; 10435d4f98a2SYan Zheng 1044a6279470SFilipe Manana cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, 10459631e4ccSJosef Bacik level, search_start, empty_size, nest); 10466702ed49SChris Mason if (IS_ERR(cow)) 10476702ed49SChris Mason return PTR_ERR(cow); 10486702ed49SChris Mason 1049b4ce94deSChris Mason /* cow is set to blocking by btrfs_init_new_buffer */ 1050b4ce94deSChris Mason 105158e8012cSDavid Sterba copy_extent_buffer_full(cow, buf); 1052db94535dSChris Mason btrfs_set_header_bytenr(cow, cow->start); 10535f39d397SChris Mason btrfs_set_header_generation(cow, trans->transid); 10545d4f98a2SYan Zheng btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 10555d4f98a2SYan Zheng btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 10565d4f98a2SYan Zheng BTRFS_HEADER_FLAG_RELOC); 10575d4f98a2SYan Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 10585d4f98a2SYan Zheng btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 10595d4f98a2SYan Zheng else 10605f39d397SChris Mason btrfs_set_header_owner(cow, root->root_key.objectid); 10616702ed49SChris Mason 1062de37aa51SNikolay Borisov write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 10632b82032cSYan Zheng 1064be1a5564SMark Fasheh ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 1065b68dc2a9SMark Fasheh if (ret) { 106666642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 1067b68dc2a9SMark Fasheh return ret; 1068b68dc2a9SMark Fasheh } 10691a40e23bSZheng Yan 107092a7cc42SQu Wenruo if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { 107183d4cfd4SJosef Bacik ret = btrfs_reloc_cow_block(trans, root, buf, cow); 107293314e3bSZhaolei if (ret) { 107366642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 107483d4cfd4SJosef Bacik return ret; 107583d4cfd4SJosef Bacik } 107693314e3bSZhaolei } 10773fd0a558SYan, Zheng 10786702ed49SChris Mason if (buf == root->node) { 1079925baeddSChris Mason WARN_ON(parent && parent != buf); 10805d4f98a2SYan Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 10815d4f98a2SYan Zheng btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 10825d4f98a2SYan Zheng parent_start = buf->start; 1083925baeddSChris Mason 108467439dadSDavid Sterba atomic_inc(&cow->refs); 1085d9d19a01SDavid Sterba ret = tree_mod_log_insert_root(root->node, cow, 1); 1086d9d19a01SDavid Sterba BUG_ON(ret < 0); 1087240f62c8SChris Mason rcu_assign_pointer(root->node, cow); 1088925baeddSChris Mason 1089f0486c68SYan, Zheng btrfs_free_tree_block(trans, root, buf, parent_start, 10905581a51aSJan Schmidt last_ref); 10915f39d397SChris Mason free_extent_buffer(buf); 10920b86a832SChris Mason add_root_to_dirty_list(root); 10936702ed49SChris Mason } else { 10945d4f98a2SYan Zheng WARN_ON(trans->transid != btrfs_header_generation(parent)); 1095e09c2efeSDavid Sterba tree_mod_log_insert_key(parent, parent_slot, 1096c8cc6341SJosef Bacik MOD_LOG_KEY_REPLACE, GFP_NOFS); 10975f39d397SChris Mason btrfs_set_node_blockptr(parent, parent_slot, 1098db94535dSChris Mason cow->start); 109974493f7aSChris Mason btrfs_set_node_ptr_generation(parent, parent_slot, 110074493f7aSChris Mason trans->transid); 11016702ed49SChris Mason btrfs_mark_buffer_dirty(parent); 11025de865eeSFilipe David Borba Manana if (last_ref) { 1103db7279a2SDavid Sterba ret = tree_mod_log_free_eb(buf); 11045de865eeSFilipe David Borba Manana if (ret) { 110566642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 11065de865eeSFilipe David Borba Manana return ret; 11075de865eeSFilipe David Borba Manana } 11085de865eeSFilipe David Borba Manana } 1109f0486c68SYan, Zheng btrfs_free_tree_block(trans, root, buf, parent_start, 11105581a51aSJan Schmidt last_ref); 11116702ed49SChris Mason } 1112925baeddSChris Mason if (unlock_orig) 1113925baeddSChris Mason btrfs_tree_unlock(buf); 11143083ee2eSJosef Bacik free_extent_buffer_stale(buf); 11156702ed49SChris Mason btrfs_mark_buffer_dirty(cow); 11166702ed49SChris Mason *cow_ret = cow; 11176702ed49SChris Mason return 0; 11186702ed49SChris Mason } 11196702ed49SChris Mason 11205d9e75c4SJan Schmidt /* 11215d9e75c4SJan Schmidt * returns the logical address of the oldest predecessor of the given root. 11225d9e75c4SJan Schmidt * entries older than time_seq are ignored. 11235d9e75c4SJan Schmidt */ 1124bcd24dabSDavid Sterba static struct tree_mod_elem *__tree_mod_log_oldest_root( 112530b0463aSJan Schmidt struct extent_buffer *eb_root, u64 time_seq) 11265d9e75c4SJan Schmidt { 11275d9e75c4SJan Schmidt struct tree_mod_elem *tm; 11285d9e75c4SJan Schmidt struct tree_mod_elem *found = NULL; 112930b0463aSJan Schmidt u64 root_logical = eb_root->start; 11305d9e75c4SJan Schmidt int looped = 0; 11315d9e75c4SJan Schmidt 11325d9e75c4SJan Schmidt if (!time_seq) 113335a3621bSStefan Behrens return NULL; 11345d9e75c4SJan Schmidt 11355d9e75c4SJan Schmidt /* 1136298cfd36SChandan Rajendra * the very last operation that's logged for a root is the 1137298cfd36SChandan Rajendra * replacement operation (if it is replaced at all). this has 1138298cfd36SChandan Rajendra * the logical address of the *new* root, making it the very 1139298cfd36SChandan Rajendra * first operation that's logged for this root. 11405d9e75c4SJan Schmidt */ 11415d9e75c4SJan Schmidt while (1) { 1142bcd24dabSDavid Sterba tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical, 11435d9e75c4SJan Schmidt time_seq); 11445d9e75c4SJan Schmidt if (!looped && !tm) 114535a3621bSStefan Behrens return NULL; 11465d9e75c4SJan Schmidt /* 114728da9fb4SJan Schmidt * if there are no tree operation for the oldest root, we simply 114828da9fb4SJan Schmidt * return it. this should only happen if that (old) root is at 114928da9fb4SJan Schmidt * level 0. 11505d9e75c4SJan Schmidt */ 115128da9fb4SJan Schmidt if (!tm) 115228da9fb4SJan Schmidt break; 11535d9e75c4SJan Schmidt 115428da9fb4SJan Schmidt /* 115528da9fb4SJan Schmidt * if there's an operation that's not a root replacement, we 115628da9fb4SJan Schmidt * found the oldest version of our root. normally, we'll find a 115728da9fb4SJan Schmidt * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. 115828da9fb4SJan Schmidt */ 11595d9e75c4SJan Schmidt if (tm->op != MOD_LOG_ROOT_REPLACE) 11605d9e75c4SJan Schmidt break; 11615d9e75c4SJan Schmidt 11625d9e75c4SJan Schmidt found = tm; 11635d9e75c4SJan Schmidt root_logical = tm->old_root.logical; 11645d9e75c4SJan Schmidt looped = 1; 11655d9e75c4SJan Schmidt } 11665d9e75c4SJan Schmidt 1167a95236d9SJan Schmidt /* if there's no old root to return, return what we found instead */ 1168a95236d9SJan Schmidt if (!found) 1169a95236d9SJan Schmidt found = tm; 1170a95236d9SJan Schmidt 11715d9e75c4SJan Schmidt return found; 11725d9e75c4SJan Schmidt } 11735d9e75c4SJan Schmidt 11745d9e75c4SJan Schmidt /* 11755d9e75c4SJan Schmidt * tm is a pointer to the first operation to rewind within eb. then, all 117601327610SNicholas D Steeves * previous operations will be rewound (until we reach something older than 11775d9e75c4SJan Schmidt * time_seq). 11785d9e75c4SJan Schmidt */ 11795d9e75c4SJan Schmidt static void 1180f1ca7e98SJosef Bacik __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 1181f1ca7e98SJosef Bacik u64 time_seq, struct tree_mod_elem *first_tm) 11825d9e75c4SJan Schmidt { 11835d9e75c4SJan Schmidt u32 n; 11845d9e75c4SJan Schmidt struct rb_node *next; 11855d9e75c4SJan Schmidt struct tree_mod_elem *tm = first_tm; 11865d9e75c4SJan Schmidt unsigned long o_dst; 11875d9e75c4SJan Schmidt unsigned long o_src; 11885d9e75c4SJan Schmidt unsigned long p_size = sizeof(struct btrfs_key_ptr); 11895d9e75c4SJan Schmidt 11905d9e75c4SJan Schmidt n = btrfs_header_nritems(eb); 1191b1a09f1eSDavid Sterba read_lock(&fs_info->tree_mod_log_lock); 1192097b8a7cSJan Schmidt while (tm && tm->seq >= time_seq) { 11935d9e75c4SJan Schmidt /* 11945d9e75c4SJan Schmidt * all the operations are recorded with the operator used for 11955d9e75c4SJan Schmidt * the modification. as we're going backwards, we do the 11965d9e75c4SJan Schmidt * opposite of each operation here. 11975d9e75c4SJan Schmidt */ 11985d9e75c4SJan Schmidt switch (tm->op) { 11995d9e75c4SJan Schmidt case MOD_LOG_KEY_REMOVE_WHILE_FREEING: 12005d9e75c4SJan Schmidt BUG_ON(tm->slot < n); 1201c730ae0cSMarcos Paulo de Souza fallthrough; 120295c80bb1SLiu Bo case MOD_LOG_KEY_REMOVE_WHILE_MOVING: 12034c3e6969SChris Mason case MOD_LOG_KEY_REMOVE: 12045d9e75c4SJan Schmidt btrfs_set_node_key(eb, &tm->key, tm->slot); 12055d9e75c4SJan Schmidt btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); 12065d9e75c4SJan Schmidt btrfs_set_node_ptr_generation(eb, tm->slot, 12075d9e75c4SJan Schmidt tm->generation); 12084c3e6969SChris Mason n++; 12095d9e75c4SJan Schmidt break; 12105d9e75c4SJan Schmidt case MOD_LOG_KEY_REPLACE: 12115d9e75c4SJan Schmidt BUG_ON(tm->slot >= n); 12125d9e75c4SJan Schmidt btrfs_set_node_key(eb, &tm->key, tm->slot); 12135d9e75c4SJan Schmidt btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); 12145d9e75c4SJan Schmidt btrfs_set_node_ptr_generation(eb, tm->slot, 12155d9e75c4SJan Schmidt tm->generation); 12165d9e75c4SJan Schmidt break; 12175d9e75c4SJan Schmidt case MOD_LOG_KEY_ADD: 121819956c7eSJan Schmidt /* if a move operation is needed it's in the log */ 12195d9e75c4SJan Schmidt n--; 12205d9e75c4SJan Schmidt break; 12215d9e75c4SJan Schmidt case MOD_LOG_MOVE_KEYS: 1222c3193108SJan Schmidt o_dst = btrfs_node_key_ptr_offset(tm->slot); 1223c3193108SJan Schmidt o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); 1224c3193108SJan Schmidt memmove_extent_buffer(eb, o_dst, o_src, 12255d9e75c4SJan Schmidt tm->move.nr_items * p_size); 12265d9e75c4SJan Schmidt break; 12275d9e75c4SJan Schmidt case MOD_LOG_ROOT_REPLACE: 12285d9e75c4SJan Schmidt /* 12295d9e75c4SJan Schmidt * this operation is special. for roots, this must be 12305d9e75c4SJan Schmidt * handled explicitly before rewinding. 12315d9e75c4SJan Schmidt * for non-roots, this operation may exist if the node 12325d9e75c4SJan Schmidt * was a root: root A -> child B; then A gets empty and 12335d9e75c4SJan Schmidt * B is promoted to the new root. in the mod log, we'll 12345d9e75c4SJan Schmidt * have a root-replace operation for B, a tree block 12355d9e75c4SJan Schmidt * that is no root. we simply ignore that operation. 12365d9e75c4SJan Schmidt */ 12375d9e75c4SJan Schmidt break; 12385d9e75c4SJan Schmidt } 12395d9e75c4SJan Schmidt next = rb_next(&tm->node); 12405d9e75c4SJan Schmidt if (!next) 12415d9e75c4SJan Schmidt break; 12426b4df8b6SGeliang Tang tm = rb_entry(next, struct tree_mod_elem, node); 1243298cfd36SChandan Rajendra if (tm->logical != first_tm->logical) 12445d9e75c4SJan Schmidt break; 12455d9e75c4SJan Schmidt } 1246b1a09f1eSDavid Sterba read_unlock(&fs_info->tree_mod_log_lock); 12475d9e75c4SJan Schmidt btrfs_set_header_nritems(eb, n); 12485d9e75c4SJan Schmidt } 12495d9e75c4SJan Schmidt 125047fb091fSJan Schmidt /* 125101327610SNicholas D Steeves * Called with eb read locked. If the buffer cannot be rewound, the same buffer 125247fb091fSJan Schmidt * is returned. If rewind operations happen, a fresh buffer is returned. The 125347fb091fSJan Schmidt * returned buffer is always read-locked. If the returned buffer is not the 125447fb091fSJan Schmidt * input buffer, the lock on the input buffer is released and the input buffer 125547fb091fSJan Schmidt * is freed (its refcount is decremented). 125647fb091fSJan Schmidt */ 12575d9e75c4SJan Schmidt static struct extent_buffer * 12589ec72677SJosef Bacik tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 12599ec72677SJosef Bacik struct extent_buffer *eb, u64 time_seq) 12605d9e75c4SJan Schmidt { 12615d9e75c4SJan Schmidt struct extent_buffer *eb_rewin; 12625d9e75c4SJan Schmidt struct tree_mod_elem *tm; 12635d9e75c4SJan Schmidt 12645d9e75c4SJan Schmidt if (!time_seq) 12655d9e75c4SJan Schmidt return eb; 12665d9e75c4SJan Schmidt 12675d9e75c4SJan Schmidt if (btrfs_header_level(eb) == 0) 12685d9e75c4SJan Schmidt return eb; 12695d9e75c4SJan Schmidt 12705d9e75c4SJan Schmidt tm = tree_mod_log_search(fs_info, eb->start, time_seq); 12715d9e75c4SJan Schmidt if (!tm) 12725d9e75c4SJan Schmidt return eb; 12735d9e75c4SJan Schmidt 12749ec72677SJosef Bacik btrfs_set_path_blocking(path); 1275300aa896SDavid Sterba btrfs_set_lock_blocking_read(eb); 12769ec72677SJosef Bacik 12775d9e75c4SJan Schmidt if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 12785d9e75c4SJan Schmidt BUG_ON(tm->slot != 0); 1279da17066cSJeff Mahoney eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); 1280db7f3436SJosef Bacik if (!eb_rewin) { 12819ec72677SJosef Bacik btrfs_tree_read_unlock_blocking(eb); 1282db7f3436SJosef Bacik free_extent_buffer(eb); 1283db7f3436SJosef Bacik return NULL; 1284db7f3436SJosef Bacik } 12855d9e75c4SJan Schmidt btrfs_set_header_bytenr(eb_rewin, eb->start); 12865d9e75c4SJan Schmidt btrfs_set_header_backref_rev(eb_rewin, 12875d9e75c4SJan Schmidt btrfs_header_backref_rev(eb)); 12885d9e75c4SJan Schmidt btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); 1289c3193108SJan Schmidt btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); 12905d9e75c4SJan Schmidt } else { 12915d9e75c4SJan Schmidt eb_rewin = btrfs_clone_extent_buffer(eb); 1292db7f3436SJosef Bacik if (!eb_rewin) { 12939ec72677SJosef Bacik btrfs_tree_read_unlock_blocking(eb); 1294db7f3436SJosef Bacik free_extent_buffer(eb); 1295db7f3436SJosef Bacik return NULL; 1296db7f3436SJosef Bacik } 12975d9e75c4SJan Schmidt } 12985d9e75c4SJan Schmidt 12999ec72677SJosef Bacik btrfs_tree_read_unlock_blocking(eb); 13005d9e75c4SJan Schmidt free_extent_buffer(eb); 13015d9e75c4SJan Schmidt 1302d3beaa25SJosef Bacik btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin), 1303d3beaa25SJosef Bacik eb_rewin, btrfs_header_level(eb_rewin)); 130447fb091fSJan Schmidt btrfs_tree_read_lock(eb_rewin); 1305f1ca7e98SJosef Bacik __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); 130657911b8bSJan Schmidt WARN_ON(btrfs_header_nritems(eb_rewin) > 1307da17066cSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 13085d9e75c4SJan Schmidt 13095d9e75c4SJan Schmidt return eb_rewin; 13105d9e75c4SJan Schmidt } 13115d9e75c4SJan Schmidt 13128ba97a15SJan Schmidt /* 13138ba97a15SJan Schmidt * get_old_root() rewinds the state of @root's root node to the given @time_seq 13148ba97a15SJan Schmidt * value. If there are no changes, the current root->root_node is returned. If 13158ba97a15SJan Schmidt * anything changed in between, there's a fresh buffer allocated on which the 13168ba97a15SJan Schmidt * rewind operations are done. In any case, the returned buffer is read locked. 13178ba97a15SJan Schmidt * Returns NULL on error (with no locks held). 13188ba97a15SJan Schmidt */ 13195d9e75c4SJan Schmidt static inline struct extent_buffer * 13205d9e75c4SJan Schmidt get_old_root(struct btrfs_root *root, u64 time_seq) 13215d9e75c4SJan Schmidt { 13220b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 13235d9e75c4SJan Schmidt struct tree_mod_elem *tm; 132430b0463aSJan Schmidt struct extent_buffer *eb = NULL; 132530b0463aSJan Schmidt struct extent_buffer *eb_root; 1326efad8a85SFilipe Manana u64 eb_root_owner = 0; 13277bfdcf7fSLiu Bo struct extent_buffer *old; 1328a95236d9SJan Schmidt struct tree_mod_root *old_root = NULL; 13294325edd0SChris Mason u64 old_generation = 0; 1330a95236d9SJan Schmidt u64 logical; 1331581c1760SQu Wenruo int level; 13325d9e75c4SJan Schmidt 133330b0463aSJan Schmidt eb_root = btrfs_read_lock_root_node(root); 1334bcd24dabSDavid Sterba tm = __tree_mod_log_oldest_root(eb_root, time_seq); 13355d9e75c4SJan Schmidt if (!tm) 133630b0463aSJan Schmidt return eb_root; 13375d9e75c4SJan Schmidt 1338a95236d9SJan Schmidt if (tm->op == MOD_LOG_ROOT_REPLACE) { 13395d9e75c4SJan Schmidt old_root = &tm->old_root; 13405d9e75c4SJan Schmidt old_generation = tm->generation; 1341a95236d9SJan Schmidt logical = old_root->logical; 1342581c1760SQu Wenruo level = old_root->level; 1343a95236d9SJan Schmidt } else { 134430b0463aSJan Schmidt logical = eb_root->start; 1345581c1760SQu Wenruo level = btrfs_header_level(eb_root); 1346a95236d9SJan Schmidt } 13475d9e75c4SJan Schmidt 13480b246afaSJeff Mahoney tm = tree_mod_log_search(fs_info, logical, time_seq); 1349834328a8SJan Schmidt if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 135030b0463aSJan Schmidt btrfs_tree_read_unlock(eb_root); 135130b0463aSJan Schmidt free_extent_buffer(eb_root); 1352581c1760SQu Wenruo old = read_tree_block(fs_info, logical, 0, level, NULL); 135364c043deSLiu Bo if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { 135464c043deSLiu Bo if (!IS_ERR(old)) 1355416bc658SJosef Bacik free_extent_buffer(old); 13560b246afaSJeff Mahoney btrfs_warn(fs_info, 13570b246afaSJeff Mahoney "failed to read tree block %llu from get_old_root", 13580b246afaSJeff Mahoney logical); 1359834328a8SJan Schmidt } else { 13607bfdcf7fSLiu Bo eb = btrfs_clone_extent_buffer(old); 13617bfdcf7fSLiu Bo free_extent_buffer(old); 1362834328a8SJan Schmidt } 1363834328a8SJan Schmidt } else if (old_root) { 1364efad8a85SFilipe Manana eb_root_owner = btrfs_header_owner(eb_root); 136530b0463aSJan Schmidt btrfs_tree_read_unlock(eb_root); 136630b0463aSJan Schmidt free_extent_buffer(eb_root); 13670b246afaSJeff Mahoney eb = alloc_dummy_extent_buffer(fs_info, logical); 1368834328a8SJan Schmidt } else { 1369300aa896SDavid Sterba btrfs_set_lock_blocking_read(eb_root); 137030b0463aSJan Schmidt eb = btrfs_clone_extent_buffer(eb_root); 13719ec72677SJosef Bacik btrfs_tree_read_unlock_blocking(eb_root); 137230b0463aSJan Schmidt free_extent_buffer(eb_root); 1373834328a8SJan Schmidt } 1374834328a8SJan Schmidt 13758ba97a15SJan Schmidt if (!eb) 13768ba97a15SJan Schmidt return NULL; 1377a95236d9SJan Schmidt if (old_root) { 13785d9e75c4SJan Schmidt btrfs_set_header_bytenr(eb, eb->start); 13795d9e75c4SJan Schmidt btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1380efad8a85SFilipe Manana btrfs_set_header_owner(eb, eb_root_owner); 13815d9e75c4SJan Schmidt btrfs_set_header_level(eb, old_root->level); 13825d9e75c4SJan Schmidt btrfs_set_header_generation(eb, old_generation); 1383a95236d9SJan Schmidt } 1384d3beaa25SJosef Bacik btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, 1385d3beaa25SJosef Bacik btrfs_header_level(eb)); 1386d3beaa25SJosef Bacik btrfs_tree_read_lock(eb); 138728da9fb4SJan Schmidt if (tm) 13880b246afaSJeff Mahoney __tree_mod_log_rewind(fs_info, eb, time_seq, tm); 138928da9fb4SJan Schmidt else 139028da9fb4SJan Schmidt WARN_ON(btrfs_header_level(eb) != 0); 13910b246afaSJeff Mahoney WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 13925d9e75c4SJan Schmidt 13935d9e75c4SJan Schmidt return eb; 13945d9e75c4SJan Schmidt } 13955d9e75c4SJan Schmidt 13965b6602e7SJan Schmidt int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq) 13975b6602e7SJan Schmidt { 13985b6602e7SJan Schmidt struct tree_mod_elem *tm; 13995b6602e7SJan Schmidt int level; 140030b0463aSJan Schmidt struct extent_buffer *eb_root = btrfs_root_node(root); 14015b6602e7SJan Schmidt 1402bcd24dabSDavid Sterba tm = __tree_mod_log_oldest_root(eb_root, time_seq); 14035b6602e7SJan Schmidt if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { 14045b6602e7SJan Schmidt level = tm->old_root.level; 14055b6602e7SJan Schmidt } else { 140630b0463aSJan Schmidt level = btrfs_header_level(eb_root); 14075b6602e7SJan Schmidt } 140830b0463aSJan Schmidt free_extent_buffer(eb_root); 14095b6602e7SJan Schmidt 14105b6602e7SJan Schmidt return level; 14115b6602e7SJan Schmidt } 14125b6602e7SJan Schmidt 14135d4f98a2SYan Zheng static inline int should_cow_block(struct btrfs_trans_handle *trans, 14145d4f98a2SYan Zheng struct btrfs_root *root, 14155d4f98a2SYan Zheng struct extent_buffer *buf) 14165d4f98a2SYan Zheng { 1417f5ee5c9aSJeff Mahoney if (btrfs_is_testing(root->fs_info)) 1418faa2dbf0SJosef Bacik return 0; 1419fccb84c9SDavid Sterba 1420d1980131SDavid Sterba /* Ensure we can see the FORCE_COW bit */ 1421d1980131SDavid Sterba smp_mb__before_atomic(); 1422f1ebcc74SLiu Bo 1423f1ebcc74SLiu Bo /* 1424f1ebcc74SLiu Bo * We do not need to cow a block if 1425f1ebcc74SLiu Bo * 1) this block is not created or changed in this transaction; 1426f1ebcc74SLiu Bo * 2) this block does not belong to TREE_RELOC tree; 1427f1ebcc74SLiu Bo * 3) the root is not forced COW. 1428f1ebcc74SLiu Bo * 1429f1ebcc74SLiu Bo * What is forced COW: 143001327610SNicholas D Steeves * when we create snapshot during committing the transaction, 143152042d8eSAndrea Gelmini * after we've finished copying src root, we must COW the shared 1432f1ebcc74SLiu Bo * block to ensure the metadata consistency. 1433f1ebcc74SLiu Bo */ 14345d4f98a2SYan Zheng if (btrfs_header_generation(buf) == trans->transid && 14355d4f98a2SYan Zheng !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 14365d4f98a2SYan Zheng !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 1437f1ebcc74SLiu Bo btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && 143827cdeb70SMiao Xie !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) 14395d4f98a2SYan Zheng return 0; 14405d4f98a2SYan Zheng return 1; 14415d4f98a2SYan Zheng } 14425d4f98a2SYan Zheng 1443d352ac68SChris Mason /* 1444d352ac68SChris Mason * cows a single block, see __btrfs_cow_block for the real work. 144501327610SNicholas D Steeves * This version of it has extra checks so that a block isn't COWed more than 1446d352ac68SChris Mason * once per transaction, as long as it hasn't been written yet 1447d352ac68SChris Mason */ 1448d397712bSChris Mason noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 14495f39d397SChris Mason struct btrfs_root *root, struct extent_buffer *buf, 14505f39d397SChris Mason struct extent_buffer *parent, int parent_slot, 14519631e4ccSJosef Bacik struct extent_buffer **cow_ret, 14529631e4ccSJosef Bacik enum btrfs_lock_nesting nest) 145302217ed2SChris Mason { 14540b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 14556702ed49SChris Mason u64 search_start; 1456f510cfecSChris Mason int ret; 1457dc17ff8fSChris Mason 145883354f07SJosef Bacik if (test_bit(BTRFS_ROOT_DELETING, &root->state)) 145983354f07SJosef Bacik btrfs_err(fs_info, 146083354f07SJosef Bacik "COW'ing blocks on a fs root that's being dropped"); 146183354f07SJosef Bacik 14620b246afaSJeff Mahoney if (trans->transaction != fs_info->running_transaction) 146331b1a2bdSJulia Lawall WARN(1, KERN_CRIT "trans %llu running %llu\n", 1464c1c9ff7cSGeert Uytterhoeven trans->transid, 14650b246afaSJeff Mahoney fs_info->running_transaction->transid); 146631b1a2bdSJulia Lawall 14670b246afaSJeff Mahoney if (trans->transid != fs_info->generation) 146831b1a2bdSJulia Lawall WARN(1, KERN_CRIT "trans %llu running %llu\n", 14690b246afaSJeff Mahoney trans->transid, fs_info->generation); 1470dc17ff8fSChris Mason 14715d4f98a2SYan Zheng if (!should_cow_block(trans, root, buf)) { 147264c12921SJeff Mahoney trans->dirty = true; 147302217ed2SChris Mason *cow_ret = buf; 147402217ed2SChris Mason return 0; 147502217ed2SChris Mason } 1476c487685dSChris Mason 1477ee22184bSByongho Lee search_start = buf->start & ~((u64)SZ_1G - 1); 1478b4ce94deSChris Mason 1479b4ce94deSChris Mason if (parent) 14808bead258SDavid Sterba btrfs_set_lock_blocking_write(parent); 14818bead258SDavid Sterba btrfs_set_lock_blocking_write(buf); 1482b4ce94deSChris Mason 1483f616f5cdSQu Wenruo /* 1484f616f5cdSQu Wenruo * Before CoWing this block for later modification, check if it's 1485f616f5cdSQu Wenruo * the subtree root and do the delayed subtree trace if needed. 1486f616f5cdSQu Wenruo * 1487f616f5cdSQu Wenruo * Also We don't care about the error, as it's handled internally. 1488f616f5cdSQu Wenruo */ 1489f616f5cdSQu Wenruo btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); 1490f510cfecSChris Mason ret = __btrfs_cow_block(trans, root, buf, parent, 14919631e4ccSJosef Bacik parent_slot, cow_ret, search_start, 0, nest); 14921abe9b8aSliubo 14931abe9b8aSliubo trace_btrfs_cow_block(root, buf, *cow_ret); 14941abe9b8aSliubo 1495f510cfecSChris Mason return ret; 14962c90e5d6SChris Mason } 14976702ed49SChris Mason 1498d352ac68SChris Mason /* 1499d352ac68SChris Mason * helper function for defrag to decide if two blocks pointed to by a 1500d352ac68SChris Mason * node are actually close by 1501d352ac68SChris Mason */ 15026b80053dSChris Mason static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 15036702ed49SChris Mason { 15046b80053dSChris Mason if (blocknr < other && other - (blocknr + blocksize) < 32768) 15056702ed49SChris Mason return 1; 15066b80053dSChris Mason if (blocknr > other && blocknr - (other + blocksize) < 32768) 15076702ed49SChris Mason return 1; 150802217ed2SChris Mason return 0; 150902217ed2SChris Mason } 151002217ed2SChris Mason 1511ce6ef5abSDavid Sterba #ifdef __LITTLE_ENDIAN 1512ce6ef5abSDavid Sterba 1513ce6ef5abSDavid Sterba /* 1514ce6ef5abSDavid Sterba * Compare two keys, on little-endian the disk order is same as CPU order and 1515ce6ef5abSDavid Sterba * we can avoid the conversion. 1516ce6ef5abSDavid Sterba */ 1517ce6ef5abSDavid Sterba static int comp_keys(const struct btrfs_disk_key *disk_key, 1518ce6ef5abSDavid Sterba const struct btrfs_key *k2) 1519ce6ef5abSDavid Sterba { 1520ce6ef5abSDavid Sterba const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key; 1521ce6ef5abSDavid Sterba 1522ce6ef5abSDavid Sterba return btrfs_comp_cpu_keys(k1, k2); 1523ce6ef5abSDavid Sterba } 1524ce6ef5abSDavid Sterba 1525ce6ef5abSDavid Sterba #else 1526ce6ef5abSDavid Sterba 1527081e9573SChris Mason /* 1528081e9573SChris Mason * compare two keys in a memcmp fashion 1529081e9573SChris Mason */ 1530310712b2SOmar Sandoval static int comp_keys(const struct btrfs_disk_key *disk, 1531310712b2SOmar Sandoval const struct btrfs_key *k2) 1532081e9573SChris Mason { 1533081e9573SChris Mason struct btrfs_key k1; 1534081e9573SChris Mason 1535081e9573SChris Mason btrfs_disk_key_to_cpu(&k1, disk); 1536081e9573SChris Mason 153720736abaSDiego Calleja return btrfs_comp_cpu_keys(&k1, k2); 1538081e9573SChris Mason } 1539ce6ef5abSDavid Sterba #endif 1540081e9573SChris Mason 1541f3465ca4SJosef Bacik /* 1542f3465ca4SJosef Bacik * same as comp_keys only with two btrfs_key's 1543f3465ca4SJosef Bacik */ 1544e1f60a65SDavid Sterba int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) 1545f3465ca4SJosef Bacik { 1546f3465ca4SJosef Bacik if (k1->objectid > k2->objectid) 1547f3465ca4SJosef Bacik return 1; 1548f3465ca4SJosef Bacik if (k1->objectid < k2->objectid) 1549f3465ca4SJosef Bacik return -1; 1550f3465ca4SJosef Bacik if (k1->type > k2->type) 1551f3465ca4SJosef Bacik return 1; 1552f3465ca4SJosef Bacik if (k1->type < k2->type) 1553f3465ca4SJosef Bacik return -1; 1554f3465ca4SJosef Bacik if (k1->offset > k2->offset) 1555f3465ca4SJosef Bacik return 1; 1556f3465ca4SJosef Bacik if (k1->offset < k2->offset) 1557f3465ca4SJosef Bacik return -1; 1558f3465ca4SJosef Bacik return 0; 1559f3465ca4SJosef Bacik } 1560081e9573SChris Mason 1561d352ac68SChris Mason /* 1562d352ac68SChris Mason * this is used by the defrag code to go through all the 1563d352ac68SChris Mason * leaves pointed to by a node and reallocate them so that 1564d352ac68SChris Mason * disk order is close to key order 1565d352ac68SChris Mason */ 15666702ed49SChris Mason int btrfs_realloc_node(struct btrfs_trans_handle *trans, 15675f39d397SChris Mason struct btrfs_root *root, struct extent_buffer *parent, 1568de78b51aSEric Sandeen int start_slot, u64 *last_ret, 1569a6b6e75eSChris Mason struct btrfs_key *progress) 15706702ed49SChris Mason { 15710b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 15726b80053dSChris Mason struct extent_buffer *cur; 15736702ed49SChris Mason u64 blocknr; 1574ca7a79adSChris Mason u64 gen; 1575e9d0b13bSChris Mason u64 search_start = *last_ret; 1576e9d0b13bSChris Mason u64 last_block = 0; 15776702ed49SChris Mason u64 other; 15786702ed49SChris Mason u32 parent_nritems; 15796702ed49SChris Mason int end_slot; 15806702ed49SChris Mason int i; 15816702ed49SChris Mason int err = 0; 1582f2183bdeSChris Mason int parent_level; 15836b80053dSChris Mason int uptodate; 15846b80053dSChris Mason u32 blocksize; 1585081e9573SChris Mason int progress_passed = 0; 1586081e9573SChris Mason struct btrfs_disk_key disk_key; 15876702ed49SChris Mason 15885708b959SChris Mason parent_level = btrfs_header_level(parent); 15895708b959SChris Mason 15900b246afaSJeff Mahoney WARN_ON(trans->transaction != fs_info->running_transaction); 15910b246afaSJeff Mahoney WARN_ON(trans->transid != fs_info->generation); 159286479a04SChris Mason 15936b80053dSChris Mason parent_nritems = btrfs_header_nritems(parent); 15940b246afaSJeff Mahoney blocksize = fs_info->nodesize; 15955dfe2be7SFilipe Manana end_slot = parent_nritems - 1; 15966702ed49SChris Mason 15975dfe2be7SFilipe Manana if (parent_nritems <= 1) 15986702ed49SChris Mason return 0; 15996702ed49SChris Mason 16008bead258SDavid Sterba btrfs_set_lock_blocking_write(parent); 1601b4ce94deSChris Mason 16025dfe2be7SFilipe Manana for (i = start_slot; i <= end_slot; i++) { 1603581c1760SQu Wenruo struct btrfs_key first_key; 16046702ed49SChris Mason int close = 1; 1605a6b6e75eSChris Mason 1606081e9573SChris Mason btrfs_node_key(parent, &disk_key, i); 1607081e9573SChris Mason if (!progress_passed && comp_keys(&disk_key, progress) < 0) 1608081e9573SChris Mason continue; 1609081e9573SChris Mason 1610081e9573SChris Mason progress_passed = 1; 16116b80053dSChris Mason blocknr = btrfs_node_blockptr(parent, i); 1612ca7a79adSChris Mason gen = btrfs_node_ptr_generation(parent, i); 1613581c1760SQu Wenruo btrfs_node_key_to_cpu(parent, &first_key, i); 1614e9d0b13bSChris Mason if (last_block == 0) 1615e9d0b13bSChris Mason last_block = blocknr; 16165708b959SChris Mason 16176702ed49SChris Mason if (i > 0) { 16186b80053dSChris Mason other = btrfs_node_blockptr(parent, i - 1); 16196b80053dSChris Mason close = close_blocks(blocknr, other, blocksize); 16206702ed49SChris Mason } 16215dfe2be7SFilipe Manana if (!close && i < end_slot) { 16226b80053dSChris Mason other = btrfs_node_blockptr(parent, i + 1); 16236b80053dSChris Mason close = close_blocks(blocknr, other, blocksize); 16246702ed49SChris Mason } 1625e9d0b13bSChris Mason if (close) { 1626e9d0b13bSChris Mason last_block = blocknr; 16276702ed49SChris Mason continue; 1628e9d0b13bSChris Mason } 16296702ed49SChris Mason 16300b246afaSJeff Mahoney cur = find_extent_buffer(fs_info, blocknr); 16316b80053dSChris Mason if (cur) 1632b9fab919SChris Mason uptodate = btrfs_buffer_uptodate(cur, gen, 0); 16336b80053dSChris Mason else 16346b80053dSChris Mason uptodate = 0; 16355708b959SChris Mason if (!cur || !uptodate) { 16366b80053dSChris Mason if (!cur) { 1637581c1760SQu Wenruo cur = read_tree_block(fs_info, blocknr, gen, 1638581c1760SQu Wenruo parent_level - 1, 1639581c1760SQu Wenruo &first_key); 164064c043deSLiu Bo if (IS_ERR(cur)) { 164164c043deSLiu Bo return PTR_ERR(cur); 164264c043deSLiu Bo } else if (!extent_buffer_uptodate(cur)) { 1643416bc658SJosef Bacik free_extent_buffer(cur); 164497d9a8a4STsutomu Itoh return -EIO; 1645416bc658SJosef Bacik } 16466b80053dSChris Mason } else if (!uptodate) { 1647581c1760SQu Wenruo err = btrfs_read_buffer(cur, gen, 1648581c1760SQu Wenruo parent_level - 1,&first_key); 1649018642a1STsutomu Itoh if (err) { 1650018642a1STsutomu Itoh free_extent_buffer(cur); 1651018642a1STsutomu Itoh return err; 1652018642a1STsutomu Itoh } 16536702ed49SChris Mason } 1654f2183bdeSChris Mason } 1655e9d0b13bSChris Mason if (search_start == 0) 16566b80053dSChris Mason search_start = last_block; 1657e9d0b13bSChris Mason 1658e7a84565SChris Mason btrfs_tree_lock(cur); 16598bead258SDavid Sterba btrfs_set_lock_blocking_write(cur); 16606b80053dSChris Mason err = __btrfs_cow_block(trans, root, cur, parent, i, 1661e7a84565SChris Mason &cur, search_start, 16626b80053dSChris Mason min(16 * blocksize, 16639631e4ccSJosef Bacik (end_slot - i) * blocksize), 16649631e4ccSJosef Bacik BTRFS_NESTING_COW); 1665252c38f0SYan if (err) { 1666e7a84565SChris Mason btrfs_tree_unlock(cur); 16676b80053dSChris Mason free_extent_buffer(cur); 16686702ed49SChris Mason break; 1669252c38f0SYan } 1670e7a84565SChris Mason search_start = cur->start; 1671e7a84565SChris Mason last_block = cur->start; 1672f2183bdeSChris Mason *last_ret = search_start; 1673e7a84565SChris Mason btrfs_tree_unlock(cur); 1674e7a84565SChris Mason free_extent_buffer(cur); 16756702ed49SChris Mason } 16766702ed49SChris Mason return err; 16776702ed49SChris Mason } 16786702ed49SChris Mason 167974123bd7SChris Mason /* 16805f39d397SChris Mason * search for key in the extent_buffer. The items start at offset p, 16815f39d397SChris Mason * and they are item_size apart. There are 'max' items in p. 16825f39d397SChris Mason * 168374123bd7SChris Mason * the slot in the array is returned via slot, and it points to 168474123bd7SChris Mason * the place where you would insert key if it is not found in 168574123bd7SChris Mason * the array. 168674123bd7SChris Mason * 168774123bd7SChris Mason * slot may point to max if the key is bigger than all of the keys 168874123bd7SChris Mason */ 1689e02119d5SChris Mason static noinline int generic_bin_search(struct extent_buffer *eb, 1690310712b2SOmar Sandoval unsigned long p, int item_size, 1691310712b2SOmar Sandoval const struct btrfs_key *key, 1692be0e5c09SChris Mason int max, int *slot) 1693be0e5c09SChris Mason { 1694be0e5c09SChris Mason int low = 0; 1695be0e5c09SChris Mason int high = max; 1696be0e5c09SChris Mason int ret; 16975cd17f34SDavid Sterba const int key_size = sizeof(struct btrfs_disk_key); 1698be0e5c09SChris Mason 16995e24e9afSLiu Bo if (low > high) { 17005e24e9afSLiu Bo btrfs_err(eb->fs_info, 17015e24e9afSLiu Bo "%s: low (%d) > high (%d) eb %llu owner %llu level %d", 17025e24e9afSLiu Bo __func__, low, high, eb->start, 17035e24e9afSLiu Bo btrfs_header_owner(eb), btrfs_header_level(eb)); 17045e24e9afSLiu Bo return -EINVAL; 17055e24e9afSLiu Bo } 17065e24e9afSLiu Bo 1707be0e5c09SChris Mason while (low < high) { 17085cd17f34SDavid Sterba unsigned long oip; 17095cd17f34SDavid Sterba unsigned long offset; 17105cd17f34SDavid Sterba struct btrfs_disk_key *tmp; 17115cd17f34SDavid Sterba struct btrfs_disk_key unaligned; 17125cd17f34SDavid Sterba int mid; 17135cd17f34SDavid Sterba 1714be0e5c09SChris Mason mid = (low + high) / 2; 17155f39d397SChris Mason offset = p + mid * item_size; 17165cd17f34SDavid Sterba oip = offset_in_page(offset); 17175f39d397SChris Mason 17185cd17f34SDavid Sterba if (oip + key_size <= PAGE_SIZE) { 17195cd17f34SDavid Sterba const unsigned long idx = offset >> PAGE_SHIFT; 17205cd17f34SDavid Sterba char *kaddr = page_address(eb->pages[idx]); 1721934d375bSChris Mason 17225cd17f34SDavid Sterba tmp = (struct btrfs_disk_key *)(kaddr + oip); 17235cd17f34SDavid Sterba } else { 17245cd17f34SDavid Sterba read_extent_buffer(eb, &unaligned, offset, key_size); 17255f39d397SChris Mason tmp = &unaligned; 1726479965d6SChris Mason } 1727479965d6SChris Mason 1728be0e5c09SChris Mason ret = comp_keys(tmp, key); 1729be0e5c09SChris Mason 1730be0e5c09SChris Mason if (ret < 0) 1731be0e5c09SChris Mason low = mid + 1; 1732be0e5c09SChris Mason else if (ret > 0) 1733be0e5c09SChris Mason high = mid; 1734be0e5c09SChris Mason else { 1735be0e5c09SChris Mason *slot = mid; 1736be0e5c09SChris Mason return 0; 1737be0e5c09SChris Mason } 1738be0e5c09SChris Mason } 1739be0e5c09SChris Mason *slot = low; 1740be0e5c09SChris Mason return 1; 1741be0e5c09SChris Mason } 1742be0e5c09SChris Mason 174397571fd0SChris Mason /* 174497571fd0SChris Mason * simple bin_search frontend that does the right thing for 174597571fd0SChris Mason * leaves vs nodes 174697571fd0SChris Mason */ 1747a74b35ecSNikolay Borisov int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, 1748e3b83361SQu Wenruo int *slot) 1749be0e5c09SChris Mason { 1750e3b83361SQu Wenruo if (btrfs_header_level(eb) == 0) 17515f39d397SChris Mason return generic_bin_search(eb, 17525f39d397SChris Mason offsetof(struct btrfs_leaf, items), 17530783fcfcSChris Mason sizeof(struct btrfs_item), 17545f39d397SChris Mason key, btrfs_header_nritems(eb), 17557518a238SChris Mason slot); 1756f775738fSWang Sheng-Hui else 17575f39d397SChris Mason return generic_bin_search(eb, 17585f39d397SChris Mason offsetof(struct btrfs_node, ptrs), 1759123abc88SChris Mason sizeof(struct btrfs_key_ptr), 17605f39d397SChris Mason key, btrfs_header_nritems(eb), 17617518a238SChris Mason slot); 1762be0e5c09SChris Mason } 1763be0e5c09SChris Mason 1764f0486c68SYan, Zheng static void root_add_used(struct btrfs_root *root, u32 size) 1765f0486c68SYan, Zheng { 1766f0486c68SYan, Zheng spin_lock(&root->accounting_lock); 1767f0486c68SYan, Zheng btrfs_set_root_used(&root->root_item, 1768f0486c68SYan, Zheng btrfs_root_used(&root->root_item) + size); 1769f0486c68SYan, Zheng spin_unlock(&root->accounting_lock); 1770f0486c68SYan, Zheng } 1771f0486c68SYan, Zheng 1772f0486c68SYan, Zheng static void root_sub_used(struct btrfs_root *root, u32 size) 1773f0486c68SYan, Zheng { 1774f0486c68SYan, Zheng spin_lock(&root->accounting_lock); 1775f0486c68SYan, Zheng btrfs_set_root_used(&root->root_item, 1776f0486c68SYan, Zheng btrfs_root_used(&root->root_item) - size); 1777f0486c68SYan, Zheng spin_unlock(&root->accounting_lock); 1778f0486c68SYan, Zheng } 1779f0486c68SYan, Zheng 1780d352ac68SChris Mason /* given a node and slot number, this reads the blocks it points to. The 1781d352ac68SChris Mason * extent buffer is returned with a reference taken (but unlocked). 1782d352ac68SChris Mason */ 17834b231ae4SDavid Sterba struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, 17844b231ae4SDavid Sterba int slot) 1785bb803951SChris Mason { 1786ca7a79adSChris Mason int level = btrfs_header_level(parent); 1787416bc658SJosef Bacik struct extent_buffer *eb; 1788581c1760SQu Wenruo struct btrfs_key first_key; 1789416bc658SJosef Bacik 1790fb770ae4SLiu Bo if (slot < 0 || slot >= btrfs_header_nritems(parent)) 1791fb770ae4SLiu Bo return ERR_PTR(-ENOENT); 1792ca7a79adSChris Mason 1793ca7a79adSChris Mason BUG_ON(level == 0); 1794ca7a79adSChris Mason 1795581c1760SQu Wenruo btrfs_node_key_to_cpu(parent, &first_key, slot); 1796d0d20b0fSDavid Sterba eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), 1797581c1760SQu Wenruo btrfs_node_ptr_generation(parent, slot), 1798581c1760SQu Wenruo level - 1, &first_key); 1799fb770ae4SLiu Bo if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { 1800416bc658SJosef Bacik free_extent_buffer(eb); 1801fb770ae4SLiu Bo eb = ERR_PTR(-EIO); 1802416bc658SJosef Bacik } 1803416bc658SJosef Bacik 1804416bc658SJosef Bacik return eb; 1805bb803951SChris Mason } 1806bb803951SChris Mason 1807d352ac68SChris Mason /* 1808d352ac68SChris Mason * node level balancing, used to make sure nodes are in proper order for 1809d352ac68SChris Mason * item deletion. We balance from the top down, so we have to make sure 1810d352ac68SChris Mason * that a deletion won't leave an node completely empty later on. 1811d352ac68SChris Mason */ 1812e02119d5SChris Mason static noinline int balance_level(struct btrfs_trans_handle *trans, 181398ed5174SChris Mason struct btrfs_root *root, 181498ed5174SChris Mason struct btrfs_path *path, int level) 1815bb803951SChris Mason { 18160b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 18175f39d397SChris Mason struct extent_buffer *right = NULL; 18185f39d397SChris Mason struct extent_buffer *mid; 18195f39d397SChris Mason struct extent_buffer *left = NULL; 18205f39d397SChris Mason struct extent_buffer *parent = NULL; 1821bb803951SChris Mason int ret = 0; 1822bb803951SChris Mason int wret; 1823bb803951SChris Mason int pslot; 1824bb803951SChris Mason int orig_slot = path->slots[level]; 182579f95c82SChris Mason u64 orig_ptr; 1826bb803951SChris Mason 182798e6b1ebSLiu Bo ASSERT(level > 0); 1828bb803951SChris Mason 18295f39d397SChris Mason mid = path->nodes[level]; 1830b4ce94deSChris Mason 1831bd681513SChris Mason WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && 1832bd681513SChris Mason path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); 18337bb86316SChris Mason WARN_ON(btrfs_header_generation(mid) != trans->transid); 18347bb86316SChris Mason 18351d4f8a0cSChris Mason orig_ptr = btrfs_node_blockptr(mid, orig_slot); 183679f95c82SChris Mason 1837a05a9bb1SLi Zefan if (level < BTRFS_MAX_LEVEL - 1) { 18385f39d397SChris Mason parent = path->nodes[level + 1]; 1839bb803951SChris Mason pslot = path->slots[level + 1]; 1840a05a9bb1SLi Zefan } 1841bb803951SChris Mason 184240689478SChris Mason /* 184340689478SChris Mason * deal with the case where there is only one pointer in the root 184440689478SChris Mason * by promoting the node below to a root 184540689478SChris Mason */ 18465f39d397SChris Mason if (!parent) { 18475f39d397SChris Mason struct extent_buffer *child; 1848bb803951SChris Mason 18495f39d397SChris Mason if (btrfs_header_nritems(mid) != 1) 1850bb803951SChris Mason return 0; 1851bb803951SChris Mason 1852bb803951SChris Mason /* promote the child to a root */ 18534b231ae4SDavid Sterba child = btrfs_read_node_slot(mid, 0); 1854fb770ae4SLiu Bo if (IS_ERR(child)) { 1855fb770ae4SLiu Bo ret = PTR_ERR(child); 18560b246afaSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, NULL); 1857305a26afSMark Fasheh goto enospc; 1858305a26afSMark Fasheh } 1859305a26afSMark Fasheh 1860925baeddSChris Mason btrfs_tree_lock(child); 18618bead258SDavid Sterba btrfs_set_lock_blocking_write(child); 18629631e4ccSJosef Bacik ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 18639631e4ccSJosef Bacik BTRFS_NESTING_COW); 1864f0486c68SYan, Zheng if (ret) { 1865f0486c68SYan, Zheng btrfs_tree_unlock(child); 1866f0486c68SYan, Zheng free_extent_buffer(child); 1867f0486c68SYan, Zheng goto enospc; 1868f0486c68SYan, Zheng } 18692f375ab9SYan 1870d9d19a01SDavid Sterba ret = tree_mod_log_insert_root(root->node, child, 1); 1871d9d19a01SDavid Sterba BUG_ON(ret < 0); 1872240f62c8SChris Mason rcu_assign_pointer(root->node, child); 1873925baeddSChris Mason 18740b86a832SChris Mason add_root_to_dirty_list(root); 1875925baeddSChris Mason btrfs_tree_unlock(child); 1876b4ce94deSChris Mason 1877925baeddSChris Mason path->locks[level] = 0; 1878bb803951SChris Mason path->nodes[level] = NULL; 18796a884d7dSDavid Sterba btrfs_clean_tree_block(mid); 1880925baeddSChris Mason btrfs_tree_unlock(mid); 1881bb803951SChris Mason /* once for the path */ 18825f39d397SChris Mason free_extent_buffer(mid); 1883f0486c68SYan, Zheng 1884f0486c68SYan, Zheng root_sub_used(root, mid->len); 18855581a51aSJan Schmidt btrfs_free_tree_block(trans, root, mid, 0, 1); 1886bb803951SChris Mason /* once for the root ptr */ 18873083ee2eSJosef Bacik free_extent_buffer_stale(mid); 1888f0486c68SYan, Zheng return 0; 1889bb803951SChris Mason } 18905f39d397SChris Mason if (btrfs_header_nritems(mid) > 18910b246afaSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) 1892bb803951SChris Mason return 0; 1893bb803951SChris Mason 18944b231ae4SDavid Sterba left = btrfs_read_node_slot(parent, pslot - 1); 1895fb770ae4SLiu Bo if (IS_ERR(left)) 1896fb770ae4SLiu Bo left = NULL; 1897fb770ae4SLiu Bo 18985f39d397SChris Mason if (left) { 1899*bf77467aSJosef Bacik __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 19008bead258SDavid Sterba btrfs_set_lock_blocking_write(left); 19015f39d397SChris Mason wret = btrfs_cow_block(trans, root, left, 19029631e4ccSJosef Bacik parent, pslot - 1, &left, 19039631e4ccSJosef Bacik BTRFS_NESTING_COW); 190454aa1f4dSChris Mason if (wret) { 190554aa1f4dSChris Mason ret = wret; 190654aa1f4dSChris Mason goto enospc; 190754aa1f4dSChris Mason } 19082cc58cf2SChris Mason } 1909fb770ae4SLiu Bo 19104b231ae4SDavid Sterba right = btrfs_read_node_slot(parent, pslot + 1); 1911fb770ae4SLiu Bo if (IS_ERR(right)) 1912fb770ae4SLiu Bo right = NULL; 1913fb770ae4SLiu Bo 19145f39d397SChris Mason if (right) { 1915*bf77467aSJosef Bacik __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 19168bead258SDavid Sterba btrfs_set_lock_blocking_write(right); 19175f39d397SChris Mason wret = btrfs_cow_block(trans, root, right, 19189631e4ccSJosef Bacik parent, pslot + 1, &right, 19199631e4ccSJosef Bacik BTRFS_NESTING_COW); 19202cc58cf2SChris Mason if (wret) { 19212cc58cf2SChris Mason ret = wret; 19222cc58cf2SChris Mason goto enospc; 19232cc58cf2SChris Mason } 19242cc58cf2SChris Mason } 19252cc58cf2SChris Mason 19262cc58cf2SChris Mason /* first, try to make some room in the middle buffer */ 19275f39d397SChris Mason if (left) { 19285f39d397SChris Mason orig_slot += btrfs_header_nritems(left); 1929d30a668fSDavid Sterba wret = push_node_left(trans, left, mid, 1); 193079f95c82SChris Mason if (wret < 0) 193179f95c82SChris Mason ret = wret; 1932bb803951SChris Mason } 193379f95c82SChris Mason 193479f95c82SChris Mason /* 193579f95c82SChris Mason * then try to empty the right most buffer into the middle 193679f95c82SChris Mason */ 19375f39d397SChris Mason if (right) { 1938d30a668fSDavid Sterba wret = push_node_left(trans, mid, right, 1); 193954aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 194079f95c82SChris Mason ret = wret; 19415f39d397SChris Mason if (btrfs_header_nritems(right) == 0) { 19426a884d7dSDavid Sterba btrfs_clean_tree_block(right); 1943925baeddSChris Mason btrfs_tree_unlock(right); 1944afe5fea7STsutomu Itoh del_ptr(root, path, level + 1, pslot + 1); 1945f0486c68SYan, Zheng root_sub_used(root, right->len); 19465581a51aSJan Schmidt btrfs_free_tree_block(trans, root, right, 0, 1); 19473083ee2eSJosef Bacik free_extent_buffer_stale(right); 1948f0486c68SYan, Zheng right = NULL; 1949bb803951SChris Mason } else { 19505f39d397SChris Mason struct btrfs_disk_key right_key; 19515f39d397SChris Mason btrfs_node_key(right, &right_key, 0); 19520e82bcfeSDavid Sterba ret = tree_mod_log_insert_key(parent, pslot + 1, 19530e82bcfeSDavid Sterba MOD_LOG_KEY_REPLACE, GFP_NOFS); 19540e82bcfeSDavid Sterba BUG_ON(ret < 0); 19555f39d397SChris Mason btrfs_set_node_key(parent, &right_key, pslot + 1); 19565f39d397SChris Mason btrfs_mark_buffer_dirty(parent); 1957bb803951SChris Mason } 1958bb803951SChris Mason } 19595f39d397SChris Mason if (btrfs_header_nritems(mid) == 1) { 196079f95c82SChris Mason /* 196179f95c82SChris Mason * we're not allowed to leave a node with one item in the 196279f95c82SChris Mason * tree during a delete. A deletion from lower in the tree 196379f95c82SChris Mason * could try to delete the only pointer in this node. 196479f95c82SChris Mason * So, pull some keys from the left. 196579f95c82SChris Mason * There has to be a left pointer at this point because 196679f95c82SChris Mason * otherwise we would have pulled some pointers from the 196779f95c82SChris Mason * right 196879f95c82SChris Mason */ 1969305a26afSMark Fasheh if (!left) { 1970305a26afSMark Fasheh ret = -EROFS; 19710b246afaSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, NULL); 1972305a26afSMark Fasheh goto enospc; 1973305a26afSMark Fasheh } 197455d32ed8SDavid Sterba wret = balance_node_right(trans, mid, left); 197554aa1f4dSChris Mason if (wret < 0) { 197679f95c82SChris Mason ret = wret; 197754aa1f4dSChris Mason goto enospc; 197854aa1f4dSChris Mason } 1979bce4eae9SChris Mason if (wret == 1) { 1980d30a668fSDavid Sterba wret = push_node_left(trans, left, mid, 1); 1981bce4eae9SChris Mason if (wret < 0) 1982bce4eae9SChris Mason ret = wret; 1983bce4eae9SChris Mason } 198479f95c82SChris Mason BUG_ON(wret == 1); 198579f95c82SChris Mason } 19865f39d397SChris Mason if (btrfs_header_nritems(mid) == 0) { 19876a884d7dSDavid Sterba btrfs_clean_tree_block(mid); 1988925baeddSChris Mason btrfs_tree_unlock(mid); 1989afe5fea7STsutomu Itoh del_ptr(root, path, level + 1, pslot); 1990f0486c68SYan, Zheng root_sub_used(root, mid->len); 19915581a51aSJan Schmidt btrfs_free_tree_block(trans, root, mid, 0, 1); 19923083ee2eSJosef Bacik free_extent_buffer_stale(mid); 1993f0486c68SYan, Zheng mid = NULL; 199479f95c82SChris Mason } else { 199579f95c82SChris Mason /* update the parent key to reflect our changes */ 19965f39d397SChris Mason struct btrfs_disk_key mid_key; 19975f39d397SChris Mason btrfs_node_key(mid, &mid_key, 0); 19980e82bcfeSDavid Sterba ret = tree_mod_log_insert_key(parent, pslot, 19990e82bcfeSDavid Sterba MOD_LOG_KEY_REPLACE, GFP_NOFS); 20000e82bcfeSDavid Sterba BUG_ON(ret < 0); 20015f39d397SChris Mason btrfs_set_node_key(parent, &mid_key, pslot); 20025f39d397SChris Mason btrfs_mark_buffer_dirty(parent); 200379f95c82SChris Mason } 2004bb803951SChris Mason 200579f95c82SChris Mason /* update the path */ 20065f39d397SChris Mason if (left) { 20075f39d397SChris Mason if (btrfs_header_nritems(left) > orig_slot) { 200867439dadSDavid Sterba atomic_inc(&left->refs); 2009925baeddSChris Mason /* left was locked after cow */ 20105f39d397SChris Mason path->nodes[level] = left; 2011bb803951SChris Mason path->slots[level + 1] -= 1; 2012bb803951SChris Mason path->slots[level] = orig_slot; 2013925baeddSChris Mason if (mid) { 2014925baeddSChris Mason btrfs_tree_unlock(mid); 20155f39d397SChris Mason free_extent_buffer(mid); 2016925baeddSChris Mason } 2017bb803951SChris Mason } else { 20185f39d397SChris Mason orig_slot -= btrfs_header_nritems(left); 2019bb803951SChris Mason path->slots[level] = orig_slot; 2020bb803951SChris Mason } 2021bb803951SChris Mason } 202279f95c82SChris Mason /* double check we haven't messed things up */ 2023e20d96d6SChris Mason if (orig_ptr != 20245f39d397SChris Mason btrfs_node_blockptr(path->nodes[level], path->slots[level])) 202579f95c82SChris Mason BUG(); 202654aa1f4dSChris Mason enospc: 2027925baeddSChris Mason if (right) { 2028925baeddSChris Mason btrfs_tree_unlock(right); 20295f39d397SChris Mason free_extent_buffer(right); 2030925baeddSChris Mason } 2031925baeddSChris Mason if (left) { 2032925baeddSChris Mason if (path->nodes[level] != left) 2033925baeddSChris Mason btrfs_tree_unlock(left); 20345f39d397SChris Mason free_extent_buffer(left); 2035925baeddSChris Mason } 2036bb803951SChris Mason return ret; 2037bb803951SChris Mason } 2038bb803951SChris Mason 2039d352ac68SChris Mason /* Node balancing for insertion. Here we only split or push nodes around 2040d352ac68SChris Mason * when they are completely full. This is also done top down, so we 2041d352ac68SChris Mason * have to be pessimistic. 2042d352ac68SChris Mason */ 2043d397712bSChris Mason static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 2044e66f709bSChris Mason struct btrfs_root *root, 2045e66f709bSChris Mason struct btrfs_path *path, int level) 2046e66f709bSChris Mason { 20470b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 20485f39d397SChris Mason struct extent_buffer *right = NULL; 20495f39d397SChris Mason struct extent_buffer *mid; 20505f39d397SChris Mason struct extent_buffer *left = NULL; 20515f39d397SChris Mason struct extent_buffer *parent = NULL; 2052e66f709bSChris Mason int ret = 0; 2053e66f709bSChris Mason int wret; 2054e66f709bSChris Mason int pslot; 2055e66f709bSChris Mason int orig_slot = path->slots[level]; 2056e66f709bSChris Mason 2057e66f709bSChris Mason if (level == 0) 2058e66f709bSChris Mason return 1; 2059e66f709bSChris Mason 20605f39d397SChris Mason mid = path->nodes[level]; 20617bb86316SChris Mason WARN_ON(btrfs_header_generation(mid) != trans->transid); 2062e66f709bSChris Mason 2063a05a9bb1SLi Zefan if (level < BTRFS_MAX_LEVEL - 1) { 20645f39d397SChris Mason parent = path->nodes[level + 1]; 2065e66f709bSChris Mason pslot = path->slots[level + 1]; 2066a05a9bb1SLi Zefan } 2067e66f709bSChris Mason 20685f39d397SChris Mason if (!parent) 2069e66f709bSChris Mason return 1; 2070e66f709bSChris Mason 20714b231ae4SDavid Sterba left = btrfs_read_node_slot(parent, pslot - 1); 2072fb770ae4SLiu Bo if (IS_ERR(left)) 2073fb770ae4SLiu Bo left = NULL; 2074e66f709bSChris Mason 2075e66f709bSChris Mason /* first, try to make some room in the middle buffer */ 20765f39d397SChris Mason if (left) { 2077e66f709bSChris Mason u32 left_nr; 2078925baeddSChris Mason 2079*bf77467aSJosef Bacik __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 20808bead258SDavid Sterba btrfs_set_lock_blocking_write(left); 2081b4ce94deSChris Mason 20825f39d397SChris Mason left_nr = btrfs_header_nritems(left); 20830b246afaSJeff Mahoney if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 208433ade1f8SChris Mason wret = 1; 208533ade1f8SChris Mason } else { 20865f39d397SChris Mason ret = btrfs_cow_block(trans, root, left, parent, 20879631e4ccSJosef Bacik pslot - 1, &left, 20889631e4ccSJosef Bacik BTRFS_NESTING_COW); 208954aa1f4dSChris Mason if (ret) 209054aa1f4dSChris Mason wret = 1; 209154aa1f4dSChris Mason else { 2092d30a668fSDavid Sterba wret = push_node_left(trans, left, mid, 0); 209354aa1f4dSChris Mason } 209433ade1f8SChris Mason } 2095e66f709bSChris Mason if (wret < 0) 2096e66f709bSChris Mason ret = wret; 2097e66f709bSChris Mason if (wret == 0) { 20985f39d397SChris Mason struct btrfs_disk_key disk_key; 2099e66f709bSChris Mason orig_slot += left_nr; 21005f39d397SChris Mason btrfs_node_key(mid, &disk_key, 0); 21010e82bcfeSDavid Sterba ret = tree_mod_log_insert_key(parent, pslot, 21020e82bcfeSDavid Sterba MOD_LOG_KEY_REPLACE, GFP_NOFS); 21030e82bcfeSDavid Sterba BUG_ON(ret < 0); 21045f39d397SChris Mason btrfs_set_node_key(parent, &disk_key, pslot); 21055f39d397SChris Mason btrfs_mark_buffer_dirty(parent); 21065f39d397SChris Mason if (btrfs_header_nritems(left) > orig_slot) { 21075f39d397SChris Mason path->nodes[level] = left; 2108e66f709bSChris Mason path->slots[level + 1] -= 1; 2109e66f709bSChris Mason path->slots[level] = orig_slot; 2110925baeddSChris Mason btrfs_tree_unlock(mid); 21115f39d397SChris Mason free_extent_buffer(mid); 2112e66f709bSChris Mason } else { 2113e66f709bSChris Mason orig_slot -= 21145f39d397SChris Mason btrfs_header_nritems(left); 2115e66f709bSChris Mason path->slots[level] = orig_slot; 2116925baeddSChris Mason btrfs_tree_unlock(left); 21175f39d397SChris Mason free_extent_buffer(left); 2118e66f709bSChris Mason } 2119e66f709bSChris Mason return 0; 2120e66f709bSChris Mason } 2121925baeddSChris Mason btrfs_tree_unlock(left); 21225f39d397SChris Mason free_extent_buffer(left); 2123e66f709bSChris Mason } 21244b231ae4SDavid Sterba right = btrfs_read_node_slot(parent, pslot + 1); 2125fb770ae4SLiu Bo if (IS_ERR(right)) 2126fb770ae4SLiu Bo right = NULL; 2127e66f709bSChris Mason 2128e66f709bSChris Mason /* 2129e66f709bSChris Mason * then try to empty the right most buffer into the middle 2130e66f709bSChris Mason */ 21315f39d397SChris Mason if (right) { 213233ade1f8SChris Mason u32 right_nr; 2133b4ce94deSChris Mason 2134*bf77467aSJosef Bacik __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 21358bead258SDavid Sterba btrfs_set_lock_blocking_write(right); 2136b4ce94deSChris Mason 21375f39d397SChris Mason right_nr = btrfs_header_nritems(right); 21380b246afaSJeff Mahoney if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 213933ade1f8SChris Mason wret = 1; 214033ade1f8SChris Mason } else { 21415f39d397SChris Mason ret = btrfs_cow_block(trans, root, right, 21425f39d397SChris Mason parent, pslot + 1, 21439631e4ccSJosef Bacik &right, BTRFS_NESTING_COW); 214454aa1f4dSChris Mason if (ret) 214554aa1f4dSChris Mason wret = 1; 214654aa1f4dSChris Mason else { 214755d32ed8SDavid Sterba wret = balance_node_right(trans, right, mid); 214833ade1f8SChris Mason } 214954aa1f4dSChris Mason } 2150e66f709bSChris Mason if (wret < 0) 2151e66f709bSChris Mason ret = wret; 2152e66f709bSChris Mason if (wret == 0) { 21535f39d397SChris Mason struct btrfs_disk_key disk_key; 21545f39d397SChris Mason 21555f39d397SChris Mason btrfs_node_key(right, &disk_key, 0); 21560e82bcfeSDavid Sterba ret = tree_mod_log_insert_key(parent, pslot + 1, 21570e82bcfeSDavid Sterba MOD_LOG_KEY_REPLACE, GFP_NOFS); 21580e82bcfeSDavid Sterba BUG_ON(ret < 0); 21595f39d397SChris Mason btrfs_set_node_key(parent, &disk_key, pslot + 1); 21605f39d397SChris Mason btrfs_mark_buffer_dirty(parent); 21615f39d397SChris Mason 21625f39d397SChris Mason if (btrfs_header_nritems(mid) <= orig_slot) { 21635f39d397SChris Mason path->nodes[level] = right; 2164e66f709bSChris Mason path->slots[level + 1] += 1; 2165e66f709bSChris Mason path->slots[level] = orig_slot - 21665f39d397SChris Mason btrfs_header_nritems(mid); 2167925baeddSChris Mason btrfs_tree_unlock(mid); 21685f39d397SChris Mason free_extent_buffer(mid); 2169e66f709bSChris Mason } else { 2170925baeddSChris Mason btrfs_tree_unlock(right); 21715f39d397SChris Mason free_extent_buffer(right); 2172e66f709bSChris Mason } 2173e66f709bSChris Mason return 0; 2174e66f709bSChris Mason } 2175925baeddSChris Mason btrfs_tree_unlock(right); 21765f39d397SChris Mason free_extent_buffer(right); 2177e66f709bSChris Mason } 2178e66f709bSChris Mason return 1; 2179e66f709bSChris Mason } 2180e66f709bSChris Mason 218174123bd7SChris Mason /* 2182d352ac68SChris Mason * readahead one full node of leaves, finding things that are close 2183d352ac68SChris Mason * to the block in 'slot', and triggering ra on them. 21843c69faecSChris Mason */ 21852ff7e61eSJeff Mahoney static void reada_for_search(struct btrfs_fs_info *fs_info, 2186e02119d5SChris Mason struct btrfs_path *path, 218701f46658SChris Mason int level, int slot, u64 objectid) 21883c69faecSChris Mason { 21895f39d397SChris Mason struct extent_buffer *node; 219001f46658SChris Mason struct btrfs_disk_key disk_key; 21913c69faecSChris Mason u32 nritems; 21923c69faecSChris Mason u64 search; 2193a7175319SChris Mason u64 target; 21946b80053dSChris Mason u64 nread = 0; 21955f39d397SChris Mason struct extent_buffer *eb; 21966b80053dSChris Mason u32 nr; 21976b80053dSChris Mason u32 blocksize; 21986b80053dSChris Mason u32 nscan = 0; 2199db94535dSChris Mason 2200a6b6e75eSChris Mason if (level != 1) 22013c69faecSChris Mason return; 22023c69faecSChris Mason 22036702ed49SChris Mason if (!path->nodes[level]) 22046702ed49SChris Mason return; 22056702ed49SChris Mason 22065f39d397SChris Mason node = path->nodes[level]; 2207925baeddSChris Mason 22083c69faecSChris Mason search = btrfs_node_blockptr(node, slot); 22090b246afaSJeff Mahoney blocksize = fs_info->nodesize; 22100b246afaSJeff Mahoney eb = find_extent_buffer(fs_info, search); 22115f39d397SChris Mason if (eb) { 22125f39d397SChris Mason free_extent_buffer(eb); 22133c69faecSChris Mason return; 22143c69faecSChris Mason } 22153c69faecSChris Mason 2216a7175319SChris Mason target = search; 22176b80053dSChris Mason 22185f39d397SChris Mason nritems = btrfs_header_nritems(node); 22196b80053dSChris Mason nr = slot; 222025b8b936SJosef Bacik 22213c69faecSChris Mason while (1) { 2222e4058b54SDavid Sterba if (path->reada == READA_BACK) { 22236b80053dSChris Mason if (nr == 0) 22243c69faecSChris Mason break; 22256b80053dSChris Mason nr--; 2226e4058b54SDavid Sterba } else if (path->reada == READA_FORWARD) { 22276b80053dSChris Mason nr++; 22286b80053dSChris Mason if (nr >= nritems) 22296b80053dSChris Mason break; 22303c69faecSChris Mason } 2231e4058b54SDavid Sterba if (path->reada == READA_BACK && objectid) { 223201f46658SChris Mason btrfs_node_key(node, &disk_key, nr); 223301f46658SChris Mason if (btrfs_disk_key_objectid(&disk_key) != objectid) 223401f46658SChris Mason break; 223501f46658SChris Mason } 22366b80053dSChris Mason search = btrfs_node_blockptr(node, nr); 2237a7175319SChris Mason if ((search <= target && target - search <= 65536) || 2238a7175319SChris Mason (search > target && search - target <= 65536)) { 22392ff7e61eSJeff Mahoney readahead_tree_block(fs_info, search); 22406b80053dSChris Mason nread += blocksize; 22413c69faecSChris Mason } 22426b80053dSChris Mason nscan++; 2243a7175319SChris Mason if ((nread > 65536 || nscan > 32)) 22446b80053dSChris Mason break; 22453c69faecSChris Mason } 22463c69faecSChris Mason } 2247925baeddSChris Mason 22482ff7e61eSJeff Mahoney static noinline void reada_for_balance(struct btrfs_fs_info *fs_info, 2249b4ce94deSChris Mason struct btrfs_path *path, int level) 2250b4ce94deSChris Mason { 2251b4ce94deSChris Mason int slot; 2252b4ce94deSChris Mason int nritems; 2253b4ce94deSChris Mason struct extent_buffer *parent; 2254b4ce94deSChris Mason struct extent_buffer *eb; 2255b4ce94deSChris Mason u64 gen; 2256b4ce94deSChris Mason u64 block1 = 0; 2257b4ce94deSChris Mason u64 block2 = 0; 2258b4ce94deSChris Mason 22598c594ea8SChris Mason parent = path->nodes[level + 1]; 2260b4ce94deSChris Mason if (!parent) 22610b08851fSJosef Bacik return; 2262b4ce94deSChris Mason 2263b4ce94deSChris Mason nritems = btrfs_header_nritems(parent); 22648c594ea8SChris Mason slot = path->slots[level + 1]; 2265b4ce94deSChris Mason 2266b4ce94deSChris Mason if (slot > 0) { 2267b4ce94deSChris Mason block1 = btrfs_node_blockptr(parent, slot - 1); 2268b4ce94deSChris Mason gen = btrfs_node_ptr_generation(parent, slot - 1); 22690b246afaSJeff Mahoney eb = find_extent_buffer(fs_info, block1); 2270b9fab919SChris Mason /* 2271b9fab919SChris Mason * if we get -eagain from btrfs_buffer_uptodate, we 2272b9fab919SChris Mason * don't want to return eagain here. That will loop 2273b9fab919SChris Mason * forever 2274b9fab919SChris Mason */ 2275b9fab919SChris Mason if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 2276b4ce94deSChris Mason block1 = 0; 2277b4ce94deSChris Mason free_extent_buffer(eb); 2278b4ce94deSChris Mason } 22798c594ea8SChris Mason if (slot + 1 < nritems) { 2280b4ce94deSChris Mason block2 = btrfs_node_blockptr(parent, slot + 1); 2281b4ce94deSChris Mason gen = btrfs_node_ptr_generation(parent, slot + 1); 22820b246afaSJeff Mahoney eb = find_extent_buffer(fs_info, block2); 2283b9fab919SChris Mason if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 2284b4ce94deSChris Mason block2 = 0; 2285b4ce94deSChris Mason free_extent_buffer(eb); 2286b4ce94deSChris Mason } 22878c594ea8SChris Mason 2288b4ce94deSChris Mason if (block1) 22892ff7e61eSJeff Mahoney readahead_tree_block(fs_info, block1); 2290b4ce94deSChris Mason if (block2) 22912ff7e61eSJeff Mahoney readahead_tree_block(fs_info, block2); 2292b4ce94deSChris Mason } 2293b4ce94deSChris Mason 2294b4ce94deSChris Mason 2295b4ce94deSChris Mason /* 2296d397712bSChris Mason * when we walk down the tree, it is usually safe to unlock the higher layers 2297d397712bSChris Mason * in the tree. The exceptions are when our path goes through slot 0, because 2298d397712bSChris Mason * operations on the tree might require changing key pointers higher up in the 2299d397712bSChris Mason * tree. 2300d352ac68SChris Mason * 2301d397712bSChris Mason * callers might also have set path->keep_locks, which tells this code to keep 2302d397712bSChris Mason * the lock if the path points to the last slot in the block. This is part of 2303d397712bSChris Mason * walking through the tree, and selecting the next slot in the higher block. 2304d352ac68SChris Mason * 2305d397712bSChris Mason * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 2306d397712bSChris Mason * if lowest_unlock is 1, level 0 won't be unlocked 2307d352ac68SChris Mason */ 2308e02119d5SChris Mason static noinline void unlock_up(struct btrfs_path *path, int level, 2309f7c79f30SChris Mason int lowest_unlock, int min_write_lock_level, 2310f7c79f30SChris Mason int *write_lock_level) 2311925baeddSChris Mason { 2312925baeddSChris Mason int i; 2313925baeddSChris Mason int skip_level = level; 2314051e1b9fSChris Mason int no_skips = 0; 2315925baeddSChris Mason struct extent_buffer *t; 2316925baeddSChris Mason 2317925baeddSChris Mason for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2318925baeddSChris Mason if (!path->nodes[i]) 2319925baeddSChris Mason break; 2320925baeddSChris Mason if (!path->locks[i]) 2321925baeddSChris Mason break; 2322051e1b9fSChris Mason if (!no_skips && path->slots[i] == 0) { 2323925baeddSChris Mason skip_level = i + 1; 2324925baeddSChris Mason continue; 2325925baeddSChris Mason } 2326051e1b9fSChris Mason if (!no_skips && path->keep_locks) { 2327925baeddSChris Mason u32 nritems; 2328925baeddSChris Mason t = path->nodes[i]; 2329925baeddSChris Mason nritems = btrfs_header_nritems(t); 2330051e1b9fSChris Mason if (nritems < 1 || path->slots[i] >= nritems - 1) { 2331925baeddSChris Mason skip_level = i + 1; 2332925baeddSChris Mason continue; 2333925baeddSChris Mason } 2334925baeddSChris Mason } 2335051e1b9fSChris Mason if (skip_level < i && i >= lowest_unlock) 2336051e1b9fSChris Mason no_skips = 1; 2337051e1b9fSChris Mason 2338925baeddSChris Mason t = path->nodes[i]; 2339d80bb3f9SLiu Bo if (i >= lowest_unlock && i > skip_level) { 2340bd681513SChris Mason btrfs_tree_unlock_rw(t, path->locks[i]); 2341925baeddSChris Mason path->locks[i] = 0; 2342f7c79f30SChris Mason if (write_lock_level && 2343f7c79f30SChris Mason i > min_write_lock_level && 2344f7c79f30SChris Mason i <= *write_lock_level) { 2345f7c79f30SChris Mason *write_lock_level = i - 1; 2346f7c79f30SChris Mason } 2347925baeddSChris Mason } 2348925baeddSChris Mason } 2349925baeddSChris Mason } 2350925baeddSChris Mason 23513c69faecSChris Mason /* 2352c8c42864SChris Mason * helper function for btrfs_search_slot. The goal is to find a block 2353c8c42864SChris Mason * in cache without setting the path to blocking. If we find the block 2354c8c42864SChris Mason * we return zero and the path is unchanged. 2355c8c42864SChris Mason * 2356c8c42864SChris Mason * If we can't find the block, we set the path blocking and do some 2357c8c42864SChris Mason * reada. -EAGAIN is returned and the search must be repeated. 2358c8c42864SChris Mason */ 2359c8c42864SChris Mason static int 2360d07b8528SLiu Bo read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, 2361c8c42864SChris Mason struct extent_buffer **eb_ret, int level, int slot, 2362cda79c54SDavid Sterba const struct btrfs_key *key) 2363c8c42864SChris Mason { 23640b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 2365c8c42864SChris Mason u64 blocknr; 2366c8c42864SChris Mason u64 gen; 2367c8c42864SChris Mason struct extent_buffer *tmp; 2368581c1760SQu Wenruo struct btrfs_key first_key; 236976a05b35SChris Mason int ret; 2370581c1760SQu Wenruo int parent_level; 2371c8c42864SChris Mason 2372213ff4b7SNikolay Borisov blocknr = btrfs_node_blockptr(*eb_ret, slot); 2373213ff4b7SNikolay Borisov gen = btrfs_node_ptr_generation(*eb_ret, slot); 2374213ff4b7SNikolay Borisov parent_level = btrfs_header_level(*eb_ret); 2375213ff4b7SNikolay Borisov btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); 2376c8c42864SChris Mason 23770b246afaSJeff Mahoney tmp = find_extent_buffer(fs_info, blocknr); 2378cb44921aSChris Mason if (tmp) { 2379b9fab919SChris Mason /* first we do an atomic uptodate check */ 2380b9fab919SChris Mason if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 2381448de471SQu Wenruo /* 2382448de471SQu Wenruo * Do extra check for first_key, eb can be stale due to 2383448de471SQu Wenruo * being cached, read from scrub, or have multiple 2384448de471SQu Wenruo * parents (shared tree blocks). 2385448de471SQu Wenruo */ 2386e064d5e9SDavid Sterba if (btrfs_verify_level_key(tmp, 2387448de471SQu Wenruo parent_level - 1, &first_key, gen)) { 2388448de471SQu Wenruo free_extent_buffer(tmp); 2389448de471SQu Wenruo return -EUCLEAN; 2390448de471SQu Wenruo } 2391c8c42864SChris Mason *eb_ret = tmp; 2392c8c42864SChris Mason return 0; 2393c8c42864SChris Mason } 2394bdf7c00eSJosef Bacik 2395cb44921aSChris Mason /* the pages were up to date, but we failed 2396cb44921aSChris Mason * the generation number check. Do a full 2397cb44921aSChris Mason * read for the generation number that is correct. 2398cb44921aSChris Mason * We must do this without dropping locks so 2399cb44921aSChris Mason * we can trust our generation number 2400cb44921aSChris Mason */ 2401bd681513SChris Mason btrfs_set_path_blocking(p); 2402bd681513SChris Mason 2403b9fab919SChris Mason /* now we're allowed to do a blocking uptodate check */ 2404581c1760SQu Wenruo ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); 2405bdf7c00eSJosef Bacik if (!ret) { 2406cb44921aSChris Mason *eb_ret = tmp; 2407cb44921aSChris Mason return 0; 2408cb44921aSChris Mason } 2409cb44921aSChris Mason free_extent_buffer(tmp); 2410b3b4aa74SDavid Sterba btrfs_release_path(p); 2411cb44921aSChris Mason return -EIO; 2412cb44921aSChris Mason } 2413c8c42864SChris Mason 2414c8c42864SChris Mason /* 2415c8c42864SChris Mason * reduce lock contention at high levels 2416c8c42864SChris Mason * of the btree by dropping locks before 241776a05b35SChris Mason * we read. Don't release the lock on the current 241876a05b35SChris Mason * level because we need to walk this node to figure 241976a05b35SChris Mason * out which blocks to read. 2420c8c42864SChris Mason */ 24218c594ea8SChris Mason btrfs_unlock_up_safe(p, level + 1); 24228c594ea8SChris Mason btrfs_set_path_blocking(p); 24238c594ea8SChris Mason 2424e4058b54SDavid Sterba if (p->reada != READA_NONE) 24252ff7e61eSJeff Mahoney reada_for_search(fs_info, p, level, slot, key->objectid); 2426c8c42864SChris Mason 242776a05b35SChris Mason ret = -EAGAIN; 242802a3307aSLiu Bo tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1, 2429581c1760SQu Wenruo &first_key); 243064c043deSLiu Bo if (!IS_ERR(tmp)) { 243176a05b35SChris Mason /* 243276a05b35SChris Mason * If the read above didn't mark this buffer up to date, 243376a05b35SChris Mason * it will never end up being up to date. Set ret to EIO now 243476a05b35SChris Mason * and give up so that our caller doesn't loop forever 243576a05b35SChris Mason * on our EAGAINs. 243676a05b35SChris Mason */ 2437e6a1d6fdSLiu Bo if (!extent_buffer_uptodate(tmp)) 243876a05b35SChris Mason ret = -EIO; 2439c8c42864SChris Mason free_extent_buffer(tmp); 2440c871b0f2SLiu Bo } else { 2441c871b0f2SLiu Bo ret = PTR_ERR(tmp); 244276a05b35SChris Mason } 244302a3307aSLiu Bo 244402a3307aSLiu Bo btrfs_release_path(p); 244576a05b35SChris Mason return ret; 2446c8c42864SChris Mason } 2447c8c42864SChris Mason 2448c8c42864SChris Mason /* 2449c8c42864SChris Mason * helper function for btrfs_search_slot. This does all of the checks 2450c8c42864SChris Mason * for node-level blocks and does any balancing required based on 2451c8c42864SChris Mason * the ins_len. 2452c8c42864SChris Mason * 2453c8c42864SChris Mason * If no extra work was required, zero is returned. If we had to 2454c8c42864SChris Mason * drop the path, -EAGAIN is returned and btrfs_search_slot must 2455c8c42864SChris Mason * start over 2456c8c42864SChris Mason */ 2457c8c42864SChris Mason static int 2458c8c42864SChris Mason setup_nodes_for_search(struct btrfs_trans_handle *trans, 2459c8c42864SChris Mason struct btrfs_root *root, struct btrfs_path *p, 2460bd681513SChris Mason struct extent_buffer *b, int level, int ins_len, 2461bd681513SChris Mason int *write_lock_level) 2462c8c42864SChris Mason { 24630b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 2464c8c42864SChris Mason int ret; 24650b246afaSJeff Mahoney 2466c8c42864SChris Mason if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 24670b246afaSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) { 2468c8c42864SChris Mason int sret; 2469c8c42864SChris Mason 2470bd681513SChris Mason if (*write_lock_level < level + 1) { 2471bd681513SChris Mason *write_lock_level = level + 1; 2472bd681513SChris Mason btrfs_release_path(p); 2473bd681513SChris Mason goto again; 2474bd681513SChris Mason } 2475bd681513SChris Mason 2476c8c42864SChris Mason btrfs_set_path_blocking(p); 24772ff7e61eSJeff Mahoney reada_for_balance(fs_info, p, level); 2478c8c42864SChris Mason sret = split_node(trans, root, p, level); 2479c8c42864SChris Mason 2480c8c42864SChris Mason BUG_ON(sret > 0); 2481c8c42864SChris Mason if (sret) { 2482c8c42864SChris Mason ret = sret; 2483c8c42864SChris Mason goto done; 2484c8c42864SChris Mason } 2485c8c42864SChris Mason b = p->nodes[level]; 2486c8c42864SChris Mason } else if (ins_len < 0 && btrfs_header_nritems(b) < 24870b246afaSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) { 2488c8c42864SChris Mason int sret; 2489c8c42864SChris Mason 2490bd681513SChris Mason if (*write_lock_level < level + 1) { 2491bd681513SChris Mason *write_lock_level = level + 1; 2492bd681513SChris Mason btrfs_release_path(p); 2493bd681513SChris Mason goto again; 2494bd681513SChris Mason } 2495bd681513SChris Mason 2496c8c42864SChris Mason btrfs_set_path_blocking(p); 24972ff7e61eSJeff Mahoney reada_for_balance(fs_info, p, level); 2498c8c42864SChris Mason sret = balance_level(trans, root, p, level); 2499c8c42864SChris Mason 2500c8c42864SChris Mason if (sret) { 2501c8c42864SChris Mason ret = sret; 2502c8c42864SChris Mason goto done; 2503c8c42864SChris Mason } 2504c8c42864SChris Mason b = p->nodes[level]; 2505c8c42864SChris Mason if (!b) { 2506b3b4aa74SDavid Sterba btrfs_release_path(p); 2507c8c42864SChris Mason goto again; 2508c8c42864SChris Mason } 2509c8c42864SChris Mason BUG_ON(btrfs_header_nritems(b) == 1); 2510c8c42864SChris Mason } 2511c8c42864SChris Mason return 0; 2512c8c42864SChris Mason 2513c8c42864SChris Mason again: 2514c8c42864SChris Mason ret = -EAGAIN; 2515c8c42864SChris Mason done: 2516c8c42864SChris Mason return ret; 2517c8c42864SChris Mason } 2518c8c42864SChris Mason 2519381cf658SDavid Sterba int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, 2520e33d5c3dSKelley Nielsen u64 iobjectid, u64 ioff, u8 key_type, 2521e33d5c3dSKelley Nielsen struct btrfs_key *found_key) 2522e33d5c3dSKelley Nielsen { 2523e33d5c3dSKelley Nielsen int ret; 2524e33d5c3dSKelley Nielsen struct btrfs_key key; 2525e33d5c3dSKelley Nielsen struct extent_buffer *eb; 2526381cf658SDavid Sterba 2527381cf658SDavid Sterba ASSERT(path); 25281d4c08e0SDavid Sterba ASSERT(found_key); 2529e33d5c3dSKelley Nielsen 2530e33d5c3dSKelley Nielsen key.type = key_type; 2531e33d5c3dSKelley Nielsen key.objectid = iobjectid; 2532e33d5c3dSKelley Nielsen key.offset = ioff; 2533e33d5c3dSKelley Nielsen 2534e33d5c3dSKelley Nielsen ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 25351d4c08e0SDavid Sterba if (ret < 0) 2536e33d5c3dSKelley Nielsen return ret; 2537e33d5c3dSKelley Nielsen 2538e33d5c3dSKelley Nielsen eb = path->nodes[0]; 2539e33d5c3dSKelley Nielsen if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 2540e33d5c3dSKelley Nielsen ret = btrfs_next_leaf(fs_root, path); 2541e33d5c3dSKelley Nielsen if (ret) 2542e33d5c3dSKelley Nielsen return ret; 2543e33d5c3dSKelley Nielsen eb = path->nodes[0]; 2544e33d5c3dSKelley Nielsen } 2545e33d5c3dSKelley Nielsen 2546e33d5c3dSKelley Nielsen btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); 2547e33d5c3dSKelley Nielsen if (found_key->type != key.type || 2548e33d5c3dSKelley Nielsen found_key->objectid != key.objectid) 2549e33d5c3dSKelley Nielsen return 1; 2550e33d5c3dSKelley Nielsen 2551e33d5c3dSKelley Nielsen return 0; 2552e33d5c3dSKelley Nielsen } 2553e33d5c3dSKelley Nielsen 25541fc28d8eSLiu Bo static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, 25551fc28d8eSLiu Bo struct btrfs_path *p, 25561fc28d8eSLiu Bo int write_lock_level) 25571fc28d8eSLiu Bo { 25581fc28d8eSLiu Bo struct btrfs_fs_info *fs_info = root->fs_info; 25591fc28d8eSLiu Bo struct extent_buffer *b; 25601fc28d8eSLiu Bo int root_lock; 25611fc28d8eSLiu Bo int level = 0; 25621fc28d8eSLiu Bo 25631fc28d8eSLiu Bo /* We try very hard to do read locks on the root */ 25641fc28d8eSLiu Bo root_lock = BTRFS_READ_LOCK; 25651fc28d8eSLiu Bo 25661fc28d8eSLiu Bo if (p->search_commit_root) { 2567be6821f8SFilipe Manana /* 2568be6821f8SFilipe Manana * The commit roots are read only so we always do read locks, 2569be6821f8SFilipe Manana * and we always must hold the commit_root_sem when doing 2570be6821f8SFilipe Manana * searches on them, the only exception is send where we don't 2571be6821f8SFilipe Manana * want to block transaction commits for a long time, so 2572be6821f8SFilipe Manana * we need to clone the commit root in order to avoid races 2573be6821f8SFilipe Manana * with transaction commits that create a snapshot of one of 2574be6821f8SFilipe Manana * the roots used by a send operation. 2575be6821f8SFilipe Manana */ 2576be6821f8SFilipe Manana if (p->need_commit_sem) { 25771fc28d8eSLiu Bo down_read(&fs_info->commit_root_sem); 2578be6821f8SFilipe Manana b = btrfs_clone_extent_buffer(root->commit_root); 2579be6821f8SFilipe Manana up_read(&fs_info->commit_root_sem); 2580be6821f8SFilipe Manana if (!b) 2581be6821f8SFilipe Manana return ERR_PTR(-ENOMEM); 2582be6821f8SFilipe Manana 2583be6821f8SFilipe Manana } else { 25841fc28d8eSLiu Bo b = root->commit_root; 258567439dadSDavid Sterba atomic_inc(&b->refs); 2586be6821f8SFilipe Manana } 25871fc28d8eSLiu Bo level = btrfs_header_level(b); 2588f9ddfd05SLiu Bo /* 2589f9ddfd05SLiu Bo * Ensure that all callers have set skip_locking when 2590f9ddfd05SLiu Bo * p->search_commit_root = 1. 2591f9ddfd05SLiu Bo */ 2592f9ddfd05SLiu Bo ASSERT(p->skip_locking == 1); 25931fc28d8eSLiu Bo 25941fc28d8eSLiu Bo goto out; 25951fc28d8eSLiu Bo } 25961fc28d8eSLiu Bo 25971fc28d8eSLiu Bo if (p->skip_locking) { 25981fc28d8eSLiu Bo b = btrfs_root_node(root); 25991fc28d8eSLiu Bo level = btrfs_header_level(b); 26001fc28d8eSLiu Bo goto out; 26011fc28d8eSLiu Bo } 26021fc28d8eSLiu Bo 26031fc28d8eSLiu Bo /* 2604662c653bSLiu Bo * If the level is set to maximum, we can skip trying to get the read 2605662c653bSLiu Bo * lock. 2606662c653bSLiu Bo */ 2607662c653bSLiu Bo if (write_lock_level < BTRFS_MAX_LEVEL) { 2608662c653bSLiu Bo /* 2609662c653bSLiu Bo * We don't know the level of the root node until we actually 2610662c653bSLiu Bo * have it read locked 26111fc28d8eSLiu Bo */ 261251899412SJosef Bacik b = __btrfs_read_lock_root_node(root, p->recurse); 26131fc28d8eSLiu Bo level = btrfs_header_level(b); 26141fc28d8eSLiu Bo if (level > write_lock_level) 26151fc28d8eSLiu Bo goto out; 26161fc28d8eSLiu Bo 2617662c653bSLiu Bo /* Whoops, must trade for write lock */ 26181fc28d8eSLiu Bo btrfs_tree_read_unlock(b); 26191fc28d8eSLiu Bo free_extent_buffer(b); 2620662c653bSLiu Bo } 2621662c653bSLiu Bo 26221fc28d8eSLiu Bo b = btrfs_lock_root_node(root); 26231fc28d8eSLiu Bo root_lock = BTRFS_WRITE_LOCK; 26241fc28d8eSLiu Bo 26251fc28d8eSLiu Bo /* The level might have changed, check again */ 26261fc28d8eSLiu Bo level = btrfs_header_level(b); 26271fc28d8eSLiu Bo 26281fc28d8eSLiu Bo out: 26291fc28d8eSLiu Bo p->nodes[level] = b; 26301fc28d8eSLiu Bo if (!p->skip_locking) 26311fc28d8eSLiu Bo p->locks[level] = root_lock; 26321fc28d8eSLiu Bo /* 26331fc28d8eSLiu Bo * Callers are responsible for dropping b's references. 26341fc28d8eSLiu Bo */ 26351fc28d8eSLiu Bo return b; 26361fc28d8eSLiu Bo } 26371fc28d8eSLiu Bo 26381fc28d8eSLiu Bo 2639c8c42864SChris Mason /* 26404271eceaSNikolay Borisov * btrfs_search_slot - look for a key in a tree and perform necessary 26414271eceaSNikolay Borisov * modifications to preserve tree invariants. 264274123bd7SChris Mason * 26434271eceaSNikolay Borisov * @trans: Handle of transaction, used when modifying the tree 26444271eceaSNikolay Borisov * @p: Holds all btree nodes along the search path 26454271eceaSNikolay Borisov * @root: The root node of the tree 26464271eceaSNikolay Borisov * @key: The key we are looking for 26474271eceaSNikolay Borisov * @ins_len: Indicates purpose of search, for inserts it is 1, for 26484271eceaSNikolay Borisov * deletions it's -1. 0 for plain searches 26494271eceaSNikolay Borisov * @cow: boolean should CoW operations be performed. Must always be 1 26504271eceaSNikolay Borisov * when modifying the tree. 265197571fd0SChris Mason * 26524271eceaSNikolay Borisov * If @ins_len > 0, nodes and leaves will be split as we walk down the tree. 26534271eceaSNikolay Borisov * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible) 26544271eceaSNikolay Borisov * 26554271eceaSNikolay Borisov * If @key is found, 0 is returned and you can find the item in the leaf level 26564271eceaSNikolay Borisov * of the path (level 0) 26574271eceaSNikolay Borisov * 26584271eceaSNikolay Borisov * If @key isn't found, 1 is returned and the leaf level of the path (level 0) 26594271eceaSNikolay Borisov * points to the slot where it should be inserted 26604271eceaSNikolay Borisov * 26614271eceaSNikolay Borisov * If an error is encountered while searching the tree a negative error number 26624271eceaSNikolay Borisov * is returned 266374123bd7SChris Mason */ 2664310712b2SOmar Sandoval int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2665310712b2SOmar Sandoval const struct btrfs_key *key, struct btrfs_path *p, 2666310712b2SOmar Sandoval int ins_len, int cow) 2667be0e5c09SChris Mason { 26685f39d397SChris Mason struct extent_buffer *b; 2669be0e5c09SChris Mason int slot; 2670be0e5c09SChris Mason int ret; 267133c66f43SYan Zheng int err; 2672be0e5c09SChris Mason int level; 2673925baeddSChris Mason int lowest_unlock = 1; 2674bd681513SChris Mason /* everything at write_lock_level or lower must be write locked */ 2675bd681513SChris Mason int write_lock_level = 0; 26769f3a7427SChris Mason u8 lowest_level = 0; 2677f7c79f30SChris Mason int min_write_lock_level; 2678d7396f07SFilipe David Borba Manana int prev_cmp; 26799f3a7427SChris Mason 26806702ed49SChris Mason lowest_level = p->lowest_level; 2681323ac95bSChris Mason WARN_ON(lowest_level && ins_len > 0); 268222b0ebdaSChris Mason WARN_ON(p->nodes[0] != NULL); 2683eb653de1SFilipe David Borba Manana BUG_ON(!cow && ins_len); 268425179201SJosef Bacik 2685bd681513SChris Mason if (ins_len < 0) { 2686925baeddSChris Mason lowest_unlock = 2; 268765b51a00SChris Mason 2688bd681513SChris Mason /* when we are removing items, we might have to go up to level 2689bd681513SChris Mason * two as we update tree pointers Make sure we keep write 2690bd681513SChris Mason * for those levels as well 2691bd681513SChris Mason */ 2692bd681513SChris Mason write_lock_level = 2; 2693bd681513SChris Mason } else if (ins_len > 0) { 2694bd681513SChris Mason /* 2695bd681513SChris Mason * for inserting items, make sure we have a write lock on 2696bd681513SChris Mason * level 1 so we can update keys 2697bd681513SChris Mason */ 2698bd681513SChris Mason write_lock_level = 1; 2699bd681513SChris Mason } 2700bd681513SChris Mason 2701bd681513SChris Mason if (!cow) 2702bd681513SChris Mason write_lock_level = -1; 2703bd681513SChris Mason 270409a2a8f9SJosef Bacik if (cow && (p->keep_locks || p->lowest_level)) 2705bd681513SChris Mason write_lock_level = BTRFS_MAX_LEVEL; 2706bd681513SChris Mason 2707f7c79f30SChris Mason min_write_lock_level = write_lock_level; 2708f7c79f30SChris Mason 2709bb803951SChris Mason again: 2710d7396f07SFilipe David Borba Manana prev_cmp = -1; 27111fc28d8eSLiu Bo b = btrfs_search_slot_get_root(root, p, write_lock_level); 2712be6821f8SFilipe Manana if (IS_ERR(b)) { 2713be6821f8SFilipe Manana ret = PTR_ERR(b); 2714be6821f8SFilipe Manana goto done; 2715be6821f8SFilipe Manana } 2716925baeddSChris Mason 2717eb60ceacSChris Mason while (b) { 2718f624d976SQu Wenruo int dec = 0; 2719f624d976SQu Wenruo 27205f39d397SChris Mason level = btrfs_header_level(b); 272165b51a00SChris Mason 272202217ed2SChris Mason if (cow) { 27239ea2c7c9SNikolay Borisov bool last_level = (level == (BTRFS_MAX_LEVEL - 1)); 27249ea2c7c9SNikolay Borisov 2725c8c42864SChris Mason /* 2726c8c42864SChris Mason * if we don't really need to cow this block 2727c8c42864SChris Mason * then we don't want to set the path blocking, 2728c8c42864SChris Mason * so we test it here 2729c8c42864SChris Mason */ 273064c12921SJeff Mahoney if (!should_cow_block(trans, root, b)) { 273164c12921SJeff Mahoney trans->dirty = true; 273265b51a00SChris Mason goto cow_done; 273364c12921SJeff Mahoney } 27345d4f98a2SYan Zheng 2735bd681513SChris Mason /* 2736bd681513SChris Mason * must have write locks on this node and the 2737bd681513SChris Mason * parent 2738bd681513SChris Mason */ 27395124e00eSJosef Bacik if (level > write_lock_level || 27405124e00eSJosef Bacik (level + 1 > write_lock_level && 27415124e00eSJosef Bacik level + 1 < BTRFS_MAX_LEVEL && 27425124e00eSJosef Bacik p->nodes[level + 1])) { 2743bd681513SChris Mason write_lock_level = level + 1; 2744bd681513SChris Mason btrfs_release_path(p); 2745bd681513SChris Mason goto again; 2746bd681513SChris Mason } 2747bd681513SChris Mason 2748160f4089SFilipe Manana btrfs_set_path_blocking(p); 27499ea2c7c9SNikolay Borisov if (last_level) 27509ea2c7c9SNikolay Borisov err = btrfs_cow_block(trans, root, b, NULL, 0, 27519631e4ccSJosef Bacik &b, 27529631e4ccSJosef Bacik BTRFS_NESTING_COW); 27539ea2c7c9SNikolay Borisov else 275433c66f43SYan Zheng err = btrfs_cow_block(trans, root, b, 2755e20d96d6SChris Mason p->nodes[level + 1], 27569631e4ccSJosef Bacik p->slots[level + 1], &b, 27579631e4ccSJosef Bacik BTRFS_NESTING_COW); 275833c66f43SYan Zheng if (err) { 275933c66f43SYan Zheng ret = err; 276065b51a00SChris Mason goto done; 276154aa1f4dSChris Mason } 276202217ed2SChris Mason } 276365b51a00SChris Mason cow_done: 2764eb60ceacSChris Mason p->nodes[level] = b; 276552398340SLiu Bo /* 276652398340SLiu Bo * Leave path with blocking locks to avoid massive 276752398340SLiu Bo * lock context switch, this is made on purpose. 276852398340SLiu Bo */ 2769b4ce94deSChris Mason 2770b4ce94deSChris Mason /* 2771b4ce94deSChris Mason * we have a lock on b and as long as we aren't changing 2772b4ce94deSChris Mason * the tree, there is no way to for the items in b to change. 2773b4ce94deSChris Mason * It is safe to drop the lock on our parent before we 2774b4ce94deSChris Mason * go through the expensive btree search on b. 2775b4ce94deSChris Mason * 2776eb653de1SFilipe David Borba Manana * If we're inserting or deleting (ins_len != 0), then we might 2777eb653de1SFilipe David Borba Manana * be changing slot zero, which may require changing the parent. 2778eb653de1SFilipe David Borba Manana * So, we can't drop the lock until after we know which slot 2779eb653de1SFilipe David Borba Manana * we're operating on. 2780b4ce94deSChris Mason */ 2781eb653de1SFilipe David Borba Manana if (!ins_len && !p->keep_locks) { 2782eb653de1SFilipe David Borba Manana int u = level + 1; 2783eb653de1SFilipe David Borba Manana 2784eb653de1SFilipe David Borba Manana if (u < BTRFS_MAX_LEVEL && p->locks[u]) { 2785eb653de1SFilipe David Borba Manana btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]); 2786eb653de1SFilipe David Borba Manana p->locks[u] = 0; 2787eb653de1SFilipe David Borba Manana } 2788eb653de1SFilipe David Borba Manana } 2789b4ce94deSChris Mason 2790995e9a16SNikolay Borisov /* 2791995e9a16SNikolay Borisov * If btrfs_bin_search returns an exact match (prev_cmp == 0) 2792995e9a16SNikolay Borisov * we can safely assume the target key will always be in slot 0 2793995e9a16SNikolay Borisov * on lower levels due to the invariants BTRFS' btree provides, 2794995e9a16SNikolay Borisov * namely that a btrfs_key_ptr entry always points to the 2795995e9a16SNikolay Borisov * lowest key in the child node, thus we can skip searching 2796995e9a16SNikolay Borisov * lower levels 2797995e9a16SNikolay Borisov */ 2798995e9a16SNikolay Borisov if (prev_cmp == 0) { 2799995e9a16SNikolay Borisov slot = 0; 2800995e9a16SNikolay Borisov ret = 0; 2801995e9a16SNikolay Borisov } else { 2802995e9a16SNikolay Borisov ret = btrfs_bin_search(b, key, &slot); 2803995e9a16SNikolay Borisov prev_cmp = ret; 2804415b35a5SLiu Bo if (ret < 0) 2805415b35a5SLiu Bo goto done; 2806995e9a16SNikolay Borisov } 2807b4ce94deSChris Mason 2808f624d976SQu Wenruo if (level == 0) { 2809be0e5c09SChris Mason p->slots[level] = slot; 281087b29b20SYan Zheng if (ins_len > 0 && 2811e902baacSDavid Sterba btrfs_leaf_free_space(b) < ins_len) { 2812bd681513SChris Mason if (write_lock_level < 1) { 2813bd681513SChris Mason write_lock_level = 1; 2814bd681513SChris Mason btrfs_release_path(p); 2815bd681513SChris Mason goto again; 2816bd681513SChris Mason } 2817bd681513SChris Mason 2818b4ce94deSChris Mason btrfs_set_path_blocking(p); 281933c66f43SYan Zheng err = split_leaf(trans, root, key, 2820cc0c5538SChris Mason p, ins_len, ret == 0); 2821b4ce94deSChris Mason 282233c66f43SYan Zheng BUG_ON(err > 0); 282333c66f43SYan Zheng if (err) { 282433c66f43SYan Zheng ret = err; 282565b51a00SChris Mason goto done; 282665b51a00SChris Mason } 28275c680ed6SChris Mason } 2828459931ecSChris Mason if (!p->search_for_split) 2829f7c79f30SChris Mason unlock_up(p, level, lowest_unlock, 28304b6f8e96SLiu Bo min_write_lock_level, NULL); 283165b51a00SChris Mason goto done; 283265b51a00SChris Mason } 2833f624d976SQu Wenruo if (ret && slot > 0) { 2834f624d976SQu Wenruo dec = 1; 2835f624d976SQu Wenruo slot--; 2836f624d976SQu Wenruo } 2837f624d976SQu Wenruo p->slots[level] = slot; 2838f624d976SQu Wenruo err = setup_nodes_for_search(trans, root, p, b, level, ins_len, 2839f624d976SQu Wenruo &write_lock_level); 2840f624d976SQu Wenruo if (err == -EAGAIN) 2841f624d976SQu Wenruo goto again; 2842f624d976SQu Wenruo if (err) { 2843f624d976SQu Wenruo ret = err; 2844f624d976SQu Wenruo goto done; 2845f624d976SQu Wenruo } 2846f624d976SQu Wenruo b = p->nodes[level]; 2847f624d976SQu Wenruo slot = p->slots[level]; 2848f624d976SQu Wenruo 2849f624d976SQu Wenruo /* 2850f624d976SQu Wenruo * Slot 0 is special, if we change the key we have to update 2851f624d976SQu Wenruo * the parent pointer which means we must have a write lock on 2852f624d976SQu Wenruo * the parent 2853f624d976SQu Wenruo */ 2854f624d976SQu Wenruo if (slot == 0 && ins_len && write_lock_level < level + 1) { 2855f624d976SQu Wenruo write_lock_level = level + 1; 2856f624d976SQu Wenruo btrfs_release_path(p); 2857f624d976SQu Wenruo goto again; 2858f624d976SQu Wenruo } 2859f624d976SQu Wenruo 2860f624d976SQu Wenruo unlock_up(p, level, lowest_unlock, min_write_lock_level, 2861f624d976SQu Wenruo &write_lock_level); 2862f624d976SQu Wenruo 2863f624d976SQu Wenruo if (level == lowest_level) { 2864f624d976SQu Wenruo if (dec) 2865f624d976SQu Wenruo p->slots[level]++; 2866f624d976SQu Wenruo goto done; 2867f624d976SQu Wenruo } 2868f624d976SQu Wenruo 2869f624d976SQu Wenruo err = read_block_for_search(root, p, &b, level, slot, key); 2870f624d976SQu Wenruo if (err == -EAGAIN) 2871f624d976SQu Wenruo goto again; 2872f624d976SQu Wenruo if (err) { 2873f624d976SQu Wenruo ret = err; 2874f624d976SQu Wenruo goto done; 2875f624d976SQu Wenruo } 2876f624d976SQu Wenruo 2877f624d976SQu Wenruo if (!p->skip_locking) { 2878f624d976SQu Wenruo level = btrfs_header_level(b); 2879f624d976SQu Wenruo if (level <= write_lock_level) { 2880f624d976SQu Wenruo if (!btrfs_try_tree_write_lock(b)) { 2881f624d976SQu Wenruo btrfs_set_path_blocking(p); 2882f624d976SQu Wenruo btrfs_tree_lock(b); 2883f624d976SQu Wenruo } 2884f624d976SQu Wenruo p->locks[level] = BTRFS_WRITE_LOCK; 2885f624d976SQu Wenruo } else { 2886f624d976SQu Wenruo if (!btrfs_tree_read_lock_atomic(b)) { 2887f624d976SQu Wenruo btrfs_set_path_blocking(p); 2888fd7ba1c1SJosef Bacik __btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL, 2889fd7ba1c1SJosef Bacik p->recurse); 2890f624d976SQu Wenruo } 2891f624d976SQu Wenruo p->locks[level] = BTRFS_READ_LOCK; 2892f624d976SQu Wenruo } 2893f624d976SQu Wenruo p->nodes[level] = b; 2894f624d976SQu Wenruo } 289565b51a00SChris Mason } 289665b51a00SChris Mason ret = 1; 289765b51a00SChris Mason done: 2898b4ce94deSChris Mason /* 2899b4ce94deSChris Mason * we don't really know what they plan on doing with the path 2900b4ce94deSChris Mason * from here on, so for now just mark it as blocking 2901b4ce94deSChris Mason */ 2902b9473439SChris Mason if (!p->leave_spinning) 2903b4ce94deSChris Mason btrfs_set_path_blocking(p); 29045f5bc6b1SFilipe Manana if (ret < 0 && !p->skip_release_on_error) 2905b3b4aa74SDavid Sterba btrfs_release_path(p); 2906be0e5c09SChris Mason return ret; 2907be0e5c09SChris Mason } 2908be0e5c09SChris Mason 290974123bd7SChris Mason /* 29105d9e75c4SJan Schmidt * Like btrfs_search_slot, this looks for a key in the given tree. It uses the 29115d9e75c4SJan Schmidt * current state of the tree together with the operations recorded in the tree 29125d9e75c4SJan Schmidt * modification log to search for the key in a previous version of this tree, as 29135d9e75c4SJan Schmidt * denoted by the time_seq parameter. 29145d9e75c4SJan Schmidt * 29155d9e75c4SJan Schmidt * Naturally, there is no support for insert, delete or cow operations. 29165d9e75c4SJan Schmidt * 29175d9e75c4SJan Schmidt * The resulting path and return value will be set up as if we called 29185d9e75c4SJan Schmidt * btrfs_search_slot at that point in time with ins_len and cow both set to 0. 29195d9e75c4SJan Schmidt */ 2920310712b2SOmar Sandoval int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, 29215d9e75c4SJan Schmidt struct btrfs_path *p, u64 time_seq) 29225d9e75c4SJan Schmidt { 29230b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 29245d9e75c4SJan Schmidt struct extent_buffer *b; 29255d9e75c4SJan Schmidt int slot; 29265d9e75c4SJan Schmidt int ret; 29275d9e75c4SJan Schmidt int err; 29285d9e75c4SJan Schmidt int level; 29295d9e75c4SJan Schmidt int lowest_unlock = 1; 29305d9e75c4SJan Schmidt u8 lowest_level = 0; 29315d9e75c4SJan Schmidt 29325d9e75c4SJan Schmidt lowest_level = p->lowest_level; 29335d9e75c4SJan Schmidt WARN_ON(p->nodes[0] != NULL); 29345d9e75c4SJan Schmidt 29355d9e75c4SJan Schmidt if (p->search_commit_root) { 29365d9e75c4SJan Schmidt BUG_ON(time_seq); 29375d9e75c4SJan Schmidt return btrfs_search_slot(NULL, root, key, p, 0, 0); 29385d9e75c4SJan Schmidt } 29395d9e75c4SJan Schmidt 29405d9e75c4SJan Schmidt again: 29415d9e75c4SJan Schmidt b = get_old_root(root, time_seq); 2942315bed43SNikolay Borisov if (!b) { 2943315bed43SNikolay Borisov ret = -EIO; 2944315bed43SNikolay Borisov goto done; 2945315bed43SNikolay Borisov } 29465d9e75c4SJan Schmidt level = btrfs_header_level(b); 29475d9e75c4SJan Schmidt p->locks[level] = BTRFS_READ_LOCK; 29485d9e75c4SJan Schmidt 29495d9e75c4SJan Schmidt while (b) { 2950abe9339dSQu Wenruo int dec = 0; 2951abe9339dSQu Wenruo 29525d9e75c4SJan Schmidt level = btrfs_header_level(b); 29535d9e75c4SJan Schmidt p->nodes[level] = b; 29545d9e75c4SJan Schmidt 29555d9e75c4SJan Schmidt /* 29565d9e75c4SJan Schmidt * we have a lock on b and as long as we aren't changing 29575d9e75c4SJan Schmidt * the tree, there is no way to for the items in b to change. 29585d9e75c4SJan Schmidt * It is safe to drop the lock on our parent before we 29595d9e75c4SJan Schmidt * go through the expensive btree search on b. 29605d9e75c4SJan Schmidt */ 29615d9e75c4SJan Schmidt btrfs_unlock_up_safe(p, level + 1); 29625d9e75c4SJan Schmidt 2963995e9a16SNikolay Borisov ret = btrfs_bin_search(b, key, &slot); 2964cbca7d59SFilipe Manana if (ret < 0) 2965cbca7d59SFilipe Manana goto done; 29665d9e75c4SJan Schmidt 2967abe9339dSQu Wenruo if (level == 0) { 2968abe9339dSQu Wenruo p->slots[level] = slot; 2969abe9339dSQu Wenruo unlock_up(p, level, lowest_unlock, 0, NULL); 2970abe9339dSQu Wenruo goto done; 2971abe9339dSQu Wenruo } 2972abe9339dSQu Wenruo 29735d9e75c4SJan Schmidt if (ret && slot > 0) { 29745d9e75c4SJan Schmidt dec = 1; 2975abe9339dSQu Wenruo slot--; 29765d9e75c4SJan Schmidt } 29775d9e75c4SJan Schmidt p->slots[level] = slot; 29785d9e75c4SJan Schmidt unlock_up(p, level, lowest_unlock, 0, NULL); 29795d9e75c4SJan Schmidt 29805d9e75c4SJan Schmidt if (level == lowest_level) { 29815d9e75c4SJan Schmidt if (dec) 29825d9e75c4SJan Schmidt p->slots[level]++; 29835d9e75c4SJan Schmidt goto done; 29845d9e75c4SJan Schmidt } 29855d9e75c4SJan Schmidt 2986abe9339dSQu Wenruo err = read_block_for_search(root, p, &b, level, slot, key); 29875d9e75c4SJan Schmidt if (err == -EAGAIN) 29885d9e75c4SJan Schmidt goto again; 29895d9e75c4SJan Schmidt if (err) { 29905d9e75c4SJan Schmidt ret = err; 29915d9e75c4SJan Schmidt goto done; 29925d9e75c4SJan Schmidt } 29935d9e75c4SJan Schmidt 29945d9e75c4SJan Schmidt level = btrfs_header_level(b); 299565e99c43SNikolay Borisov if (!btrfs_tree_read_lock_atomic(b)) { 29965d9e75c4SJan Schmidt btrfs_set_path_blocking(p); 29975d9e75c4SJan Schmidt btrfs_tree_read_lock(b); 29985d9e75c4SJan Schmidt } 29990b246afaSJeff Mahoney b = tree_mod_log_rewind(fs_info, p, b, time_seq); 3000db7f3436SJosef Bacik if (!b) { 3001db7f3436SJosef Bacik ret = -ENOMEM; 3002db7f3436SJosef Bacik goto done; 3003db7f3436SJosef Bacik } 30045d9e75c4SJan Schmidt p->locks[level] = BTRFS_READ_LOCK; 30055d9e75c4SJan Schmidt p->nodes[level] = b; 30065d9e75c4SJan Schmidt } 30075d9e75c4SJan Schmidt ret = 1; 30085d9e75c4SJan Schmidt done: 30095d9e75c4SJan Schmidt if (!p->leave_spinning) 30105d9e75c4SJan Schmidt btrfs_set_path_blocking(p); 30115d9e75c4SJan Schmidt if (ret < 0) 30125d9e75c4SJan Schmidt btrfs_release_path(p); 30135d9e75c4SJan Schmidt 30145d9e75c4SJan Schmidt return ret; 30155d9e75c4SJan Schmidt } 30165d9e75c4SJan Schmidt 30175d9e75c4SJan Schmidt /* 30182f38b3e1SArne Jansen * helper to use instead of search slot if no exact match is needed but 30192f38b3e1SArne Jansen * instead the next or previous item should be returned. 30202f38b3e1SArne Jansen * When find_higher is true, the next higher item is returned, the next lower 30212f38b3e1SArne Jansen * otherwise. 30222f38b3e1SArne Jansen * When return_any and find_higher are both true, and no higher item is found, 30232f38b3e1SArne Jansen * return the next lower instead. 30242f38b3e1SArne Jansen * When return_any is true and find_higher is false, and no lower item is found, 30252f38b3e1SArne Jansen * return the next higher instead. 30262f38b3e1SArne Jansen * It returns 0 if any item is found, 1 if none is found (tree empty), and 30272f38b3e1SArne Jansen * < 0 on error 30282f38b3e1SArne Jansen */ 30292f38b3e1SArne Jansen int btrfs_search_slot_for_read(struct btrfs_root *root, 3030310712b2SOmar Sandoval const struct btrfs_key *key, 3031310712b2SOmar Sandoval struct btrfs_path *p, int find_higher, 3032310712b2SOmar Sandoval int return_any) 30332f38b3e1SArne Jansen { 30342f38b3e1SArne Jansen int ret; 30352f38b3e1SArne Jansen struct extent_buffer *leaf; 30362f38b3e1SArne Jansen 30372f38b3e1SArne Jansen again: 30382f38b3e1SArne Jansen ret = btrfs_search_slot(NULL, root, key, p, 0, 0); 30392f38b3e1SArne Jansen if (ret <= 0) 30402f38b3e1SArne Jansen return ret; 30412f38b3e1SArne Jansen /* 30422f38b3e1SArne Jansen * a return value of 1 means the path is at the position where the 30432f38b3e1SArne Jansen * item should be inserted. Normally this is the next bigger item, 30442f38b3e1SArne Jansen * but in case the previous item is the last in a leaf, path points 30452f38b3e1SArne Jansen * to the first free slot in the previous leaf, i.e. at an invalid 30462f38b3e1SArne Jansen * item. 30472f38b3e1SArne Jansen */ 30482f38b3e1SArne Jansen leaf = p->nodes[0]; 30492f38b3e1SArne Jansen 30502f38b3e1SArne Jansen if (find_higher) { 30512f38b3e1SArne Jansen if (p->slots[0] >= btrfs_header_nritems(leaf)) { 30522f38b3e1SArne Jansen ret = btrfs_next_leaf(root, p); 30532f38b3e1SArne Jansen if (ret <= 0) 30542f38b3e1SArne Jansen return ret; 30552f38b3e1SArne Jansen if (!return_any) 30562f38b3e1SArne Jansen return 1; 30572f38b3e1SArne Jansen /* 30582f38b3e1SArne Jansen * no higher item found, return the next 30592f38b3e1SArne Jansen * lower instead 30602f38b3e1SArne Jansen */ 30612f38b3e1SArne Jansen return_any = 0; 30622f38b3e1SArne Jansen find_higher = 0; 30632f38b3e1SArne Jansen btrfs_release_path(p); 30642f38b3e1SArne Jansen goto again; 30652f38b3e1SArne Jansen } 30662f38b3e1SArne Jansen } else { 30672f38b3e1SArne Jansen if (p->slots[0] == 0) { 30682f38b3e1SArne Jansen ret = btrfs_prev_leaf(root, p); 3069e6793769SArne Jansen if (ret < 0) 30702f38b3e1SArne Jansen return ret; 3071e6793769SArne Jansen if (!ret) { 307223c6bf6aSFilipe David Borba Manana leaf = p->nodes[0]; 307323c6bf6aSFilipe David Borba Manana if (p->slots[0] == btrfs_header_nritems(leaf)) 307423c6bf6aSFilipe David Borba Manana p->slots[0]--; 3075e6793769SArne Jansen return 0; 3076e6793769SArne Jansen } 30772f38b3e1SArne Jansen if (!return_any) 30782f38b3e1SArne Jansen return 1; 30792f38b3e1SArne Jansen /* 30802f38b3e1SArne Jansen * no lower item found, return the next 30812f38b3e1SArne Jansen * higher instead 30822f38b3e1SArne Jansen */ 30832f38b3e1SArne Jansen return_any = 0; 30842f38b3e1SArne Jansen find_higher = 1; 30852f38b3e1SArne Jansen btrfs_release_path(p); 30862f38b3e1SArne Jansen goto again; 3087e6793769SArne Jansen } else { 30882f38b3e1SArne Jansen --p->slots[0]; 30892f38b3e1SArne Jansen } 30902f38b3e1SArne Jansen } 30912f38b3e1SArne Jansen return 0; 30922f38b3e1SArne Jansen } 30932f38b3e1SArne Jansen 30942f38b3e1SArne Jansen /* 309574123bd7SChris Mason * adjust the pointers going up the tree, starting at level 309674123bd7SChris Mason * making sure the right key of each node is points to 'key'. 309774123bd7SChris Mason * This is used after shifting pointers to the left, so it stops 309874123bd7SChris Mason * fixing up pointers when a given leaf/node is not in slot 0 of the 309974123bd7SChris Mason * higher levels 3100aa5d6bedSChris Mason * 310174123bd7SChris Mason */ 3102b167fa91SNikolay Borisov static void fixup_low_keys(struct btrfs_path *path, 31035f39d397SChris Mason struct btrfs_disk_key *key, int level) 3104be0e5c09SChris Mason { 3105be0e5c09SChris Mason int i; 31065f39d397SChris Mason struct extent_buffer *t; 31070e82bcfeSDavid Sterba int ret; 31085f39d397SChris Mason 3109234b63a0SChris Mason for (i = level; i < BTRFS_MAX_LEVEL; i++) { 3110be0e5c09SChris Mason int tslot = path->slots[i]; 31110e82bcfeSDavid Sterba 3112eb60ceacSChris Mason if (!path->nodes[i]) 3113be0e5c09SChris Mason break; 31145f39d397SChris Mason t = path->nodes[i]; 31150e82bcfeSDavid Sterba ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE, 31160e82bcfeSDavid Sterba GFP_ATOMIC); 31170e82bcfeSDavid Sterba BUG_ON(ret < 0); 31185f39d397SChris Mason btrfs_set_node_key(t, key, tslot); 3119d6025579SChris Mason btrfs_mark_buffer_dirty(path->nodes[i]); 3120be0e5c09SChris Mason if (tslot != 0) 3121be0e5c09SChris Mason break; 3122be0e5c09SChris Mason } 3123be0e5c09SChris Mason } 3124be0e5c09SChris Mason 312574123bd7SChris Mason /* 312631840ae1SZheng Yan * update item key. 312731840ae1SZheng Yan * 312831840ae1SZheng Yan * This function isn't completely safe. It's the caller's responsibility 312931840ae1SZheng Yan * that the new key won't break the order 313031840ae1SZheng Yan */ 3131b7a0365eSDaniel Dressler void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, 3132b7a0365eSDaniel Dressler struct btrfs_path *path, 3133310712b2SOmar Sandoval const struct btrfs_key *new_key) 313431840ae1SZheng Yan { 313531840ae1SZheng Yan struct btrfs_disk_key disk_key; 313631840ae1SZheng Yan struct extent_buffer *eb; 313731840ae1SZheng Yan int slot; 313831840ae1SZheng Yan 313931840ae1SZheng Yan eb = path->nodes[0]; 314031840ae1SZheng Yan slot = path->slots[0]; 314131840ae1SZheng Yan if (slot > 0) { 314231840ae1SZheng Yan btrfs_item_key(eb, &disk_key, slot - 1); 31437c15d410SQu Wenruo if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { 31447c15d410SQu Wenruo btrfs_crit(fs_info, 31457c15d410SQu Wenruo "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 31467c15d410SQu Wenruo slot, btrfs_disk_key_objectid(&disk_key), 31477c15d410SQu Wenruo btrfs_disk_key_type(&disk_key), 31487c15d410SQu Wenruo btrfs_disk_key_offset(&disk_key), 31497c15d410SQu Wenruo new_key->objectid, new_key->type, 31507c15d410SQu Wenruo new_key->offset); 31517c15d410SQu Wenruo btrfs_print_leaf(eb); 31527c15d410SQu Wenruo BUG(); 31537c15d410SQu Wenruo } 315431840ae1SZheng Yan } 315531840ae1SZheng Yan if (slot < btrfs_header_nritems(eb) - 1) { 315631840ae1SZheng Yan btrfs_item_key(eb, &disk_key, slot + 1); 31577c15d410SQu Wenruo if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { 31587c15d410SQu Wenruo btrfs_crit(fs_info, 31597c15d410SQu Wenruo "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 31607c15d410SQu Wenruo slot, btrfs_disk_key_objectid(&disk_key), 31617c15d410SQu Wenruo btrfs_disk_key_type(&disk_key), 31627c15d410SQu Wenruo btrfs_disk_key_offset(&disk_key), 31637c15d410SQu Wenruo new_key->objectid, new_key->type, 31647c15d410SQu Wenruo new_key->offset); 31657c15d410SQu Wenruo btrfs_print_leaf(eb); 31667c15d410SQu Wenruo BUG(); 31677c15d410SQu Wenruo } 316831840ae1SZheng Yan } 316931840ae1SZheng Yan 317031840ae1SZheng Yan btrfs_cpu_key_to_disk(&disk_key, new_key); 317131840ae1SZheng Yan btrfs_set_item_key(eb, &disk_key, slot); 317231840ae1SZheng Yan btrfs_mark_buffer_dirty(eb); 317331840ae1SZheng Yan if (slot == 0) 3174b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 317531840ae1SZheng Yan } 317631840ae1SZheng Yan 317731840ae1SZheng Yan /* 3178d16c702fSQu Wenruo * Check key order of two sibling extent buffers. 3179d16c702fSQu Wenruo * 3180d16c702fSQu Wenruo * Return true if something is wrong. 3181d16c702fSQu Wenruo * Return false if everything is fine. 3182d16c702fSQu Wenruo * 3183d16c702fSQu Wenruo * Tree-checker only works inside one tree block, thus the following 3184d16c702fSQu Wenruo * corruption can not be detected by tree-checker: 3185d16c702fSQu Wenruo * 3186d16c702fSQu Wenruo * Leaf @left | Leaf @right 3187d16c702fSQu Wenruo * -------------------------------------------------------------- 3188d16c702fSQu Wenruo * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 | 3189d16c702fSQu Wenruo * 3190d16c702fSQu Wenruo * Key f6 in leaf @left itself is valid, but not valid when the next 3191d16c702fSQu Wenruo * key in leaf @right is 7. 3192d16c702fSQu Wenruo * This can only be checked at tree block merge time. 3193d16c702fSQu Wenruo * And since tree checker has ensured all key order in each tree block 3194d16c702fSQu Wenruo * is correct, we only need to bother the last key of @left and the first 3195d16c702fSQu Wenruo * key of @right. 3196d16c702fSQu Wenruo */ 3197d16c702fSQu Wenruo static bool check_sibling_keys(struct extent_buffer *left, 3198d16c702fSQu Wenruo struct extent_buffer *right) 3199d16c702fSQu Wenruo { 3200d16c702fSQu Wenruo struct btrfs_key left_last; 3201d16c702fSQu Wenruo struct btrfs_key right_first; 3202d16c702fSQu Wenruo int level = btrfs_header_level(left); 3203d16c702fSQu Wenruo int nr_left = btrfs_header_nritems(left); 3204d16c702fSQu Wenruo int nr_right = btrfs_header_nritems(right); 3205d16c702fSQu Wenruo 3206d16c702fSQu Wenruo /* No key to check in one of the tree blocks */ 3207d16c702fSQu Wenruo if (!nr_left || !nr_right) 3208d16c702fSQu Wenruo return false; 3209d16c702fSQu Wenruo 3210d16c702fSQu Wenruo if (level) { 3211d16c702fSQu Wenruo btrfs_node_key_to_cpu(left, &left_last, nr_left - 1); 3212d16c702fSQu Wenruo btrfs_node_key_to_cpu(right, &right_first, 0); 3213d16c702fSQu Wenruo } else { 3214d16c702fSQu Wenruo btrfs_item_key_to_cpu(left, &left_last, nr_left - 1); 3215d16c702fSQu Wenruo btrfs_item_key_to_cpu(right, &right_first, 0); 3216d16c702fSQu Wenruo } 3217d16c702fSQu Wenruo 3218d16c702fSQu Wenruo if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) { 3219d16c702fSQu Wenruo btrfs_crit(left->fs_info, 3220d16c702fSQu Wenruo "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", 3221d16c702fSQu Wenruo left_last.objectid, left_last.type, 3222d16c702fSQu Wenruo left_last.offset, right_first.objectid, 3223d16c702fSQu Wenruo right_first.type, right_first.offset); 3224d16c702fSQu Wenruo return true; 3225d16c702fSQu Wenruo } 3226d16c702fSQu Wenruo return false; 3227d16c702fSQu Wenruo } 3228d16c702fSQu Wenruo 3229d16c702fSQu Wenruo /* 323074123bd7SChris Mason * try to push data from one node into the next node left in the 323179f95c82SChris Mason * tree. 3232aa5d6bedSChris Mason * 3233aa5d6bedSChris Mason * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 3234aa5d6bedSChris Mason * error, and > 0 if there was no room in the left hand block. 323574123bd7SChris Mason */ 323698ed5174SChris Mason static int push_node_left(struct btrfs_trans_handle *trans, 32372ff7e61eSJeff Mahoney struct extent_buffer *dst, 3238971a1f66SChris Mason struct extent_buffer *src, int empty) 3239be0e5c09SChris Mason { 3240d30a668fSDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 3241be0e5c09SChris Mason int push_items = 0; 3242bb803951SChris Mason int src_nritems; 3243bb803951SChris Mason int dst_nritems; 3244aa5d6bedSChris Mason int ret = 0; 3245be0e5c09SChris Mason 32465f39d397SChris Mason src_nritems = btrfs_header_nritems(src); 32475f39d397SChris Mason dst_nritems = btrfs_header_nritems(dst); 32480b246afaSJeff Mahoney push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 32497bb86316SChris Mason WARN_ON(btrfs_header_generation(src) != trans->transid); 32507bb86316SChris Mason WARN_ON(btrfs_header_generation(dst) != trans->transid); 325154aa1f4dSChris Mason 3252bce4eae9SChris Mason if (!empty && src_nritems <= 8) 3253971a1f66SChris Mason return 1; 3254971a1f66SChris Mason 3255d397712bSChris Mason if (push_items <= 0) 3256be0e5c09SChris Mason return 1; 3257be0e5c09SChris Mason 3258bce4eae9SChris Mason if (empty) { 3259971a1f66SChris Mason push_items = min(src_nritems, push_items); 3260bce4eae9SChris Mason if (push_items < src_nritems) { 3261bce4eae9SChris Mason /* leave at least 8 pointers in the node if 3262bce4eae9SChris Mason * we aren't going to empty it 3263bce4eae9SChris Mason */ 3264bce4eae9SChris Mason if (src_nritems - push_items < 8) { 3265bce4eae9SChris Mason if (push_items <= 8) 3266bce4eae9SChris Mason return 1; 3267bce4eae9SChris Mason push_items -= 8; 3268bce4eae9SChris Mason } 3269bce4eae9SChris Mason } 3270bce4eae9SChris Mason } else 3271bce4eae9SChris Mason push_items = min(src_nritems - 8, push_items); 327279f95c82SChris Mason 3273d16c702fSQu Wenruo /* dst is the left eb, src is the middle eb */ 3274d16c702fSQu Wenruo if (check_sibling_keys(dst, src)) { 3275d16c702fSQu Wenruo ret = -EUCLEAN; 3276d16c702fSQu Wenruo btrfs_abort_transaction(trans, ret); 3277d16c702fSQu Wenruo return ret; 3278d16c702fSQu Wenruo } 3279ed874f0dSDavid Sterba ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); 32805de865eeSFilipe David Borba Manana if (ret) { 328166642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 32825de865eeSFilipe David Borba Manana return ret; 32835de865eeSFilipe David Borba Manana } 32845f39d397SChris Mason copy_extent_buffer(dst, src, 32855f39d397SChris Mason btrfs_node_key_ptr_offset(dst_nritems), 32865f39d397SChris Mason btrfs_node_key_ptr_offset(0), 3287123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 32885f39d397SChris Mason 3289bb803951SChris Mason if (push_items < src_nritems) { 329057911b8bSJan Schmidt /* 3291bf1d3425SDavid Sterba * Don't call tree_mod_log_insert_move here, key removal was 3292bf1d3425SDavid Sterba * already fully logged by tree_mod_log_eb_copy above. 329357911b8bSJan Schmidt */ 32945f39d397SChris Mason memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 32955f39d397SChris Mason btrfs_node_key_ptr_offset(push_items), 3296e2fa7227SChris Mason (src_nritems - push_items) * 3297123abc88SChris Mason sizeof(struct btrfs_key_ptr)); 3298bb803951SChris Mason } 32995f39d397SChris Mason btrfs_set_header_nritems(src, src_nritems - push_items); 33005f39d397SChris Mason btrfs_set_header_nritems(dst, dst_nritems + push_items); 33015f39d397SChris Mason btrfs_mark_buffer_dirty(src); 33025f39d397SChris Mason btrfs_mark_buffer_dirty(dst); 330331840ae1SZheng Yan 3304bb803951SChris Mason return ret; 3305be0e5c09SChris Mason } 3306be0e5c09SChris Mason 330797571fd0SChris Mason /* 330879f95c82SChris Mason * try to push data from one node into the next node right in the 330979f95c82SChris Mason * tree. 331079f95c82SChris Mason * 331179f95c82SChris Mason * returns 0 if some ptrs were pushed, < 0 if there was some horrible 331279f95c82SChris Mason * error, and > 0 if there was no room in the right hand block. 331379f95c82SChris Mason * 331479f95c82SChris Mason * this will only push up to 1/2 the contents of the left node over 331579f95c82SChris Mason */ 33165f39d397SChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, 33175f39d397SChris Mason struct extent_buffer *dst, 33185f39d397SChris Mason struct extent_buffer *src) 331979f95c82SChris Mason { 332055d32ed8SDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 332179f95c82SChris Mason int push_items = 0; 332279f95c82SChris Mason int max_push; 332379f95c82SChris Mason int src_nritems; 332479f95c82SChris Mason int dst_nritems; 332579f95c82SChris Mason int ret = 0; 332679f95c82SChris Mason 33277bb86316SChris Mason WARN_ON(btrfs_header_generation(src) != trans->transid); 33287bb86316SChris Mason WARN_ON(btrfs_header_generation(dst) != trans->transid); 33297bb86316SChris Mason 33305f39d397SChris Mason src_nritems = btrfs_header_nritems(src); 33315f39d397SChris Mason dst_nritems = btrfs_header_nritems(dst); 33320b246afaSJeff Mahoney push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 3333d397712bSChris Mason if (push_items <= 0) 333479f95c82SChris Mason return 1; 3335bce4eae9SChris Mason 3336d397712bSChris Mason if (src_nritems < 4) 3337bce4eae9SChris Mason return 1; 333879f95c82SChris Mason 333979f95c82SChris Mason max_push = src_nritems / 2 + 1; 334079f95c82SChris Mason /* don't try to empty the node */ 3341d397712bSChris Mason if (max_push >= src_nritems) 334279f95c82SChris Mason return 1; 3343252c38f0SYan 334479f95c82SChris Mason if (max_push < push_items) 334579f95c82SChris Mason push_items = max_push; 334679f95c82SChris Mason 3347d16c702fSQu Wenruo /* dst is the right eb, src is the middle eb */ 3348d16c702fSQu Wenruo if (check_sibling_keys(src, dst)) { 3349d16c702fSQu Wenruo ret = -EUCLEAN; 3350d16c702fSQu Wenruo btrfs_abort_transaction(trans, ret); 3351d16c702fSQu Wenruo return ret; 3352d16c702fSQu Wenruo } 3353bf1d3425SDavid Sterba ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems); 3354bf1d3425SDavid Sterba BUG_ON(ret < 0); 33555f39d397SChris Mason memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 33565f39d397SChris Mason btrfs_node_key_ptr_offset(0), 33575f39d397SChris Mason (dst_nritems) * 33585f39d397SChris Mason sizeof(struct btrfs_key_ptr)); 3359d6025579SChris Mason 3360ed874f0dSDavid Sterba ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, 3361ed874f0dSDavid Sterba push_items); 33625de865eeSFilipe David Borba Manana if (ret) { 336366642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 33645de865eeSFilipe David Borba Manana return ret; 33655de865eeSFilipe David Borba Manana } 33665f39d397SChris Mason copy_extent_buffer(dst, src, 33675f39d397SChris Mason btrfs_node_key_ptr_offset(0), 33685f39d397SChris Mason btrfs_node_key_ptr_offset(src_nritems - push_items), 3369123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 337079f95c82SChris Mason 33715f39d397SChris Mason btrfs_set_header_nritems(src, src_nritems - push_items); 33725f39d397SChris Mason btrfs_set_header_nritems(dst, dst_nritems + push_items); 337379f95c82SChris Mason 33745f39d397SChris Mason btrfs_mark_buffer_dirty(src); 33755f39d397SChris Mason btrfs_mark_buffer_dirty(dst); 337631840ae1SZheng Yan 337779f95c82SChris Mason return ret; 337879f95c82SChris Mason } 337979f95c82SChris Mason 338079f95c82SChris Mason /* 338197571fd0SChris Mason * helper function to insert a new root level in the tree. 338297571fd0SChris Mason * A new node is allocated, and a single item is inserted to 338397571fd0SChris Mason * point to the existing root 3384aa5d6bedSChris Mason * 3385aa5d6bedSChris Mason * returns zero on success or < 0 on failure. 338697571fd0SChris Mason */ 3387d397712bSChris Mason static noinline int insert_new_root(struct btrfs_trans_handle *trans, 33885f39d397SChris Mason struct btrfs_root *root, 3389fdd99c72SLiu Bo struct btrfs_path *path, int level) 339074123bd7SChris Mason { 33910b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 33927bb86316SChris Mason u64 lower_gen; 33935f39d397SChris Mason struct extent_buffer *lower; 33945f39d397SChris Mason struct extent_buffer *c; 3395925baeddSChris Mason struct extent_buffer *old; 33965f39d397SChris Mason struct btrfs_disk_key lower_key; 3397d9d19a01SDavid Sterba int ret; 33985c680ed6SChris Mason 33995c680ed6SChris Mason BUG_ON(path->nodes[level]); 34005c680ed6SChris Mason BUG_ON(path->nodes[level-1] != root->node); 34015c680ed6SChris Mason 34027bb86316SChris Mason lower = path->nodes[level-1]; 34037bb86316SChris Mason if (level == 1) 34047bb86316SChris Mason btrfs_item_key(lower, &lower_key, 0); 34057bb86316SChris Mason else 34067bb86316SChris Mason btrfs_node_key(lower, &lower_key, 0); 34077bb86316SChris Mason 3408a6279470SFilipe Manana c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, 34099631e4ccSJosef Bacik root->node->start, 0, 34109631e4ccSJosef Bacik BTRFS_NESTING_NORMAL); 34115f39d397SChris Mason if (IS_ERR(c)) 34125f39d397SChris Mason return PTR_ERR(c); 3413925baeddSChris Mason 34140b246afaSJeff Mahoney root_add_used(root, fs_info->nodesize); 3415f0486c68SYan, Zheng 34165f39d397SChris Mason btrfs_set_header_nritems(c, 1); 34175f39d397SChris Mason btrfs_set_node_key(c, &lower_key, 0); 3418db94535dSChris Mason btrfs_set_node_blockptr(c, 0, lower->start); 34197bb86316SChris Mason lower_gen = btrfs_header_generation(lower); 342031840ae1SZheng Yan WARN_ON(lower_gen != trans->transid); 34217bb86316SChris Mason 34227bb86316SChris Mason btrfs_set_node_ptr_generation(c, 0, lower_gen); 34235f39d397SChris Mason 34245f39d397SChris Mason btrfs_mark_buffer_dirty(c); 3425d5719762SChris Mason 3426925baeddSChris Mason old = root->node; 3427d9d19a01SDavid Sterba ret = tree_mod_log_insert_root(root->node, c, 0); 3428d9d19a01SDavid Sterba BUG_ON(ret < 0); 3429240f62c8SChris Mason rcu_assign_pointer(root->node, c); 3430925baeddSChris Mason 3431925baeddSChris Mason /* the super has an extra ref to root->node */ 3432925baeddSChris Mason free_extent_buffer(old); 3433925baeddSChris Mason 34340b86a832SChris Mason add_root_to_dirty_list(root); 343567439dadSDavid Sterba atomic_inc(&c->refs); 34365f39d397SChris Mason path->nodes[level] = c; 343795449a16Schandan path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 343874123bd7SChris Mason path->slots[level] = 0; 343974123bd7SChris Mason return 0; 344074123bd7SChris Mason } 34415c680ed6SChris Mason 34425c680ed6SChris Mason /* 34435c680ed6SChris Mason * worker function to insert a single pointer in a node. 34445c680ed6SChris Mason * the node should have enough room for the pointer already 344597571fd0SChris Mason * 34465c680ed6SChris Mason * slot and level indicate where you want the key to go, and 34475c680ed6SChris Mason * blocknr is the block the key points to. 34485c680ed6SChris Mason */ 3449143bede5SJeff Mahoney static void insert_ptr(struct btrfs_trans_handle *trans, 34506ad3cf6dSDavid Sterba struct btrfs_path *path, 3451143bede5SJeff Mahoney struct btrfs_disk_key *key, u64 bytenr, 3452c3e06965SJan Schmidt int slot, int level) 34535c680ed6SChris Mason { 34545f39d397SChris Mason struct extent_buffer *lower; 34555c680ed6SChris Mason int nritems; 3456f3ea38daSJan Schmidt int ret; 34575c680ed6SChris Mason 34585c680ed6SChris Mason BUG_ON(!path->nodes[level]); 3459f0486c68SYan, Zheng btrfs_assert_tree_locked(path->nodes[level]); 34605f39d397SChris Mason lower = path->nodes[level]; 34615f39d397SChris Mason nritems = btrfs_header_nritems(lower); 3462c293498bSStoyan Gaydarov BUG_ON(slot > nritems); 34636ad3cf6dSDavid Sterba BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); 346474123bd7SChris Mason if (slot != nritems) { 3465bf1d3425SDavid Sterba if (level) { 3466bf1d3425SDavid Sterba ret = tree_mod_log_insert_move(lower, slot + 1, slot, 3467a446a979SDavid Sterba nritems - slot); 3468bf1d3425SDavid Sterba BUG_ON(ret < 0); 3469bf1d3425SDavid Sterba } 34705f39d397SChris Mason memmove_extent_buffer(lower, 34715f39d397SChris Mason btrfs_node_key_ptr_offset(slot + 1), 34725f39d397SChris Mason btrfs_node_key_ptr_offset(slot), 3473123abc88SChris Mason (nritems - slot) * sizeof(struct btrfs_key_ptr)); 347474123bd7SChris Mason } 3475c3e06965SJan Schmidt if (level) { 3476e09c2efeSDavid Sterba ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD, 3477e09c2efeSDavid Sterba GFP_NOFS); 3478f3ea38daSJan Schmidt BUG_ON(ret < 0); 3479f3ea38daSJan Schmidt } 34805f39d397SChris Mason btrfs_set_node_key(lower, key, slot); 3481db94535dSChris Mason btrfs_set_node_blockptr(lower, slot, bytenr); 348274493f7aSChris Mason WARN_ON(trans->transid == 0); 348374493f7aSChris Mason btrfs_set_node_ptr_generation(lower, slot, trans->transid); 34845f39d397SChris Mason btrfs_set_header_nritems(lower, nritems + 1); 34855f39d397SChris Mason btrfs_mark_buffer_dirty(lower); 348674123bd7SChris Mason } 348774123bd7SChris Mason 348897571fd0SChris Mason /* 348997571fd0SChris Mason * split the node at the specified level in path in two. 349097571fd0SChris Mason * The path is corrected to point to the appropriate node after the split 349197571fd0SChris Mason * 349297571fd0SChris Mason * Before splitting this tries to make some room in the node by pushing 349397571fd0SChris Mason * left and right, if either one works, it returns right away. 3494aa5d6bedSChris Mason * 3495aa5d6bedSChris Mason * returns 0 on success and < 0 on failure 349697571fd0SChris Mason */ 3497e02119d5SChris Mason static noinline int split_node(struct btrfs_trans_handle *trans, 3498e02119d5SChris Mason struct btrfs_root *root, 3499e02119d5SChris Mason struct btrfs_path *path, int level) 3500be0e5c09SChris Mason { 35010b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 35025f39d397SChris Mason struct extent_buffer *c; 35035f39d397SChris Mason struct extent_buffer *split; 35045f39d397SChris Mason struct btrfs_disk_key disk_key; 3505be0e5c09SChris Mason int mid; 35065c680ed6SChris Mason int ret; 35077518a238SChris Mason u32 c_nritems; 3508be0e5c09SChris Mason 35095f39d397SChris Mason c = path->nodes[level]; 35107bb86316SChris Mason WARN_ON(btrfs_header_generation(c) != trans->transid); 35115f39d397SChris Mason if (c == root->node) { 3512d9abbf1cSJan Schmidt /* 351390f8d62eSJan Schmidt * trying to split the root, lets make a new one 351490f8d62eSJan Schmidt * 3515fdd99c72SLiu Bo * tree mod log: We don't log_removal old root in 351690f8d62eSJan Schmidt * insert_new_root, because that root buffer will be kept as a 351790f8d62eSJan Schmidt * normal node. We are going to log removal of half of the 351890f8d62eSJan Schmidt * elements below with tree_mod_log_eb_copy. We're holding a 351990f8d62eSJan Schmidt * tree lock on the buffer, which is why we cannot race with 352090f8d62eSJan Schmidt * other tree_mod_log users. 3521d9abbf1cSJan Schmidt */ 3522fdd99c72SLiu Bo ret = insert_new_root(trans, root, path, level + 1); 35235c680ed6SChris Mason if (ret) 35245c680ed6SChris Mason return ret; 3525b3612421SChris Mason } else { 3526e66f709bSChris Mason ret = push_nodes_for_insert(trans, root, path, level); 35275f39d397SChris Mason c = path->nodes[level]; 35285f39d397SChris Mason if (!ret && btrfs_header_nritems(c) < 35290b246afaSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) 3530e66f709bSChris Mason return 0; 353154aa1f4dSChris Mason if (ret < 0) 353254aa1f4dSChris Mason return ret; 35335c680ed6SChris Mason } 3534e66f709bSChris Mason 35355f39d397SChris Mason c_nritems = btrfs_header_nritems(c); 35365d4f98a2SYan Zheng mid = (c_nritems + 1) / 2; 35375d4f98a2SYan Zheng btrfs_node_key(c, &disk_key, mid); 35387bb86316SChris Mason 3539a6279470SFilipe Manana split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, 35409631e4ccSJosef Bacik c->start, 0, BTRFS_NESTING_NORMAL); 35415f39d397SChris Mason if (IS_ERR(split)) 35425f39d397SChris Mason return PTR_ERR(split); 354354aa1f4dSChris Mason 35440b246afaSJeff Mahoney root_add_used(root, fs_info->nodesize); 3545bc877d28SNikolay Borisov ASSERT(btrfs_header_level(c) == level); 35465f39d397SChris Mason 3547ed874f0dSDavid Sterba ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); 35485de865eeSFilipe David Borba Manana if (ret) { 354966642832SJeff Mahoney btrfs_abort_transaction(trans, ret); 35505de865eeSFilipe David Borba Manana return ret; 35515de865eeSFilipe David Borba Manana } 35525f39d397SChris Mason copy_extent_buffer(split, c, 35535f39d397SChris Mason btrfs_node_key_ptr_offset(0), 35545f39d397SChris Mason btrfs_node_key_ptr_offset(mid), 3555123abc88SChris Mason (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 35565f39d397SChris Mason btrfs_set_header_nritems(split, c_nritems - mid); 35575f39d397SChris Mason btrfs_set_header_nritems(c, mid); 3558aa5d6bedSChris Mason ret = 0; 3559aa5d6bedSChris Mason 35605f39d397SChris Mason btrfs_mark_buffer_dirty(c); 35615f39d397SChris Mason btrfs_mark_buffer_dirty(split); 35625f39d397SChris Mason 35636ad3cf6dSDavid Sterba insert_ptr(trans, path, &disk_key, split->start, 3564c3e06965SJan Schmidt path->slots[level + 1] + 1, level + 1); 3565aa5d6bedSChris Mason 35665de08d7dSChris Mason if (path->slots[level] >= mid) { 35675c680ed6SChris Mason path->slots[level] -= mid; 3568925baeddSChris Mason btrfs_tree_unlock(c); 35695f39d397SChris Mason free_extent_buffer(c); 35705f39d397SChris Mason path->nodes[level] = split; 35715c680ed6SChris Mason path->slots[level + 1] += 1; 3572eb60ceacSChris Mason } else { 3573925baeddSChris Mason btrfs_tree_unlock(split); 35745f39d397SChris Mason free_extent_buffer(split); 3575be0e5c09SChris Mason } 3576aa5d6bedSChris Mason return ret; 3577be0e5c09SChris Mason } 3578be0e5c09SChris Mason 357974123bd7SChris Mason /* 358074123bd7SChris Mason * how many bytes are required to store the items in a leaf. start 358174123bd7SChris Mason * and nr indicate which items in the leaf to check. This totals up the 358274123bd7SChris Mason * space used both by the item structs and the item data 358374123bd7SChris Mason */ 35845f39d397SChris Mason static int leaf_space_used(struct extent_buffer *l, int start, int nr) 3585be0e5c09SChris Mason { 358641be1f3bSJosef Bacik struct btrfs_item *start_item; 358741be1f3bSJosef Bacik struct btrfs_item *end_item; 3588be0e5c09SChris Mason int data_len; 35895f39d397SChris Mason int nritems = btrfs_header_nritems(l); 3590d4dbff95SChris Mason int end = min(nritems, start + nr) - 1; 3591be0e5c09SChris Mason 3592be0e5c09SChris Mason if (!nr) 3593be0e5c09SChris Mason return 0; 3594dd3cc16bSRoss Kirk start_item = btrfs_item_nr(start); 3595dd3cc16bSRoss Kirk end_item = btrfs_item_nr(end); 3596a31356b9SDavid Sterba data_len = btrfs_item_offset(l, start_item) + 3597a31356b9SDavid Sterba btrfs_item_size(l, start_item); 3598a31356b9SDavid Sterba data_len = data_len - btrfs_item_offset(l, end_item); 35990783fcfcSChris Mason data_len += sizeof(struct btrfs_item) * nr; 3600d4dbff95SChris Mason WARN_ON(data_len < 0); 3601be0e5c09SChris Mason return data_len; 3602be0e5c09SChris Mason } 3603be0e5c09SChris Mason 360474123bd7SChris Mason /* 3605d4dbff95SChris Mason * The space between the end of the leaf items and 3606d4dbff95SChris Mason * the start of the leaf data. IOW, how much room 3607d4dbff95SChris Mason * the leaf has left for both items and data 3608d4dbff95SChris Mason */ 3609e902baacSDavid Sterba noinline int btrfs_leaf_free_space(struct extent_buffer *leaf) 3610d4dbff95SChris Mason { 3611e902baacSDavid Sterba struct btrfs_fs_info *fs_info = leaf->fs_info; 36125f39d397SChris Mason int nritems = btrfs_header_nritems(leaf); 36135f39d397SChris Mason int ret; 36140b246afaSJeff Mahoney 36150b246afaSJeff Mahoney ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems); 36165f39d397SChris Mason if (ret < 0) { 36170b246afaSJeff Mahoney btrfs_crit(fs_info, 3618efe120a0SFrank Holton "leaf free space ret %d, leaf data size %lu, used %d nritems %d", 3619da17066cSJeff Mahoney ret, 36200b246afaSJeff Mahoney (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info), 36215f39d397SChris Mason leaf_space_used(leaf, 0, nritems), nritems); 36225f39d397SChris Mason } 36235f39d397SChris Mason return ret; 3624d4dbff95SChris Mason } 3625d4dbff95SChris Mason 362699d8f83cSChris Mason /* 362799d8f83cSChris Mason * min slot controls the lowest index we're willing to push to the 362899d8f83cSChris Mason * right. We'll push up to and including min_slot, but no lower 362999d8f83cSChris Mason */ 3630f72f0010SDavid Sterba static noinline int __push_leaf_right(struct btrfs_path *path, 363144871b1bSChris Mason int data_size, int empty, 363244871b1bSChris Mason struct extent_buffer *right, 363399d8f83cSChris Mason int free_space, u32 left_nritems, 363499d8f83cSChris Mason u32 min_slot) 363500ec4c51SChris Mason { 3636f72f0010SDavid Sterba struct btrfs_fs_info *fs_info = right->fs_info; 36375f39d397SChris Mason struct extent_buffer *left = path->nodes[0]; 363844871b1bSChris Mason struct extent_buffer *upper = path->nodes[1]; 3639cfed81a0SChris Mason struct btrfs_map_token token; 36405f39d397SChris Mason struct btrfs_disk_key disk_key; 364100ec4c51SChris Mason int slot; 364234a38218SChris Mason u32 i; 364300ec4c51SChris Mason int push_space = 0; 364400ec4c51SChris Mason int push_items = 0; 36450783fcfcSChris Mason struct btrfs_item *item; 364634a38218SChris Mason u32 nr; 36477518a238SChris Mason u32 right_nritems; 36485f39d397SChris Mason u32 data_end; 3649db94535dSChris Mason u32 this_item_size; 365000ec4c51SChris Mason 365134a38218SChris Mason if (empty) 365234a38218SChris Mason nr = 0; 365334a38218SChris Mason else 365499d8f83cSChris Mason nr = max_t(u32, 1, min_slot); 365534a38218SChris Mason 365631840ae1SZheng Yan if (path->slots[0] >= left_nritems) 365787b29b20SYan Zheng push_space += data_size; 365831840ae1SZheng Yan 365944871b1bSChris Mason slot = path->slots[1]; 366034a38218SChris Mason i = left_nritems - 1; 366134a38218SChris Mason while (i >= nr) { 3662dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 3663db94535dSChris Mason 366431840ae1SZheng Yan if (!empty && push_items > 0) { 366531840ae1SZheng Yan if (path->slots[0] > i) 366631840ae1SZheng Yan break; 366731840ae1SZheng Yan if (path->slots[0] == i) { 3668e902baacSDavid Sterba int space = btrfs_leaf_free_space(left); 3669e902baacSDavid Sterba 367031840ae1SZheng Yan if (space + push_space * 2 > free_space) 367131840ae1SZheng Yan break; 367231840ae1SZheng Yan } 367331840ae1SZheng Yan } 367431840ae1SZheng Yan 367500ec4c51SChris Mason if (path->slots[0] == i) 367687b29b20SYan Zheng push_space += data_size; 3677db94535dSChris Mason 3678db94535dSChris Mason this_item_size = btrfs_item_size(left, item); 3679db94535dSChris Mason if (this_item_size + sizeof(*item) + push_space > free_space) 368000ec4c51SChris Mason break; 368131840ae1SZheng Yan 368200ec4c51SChris Mason push_items++; 3683db94535dSChris Mason push_space += this_item_size + sizeof(*item); 368434a38218SChris Mason if (i == 0) 368534a38218SChris Mason break; 368634a38218SChris Mason i--; 3687db94535dSChris Mason } 36885f39d397SChris Mason 3689925baeddSChris Mason if (push_items == 0) 3690925baeddSChris Mason goto out_unlock; 36915f39d397SChris Mason 36926c1500f2SJulia Lawall WARN_ON(!empty && push_items == left_nritems); 36935f39d397SChris Mason 369400ec4c51SChris Mason /* push left to right */ 36955f39d397SChris Mason right_nritems = btrfs_header_nritems(right); 369634a38218SChris Mason 36975f39d397SChris Mason push_space = btrfs_item_end_nr(left, left_nritems - push_items); 36988f881e8cSDavid Sterba push_space -= leaf_data_end(left); 36995f39d397SChris Mason 370000ec4c51SChris Mason /* make room in the right data area */ 37018f881e8cSDavid Sterba data_end = leaf_data_end(right); 37025f39d397SChris Mason memmove_extent_buffer(right, 37033d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + data_end - push_space, 37043d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + data_end, 37050b246afaSJeff Mahoney BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); 37065f39d397SChris Mason 370700ec4c51SChris Mason /* copy from the left data area */ 37083d9ec8c4SNikolay Borisov copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + 37090b246afaSJeff Mahoney BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 37108f881e8cSDavid Sterba BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left), 3711d6025579SChris Mason push_space); 37125f39d397SChris Mason 37135f39d397SChris Mason memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 37145f39d397SChris Mason btrfs_item_nr_offset(0), 37150783fcfcSChris Mason right_nritems * sizeof(struct btrfs_item)); 37165f39d397SChris Mason 371700ec4c51SChris Mason /* copy the items from left to right */ 37185f39d397SChris Mason copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 37195f39d397SChris Mason btrfs_item_nr_offset(left_nritems - push_items), 37200783fcfcSChris Mason push_items * sizeof(struct btrfs_item)); 372100ec4c51SChris Mason 372200ec4c51SChris Mason /* update the item pointers */ 3723c82f823cSDavid Sterba btrfs_init_map_token(&token, right); 37247518a238SChris Mason right_nritems += push_items; 37255f39d397SChris Mason btrfs_set_header_nritems(right, right_nritems); 37260b246afaSJeff Mahoney push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 37277518a238SChris Mason for (i = 0; i < right_nritems; i++) { 3728dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 3729cc4c13d5SDavid Sterba push_space -= btrfs_token_item_size(&token, item); 3730cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, push_space); 3731db94535dSChris Mason } 3732db94535dSChris Mason 37337518a238SChris Mason left_nritems -= push_items; 37345f39d397SChris Mason btrfs_set_header_nritems(left, left_nritems); 373500ec4c51SChris Mason 373634a38218SChris Mason if (left_nritems) 37375f39d397SChris Mason btrfs_mark_buffer_dirty(left); 3738f0486c68SYan, Zheng else 37396a884d7dSDavid Sterba btrfs_clean_tree_block(left); 3740f0486c68SYan, Zheng 37415f39d397SChris Mason btrfs_mark_buffer_dirty(right); 3742a429e513SChris Mason 37435f39d397SChris Mason btrfs_item_key(right, &disk_key, 0); 37445f39d397SChris Mason btrfs_set_node_key(upper, &disk_key, slot + 1); 3745d6025579SChris Mason btrfs_mark_buffer_dirty(upper); 374602217ed2SChris Mason 374700ec4c51SChris Mason /* then fixup the leaf pointer in the path */ 37487518a238SChris Mason if (path->slots[0] >= left_nritems) { 37497518a238SChris Mason path->slots[0] -= left_nritems; 3750925baeddSChris Mason if (btrfs_header_nritems(path->nodes[0]) == 0) 37516a884d7dSDavid Sterba btrfs_clean_tree_block(path->nodes[0]); 3752925baeddSChris Mason btrfs_tree_unlock(path->nodes[0]); 37535f39d397SChris Mason free_extent_buffer(path->nodes[0]); 37545f39d397SChris Mason path->nodes[0] = right; 375500ec4c51SChris Mason path->slots[1] += 1; 375600ec4c51SChris Mason } else { 3757925baeddSChris Mason btrfs_tree_unlock(right); 37585f39d397SChris Mason free_extent_buffer(right); 375900ec4c51SChris Mason } 376000ec4c51SChris Mason return 0; 3761925baeddSChris Mason 3762925baeddSChris Mason out_unlock: 3763925baeddSChris Mason btrfs_tree_unlock(right); 3764925baeddSChris Mason free_extent_buffer(right); 3765925baeddSChris Mason return 1; 376600ec4c51SChris Mason } 3767925baeddSChris Mason 376800ec4c51SChris Mason /* 376944871b1bSChris Mason * push some data in the path leaf to the right, trying to free up at 377074123bd7SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 377144871b1bSChris Mason * 377244871b1bSChris Mason * returns 1 if the push failed because the other node didn't have enough 377344871b1bSChris Mason * room, 0 if everything worked out and < 0 if there were major errors. 377499d8f83cSChris Mason * 377599d8f83cSChris Mason * this will push starting from min_slot to the end of the leaf. It won't 377699d8f83cSChris Mason * push any slot lower than min_slot 377774123bd7SChris Mason */ 377844871b1bSChris Mason static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 377999d8f83cSChris Mason *root, struct btrfs_path *path, 378099d8f83cSChris Mason int min_data_size, int data_size, 378199d8f83cSChris Mason int empty, u32 min_slot) 3782be0e5c09SChris Mason { 378344871b1bSChris Mason struct extent_buffer *left = path->nodes[0]; 378444871b1bSChris Mason struct extent_buffer *right; 378544871b1bSChris Mason struct extent_buffer *upper; 378644871b1bSChris Mason int slot; 378744871b1bSChris Mason int free_space; 378844871b1bSChris Mason u32 left_nritems; 378944871b1bSChris Mason int ret; 379044871b1bSChris Mason 379144871b1bSChris Mason if (!path->nodes[1]) 379244871b1bSChris Mason return 1; 379344871b1bSChris Mason 379444871b1bSChris Mason slot = path->slots[1]; 379544871b1bSChris Mason upper = path->nodes[1]; 379644871b1bSChris Mason if (slot >= btrfs_header_nritems(upper) - 1) 379744871b1bSChris Mason return 1; 379844871b1bSChris Mason 379944871b1bSChris Mason btrfs_assert_tree_locked(path->nodes[1]); 380044871b1bSChris Mason 38014b231ae4SDavid Sterba right = btrfs_read_node_slot(upper, slot + 1); 3802fb770ae4SLiu Bo /* 3803fb770ae4SLiu Bo * slot + 1 is not valid or we fail to read the right node, 3804fb770ae4SLiu Bo * no big deal, just return. 3805fb770ae4SLiu Bo */ 3806fb770ae4SLiu Bo if (IS_ERR(right)) 380791ca338dSTsutomu Itoh return 1; 380891ca338dSTsutomu Itoh 3809*bf77467aSJosef Bacik __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 38108bead258SDavid Sterba btrfs_set_lock_blocking_write(right); 381144871b1bSChris Mason 3812e902baacSDavid Sterba free_space = btrfs_leaf_free_space(right); 381344871b1bSChris Mason if (free_space < data_size) 381444871b1bSChris Mason goto out_unlock; 381544871b1bSChris Mason 381644871b1bSChris Mason /* cow and double check */ 381744871b1bSChris Mason ret = btrfs_cow_block(trans, root, right, upper, 38189631e4ccSJosef Bacik slot + 1, &right, BTRFS_NESTING_COW); 381944871b1bSChris Mason if (ret) 382044871b1bSChris Mason goto out_unlock; 382144871b1bSChris Mason 3822e902baacSDavid Sterba free_space = btrfs_leaf_free_space(right); 382344871b1bSChris Mason if (free_space < data_size) 382444871b1bSChris Mason goto out_unlock; 382544871b1bSChris Mason 382644871b1bSChris Mason left_nritems = btrfs_header_nritems(left); 382744871b1bSChris Mason if (left_nritems == 0) 382844871b1bSChris Mason goto out_unlock; 382944871b1bSChris Mason 3830d16c702fSQu Wenruo if (check_sibling_keys(left, right)) { 3831d16c702fSQu Wenruo ret = -EUCLEAN; 3832d16c702fSQu Wenruo btrfs_tree_unlock(right); 3833d16c702fSQu Wenruo free_extent_buffer(right); 3834d16c702fSQu Wenruo return ret; 3835d16c702fSQu Wenruo } 38362ef1fed2SFilipe David Borba Manana if (path->slots[0] == left_nritems && !empty) { 38372ef1fed2SFilipe David Borba Manana /* Key greater than all keys in the leaf, right neighbor has 38382ef1fed2SFilipe David Borba Manana * enough room for it and we're not emptying our leaf to delete 38392ef1fed2SFilipe David Borba Manana * it, therefore use right neighbor to insert the new item and 384052042d8eSAndrea Gelmini * no need to touch/dirty our left leaf. */ 38412ef1fed2SFilipe David Borba Manana btrfs_tree_unlock(left); 38422ef1fed2SFilipe David Borba Manana free_extent_buffer(left); 38432ef1fed2SFilipe David Borba Manana path->nodes[0] = right; 38442ef1fed2SFilipe David Borba Manana path->slots[0] = 0; 38452ef1fed2SFilipe David Borba Manana path->slots[1]++; 38462ef1fed2SFilipe David Borba Manana return 0; 38472ef1fed2SFilipe David Borba Manana } 38482ef1fed2SFilipe David Borba Manana 3849f72f0010SDavid Sterba return __push_leaf_right(path, min_data_size, empty, 385099d8f83cSChris Mason right, free_space, left_nritems, min_slot); 385144871b1bSChris Mason out_unlock: 385244871b1bSChris Mason btrfs_tree_unlock(right); 385344871b1bSChris Mason free_extent_buffer(right); 385444871b1bSChris Mason return 1; 385544871b1bSChris Mason } 385644871b1bSChris Mason 385744871b1bSChris Mason /* 385844871b1bSChris Mason * push some data in the path leaf to the left, trying to free up at 385944871b1bSChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 386099d8f83cSChris Mason * 386199d8f83cSChris Mason * max_slot can put a limit on how far into the leaf we'll push items. The 386299d8f83cSChris Mason * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 386399d8f83cSChris Mason * items 386444871b1bSChris Mason */ 38658087c193SDavid Sterba static noinline int __push_leaf_left(struct btrfs_path *path, int data_size, 386644871b1bSChris Mason int empty, struct extent_buffer *left, 386799d8f83cSChris Mason int free_space, u32 right_nritems, 386899d8f83cSChris Mason u32 max_slot) 386944871b1bSChris Mason { 38708087c193SDavid Sterba struct btrfs_fs_info *fs_info = left->fs_info; 38715f39d397SChris Mason struct btrfs_disk_key disk_key; 38725f39d397SChris Mason struct extent_buffer *right = path->nodes[0]; 3873be0e5c09SChris Mason int i; 3874be0e5c09SChris Mason int push_space = 0; 3875be0e5c09SChris Mason int push_items = 0; 38760783fcfcSChris Mason struct btrfs_item *item; 38777518a238SChris Mason u32 old_left_nritems; 387834a38218SChris Mason u32 nr; 3879aa5d6bedSChris Mason int ret = 0; 3880db94535dSChris Mason u32 this_item_size; 3881db94535dSChris Mason u32 old_left_item_size; 3882cfed81a0SChris Mason struct btrfs_map_token token; 3883cfed81a0SChris Mason 388434a38218SChris Mason if (empty) 388599d8f83cSChris Mason nr = min(right_nritems, max_slot); 388634a38218SChris Mason else 388799d8f83cSChris Mason nr = min(right_nritems - 1, max_slot); 388834a38218SChris Mason 388934a38218SChris Mason for (i = 0; i < nr; i++) { 3890dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 3891db94535dSChris Mason 389231840ae1SZheng Yan if (!empty && push_items > 0) { 389331840ae1SZheng Yan if (path->slots[0] < i) 389431840ae1SZheng Yan break; 389531840ae1SZheng Yan if (path->slots[0] == i) { 3896e902baacSDavid Sterba int space = btrfs_leaf_free_space(right); 3897e902baacSDavid Sterba 389831840ae1SZheng Yan if (space + push_space * 2 > free_space) 389931840ae1SZheng Yan break; 390031840ae1SZheng Yan } 390131840ae1SZheng Yan } 390231840ae1SZheng Yan 3903be0e5c09SChris Mason if (path->slots[0] == i) 390487b29b20SYan Zheng push_space += data_size; 3905db94535dSChris Mason 3906db94535dSChris Mason this_item_size = btrfs_item_size(right, item); 3907db94535dSChris Mason if (this_item_size + sizeof(*item) + push_space > free_space) 3908be0e5c09SChris Mason break; 3909db94535dSChris Mason 3910be0e5c09SChris Mason push_items++; 3911db94535dSChris Mason push_space += this_item_size + sizeof(*item); 3912be0e5c09SChris Mason } 3913db94535dSChris Mason 3914be0e5c09SChris Mason if (push_items == 0) { 3915925baeddSChris Mason ret = 1; 3916925baeddSChris Mason goto out; 3917be0e5c09SChris Mason } 3918fae7f21cSDulshani Gunawardhana WARN_ON(!empty && push_items == btrfs_header_nritems(right)); 39195f39d397SChris Mason 3920be0e5c09SChris Mason /* push data from right to left */ 39215f39d397SChris Mason copy_extent_buffer(left, right, 39225f39d397SChris Mason btrfs_item_nr_offset(btrfs_header_nritems(left)), 39235f39d397SChris Mason btrfs_item_nr_offset(0), 39245f39d397SChris Mason push_items * sizeof(struct btrfs_item)); 39255f39d397SChris Mason 39260b246afaSJeff Mahoney push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - 39275f39d397SChris Mason btrfs_item_offset_nr(right, push_items - 1); 39285f39d397SChris Mason 39293d9ec8c4SNikolay Borisov copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + 39308f881e8cSDavid Sterba leaf_data_end(left) - push_space, 39313d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + 39325f39d397SChris Mason btrfs_item_offset_nr(right, push_items - 1), 3933be0e5c09SChris Mason push_space); 39345f39d397SChris Mason old_left_nritems = btrfs_header_nritems(left); 393587b29b20SYan Zheng BUG_ON(old_left_nritems <= 0); 3936eb60ceacSChris Mason 3937c82f823cSDavid Sterba btrfs_init_map_token(&token, left); 3938db94535dSChris Mason old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 3939be0e5c09SChris Mason for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 39405f39d397SChris Mason u32 ioff; 3941db94535dSChris Mason 3942dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 3943db94535dSChris Mason 3944cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 3945cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, 3946cc4c13d5SDavid Sterba ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size)); 3947be0e5c09SChris Mason } 39485f39d397SChris Mason btrfs_set_header_nritems(left, old_left_nritems + push_items); 3949be0e5c09SChris Mason 3950be0e5c09SChris Mason /* fixup right node */ 395131b1a2bdSJulia Lawall if (push_items > right_nritems) 395231b1a2bdSJulia Lawall WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, 3953d397712bSChris Mason right_nritems); 395434a38218SChris Mason 395534a38218SChris Mason if (push_items < right_nritems) { 39565f39d397SChris Mason push_space = btrfs_item_offset_nr(right, push_items - 1) - 39578f881e8cSDavid Sterba leaf_data_end(right); 39583d9ec8c4SNikolay Borisov memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + 39590b246afaSJeff Mahoney BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 39603d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + 39618f881e8cSDavid Sterba leaf_data_end(right), push_space); 39625f39d397SChris Mason 39635f39d397SChris Mason memmove_extent_buffer(right, btrfs_item_nr_offset(0), 39645f39d397SChris Mason btrfs_item_nr_offset(push_items), 39655f39d397SChris Mason (btrfs_header_nritems(right) - push_items) * 39660783fcfcSChris Mason sizeof(struct btrfs_item)); 396734a38218SChris Mason } 3968c82f823cSDavid Sterba 3969c82f823cSDavid Sterba btrfs_init_map_token(&token, right); 3970eef1c494SYan right_nritems -= push_items; 3971eef1c494SYan btrfs_set_header_nritems(right, right_nritems); 39720b246afaSJeff Mahoney push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 39735f39d397SChris Mason for (i = 0; i < right_nritems; i++) { 3974dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 3975db94535dSChris Mason 3976cc4c13d5SDavid Sterba push_space = push_space - btrfs_token_item_size(&token, item); 3977cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, push_space); 3978db94535dSChris Mason } 3979eb60ceacSChris Mason 39805f39d397SChris Mason btrfs_mark_buffer_dirty(left); 398134a38218SChris Mason if (right_nritems) 39825f39d397SChris Mason btrfs_mark_buffer_dirty(right); 3983f0486c68SYan, Zheng else 39846a884d7dSDavid Sterba btrfs_clean_tree_block(right); 3985098f59c2SChris Mason 39865f39d397SChris Mason btrfs_item_key(right, &disk_key, 0); 3987b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 3988be0e5c09SChris Mason 3989be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 3990be0e5c09SChris Mason if (path->slots[0] < push_items) { 3991be0e5c09SChris Mason path->slots[0] += old_left_nritems; 3992925baeddSChris Mason btrfs_tree_unlock(path->nodes[0]); 39935f39d397SChris Mason free_extent_buffer(path->nodes[0]); 39945f39d397SChris Mason path->nodes[0] = left; 3995be0e5c09SChris Mason path->slots[1] -= 1; 3996be0e5c09SChris Mason } else { 3997925baeddSChris Mason btrfs_tree_unlock(left); 39985f39d397SChris Mason free_extent_buffer(left); 3999be0e5c09SChris Mason path->slots[0] -= push_items; 4000be0e5c09SChris Mason } 4001eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 4002aa5d6bedSChris Mason return ret; 4003925baeddSChris Mason out: 4004925baeddSChris Mason btrfs_tree_unlock(left); 4005925baeddSChris Mason free_extent_buffer(left); 4006925baeddSChris Mason return ret; 4007be0e5c09SChris Mason } 4008be0e5c09SChris Mason 400974123bd7SChris Mason /* 401044871b1bSChris Mason * push some data in the path leaf to the left, trying to free up at 401144871b1bSChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 401299d8f83cSChris Mason * 401399d8f83cSChris Mason * max_slot can put a limit on how far into the leaf we'll push items. The 401499d8f83cSChris Mason * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 401599d8f83cSChris Mason * items 401644871b1bSChris Mason */ 401744871b1bSChris Mason static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 401899d8f83cSChris Mason *root, struct btrfs_path *path, int min_data_size, 401999d8f83cSChris Mason int data_size, int empty, u32 max_slot) 402044871b1bSChris Mason { 402144871b1bSChris Mason struct extent_buffer *right = path->nodes[0]; 402244871b1bSChris Mason struct extent_buffer *left; 402344871b1bSChris Mason int slot; 402444871b1bSChris Mason int free_space; 402544871b1bSChris Mason u32 right_nritems; 402644871b1bSChris Mason int ret = 0; 402744871b1bSChris Mason 402844871b1bSChris Mason slot = path->slots[1]; 402944871b1bSChris Mason if (slot == 0) 403044871b1bSChris Mason return 1; 403144871b1bSChris Mason if (!path->nodes[1]) 403244871b1bSChris Mason return 1; 403344871b1bSChris Mason 403444871b1bSChris Mason right_nritems = btrfs_header_nritems(right); 403544871b1bSChris Mason if (right_nritems == 0) 403644871b1bSChris Mason return 1; 403744871b1bSChris Mason 403844871b1bSChris Mason btrfs_assert_tree_locked(path->nodes[1]); 403944871b1bSChris Mason 40404b231ae4SDavid Sterba left = btrfs_read_node_slot(path->nodes[1], slot - 1); 4041fb770ae4SLiu Bo /* 4042fb770ae4SLiu Bo * slot - 1 is not valid or we fail to read the left node, 4043fb770ae4SLiu Bo * no big deal, just return. 4044fb770ae4SLiu Bo */ 4045fb770ae4SLiu Bo if (IS_ERR(left)) 404691ca338dSTsutomu Itoh return 1; 404791ca338dSTsutomu Itoh 4048*bf77467aSJosef Bacik __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 40498bead258SDavid Sterba btrfs_set_lock_blocking_write(left); 405044871b1bSChris Mason 4051e902baacSDavid Sterba free_space = btrfs_leaf_free_space(left); 405244871b1bSChris Mason if (free_space < data_size) { 405344871b1bSChris Mason ret = 1; 405444871b1bSChris Mason goto out; 405544871b1bSChris Mason } 405644871b1bSChris Mason 405744871b1bSChris Mason /* cow and double check */ 405844871b1bSChris Mason ret = btrfs_cow_block(trans, root, left, 40599631e4ccSJosef Bacik path->nodes[1], slot - 1, &left, 40609631e4ccSJosef Bacik BTRFS_NESTING_COW); 406144871b1bSChris Mason if (ret) { 406244871b1bSChris Mason /* we hit -ENOSPC, but it isn't fatal here */ 406379787eaaSJeff Mahoney if (ret == -ENOSPC) 406444871b1bSChris Mason ret = 1; 406544871b1bSChris Mason goto out; 406644871b1bSChris Mason } 406744871b1bSChris Mason 4068e902baacSDavid Sterba free_space = btrfs_leaf_free_space(left); 406944871b1bSChris Mason if (free_space < data_size) { 407044871b1bSChris Mason ret = 1; 407144871b1bSChris Mason goto out; 407244871b1bSChris Mason } 407344871b1bSChris Mason 4074d16c702fSQu Wenruo if (check_sibling_keys(left, right)) { 4075d16c702fSQu Wenruo ret = -EUCLEAN; 4076d16c702fSQu Wenruo goto out; 4077d16c702fSQu Wenruo } 40788087c193SDavid Sterba return __push_leaf_left(path, min_data_size, 407999d8f83cSChris Mason empty, left, free_space, right_nritems, 408099d8f83cSChris Mason max_slot); 408144871b1bSChris Mason out: 408244871b1bSChris Mason btrfs_tree_unlock(left); 408344871b1bSChris Mason free_extent_buffer(left); 408444871b1bSChris Mason return ret; 408544871b1bSChris Mason } 408644871b1bSChris Mason 408744871b1bSChris Mason /* 408874123bd7SChris Mason * split the path's leaf in two, making sure there is at least data_size 408974123bd7SChris Mason * available for the resulting leaf level of the path. 409074123bd7SChris Mason */ 4091143bede5SJeff Mahoney static noinline void copy_for_split(struct btrfs_trans_handle *trans, 409244871b1bSChris Mason struct btrfs_path *path, 409344871b1bSChris Mason struct extent_buffer *l, 409444871b1bSChris Mason struct extent_buffer *right, 409544871b1bSChris Mason int slot, int mid, int nritems) 4096be0e5c09SChris Mason { 409794f94ad9SDavid Sterba struct btrfs_fs_info *fs_info = trans->fs_info; 4098be0e5c09SChris Mason int data_copy_size; 4099be0e5c09SChris Mason int rt_data_off; 4100be0e5c09SChris Mason int i; 4101d4dbff95SChris Mason struct btrfs_disk_key disk_key; 4102cfed81a0SChris Mason struct btrfs_map_token token; 4103cfed81a0SChris Mason 41045f39d397SChris Mason nritems = nritems - mid; 41055f39d397SChris Mason btrfs_set_header_nritems(right, nritems); 41068f881e8cSDavid Sterba data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l); 41075f39d397SChris Mason 41085f39d397SChris Mason copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 41095f39d397SChris Mason btrfs_item_nr_offset(mid), 41105f39d397SChris Mason nritems * sizeof(struct btrfs_item)); 41115f39d397SChris Mason 41125f39d397SChris Mason copy_extent_buffer(right, l, 41133d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - 41143d9ec8c4SNikolay Borisov data_copy_size, BTRFS_LEAF_DATA_OFFSET + 41158f881e8cSDavid Sterba leaf_data_end(l), data_copy_size); 411674123bd7SChris Mason 41170b246afaSJeff Mahoney rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); 41185f39d397SChris Mason 4119c82f823cSDavid Sterba btrfs_init_map_token(&token, right); 41205f39d397SChris Mason for (i = 0; i < nritems; i++) { 4121dd3cc16bSRoss Kirk struct btrfs_item *item = btrfs_item_nr(i); 4122db94535dSChris Mason u32 ioff; 4123db94535dSChris Mason 4124cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 4125cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, ioff + rt_data_off); 41260783fcfcSChris Mason } 412774123bd7SChris Mason 41285f39d397SChris Mason btrfs_set_header_nritems(l, mid); 41295f39d397SChris Mason btrfs_item_key(right, &disk_key, 0); 41306ad3cf6dSDavid Sterba insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); 41315f39d397SChris Mason 41325f39d397SChris Mason btrfs_mark_buffer_dirty(right); 41335f39d397SChris Mason btrfs_mark_buffer_dirty(l); 4134eb60ceacSChris Mason BUG_ON(path->slots[0] != slot); 41355f39d397SChris Mason 4136be0e5c09SChris Mason if (mid <= slot) { 4137925baeddSChris Mason btrfs_tree_unlock(path->nodes[0]); 41385f39d397SChris Mason free_extent_buffer(path->nodes[0]); 41395f39d397SChris Mason path->nodes[0] = right; 4140be0e5c09SChris Mason path->slots[0] -= mid; 4141be0e5c09SChris Mason path->slots[1] += 1; 4142925baeddSChris Mason } else { 4143925baeddSChris Mason btrfs_tree_unlock(right); 41445f39d397SChris Mason free_extent_buffer(right); 4145925baeddSChris Mason } 41465f39d397SChris Mason 4147eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 414844871b1bSChris Mason } 414944871b1bSChris Mason 415044871b1bSChris Mason /* 415199d8f83cSChris Mason * double splits happen when we need to insert a big item in the middle 415299d8f83cSChris Mason * of a leaf. A double split can leave us with 3 mostly empty leaves: 415399d8f83cSChris Mason * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 415499d8f83cSChris Mason * A B C 415599d8f83cSChris Mason * 415699d8f83cSChris Mason * We avoid this by trying to push the items on either side of our target 415799d8f83cSChris Mason * into the adjacent leaves. If all goes well we can avoid the double split 415899d8f83cSChris Mason * completely. 415999d8f83cSChris Mason */ 416099d8f83cSChris Mason static noinline int push_for_double_split(struct btrfs_trans_handle *trans, 416199d8f83cSChris Mason struct btrfs_root *root, 416299d8f83cSChris Mason struct btrfs_path *path, 416399d8f83cSChris Mason int data_size) 416499d8f83cSChris Mason { 416599d8f83cSChris Mason int ret; 416699d8f83cSChris Mason int progress = 0; 416799d8f83cSChris Mason int slot; 416899d8f83cSChris Mason u32 nritems; 41695a4267caSFilipe David Borba Manana int space_needed = data_size; 417099d8f83cSChris Mason 417199d8f83cSChris Mason slot = path->slots[0]; 41725a4267caSFilipe David Borba Manana if (slot < btrfs_header_nritems(path->nodes[0])) 4173e902baacSDavid Sterba space_needed -= btrfs_leaf_free_space(path->nodes[0]); 417499d8f83cSChris Mason 417599d8f83cSChris Mason /* 417699d8f83cSChris Mason * try to push all the items after our slot into the 417799d8f83cSChris Mason * right leaf 417899d8f83cSChris Mason */ 41795a4267caSFilipe David Borba Manana ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot); 418099d8f83cSChris Mason if (ret < 0) 418199d8f83cSChris Mason return ret; 418299d8f83cSChris Mason 418399d8f83cSChris Mason if (ret == 0) 418499d8f83cSChris Mason progress++; 418599d8f83cSChris Mason 418699d8f83cSChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 418799d8f83cSChris Mason /* 418899d8f83cSChris Mason * our goal is to get our slot at the start or end of a leaf. If 418999d8f83cSChris Mason * we've done so we're done 419099d8f83cSChris Mason */ 419199d8f83cSChris Mason if (path->slots[0] == 0 || path->slots[0] == nritems) 419299d8f83cSChris Mason return 0; 419399d8f83cSChris Mason 4194e902baacSDavid Sterba if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 419599d8f83cSChris Mason return 0; 419699d8f83cSChris Mason 419799d8f83cSChris Mason /* try to push all the items before our slot into the next leaf */ 419899d8f83cSChris Mason slot = path->slots[0]; 4199263d3995SFilipe Manana space_needed = data_size; 4200263d3995SFilipe Manana if (slot > 0) 4201e902baacSDavid Sterba space_needed -= btrfs_leaf_free_space(path->nodes[0]); 42025a4267caSFilipe David Borba Manana ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); 420399d8f83cSChris Mason if (ret < 0) 420499d8f83cSChris Mason return ret; 420599d8f83cSChris Mason 420699d8f83cSChris Mason if (ret == 0) 420799d8f83cSChris Mason progress++; 420899d8f83cSChris Mason 420999d8f83cSChris Mason if (progress) 421099d8f83cSChris Mason return 0; 421199d8f83cSChris Mason return 1; 421299d8f83cSChris Mason } 421399d8f83cSChris Mason 421499d8f83cSChris Mason /* 421544871b1bSChris Mason * split the path's leaf in two, making sure there is at least data_size 421644871b1bSChris Mason * available for the resulting leaf level of the path. 421744871b1bSChris Mason * 421844871b1bSChris Mason * returns 0 if all went well and < 0 on failure. 421944871b1bSChris Mason */ 422044871b1bSChris Mason static noinline int split_leaf(struct btrfs_trans_handle *trans, 422144871b1bSChris Mason struct btrfs_root *root, 4222310712b2SOmar Sandoval const struct btrfs_key *ins_key, 422344871b1bSChris Mason struct btrfs_path *path, int data_size, 422444871b1bSChris Mason int extend) 422544871b1bSChris Mason { 42265d4f98a2SYan Zheng struct btrfs_disk_key disk_key; 422744871b1bSChris Mason struct extent_buffer *l; 422844871b1bSChris Mason u32 nritems; 422944871b1bSChris Mason int mid; 423044871b1bSChris Mason int slot; 423144871b1bSChris Mason struct extent_buffer *right; 4232b7a0365eSDaniel Dressler struct btrfs_fs_info *fs_info = root->fs_info; 423344871b1bSChris Mason int ret = 0; 423444871b1bSChris Mason int wret; 42355d4f98a2SYan Zheng int split; 423644871b1bSChris Mason int num_doubles = 0; 423799d8f83cSChris Mason int tried_avoid_double = 0; 423844871b1bSChris Mason 4239a5719521SYan, Zheng l = path->nodes[0]; 4240a5719521SYan, Zheng slot = path->slots[0]; 4241a5719521SYan, Zheng if (extend && data_size + btrfs_item_size_nr(l, slot) + 42420b246afaSJeff Mahoney sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) 4243a5719521SYan, Zheng return -EOVERFLOW; 4244a5719521SYan, Zheng 424544871b1bSChris Mason /* first try to make some room by pushing left and right */ 424633157e05SLiu Bo if (data_size && path->nodes[1]) { 42475a4267caSFilipe David Borba Manana int space_needed = data_size; 42485a4267caSFilipe David Borba Manana 42495a4267caSFilipe David Borba Manana if (slot < btrfs_header_nritems(l)) 4250e902baacSDavid Sterba space_needed -= btrfs_leaf_free_space(l); 42515a4267caSFilipe David Borba Manana 42525a4267caSFilipe David Borba Manana wret = push_leaf_right(trans, root, path, space_needed, 42535a4267caSFilipe David Borba Manana space_needed, 0, 0); 425444871b1bSChris Mason if (wret < 0) 425544871b1bSChris Mason return wret; 425644871b1bSChris Mason if (wret) { 4257263d3995SFilipe Manana space_needed = data_size; 4258263d3995SFilipe Manana if (slot > 0) 4259e902baacSDavid Sterba space_needed -= btrfs_leaf_free_space(l); 42605a4267caSFilipe David Borba Manana wret = push_leaf_left(trans, root, path, space_needed, 42615a4267caSFilipe David Borba Manana space_needed, 0, (u32)-1); 426244871b1bSChris Mason if (wret < 0) 426344871b1bSChris Mason return wret; 426444871b1bSChris Mason } 426544871b1bSChris Mason l = path->nodes[0]; 426644871b1bSChris Mason 426744871b1bSChris Mason /* did the pushes work? */ 4268e902baacSDavid Sterba if (btrfs_leaf_free_space(l) >= data_size) 426944871b1bSChris Mason return 0; 427044871b1bSChris Mason } 427144871b1bSChris Mason 427244871b1bSChris Mason if (!path->nodes[1]) { 4273fdd99c72SLiu Bo ret = insert_new_root(trans, root, path, 1); 427444871b1bSChris Mason if (ret) 427544871b1bSChris Mason return ret; 427644871b1bSChris Mason } 427744871b1bSChris Mason again: 42785d4f98a2SYan Zheng split = 1; 427944871b1bSChris Mason l = path->nodes[0]; 428044871b1bSChris Mason slot = path->slots[0]; 428144871b1bSChris Mason nritems = btrfs_header_nritems(l); 428244871b1bSChris Mason mid = (nritems + 1) / 2; 428344871b1bSChris Mason 42845d4f98a2SYan Zheng if (mid <= slot) { 42855d4f98a2SYan Zheng if (nritems == 1 || 42865d4f98a2SYan Zheng leaf_space_used(l, mid, nritems - mid) + data_size > 42870b246afaSJeff Mahoney BTRFS_LEAF_DATA_SIZE(fs_info)) { 42885d4f98a2SYan Zheng if (slot >= nritems) { 42895d4f98a2SYan Zheng split = 0; 42905d4f98a2SYan Zheng } else { 42915d4f98a2SYan Zheng mid = slot; 42925d4f98a2SYan Zheng if (mid != nritems && 42935d4f98a2SYan Zheng leaf_space_used(l, mid, nritems - mid) + 42940b246afaSJeff Mahoney data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 429599d8f83cSChris Mason if (data_size && !tried_avoid_double) 429699d8f83cSChris Mason goto push_for_double; 42975d4f98a2SYan Zheng split = 2; 42985d4f98a2SYan Zheng } 42995d4f98a2SYan Zheng } 43005d4f98a2SYan Zheng } 43015d4f98a2SYan Zheng } else { 43025d4f98a2SYan Zheng if (leaf_space_used(l, 0, mid) + data_size > 43030b246afaSJeff Mahoney BTRFS_LEAF_DATA_SIZE(fs_info)) { 43045d4f98a2SYan Zheng if (!extend && data_size && slot == 0) { 43055d4f98a2SYan Zheng split = 0; 43065d4f98a2SYan Zheng } else if ((extend || !data_size) && slot == 0) { 43075d4f98a2SYan Zheng mid = 1; 43085d4f98a2SYan Zheng } else { 43095d4f98a2SYan Zheng mid = slot; 43105d4f98a2SYan Zheng if (mid != nritems && 43115d4f98a2SYan Zheng leaf_space_used(l, mid, nritems - mid) + 43120b246afaSJeff Mahoney data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 431399d8f83cSChris Mason if (data_size && !tried_avoid_double) 431499d8f83cSChris Mason goto push_for_double; 43155d4f98a2SYan Zheng split = 2; 43165d4f98a2SYan Zheng } 43175d4f98a2SYan Zheng } 43185d4f98a2SYan Zheng } 43195d4f98a2SYan Zheng } 43205d4f98a2SYan Zheng 43215d4f98a2SYan Zheng if (split == 0) 43225d4f98a2SYan Zheng btrfs_cpu_key_to_disk(&disk_key, ins_key); 43235d4f98a2SYan Zheng else 43245d4f98a2SYan Zheng btrfs_item_key(l, &disk_key, mid); 43255d4f98a2SYan Zheng 4326a6279470SFilipe Manana right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, 43279631e4ccSJosef Bacik l->start, 0, BTRFS_NESTING_NORMAL); 4328f0486c68SYan, Zheng if (IS_ERR(right)) 432944871b1bSChris Mason return PTR_ERR(right); 4330f0486c68SYan, Zheng 43310b246afaSJeff Mahoney root_add_used(root, fs_info->nodesize); 433244871b1bSChris Mason 43335d4f98a2SYan Zheng if (split == 0) { 433444871b1bSChris Mason if (mid <= slot) { 433544871b1bSChris Mason btrfs_set_header_nritems(right, 0); 43366ad3cf6dSDavid Sterba insert_ptr(trans, path, &disk_key, 43372ff7e61eSJeff Mahoney right->start, path->slots[1] + 1, 1); 433844871b1bSChris Mason btrfs_tree_unlock(path->nodes[0]); 433944871b1bSChris Mason free_extent_buffer(path->nodes[0]); 434044871b1bSChris Mason path->nodes[0] = right; 434144871b1bSChris Mason path->slots[0] = 0; 434244871b1bSChris Mason path->slots[1] += 1; 434344871b1bSChris Mason } else { 434444871b1bSChris Mason btrfs_set_header_nritems(right, 0); 43456ad3cf6dSDavid Sterba insert_ptr(trans, path, &disk_key, 43462ff7e61eSJeff Mahoney right->start, path->slots[1], 1); 434744871b1bSChris Mason btrfs_tree_unlock(path->nodes[0]); 434844871b1bSChris Mason free_extent_buffer(path->nodes[0]); 434944871b1bSChris Mason path->nodes[0] = right; 435044871b1bSChris Mason path->slots[0] = 0; 4351143bede5SJeff Mahoney if (path->slots[1] == 0) 4352b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 43535d4f98a2SYan Zheng } 4354196e0249SLiu Bo /* 4355196e0249SLiu Bo * We create a new leaf 'right' for the required ins_len and 4356196e0249SLiu Bo * we'll do btrfs_mark_buffer_dirty() on this leaf after copying 4357196e0249SLiu Bo * the content of ins_len to 'right'. 4358196e0249SLiu Bo */ 435944871b1bSChris Mason return ret; 436044871b1bSChris Mason } 436144871b1bSChris Mason 436294f94ad9SDavid Sterba copy_for_split(trans, path, l, right, slot, mid, nritems); 436344871b1bSChris Mason 43645d4f98a2SYan Zheng if (split == 2) { 4365cc0c5538SChris Mason BUG_ON(num_doubles != 0); 4366cc0c5538SChris Mason num_doubles++; 4367cc0c5538SChris Mason goto again; 43683326d1b0SChris Mason } 436944871b1bSChris Mason 4370143bede5SJeff Mahoney return 0; 437199d8f83cSChris Mason 437299d8f83cSChris Mason push_for_double: 437399d8f83cSChris Mason push_for_double_split(trans, root, path, data_size); 437499d8f83cSChris Mason tried_avoid_double = 1; 4375e902baacSDavid Sterba if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 437699d8f83cSChris Mason return 0; 437799d8f83cSChris Mason goto again; 4378be0e5c09SChris Mason } 4379be0e5c09SChris Mason 4380ad48fd75SYan, Zheng static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 4381ad48fd75SYan, Zheng struct btrfs_root *root, 4382ad48fd75SYan, Zheng struct btrfs_path *path, int ins_len) 4383ad48fd75SYan, Zheng { 4384ad48fd75SYan, Zheng struct btrfs_key key; 4385ad48fd75SYan, Zheng struct extent_buffer *leaf; 4386ad48fd75SYan, Zheng struct btrfs_file_extent_item *fi; 4387ad48fd75SYan, Zheng u64 extent_len = 0; 4388ad48fd75SYan, Zheng u32 item_size; 4389ad48fd75SYan, Zheng int ret; 4390ad48fd75SYan, Zheng 4391ad48fd75SYan, Zheng leaf = path->nodes[0]; 4392ad48fd75SYan, Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4393ad48fd75SYan, Zheng 4394ad48fd75SYan, Zheng BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 4395ad48fd75SYan, Zheng key.type != BTRFS_EXTENT_CSUM_KEY); 4396ad48fd75SYan, Zheng 4397e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) >= ins_len) 4398ad48fd75SYan, Zheng return 0; 4399ad48fd75SYan, Zheng 4400ad48fd75SYan, Zheng item_size = btrfs_item_size_nr(leaf, path->slots[0]); 4401ad48fd75SYan, Zheng if (key.type == BTRFS_EXTENT_DATA_KEY) { 4402ad48fd75SYan, Zheng fi = btrfs_item_ptr(leaf, path->slots[0], 4403ad48fd75SYan, Zheng struct btrfs_file_extent_item); 4404ad48fd75SYan, Zheng extent_len = btrfs_file_extent_num_bytes(leaf, fi); 4405ad48fd75SYan, Zheng } 4406b3b4aa74SDavid Sterba btrfs_release_path(path); 4407ad48fd75SYan, Zheng 4408ad48fd75SYan, Zheng path->keep_locks = 1; 4409ad48fd75SYan, Zheng path->search_for_split = 1; 4410ad48fd75SYan, Zheng ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4411ad48fd75SYan, Zheng path->search_for_split = 0; 4412a8df6fe6SFilipe Manana if (ret > 0) 4413a8df6fe6SFilipe Manana ret = -EAGAIN; 4414ad48fd75SYan, Zheng if (ret < 0) 4415ad48fd75SYan, Zheng goto err; 4416ad48fd75SYan, Zheng 4417ad48fd75SYan, Zheng ret = -EAGAIN; 4418ad48fd75SYan, Zheng leaf = path->nodes[0]; 4419a8df6fe6SFilipe Manana /* if our item isn't there, return now */ 4420a8df6fe6SFilipe Manana if (item_size != btrfs_item_size_nr(leaf, path->slots[0])) 4421ad48fd75SYan, Zheng goto err; 4422ad48fd75SYan, Zheng 4423109f6aefSChris Mason /* the leaf has changed, it now has room. return now */ 4424e902baacSDavid Sterba if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len) 4425109f6aefSChris Mason goto err; 4426109f6aefSChris Mason 4427ad48fd75SYan, Zheng if (key.type == BTRFS_EXTENT_DATA_KEY) { 4428ad48fd75SYan, Zheng fi = btrfs_item_ptr(leaf, path->slots[0], 4429ad48fd75SYan, Zheng struct btrfs_file_extent_item); 4430ad48fd75SYan, Zheng if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 4431ad48fd75SYan, Zheng goto err; 4432ad48fd75SYan, Zheng } 4433ad48fd75SYan, Zheng 4434ad48fd75SYan, Zheng btrfs_set_path_blocking(path); 4435ad48fd75SYan, Zheng ret = split_leaf(trans, root, &key, path, ins_len, 1); 4436f0486c68SYan, Zheng if (ret) 4437f0486c68SYan, Zheng goto err; 4438ad48fd75SYan, Zheng 4439ad48fd75SYan, Zheng path->keep_locks = 0; 4440ad48fd75SYan, Zheng btrfs_unlock_up_safe(path, 1); 4441ad48fd75SYan, Zheng return 0; 4442ad48fd75SYan, Zheng err: 4443ad48fd75SYan, Zheng path->keep_locks = 0; 4444ad48fd75SYan, Zheng return ret; 4445ad48fd75SYan, Zheng } 4446ad48fd75SYan, Zheng 444725263cd7SDavid Sterba static noinline int split_item(struct btrfs_path *path, 4448310712b2SOmar Sandoval const struct btrfs_key *new_key, 4449459931ecSChris Mason unsigned long split_offset) 4450459931ecSChris Mason { 4451459931ecSChris Mason struct extent_buffer *leaf; 4452459931ecSChris Mason struct btrfs_item *item; 4453459931ecSChris Mason struct btrfs_item *new_item; 4454459931ecSChris Mason int slot; 4455ad48fd75SYan, Zheng char *buf; 4456459931ecSChris Mason u32 nritems; 4457ad48fd75SYan, Zheng u32 item_size; 4458459931ecSChris Mason u32 orig_offset; 4459459931ecSChris Mason struct btrfs_disk_key disk_key; 4460459931ecSChris Mason 4461459931ecSChris Mason leaf = path->nodes[0]; 4462e902baacSDavid Sterba BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)); 4463b9473439SChris Mason 4464b4ce94deSChris Mason btrfs_set_path_blocking(path); 4465b4ce94deSChris Mason 4466dd3cc16bSRoss Kirk item = btrfs_item_nr(path->slots[0]); 4467459931ecSChris Mason orig_offset = btrfs_item_offset(leaf, item); 4468459931ecSChris Mason item_size = btrfs_item_size(leaf, item); 4469459931ecSChris Mason 4470459931ecSChris Mason buf = kmalloc(item_size, GFP_NOFS); 4471ad48fd75SYan, Zheng if (!buf) 4472ad48fd75SYan, Zheng return -ENOMEM; 4473ad48fd75SYan, Zheng 4474459931ecSChris Mason read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 4475459931ecSChris Mason path->slots[0]), item_size); 4476ad48fd75SYan, Zheng 4477459931ecSChris Mason slot = path->slots[0] + 1; 4478459931ecSChris Mason nritems = btrfs_header_nritems(leaf); 4479459931ecSChris Mason if (slot != nritems) { 4480459931ecSChris Mason /* shift the items */ 4481459931ecSChris Mason memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 4482459931ecSChris Mason btrfs_item_nr_offset(slot), 4483459931ecSChris Mason (nritems - slot) * sizeof(struct btrfs_item)); 4484459931ecSChris Mason } 4485459931ecSChris Mason 4486459931ecSChris Mason btrfs_cpu_key_to_disk(&disk_key, new_key); 4487459931ecSChris Mason btrfs_set_item_key(leaf, &disk_key, slot); 4488459931ecSChris Mason 4489dd3cc16bSRoss Kirk new_item = btrfs_item_nr(slot); 4490459931ecSChris Mason 4491459931ecSChris Mason btrfs_set_item_offset(leaf, new_item, orig_offset); 4492459931ecSChris Mason btrfs_set_item_size(leaf, new_item, item_size - split_offset); 4493459931ecSChris Mason 4494459931ecSChris Mason btrfs_set_item_offset(leaf, item, 4495459931ecSChris Mason orig_offset + item_size - split_offset); 4496459931ecSChris Mason btrfs_set_item_size(leaf, item, split_offset); 4497459931ecSChris Mason 4498459931ecSChris Mason btrfs_set_header_nritems(leaf, nritems + 1); 4499459931ecSChris Mason 4500459931ecSChris Mason /* write the data for the start of the original item */ 4501459931ecSChris Mason write_extent_buffer(leaf, buf, 4502459931ecSChris Mason btrfs_item_ptr_offset(leaf, path->slots[0]), 4503459931ecSChris Mason split_offset); 4504459931ecSChris Mason 4505459931ecSChris Mason /* write the data for the new item */ 4506459931ecSChris Mason write_extent_buffer(leaf, buf + split_offset, 4507459931ecSChris Mason btrfs_item_ptr_offset(leaf, slot), 4508459931ecSChris Mason item_size - split_offset); 4509459931ecSChris Mason btrfs_mark_buffer_dirty(leaf); 4510459931ecSChris Mason 4511e902baacSDavid Sterba BUG_ON(btrfs_leaf_free_space(leaf) < 0); 4512459931ecSChris Mason kfree(buf); 4513ad48fd75SYan, Zheng return 0; 4514ad48fd75SYan, Zheng } 4515ad48fd75SYan, Zheng 4516ad48fd75SYan, Zheng /* 4517ad48fd75SYan, Zheng * This function splits a single item into two items, 4518ad48fd75SYan, Zheng * giving 'new_key' to the new item and splitting the 4519ad48fd75SYan, Zheng * old one at split_offset (from the start of the item). 4520ad48fd75SYan, Zheng * 4521ad48fd75SYan, Zheng * The path may be released by this operation. After 4522ad48fd75SYan, Zheng * the split, the path is pointing to the old item. The 4523ad48fd75SYan, Zheng * new item is going to be in the same node as the old one. 4524ad48fd75SYan, Zheng * 4525ad48fd75SYan, Zheng * Note, the item being split must be smaller enough to live alone on 4526ad48fd75SYan, Zheng * a tree block with room for one extra struct btrfs_item 4527ad48fd75SYan, Zheng * 4528ad48fd75SYan, Zheng * This allows us to split the item in place, keeping a lock on the 4529ad48fd75SYan, Zheng * leaf the entire time. 4530ad48fd75SYan, Zheng */ 4531ad48fd75SYan, Zheng int btrfs_split_item(struct btrfs_trans_handle *trans, 4532ad48fd75SYan, Zheng struct btrfs_root *root, 4533ad48fd75SYan, Zheng struct btrfs_path *path, 4534310712b2SOmar Sandoval const struct btrfs_key *new_key, 4535ad48fd75SYan, Zheng unsigned long split_offset) 4536ad48fd75SYan, Zheng { 4537ad48fd75SYan, Zheng int ret; 4538ad48fd75SYan, Zheng ret = setup_leaf_for_split(trans, root, path, 4539ad48fd75SYan, Zheng sizeof(struct btrfs_item)); 4540ad48fd75SYan, Zheng if (ret) 4541459931ecSChris Mason return ret; 4542ad48fd75SYan, Zheng 454325263cd7SDavid Sterba ret = split_item(path, new_key, split_offset); 4544ad48fd75SYan, Zheng return ret; 4545ad48fd75SYan, Zheng } 4546ad48fd75SYan, Zheng 4547ad48fd75SYan, Zheng /* 4548ad48fd75SYan, Zheng * This function duplicate a item, giving 'new_key' to the new item. 4549ad48fd75SYan, Zheng * It guarantees both items live in the same tree leaf and the new item 4550ad48fd75SYan, Zheng * is contiguous with the original item. 4551ad48fd75SYan, Zheng * 4552ad48fd75SYan, Zheng * This allows us to split file extent in place, keeping a lock on the 4553ad48fd75SYan, Zheng * leaf the entire time. 4554ad48fd75SYan, Zheng */ 4555ad48fd75SYan, Zheng int btrfs_duplicate_item(struct btrfs_trans_handle *trans, 4556ad48fd75SYan, Zheng struct btrfs_root *root, 4557ad48fd75SYan, Zheng struct btrfs_path *path, 4558310712b2SOmar Sandoval const struct btrfs_key *new_key) 4559ad48fd75SYan, Zheng { 4560ad48fd75SYan, Zheng struct extent_buffer *leaf; 4561ad48fd75SYan, Zheng int ret; 4562ad48fd75SYan, Zheng u32 item_size; 4563ad48fd75SYan, Zheng 4564ad48fd75SYan, Zheng leaf = path->nodes[0]; 4565ad48fd75SYan, Zheng item_size = btrfs_item_size_nr(leaf, path->slots[0]); 4566ad48fd75SYan, Zheng ret = setup_leaf_for_split(trans, root, path, 4567ad48fd75SYan, Zheng item_size + sizeof(struct btrfs_item)); 4568ad48fd75SYan, Zheng if (ret) 4569ad48fd75SYan, Zheng return ret; 4570ad48fd75SYan, Zheng 4571ad48fd75SYan, Zheng path->slots[0]++; 4572afe5fea7STsutomu Itoh setup_items_for_insert(root, path, new_key, &item_size, 4573ad48fd75SYan, Zheng item_size, item_size + 4574ad48fd75SYan, Zheng sizeof(struct btrfs_item), 1); 4575ad48fd75SYan, Zheng leaf = path->nodes[0]; 4576ad48fd75SYan, Zheng memcpy_extent_buffer(leaf, 4577ad48fd75SYan, Zheng btrfs_item_ptr_offset(leaf, path->slots[0]), 4578ad48fd75SYan, Zheng btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 4579ad48fd75SYan, Zheng item_size); 4580ad48fd75SYan, Zheng return 0; 4581459931ecSChris Mason } 4582459931ecSChris Mason 4583459931ecSChris Mason /* 4584d352ac68SChris Mason * make the item pointed to by the path smaller. new_size indicates 4585d352ac68SChris Mason * how small to make it, and from_end tells us if we just chop bytes 4586d352ac68SChris Mason * off the end of the item or if we shift the item to chop bytes off 4587d352ac68SChris Mason * the front. 4588d352ac68SChris Mason */ 458978ac4f9eSDavid Sterba void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) 4590b18c6685SChris Mason { 4591b18c6685SChris Mason int slot; 45925f39d397SChris Mason struct extent_buffer *leaf; 45935f39d397SChris Mason struct btrfs_item *item; 4594b18c6685SChris Mason u32 nritems; 4595b18c6685SChris Mason unsigned int data_end; 4596b18c6685SChris Mason unsigned int old_data_start; 4597b18c6685SChris Mason unsigned int old_size; 4598b18c6685SChris Mason unsigned int size_diff; 4599b18c6685SChris Mason int i; 4600cfed81a0SChris Mason struct btrfs_map_token token; 4601cfed81a0SChris Mason 46025f39d397SChris Mason leaf = path->nodes[0]; 4603179e29e4SChris Mason slot = path->slots[0]; 4604179e29e4SChris Mason 4605179e29e4SChris Mason old_size = btrfs_item_size_nr(leaf, slot); 4606179e29e4SChris Mason if (old_size == new_size) 4607143bede5SJeff Mahoney return; 4608b18c6685SChris Mason 46095f39d397SChris Mason nritems = btrfs_header_nritems(leaf); 46108f881e8cSDavid Sterba data_end = leaf_data_end(leaf); 4611b18c6685SChris Mason 46125f39d397SChris Mason old_data_start = btrfs_item_offset_nr(leaf, slot); 4613179e29e4SChris Mason 4614b18c6685SChris Mason size_diff = old_size - new_size; 4615b18c6685SChris Mason 4616b18c6685SChris Mason BUG_ON(slot < 0); 4617b18c6685SChris Mason BUG_ON(slot >= nritems); 4618b18c6685SChris Mason 4619b18c6685SChris Mason /* 4620b18c6685SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 4621b18c6685SChris Mason */ 4622b18c6685SChris Mason /* first correct the data pointers */ 4623c82f823cSDavid Sterba btrfs_init_map_token(&token, leaf); 4624b18c6685SChris Mason for (i = slot; i < nritems; i++) { 46255f39d397SChris Mason u32 ioff; 4626dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 4627db94535dSChris Mason 4628cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 4629cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, ioff + size_diff); 4630b18c6685SChris Mason } 4631db94535dSChris Mason 4632b18c6685SChris Mason /* shift the data */ 4633179e29e4SChris Mason if (from_end) { 46343d9ec8c4SNikolay Borisov memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 46353d9ec8c4SNikolay Borisov data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + 4636b18c6685SChris Mason data_end, old_data_start + new_size - data_end); 4637179e29e4SChris Mason } else { 4638179e29e4SChris Mason struct btrfs_disk_key disk_key; 4639179e29e4SChris Mason u64 offset; 4640179e29e4SChris Mason 4641179e29e4SChris Mason btrfs_item_key(leaf, &disk_key, slot); 4642179e29e4SChris Mason 4643179e29e4SChris Mason if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 4644179e29e4SChris Mason unsigned long ptr; 4645179e29e4SChris Mason struct btrfs_file_extent_item *fi; 4646179e29e4SChris Mason 4647179e29e4SChris Mason fi = btrfs_item_ptr(leaf, slot, 4648179e29e4SChris Mason struct btrfs_file_extent_item); 4649179e29e4SChris Mason fi = (struct btrfs_file_extent_item *)( 4650179e29e4SChris Mason (unsigned long)fi - size_diff); 4651179e29e4SChris Mason 4652179e29e4SChris Mason if (btrfs_file_extent_type(leaf, fi) == 4653179e29e4SChris Mason BTRFS_FILE_EXTENT_INLINE) { 4654179e29e4SChris Mason ptr = btrfs_item_ptr_offset(leaf, slot); 4655179e29e4SChris Mason memmove_extent_buffer(leaf, ptr, 4656179e29e4SChris Mason (unsigned long)fi, 46577ec20afbSDavid Sterba BTRFS_FILE_EXTENT_INLINE_DATA_START); 4658179e29e4SChris Mason } 4659179e29e4SChris Mason } 4660179e29e4SChris Mason 46613d9ec8c4SNikolay Borisov memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 46623d9ec8c4SNikolay Borisov data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + 4663179e29e4SChris Mason data_end, old_data_start - data_end); 4664179e29e4SChris Mason 4665179e29e4SChris Mason offset = btrfs_disk_key_offset(&disk_key); 4666179e29e4SChris Mason btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4667179e29e4SChris Mason btrfs_set_item_key(leaf, &disk_key, slot); 4668179e29e4SChris Mason if (slot == 0) 4669b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 4670179e29e4SChris Mason } 46715f39d397SChris Mason 4672dd3cc16bSRoss Kirk item = btrfs_item_nr(slot); 46735f39d397SChris Mason btrfs_set_item_size(leaf, item, new_size); 46745f39d397SChris Mason btrfs_mark_buffer_dirty(leaf); 4675b18c6685SChris Mason 4676e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) < 0) { 4677a4f78750SDavid Sterba btrfs_print_leaf(leaf); 4678b18c6685SChris Mason BUG(); 46795f39d397SChris Mason } 4680b18c6685SChris Mason } 4681b18c6685SChris Mason 4682d352ac68SChris Mason /* 46838f69dbd2SStefan Behrens * make the item pointed to by the path bigger, data_size is the added size. 4684d352ac68SChris Mason */ 4685c71dd880SDavid Sterba void btrfs_extend_item(struct btrfs_path *path, u32 data_size) 46866567e837SChris Mason { 46876567e837SChris Mason int slot; 46885f39d397SChris Mason struct extent_buffer *leaf; 46895f39d397SChris Mason struct btrfs_item *item; 46906567e837SChris Mason u32 nritems; 46916567e837SChris Mason unsigned int data_end; 46926567e837SChris Mason unsigned int old_data; 46936567e837SChris Mason unsigned int old_size; 46946567e837SChris Mason int i; 4695cfed81a0SChris Mason struct btrfs_map_token token; 4696cfed81a0SChris Mason 46975f39d397SChris Mason leaf = path->nodes[0]; 46986567e837SChris Mason 46995f39d397SChris Mason nritems = btrfs_header_nritems(leaf); 47008f881e8cSDavid Sterba data_end = leaf_data_end(leaf); 47016567e837SChris Mason 4702e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) < data_size) { 4703a4f78750SDavid Sterba btrfs_print_leaf(leaf); 47046567e837SChris Mason BUG(); 47055f39d397SChris Mason } 47066567e837SChris Mason slot = path->slots[0]; 47075f39d397SChris Mason old_data = btrfs_item_end_nr(leaf, slot); 47086567e837SChris Mason 47096567e837SChris Mason BUG_ON(slot < 0); 47103326d1b0SChris Mason if (slot >= nritems) { 4711a4f78750SDavid Sterba btrfs_print_leaf(leaf); 4712c71dd880SDavid Sterba btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", 4713d397712bSChris Mason slot, nritems); 4714290342f6SArnd Bergmann BUG(); 47153326d1b0SChris Mason } 47166567e837SChris Mason 47176567e837SChris Mason /* 47186567e837SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 47196567e837SChris Mason */ 47206567e837SChris Mason /* first correct the data pointers */ 4721c82f823cSDavid Sterba btrfs_init_map_token(&token, leaf); 47226567e837SChris Mason for (i = slot; i < nritems; i++) { 47235f39d397SChris Mason u32 ioff; 4724dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 4725db94535dSChris Mason 4726cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 4727cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, ioff - data_size); 47286567e837SChris Mason } 47295f39d397SChris Mason 47306567e837SChris Mason /* shift the data */ 47313d9ec8c4SNikolay Borisov memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 47323d9ec8c4SNikolay Borisov data_end - data_size, BTRFS_LEAF_DATA_OFFSET + 47336567e837SChris Mason data_end, old_data - data_end); 47345f39d397SChris Mason 47356567e837SChris Mason data_end = old_data; 47365f39d397SChris Mason old_size = btrfs_item_size_nr(leaf, slot); 4737dd3cc16bSRoss Kirk item = btrfs_item_nr(slot); 47385f39d397SChris Mason btrfs_set_item_size(leaf, item, old_size + data_size); 47395f39d397SChris Mason btrfs_mark_buffer_dirty(leaf); 47406567e837SChris Mason 4741e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) < 0) { 4742a4f78750SDavid Sterba btrfs_print_leaf(leaf); 47436567e837SChris Mason BUG(); 47445f39d397SChris Mason } 47456567e837SChris Mason } 47466567e837SChris Mason 474774123bd7SChris Mason /* 474844871b1bSChris Mason * this is a helper for btrfs_insert_empty_items, the main goal here is 474944871b1bSChris Mason * to save stack depth by doing the bulk of the work in a function 475044871b1bSChris Mason * that doesn't call btrfs_search_slot 475174123bd7SChris Mason */ 4752afe5fea7STsutomu Itoh void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, 4753310712b2SOmar Sandoval const struct btrfs_key *cpu_key, u32 *data_size, 475444871b1bSChris Mason u32 total_data, u32 total_size, int nr) 4755be0e5c09SChris Mason { 47560b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 47575f39d397SChris Mason struct btrfs_item *item; 47589c58309dSChris Mason int i; 47597518a238SChris Mason u32 nritems; 4760be0e5c09SChris Mason unsigned int data_end; 4761e2fa7227SChris Mason struct btrfs_disk_key disk_key; 476244871b1bSChris Mason struct extent_buffer *leaf; 476344871b1bSChris Mason int slot; 4764cfed81a0SChris Mason struct btrfs_map_token token; 4765cfed81a0SChris Mason 476624cdc847SFilipe Manana if (path->slots[0] == 0) { 476724cdc847SFilipe Manana btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4768b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 476924cdc847SFilipe Manana } 477024cdc847SFilipe Manana btrfs_unlock_up_safe(path, 1); 477124cdc847SFilipe Manana 47725f39d397SChris Mason leaf = path->nodes[0]; 477344871b1bSChris Mason slot = path->slots[0]; 477474123bd7SChris Mason 47755f39d397SChris Mason nritems = btrfs_header_nritems(leaf); 47768f881e8cSDavid Sterba data_end = leaf_data_end(leaf); 4777eb60ceacSChris Mason 4778e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) < total_size) { 4779a4f78750SDavid Sterba btrfs_print_leaf(leaf); 47800b246afaSJeff Mahoney btrfs_crit(fs_info, "not enough freespace need %u have %d", 4781e902baacSDavid Sterba total_size, btrfs_leaf_free_space(leaf)); 4782be0e5c09SChris Mason BUG(); 4783d4dbff95SChris Mason } 47845f39d397SChris Mason 4785c82f823cSDavid Sterba btrfs_init_map_token(&token, leaf); 4786be0e5c09SChris Mason if (slot != nritems) { 47875f39d397SChris Mason unsigned int old_data = btrfs_item_end_nr(leaf, slot); 4788be0e5c09SChris Mason 47895f39d397SChris Mason if (old_data < data_end) { 4790a4f78750SDavid Sterba btrfs_print_leaf(leaf); 47910b246afaSJeff Mahoney btrfs_crit(fs_info, "slot %d old_data %d data_end %d", 47925f39d397SChris Mason slot, old_data, data_end); 4793290342f6SArnd Bergmann BUG(); 47945f39d397SChris Mason } 4795be0e5c09SChris Mason /* 4796be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 4797be0e5c09SChris Mason */ 4798be0e5c09SChris Mason /* first correct the data pointers */ 47990783fcfcSChris Mason for (i = slot; i < nritems; i++) { 48005f39d397SChris Mason u32 ioff; 4801db94535dSChris Mason 4802dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 4803cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 4804cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, 4805cc4c13d5SDavid Sterba ioff - total_data); 48060783fcfcSChris Mason } 4807be0e5c09SChris Mason /* shift the items */ 48089c58309dSChris Mason memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 48095f39d397SChris Mason btrfs_item_nr_offset(slot), 48100783fcfcSChris Mason (nritems - slot) * sizeof(struct btrfs_item)); 4811be0e5c09SChris Mason 4812be0e5c09SChris Mason /* shift the data */ 48133d9ec8c4SNikolay Borisov memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 48143d9ec8c4SNikolay Borisov data_end - total_data, BTRFS_LEAF_DATA_OFFSET + 4815be0e5c09SChris Mason data_end, old_data - data_end); 4816be0e5c09SChris Mason data_end = old_data; 4817be0e5c09SChris Mason } 48185f39d397SChris Mason 481962e2749eSChris Mason /* setup the item for the new data */ 48209c58309dSChris Mason for (i = 0; i < nr; i++) { 48219c58309dSChris Mason btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 48229c58309dSChris Mason btrfs_set_item_key(leaf, &disk_key, slot + i); 4823dd3cc16bSRoss Kirk item = btrfs_item_nr(slot + i); 4824cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, data_end - data_size[i]); 48259c58309dSChris Mason data_end -= data_size[i]; 4826cc4c13d5SDavid Sterba btrfs_set_token_item_size(&token, item, data_size[i]); 48279c58309dSChris Mason } 482844871b1bSChris Mason 48299c58309dSChris Mason btrfs_set_header_nritems(leaf, nritems + nr); 4830b9473439SChris Mason btrfs_mark_buffer_dirty(leaf); 4831aa5d6bedSChris Mason 4832e902baacSDavid Sterba if (btrfs_leaf_free_space(leaf) < 0) { 4833a4f78750SDavid Sterba btrfs_print_leaf(leaf); 4834be0e5c09SChris Mason BUG(); 48355f39d397SChris Mason } 483644871b1bSChris Mason } 483744871b1bSChris Mason 483844871b1bSChris Mason /* 483944871b1bSChris Mason * Given a key and some data, insert items into the tree. 484044871b1bSChris Mason * This does all the path init required, making room in the tree if needed. 484144871b1bSChris Mason */ 484244871b1bSChris Mason int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 484344871b1bSChris Mason struct btrfs_root *root, 484444871b1bSChris Mason struct btrfs_path *path, 4845310712b2SOmar Sandoval const struct btrfs_key *cpu_key, u32 *data_size, 484644871b1bSChris Mason int nr) 484744871b1bSChris Mason { 484844871b1bSChris Mason int ret = 0; 484944871b1bSChris Mason int slot; 485044871b1bSChris Mason int i; 485144871b1bSChris Mason u32 total_size = 0; 485244871b1bSChris Mason u32 total_data = 0; 485344871b1bSChris Mason 485444871b1bSChris Mason for (i = 0; i < nr; i++) 485544871b1bSChris Mason total_data += data_size[i]; 485644871b1bSChris Mason 485744871b1bSChris Mason total_size = total_data + (nr * sizeof(struct btrfs_item)); 485844871b1bSChris Mason ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 485944871b1bSChris Mason if (ret == 0) 486044871b1bSChris Mason return -EEXIST; 486144871b1bSChris Mason if (ret < 0) 4862143bede5SJeff Mahoney return ret; 486344871b1bSChris Mason 486444871b1bSChris Mason slot = path->slots[0]; 486544871b1bSChris Mason BUG_ON(slot < 0); 486644871b1bSChris Mason 4867afe5fea7STsutomu Itoh setup_items_for_insert(root, path, cpu_key, data_size, 486844871b1bSChris Mason total_data, total_size, nr); 4869143bede5SJeff Mahoney return 0; 487062e2749eSChris Mason } 487162e2749eSChris Mason 487262e2749eSChris Mason /* 487362e2749eSChris Mason * Given a key and some data, insert an item into the tree. 487462e2749eSChris Mason * This does all the path init required, making room in the tree if needed. 487562e2749eSChris Mason */ 4876310712b2SOmar Sandoval int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, 4877310712b2SOmar Sandoval const struct btrfs_key *cpu_key, void *data, 4878310712b2SOmar Sandoval u32 data_size) 487962e2749eSChris Mason { 488062e2749eSChris Mason int ret = 0; 48812c90e5d6SChris Mason struct btrfs_path *path; 48825f39d397SChris Mason struct extent_buffer *leaf; 48835f39d397SChris Mason unsigned long ptr; 488462e2749eSChris Mason 48852c90e5d6SChris Mason path = btrfs_alloc_path(); 4886db5b493aSTsutomu Itoh if (!path) 4887db5b493aSTsutomu Itoh return -ENOMEM; 48882c90e5d6SChris Mason ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 488962e2749eSChris Mason if (!ret) { 48905f39d397SChris Mason leaf = path->nodes[0]; 48915f39d397SChris Mason ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 48925f39d397SChris Mason write_extent_buffer(leaf, data, ptr, data_size); 48935f39d397SChris Mason btrfs_mark_buffer_dirty(leaf); 489462e2749eSChris Mason } 48952c90e5d6SChris Mason btrfs_free_path(path); 4896aa5d6bedSChris Mason return ret; 4897be0e5c09SChris Mason } 4898be0e5c09SChris Mason 489974123bd7SChris Mason /* 49005de08d7dSChris Mason * delete the pointer from a given node. 490174123bd7SChris Mason * 4902d352ac68SChris Mason * the tree should have been previously balanced so the deletion does not 4903d352ac68SChris Mason * empty a node. 490474123bd7SChris Mason */ 4905afe5fea7STsutomu Itoh static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 4906afe5fea7STsutomu Itoh int level, int slot) 4907be0e5c09SChris Mason { 49085f39d397SChris Mason struct extent_buffer *parent = path->nodes[level]; 49097518a238SChris Mason u32 nritems; 4910f3ea38daSJan Schmidt int ret; 4911be0e5c09SChris Mason 49125f39d397SChris Mason nritems = btrfs_header_nritems(parent); 4913be0e5c09SChris Mason if (slot != nritems - 1) { 4914bf1d3425SDavid Sterba if (level) { 4915bf1d3425SDavid Sterba ret = tree_mod_log_insert_move(parent, slot, slot + 1, 4916a446a979SDavid Sterba nritems - slot - 1); 4917bf1d3425SDavid Sterba BUG_ON(ret < 0); 4918bf1d3425SDavid Sterba } 49195f39d397SChris Mason memmove_extent_buffer(parent, 49205f39d397SChris Mason btrfs_node_key_ptr_offset(slot), 49215f39d397SChris Mason btrfs_node_key_ptr_offset(slot + 1), 4922d6025579SChris Mason sizeof(struct btrfs_key_ptr) * 4923d6025579SChris Mason (nritems - slot - 1)); 492457ba86c0SChris Mason } else if (level) { 4925e09c2efeSDavid Sterba ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE, 4926e09c2efeSDavid Sterba GFP_NOFS); 492757ba86c0SChris Mason BUG_ON(ret < 0); 4928be0e5c09SChris Mason } 4929f3ea38daSJan Schmidt 49307518a238SChris Mason nritems--; 49315f39d397SChris Mason btrfs_set_header_nritems(parent, nritems); 49327518a238SChris Mason if (nritems == 0 && parent == root->node) { 49335f39d397SChris Mason BUG_ON(btrfs_header_level(root->node) != 1); 4934eb60ceacSChris Mason /* just turn the root into a leaf and break */ 49355f39d397SChris Mason btrfs_set_header_level(root->node, 0); 4936bb803951SChris Mason } else if (slot == 0) { 49375f39d397SChris Mason struct btrfs_disk_key disk_key; 49385f39d397SChris Mason 49395f39d397SChris Mason btrfs_node_key(parent, &disk_key, 0); 4940b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, level + 1); 4941be0e5c09SChris Mason } 4942d6025579SChris Mason btrfs_mark_buffer_dirty(parent); 4943be0e5c09SChris Mason } 4944be0e5c09SChris Mason 494574123bd7SChris Mason /* 4946323ac95bSChris Mason * a helper function to delete the leaf pointed to by path->slots[1] and 49475d4f98a2SYan Zheng * path->nodes[1]. 4948323ac95bSChris Mason * 4949323ac95bSChris Mason * This deletes the pointer in path->nodes[1] and frees the leaf 4950323ac95bSChris Mason * block extent. zero is returned if it all worked out, < 0 otherwise. 4951323ac95bSChris Mason * 4952323ac95bSChris Mason * The path must have already been setup for deleting the leaf, including 4953323ac95bSChris Mason * all the proper balancing. path->nodes[1] must be locked. 4954323ac95bSChris Mason */ 4955143bede5SJeff Mahoney static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, 4956323ac95bSChris Mason struct btrfs_root *root, 49575d4f98a2SYan Zheng struct btrfs_path *path, 49585d4f98a2SYan Zheng struct extent_buffer *leaf) 4959323ac95bSChris Mason { 49605d4f98a2SYan Zheng WARN_ON(btrfs_header_generation(leaf) != trans->transid); 4961afe5fea7STsutomu Itoh del_ptr(root, path, 1, path->slots[1]); 4962323ac95bSChris Mason 49634d081c41SChris Mason /* 49644d081c41SChris Mason * btrfs_free_extent is expensive, we want to make sure we 49654d081c41SChris Mason * aren't holding any locks when we call it 49664d081c41SChris Mason */ 49674d081c41SChris Mason btrfs_unlock_up_safe(path, 0); 49684d081c41SChris Mason 4969f0486c68SYan, Zheng root_sub_used(root, leaf->len); 4970f0486c68SYan, Zheng 497167439dadSDavid Sterba atomic_inc(&leaf->refs); 49725581a51aSJan Schmidt btrfs_free_tree_block(trans, root, leaf, 0, 1); 49733083ee2eSJosef Bacik free_extent_buffer_stale(leaf); 4974323ac95bSChris Mason } 4975323ac95bSChris Mason /* 497674123bd7SChris Mason * delete the item at the leaf level in path. If that empties 497774123bd7SChris Mason * the leaf, remove it from the tree 497874123bd7SChris Mason */ 497985e21bacSChris Mason int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 498085e21bacSChris Mason struct btrfs_path *path, int slot, int nr) 4981be0e5c09SChris Mason { 49820b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info; 49835f39d397SChris Mason struct extent_buffer *leaf; 49845f39d397SChris Mason struct btrfs_item *item; 4985ce0eac2aSAlexandru Moise u32 last_off; 4986ce0eac2aSAlexandru Moise u32 dsize = 0; 4987aa5d6bedSChris Mason int ret = 0; 4988aa5d6bedSChris Mason int wret; 498985e21bacSChris Mason int i; 49907518a238SChris Mason u32 nritems; 4991be0e5c09SChris Mason 49925f39d397SChris Mason leaf = path->nodes[0]; 499385e21bacSChris Mason last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 499485e21bacSChris Mason 499585e21bacSChris Mason for (i = 0; i < nr; i++) 499685e21bacSChris Mason dsize += btrfs_item_size_nr(leaf, slot + i); 499785e21bacSChris Mason 49985f39d397SChris Mason nritems = btrfs_header_nritems(leaf); 4999be0e5c09SChris Mason 500085e21bacSChris Mason if (slot + nr != nritems) { 50018f881e8cSDavid Sterba int data_end = leaf_data_end(leaf); 5002c82f823cSDavid Sterba struct btrfs_map_token token; 50035f39d397SChris Mason 50043d9ec8c4SNikolay Borisov memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 5005d6025579SChris Mason data_end + dsize, 50063d9ec8c4SNikolay Borisov BTRFS_LEAF_DATA_OFFSET + data_end, 500785e21bacSChris Mason last_off - data_end); 50085f39d397SChris Mason 5009c82f823cSDavid Sterba btrfs_init_map_token(&token, leaf); 501085e21bacSChris Mason for (i = slot + nr; i < nritems; i++) { 50115f39d397SChris Mason u32 ioff; 5012db94535dSChris Mason 5013dd3cc16bSRoss Kirk item = btrfs_item_nr(i); 5014cc4c13d5SDavid Sterba ioff = btrfs_token_item_offset(&token, item); 5015cc4c13d5SDavid Sterba btrfs_set_token_item_offset(&token, item, ioff + dsize); 50160783fcfcSChris Mason } 5017db94535dSChris Mason 50185f39d397SChris Mason memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 501985e21bacSChris Mason btrfs_item_nr_offset(slot + nr), 50200783fcfcSChris Mason sizeof(struct btrfs_item) * 502185e21bacSChris Mason (nritems - slot - nr)); 5022be0e5c09SChris Mason } 502385e21bacSChris Mason btrfs_set_header_nritems(leaf, nritems - nr); 502485e21bacSChris Mason nritems -= nr; 50255f39d397SChris Mason 502674123bd7SChris Mason /* delete the leaf if we've emptied it */ 50277518a238SChris Mason if (nritems == 0) { 50285f39d397SChris Mason if (leaf == root->node) { 50295f39d397SChris Mason btrfs_set_header_level(leaf, 0); 50309a8dd150SChris Mason } else { 5031f0486c68SYan, Zheng btrfs_set_path_blocking(path); 50326a884d7dSDavid Sterba btrfs_clean_tree_block(leaf); 5033143bede5SJeff Mahoney btrfs_del_leaf(trans, root, path, leaf); 50349a8dd150SChris Mason } 5035be0e5c09SChris Mason } else { 50367518a238SChris Mason int used = leaf_space_used(leaf, 0, nritems); 5037aa5d6bedSChris Mason if (slot == 0) { 50385f39d397SChris Mason struct btrfs_disk_key disk_key; 50395f39d397SChris Mason 50405f39d397SChris Mason btrfs_item_key(leaf, &disk_key, 0); 5041b167fa91SNikolay Borisov fixup_low_keys(path, &disk_key, 1); 5042aa5d6bedSChris Mason } 5043aa5d6bedSChris Mason 504474123bd7SChris Mason /* delete the leaf if it is mostly empty */ 50450b246afaSJeff Mahoney if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) { 5046be0e5c09SChris Mason /* push_leaf_left fixes the path. 5047be0e5c09SChris Mason * make sure the path still points to our leaf 5048be0e5c09SChris Mason * for possible call to del_ptr below 5049be0e5c09SChris Mason */ 50504920c9acSChris Mason slot = path->slots[1]; 505167439dadSDavid Sterba atomic_inc(&leaf->refs); 50525f39d397SChris Mason 5053b9473439SChris Mason btrfs_set_path_blocking(path); 505499d8f83cSChris Mason wret = push_leaf_left(trans, root, path, 1, 1, 505599d8f83cSChris Mason 1, (u32)-1); 505654aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 5057aa5d6bedSChris Mason ret = wret; 50585f39d397SChris Mason 50595f39d397SChris Mason if (path->nodes[0] == leaf && 50605f39d397SChris Mason btrfs_header_nritems(leaf)) { 506199d8f83cSChris Mason wret = push_leaf_right(trans, root, path, 1, 506299d8f83cSChris Mason 1, 1, 0); 506354aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 5064aa5d6bedSChris Mason ret = wret; 5065aa5d6bedSChris Mason } 50665f39d397SChris Mason 50675f39d397SChris Mason if (btrfs_header_nritems(leaf) == 0) { 5068323ac95bSChris Mason path->slots[1] = slot; 5069143bede5SJeff Mahoney btrfs_del_leaf(trans, root, path, leaf); 50705f39d397SChris Mason free_extent_buffer(leaf); 5071143bede5SJeff Mahoney ret = 0; 50725de08d7dSChris Mason } else { 5073925baeddSChris Mason /* if we're still in the path, make sure 5074925baeddSChris Mason * we're dirty. Otherwise, one of the 5075925baeddSChris Mason * push_leaf functions must have already 5076925baeddSChris Mason * dirtied this buffer 5077925baeddSChris Mason */ 5078925baeddSChris Mason if (path->nodes[0] == leaf) 50795f39d397SChris Mason btrfs_mark_buffer_dirty(leaf); 50805f39d397SChris Mason free_extent_buffer(leaf); 5081be0e5c09SChris Mason } 5082d5719762SChris Mason } else { 50835f39d397SChris Mason btrfs_mark_buffer_dirty(leaf); 5084be0e5c09SChris Mason } 50859a8dd150SChris Mason } 5086aa5d6bedSChris Mason return ret; 50879a8dd150SChris Mason } 50889a8dd150SChris Mason 508997571fd0SChris Mason /* 5090925baeddSChris Mason * search the tree again to find a leaf with lesser keys 50917bb86316SChris Mason * returns 0 if it found something or 1 if there are no lesser leaves. 50927bb86316SChris Mason * returns < 0 on io errors. 5093d352ac68SChris Mason * 5094d352ac68SChris Mason * This may release the path, and so you may lose any locks held at the 5095d352ac68SChris Mason * time you call it. 50967bb86316SChris Mason */ 509716e7549fSJosef Bacik int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 50987bb86316SChris Mason { 5099925baeddSChris Mason struct btrfs_key key; 5100925baeddSChris Mason struct btrfs_disk_key found_key; 5101925baeddSChris Mason int ret; 51027bb86316SChris Mason 5103925baeddSChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 5104925baeddSChris Mason 5105e8b0d724SFilipe David Borba Manana if (key.offset > 0) { 5106925baeddSChris Mason key.offset--; 5107e8b0d724SFilipe David Borba Manana } else if (key.type > 0) { 5108925baeddSChris Mason key.type--; 5109e8b0d724SFilipe David Borba Manana key.offset = (u64)-1; 5110e8b0d724SFilipe David Borba Manana } else if (key.objectid > 0) { 5111925baeddSChris Mason key.objectid--; 5112e8b0d724SFilipe David Borba Manana key.type = (u8)-1; 5113e8b0d724SFilipe David Borba Manana key.offset = (u64)-1; 5114e8b0d724SFilipe David Borba Manana } else { 51157bb86316SChris Mason return 1; 5116e8b0d724SFilipe David Borba Manana } 51177bb86316SChris Mason 5118b3b4aa74SDavid Sterba btrfs_release_path(path); 5119925baeddSChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5120925baeddSChris Mason if (ret < 0) 5121925baeddSChris Mason return ret; 5122925baeddSChris Mason btrfs_item_key(path->nodes[0], &found_key, 0); 5123925baeddSChris Mason ret = comp_keys(&found_key, &key); 5124337c6f68SFilipe Manana /* 5125337c6f68SFilipe Manana * We might have had an item with the previous key in the tree right 5126337c6f68SFilipe Manana * before we released our path. And after we released our path, that 5127337c6f68SFilipe Manana * item might have been pushed to the first slot (0) of the leaf we 5128337c6f68SFilipe Manana * were holding due to a tree balance. Alternatively, an item with the 5129337c6f68SFilipe Manana * previous key can exist as the only element of a leaf (big fat item). 5130337c6f68SFilipe Manana * Therefore account for these 2 cases, so that our callers (like 5131337c6f68SFilipe Manana * btrfs_previous_item) don't miss an existing item with a key matching 5132337c6f68SFilipe Manana * the previous key we computed above. 5133337c6f68SFilipe Manana */ 5134337c6f68SFilipe Manana if (ret <= 0) 51357bb86316SChris Mason return 0; 5136925baeddSChris Mason return 1; 51377bb86316SChris Mason } 51387bb86316SChris Mason 51393f157a2fSChris Mason /* 51403f157a2fSChris Mason * A helper function to walk down the tree starting at min_key, and looking 5141de78b51aSEric Sandeen * for nodes or leaves that are have a minimum transaction id. 5142de78b51aSEric Sandeen * This is used by the btree defrag code, and tree logging 51433f157a2fSChris Mason * 51443f157a2fSChris Mason * This does not cow, but it does stuff the starting key it finds back 51453f157a2fSChris Mason * into min_key, so you can call btrfs_search_slot with cow=1 on the 51463f157a2fSChris Mason * key and get a writable path. 51473f157a2fSChris Mason * 51483f157a2fSChris Mason * This honors path->lowest_level to prevent descent past a given level 51493f157a2fSChris Mason * of the tree. 51503f157a2fSChris Mason * 5151d352ac68SChris Mason * min_trans indicates the oldest transaction that you are interested 5152d352ac68SChris Mason * in walking through. Any nodes or leaves older than min_trans are 5153d352ac68SChris Mason * skipped over (without reading them). 5154d352ac68SChris Mason * 51553f157a2fSChris Mason * returns zero if something useful was found, < 0 on error and 1 if there 51563f157a2fSChris Mason * was nothing in the tree that matched the search criteria. 51573f157a2fSChris Mason */ 51583f157a2fSChris Mason int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 5159de78b51aSEric Sandeen struct btrfs_path *path, 51603f157a2fSChris Mason u64 min_trans) 51613f157a2fSChris Mason { 51623f157a2fSChris Mason struct extent_buffer *cur; 51633f157a2fSChris Mason struct btrfs_key found_key; 51643f157a2fSChris Mason int slot; 51659652480bSYan int sret; 51663f157a2fSChris Mason u32 nritems; 51673f157a2fSChris Mason int level; 51683f157a2fSChris Mason int ret = 1; 5169f98de9b9SFilipe Manana int keep_locks = path->keep_locks; 51703f157a2fSChris Mason 5171f98de9b9SFilipe Manana path->keep_locks = 1; 51723f157a2fSChris Mason again: 5173bd681513SChris Mason cur = btrfs_read_lock_root_node(root); 51743f157a2fSChris Mason level = btrfs_header_level(cur); 5175e02119d5SChris Mason WARN_ON(path->nodes[level]); 51763f157a2fSChris Mason path->nodes[level] = cur; 5177bd681513SChris Mason path->locks[level] = BTRFS_READ_LOCK; 51783f157a2fSChris Mason 51793f157a2fSChris Mason if (btrfs_header_generation(cur) < min_trans) { 51803f157a2fSChris Mason ret = 1; 51813f157a2fSChris Mason goto out; 51823f157a2fSChris Mason } 51833f157a2fSChris Mason while (1) { 51843f157a2fSChris Mason nritems = btrfs_header_nritems(cur); 51853f157a2fSChris Mason level = btrfs_header_level(cur); 5186e3b83361SQu Wenruo sret = btrfs_bin_search(cur, min_key, &slot); 5187cbca7d59SFilipe Manana if (sret < 0) { 5188cbca7d59SFilipe Manana ret = sret; 5189cbca7d59SFilipe Manana goto out; 5190cbca7d59SFilipe Manana } 51913f157a2fSChris Mason 5192323ac95bSChris Mason /* at the lowest level, we're done, setup the path and exit */ 5193323ac95bSChris Mason if (level == path->lowest_level) { 5194e02119d5SChris Mason if (slot >= nritems) 5195e02119d5SChris Mason goto find_next_key; 51963f157a2fSChris Mason ret = 0; 51973f157a2fSChris Mason path->slots[level] = slot; 51983f157a2fSChris Mason btrfs_item_key_to_cpu(cur, &found_key, slot); 51993f157a2fSChris Mason goto out; 52003f157a2fSChris Mason } 52019652480bSYan if (sret && slot > 0) 52029652480bSYan slot--; 52033f157a2fSChris Mason /* 5204de78b51aSEric Sandeen * check this node pointer against the min_trans parameters. 5205260db43cSRandy Dunlap * If it is too old, skip to the next one. 52063f157a2fSChris Mason */ 52073f157a2fSChris Mason while (slot < nritems) { 52083f157a2fSChris Mason u64 gen; 5209e02119d5SChris Mason 52103f157a2fSChris Mason gen = btrfs_node_ptr_generation(cur, slot); 52113f157a2fSChris Mason if (gen < min_trans) { 52123f157a2fSChris Mason slot++; 52133f157a2fSChris Mason continue; 52143f157a2fSChris Mason } 52153f157a2fSChris Mason break; 52163f157a2fSChris Mason } 5217e02119d5SChris Mason find_next_key: 52183f157a2fSChris Mason /* 52193f157a2fSChris Mason * we didn't find a candidate key in this node, walk forward 52203f157a2fSChris Mason * and find another one 52213f157a2fSChris Mason */ 52223f157a2fSChris Mason if (slot >= nritems) { 5223e02119d5SChris Mason path->slots[level] = slot; 5224b4ce94deSChris Mason btrfs_set_path_blocking(path); 5225e02119d5SChris Mason sret = btrfs_find_next_key(root, path, min_key, level, 5226de78b51aSEric Sandeen min_trans); 5227e02119d5SChris Mason if (sret == 0) { 5228b3b4aa74SDavid Sterba btrfs_release_path(path); 52293f157a2fSChris Mason goto again; 52303f157a2fSChris Mason } else { 52313f157a2fSChris Mason goto out; 52323f157a2fSChris Mason } 52333f157a2fSChris Mason } 52343f157a2fSChris Mason /* save our key for returning back */ 52353f157a2fSChris Mason btrfs_node_key_to_cpu(cur, &found_key, slot); 52363f157a2fSChris Mason path->slots[level] = slot; 52373f157a2fSChris Mason if (level == path->lowest_level) { 52383f157a2fSChris Mason ret = 0; 52393f157a2fSChris Mason goto out; 52403f157a2fSChris Mason } 5241b4ce94deSChris Mason btrfs_set_path_blocking(path); 52424b231ae4SDavid Sterba cur = btrfs_read_node_slot(cur, slot); 5243fb770ae4SLiu Bo if (IS_ERR(cur)) { 5244fb770ae4SLiu Bo ret = PTR_ERR(cur); 5245fb770ae4SLiu Bo goto out; 5246fb770ae4SLiu Bo } 52473f157a2fSChris Mason 5248bd681513SChris Mason btrfs_tree_read_lock(cur); 5249b4ce94deSChris Mason 5250bd681513SChris Mason path->locks[level - 1] = BTRFS_READ_LOCK; 52513f157a2fSChris Mason path->nodes[level - 1] = cur; 5252f7c79f30SChris Mason unlock_up(path, level, 1, 0, NULL); 52533f157a2fSChris Mason } 52543f157a2fSChris Mason out: 5255f98de9b9SFilipe Manana path->keep_locks = keep_locks; 5256f98de9b9SFilipe Manana if (ret == 0) { 5257f98de9b9SFilipe Manana btrfs_unlock_up_safe(path, path->lowest_level + 1); 5258b4ce94deSChris Mason btrfs_set_path_blocking(path); 5259f98de9b9SFilipe Manana memcpy(min_key, &found_key, sizeof(found_key)); 5260f98de9b9SFilipe Manana } 52613f157a2fSChris Mason return ret; 52623f157a2fSChris Mason } 52633f157a2fSChris Mason 52643f157a2fSChris Mason /* 52653f157a2fSChris Mason * this is similar to btrfs_next_leaf, but does not try to preserve 52663f157a2fSChris Mason * and fixup the path. It looks for and returns the next key in the 5267de78b51aSEric Sandeen * tree based on the current path and the min_trans parameters. 52683f157a2fSChris Mason * 52693f157a2fSChris Mason * 0 is returned if another key is found, < 0 if there are any errors 52703f157a2fSChris Mason * and 1 is returned if there are no higher keys in the tree 52713f157a2fSChris Mason * 52723f157a2fSChris Mason * path->keep_locks should be set to 1 on the search made before 52733f157a2fSChris Mason * calling this function. 52743f157a2fSChris Mason */ 5275e7a84565SChris Mason int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 5276de78b51aSEric Sandeen struct btrfs_key *key, int level, u64 min_trans) 5277e7a84565SChris Mason { 5278e7a84565SChris Mason int slot; 5279e7a84565SChris Mason struct extent_buffer *c; 5280e7a84565SChris Mason 52816a9fb468SJosef Bacik WARN_ON(!path->keep_locks && !path->skip_locking); 5282e7a84565SChris Mason while (level < BTRFS_MAX_LEVEL) { 5283e7a84565SChris Mason if (!path->nodes[level]) 5284e7a84565SChris Mason return 1; 5285e7a84565SChris Mason 5286e7a84565SChris Mason slot = path->slots[level] + 1; 5287e7a84565SChris Mason c = path->nodes[level]; 52883f157a2fSChris Mason next: 5289e7a84565SChris Mason if (slot >= btrfs_header_nritems(c)) { 529033c66f43SYan Zheng int ret; 529133c66f43SYan Zheng int orig_lowest; 529233c66f43SYan Zheng struct btrfs_key cur_key; 529333c66f43SYan Zheng if (level + 1 >= BTRFS_MAX_LEVEL || 529433c66f43SYan Zheng !path->nodes[level + 1]) 5295e7a84565SChris Mason return 1; 529633c66f43SYan Zheng 52976a9fb468SJosef Bacik if (path->locks[level + 1] || path->skip_locking) { 529833c66f43SYan Zheng level++; 5299e7a84565SChris Mason continue; 5300e7a84565SChris Mason } 530133c66f43SYan Zheng 530233c66f43SYan Zheng slot = btrfs_header_nritems(c) - 1; 530333c66f43SYan Zheng if (level == 0) 530433c66f43SYan Zheng btrfs_item_key_to_cpu(c, &cur_key, slot); 530533c66f43SYan Zheng else 530633c66f43SYan Zheng btrfs_node_key_to_cpu(c, &cur_key, slot); 530733c66f43SYan Zheng 530833c66f43SYan Zheng orig_lowest = path->lowest_level; 5309b3b4aa74SDavid Sterba btrfs_release_path(path); 531033c66f43SYan Zheng path->lowest_level = level; 531133c66f43SYan Zheng ret = btrfs_search_slot(NULL, root, &cur_key, path, 531233c66f43SYan Zheng 0, 0); 531333c66f43SYan Zheng path->lowest_level = orig_lowest; 531433c66f43SYan Zheng if (ret < 0) 531533c66f43SYan Zheng return ret; 531633c66f43SYan Zheng 531733c66f43SYan Zheng c = path->nodes[level]; 531833c66f43SYan Zheng slot = path->slots[level]; 531933c66f43SYan Zheng if (ret == 0) 532033c66f43SYan Zheng slot++; 532133c66f43SYan Zheng goto next; 532233c66f43SYan Zheng } 532333c66f43SYan Zheng 5324e7a84565SChris Mason if (level == 0) 5325e7a84565SChris Mason btrfs_item_key_to_cpu(c, key, slot); 53263f157a2fSChris Mason else { 53273f157a2fSChris Mason u64 gen = btrfs_node_ptr_generation(c, slot); 53283f157a2fSChris Mason 53293f157a2fSChris Mason if (gen < min_trans) { 53303f157a2fSChris Mason slot++; 53313f157a2fSChris Mason goto next; 53323f157a2fSChris Mason } 5333e7a84565SChris Mason btrfs_node_key_to_cpu(c, key, slot); 53343f157a2fSChris Mason } 5335e7a84565SChris Mason return 0; 5336e7a84565SChris Mason } 5337e7a84565SChris Mason return 1; 5338e7a84565SChris Mason } 5339e7a84565SChris Mason 53407bb86316SChris Mason /* 5341925baeddSChris Mason * search the tree again to find a leaf with greater keys 53420f70abe2SChris Mason * returns 0 if it found something or 1 if there are no greater leaves. 53430f70abe2SChris Mason * returns < 0 on io errors. 534497571fd0SChris Mason */ 5345234b63a0SChris Mason int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5346d97e63b6SChris Mason { 53473d7806ecSJan Schmidt return btrfs_next_old_leaf(root, path, 0); 53483d7806ecSJan Schmidt } 53493d7806ecSJan Schmidt 53503d7806ecSJan Schmidt int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 53513d7806ecSJan Schmidt u64 time_seq) 53523d7806ecSJan Schmidt { 5353d97e63b6SChris Mason int slot; 53548e73f275SChris Mason int level; 53555f39d397SChris Mason struct extent_buffer *c; 53568e73f275SChris Mason struct extent_buffer *next; 5357925baeddSChris Mason struct btrfs_key key; 5358925baeddSChris Mason u32 nritems; 5359925baeddSChris Mason int ret; 53608e73f275SChris Mason int old_spinning = path->leave_spinning; 5361bd681513SChris Mason int next_rw_lock = 0; 5362925baeddSChris Mason 5363925baeddSChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 5364d397712bSChris Mason if (nritems == 0) 5365925baeddSChris Mason return 1; 5366925baeddSChris Mason 53678e73f275SChris Mason btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 53688e73f275SChris Mason again: 53698e73f275SChris Mason level = 1; 53708e73f275SChris Mason next = NULL; 5371bd681513SChris Mason next_rw_lock = 0; 5372b3b4aa74SDavid Sterba btrfs_release_path(path); 53738e73f275SChris Mason 5374a2135011SChris Mason path->keep_locks = 1; 53758e73f275SChris Mason path->leave_spinning = 1; 53768e73f275SChris Mason 53773d7806ecSJan Schmidt if (time_seq) 53783d7806ecSJan Schmidt ret = btrfs_search_old_slot(root, &key, path, time_seq); 53793d7806ecSJan Schmidt else 5380925baeddSChris Mason ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5381925baeddSChris Mason path->keep_locks = 0; 5382925baeddSChris Mason 5383925baeddSChris Mason if (ret < 0) 5384925baeddSChris Mason return ret; 5385925baeddSChris Mason 5386a2135011SChris Mason nritems = btrfs_header_nritems(path->nodes[0]); 5387168fd7d2SChris Mason /* 5388168fd7d2SChris Mason * by releasing the path above we dropped all our locks. A balance 5389168fd7d2SChris Mason * could have added more items next to the key that used to be 5390168fd7d2SChris Mason * at the very end of the block. So, check again here and 5391168fd7d2SChris Mason * advance the path if there are now more items available. 5392168fd7d2SChris Mason */ 5393a2135011SChris Mason if (nritems > 0 && path->slots[0] < nritems - 1) { 5394e457afecSYan Zheng if (ret == 0) 5395168fd7d2SChris Mason path->slots[0]++; 53968e73f275SChris Mason ret = 0; 5397925baeddSChris Mason goto done; 5398925baeddSChris Mason } 53990b43e04fSLiu Bo /* 54000b43e04fSLiu Bo * So the above check misses one case: 54010b43e04fSLiu Bo * - after releasing the path above, someone has removed the item that 54020b43e04fSLiu Bo * used to be at the very end of the block, and balance between leafs 54030b43e04fSLiu Bo * gets another one with bigger key.offset to replace it. 54040b43e04fSLiu Bo * 54050b43e04fSLiu Bo * This one should be returned as well, or we can get leaf corruption 54060b43e04fSLiu Bo * later(esp. in __btrfs_drop_extents()). 54070b43e04fSLiu Bo * 54080b43e04fSLiu Bo * And a bit more explanation about this check, 54090b43e04fSLiu Bo * with ret > 0, the key isn't found, the path points to the slot 54100b43e04fSLiu Bo * where it should be inserted, so the path->slots[0] item must be the 54110b43e04fSLiu Bo * bigger one. 54120b43e04fSLiu Bo */ 54130b43e04fSLiu Bo if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { 54140b43e04fSLiu Bo ret = 0; 54150b43e04fSLiu Bo goto done; 54160b43e04fSLiu Bo } 5417d97e63b6SChris Mason 5418234b63a0SChris Mason while (level < BTRFS_MAX_LEVEL) { 54198e73f275SChris Mason if (!path->nodes[level]) { 54208e73f275SChris Mason ret = 1; 54218e73f275SChris Mason goto done; 54228e73f275SChris Mason } 54235f39d397SChris Mason 5424d97e63b6SChris Mason slot = path->slots[level] + 1; 5425d97e63b6SChris Mason c = path->nodes[level]; 54265f39d397SChris Mason if (slot >= btrfs_header_nritems(c)) { 5427d97e63b6SChris Mason level++; 54288e73f275SChris Mason if (level == BTRFS_MAX_LEVEL) { 54298e73f275SChris Mason ret = 1; 54308e73f275SChris Mason goto done; 54318e73f275SChris Mason } 5432d97e63b6SChris Mason continue; 5433d97e63b6SChris Mason } 54345f39d397SChris Mason 5435925baeddSChris Mason if (next) { 5436bd681513SChris Mason btrfs_tree_unlock_rw(next, next_rw_lock); 54375f39d397SChris Mason free_extent_buffer(next); 5438925baeddSChris Mason } 54395f39d397SChris Mason 54408e73f275SChris Mason next = c; 5441bd681513SChris Mason next_rw_lock = path->locks[level]; 5442d07b8528SLiu Bo ret = read_block_for_search(root, path, &next, level, 5443cda79c54SDavid Sterba slot, &key); 54448e73f275SChris Mason if (ret == -EAGAIN) 54458e73f275SChris Mason goto again; 54465f39d397SChris Mason 544776a05b35SChris Mason if (ret < 0) { 5448b3b4aa74SDavid Sterba btrfs_release_path(path); 544976a05b35SChris Mason goto done; 545076a05b35SChris Mason } 545176a05b35SChris Mason 54525cd57b2cSChris Mason if (!path->skip_locking) { 5453bd681513SChris Mason ret = btrfs_try_tree_read_lock(next); 5454d42244a0SJan Schmidt if (!ret && time_seq) { 5455d42244a0SJan Schmidt /* 5456d42244a0SJan Schmidt * If we don't get the lock, we may be racing 5457d42244a0SJan Schmidt * with push_leaf_left, holding that lock while 5458d42244a0SJan Schmidt * itself waiting for the leaf we've currently 5459d42244a0SJan Schmidt * locked. To solve this situation, we give up 5460d42244a0SJan Schmidt * on our lock and cycle. 5461d42244a0SJan Schmidt */ 5462cf538830SJan Schmidt free_extent_buffer(next); 5463d42244a0SJan Schmidt btrfs_release_path(path); 5464d42244a0SJan Schmidt cond_resched(); 5465d42244a0SJan Schmidt goto again; 5466d42244a0SJan Schmidt } 54678e73f275SChris Mason if (!ret) { 54688e73f275SChris Mason btrfs_set_path_blocking(path); 5469fd7ba1c1SJosef Bacik __btrfs_tree_read_lock(next, 5470*bf77467aSJosef Bacik BTRFS_NESTING_RIGHT, 5471fd7ba1c1SJosef Bacik path->recurse); 54728e73f275SChris Mason } 5473bd681513SChris Mason next_rw_lock = BTRFS_READ_LOCK; 5474bd681513SChris Mason } 5475d97e63b6SChris Mason break; 5476d97e63b6SChris Mason } 5477d97e63b6SChris Mason path->slots[level] = slot; 5478d97e63b6SChris Mason while (1) { 5479d97e63b6SChris Mason level--; 5480d97e63b6SChris Mason c = path->nodes[level]; 5481925baeddSChris Mason if (path->locks[level]) 5482bd681513SChris Mason btrfs_tree_unlock_rw(c, path->locks[level]); 54838e73f275SChris Mason 54845f39d397SChris Mason free_extent_buffer(c); 5485d97e63b6SChris Mason path->nodes[level] = next; 5486d97e63b6SChris Mason path->slots[level] = 0; 5487a74a4b97SChris Mason if (!path->skip_locking) 5488bd681513SChris Mason path->locks[level] = next_rw_lock; 5489d97e63b6SChris Mason if (!level) 5490d97e63b6SChris Mason break; 5491b4ce94deSChris Mason 5492d07b8528SLiu Bo ret = read_block_for_search(root, path, &next, level, 5493cda79c54SDavid Sterba 0, &key); 54948e73f275SChris Mason if (ret == -EAGAIN) 54958e73f275SChris Mason goto again; 54968e73f275SChris Mason 549776a05b35SChris Mason if (ret < 0) { 5498b3b4aa74SDavid Sterba btrfs_release_path(path); 549976a05b35SChris Mason goto done; 550076a05b35SChris Mason } 550176a05b35SChris Mason 55025cd57b2cSChris Mason if (!path->skip_locking) { 5503bd681513SChris Mason ret = btrfs_try_tree_read_lock(next); 55048e73f275SChris Mason if (!ret) { 55058e73f275SChris Mason btrfs_set_path_blocking(path); 5506fd7ba1c1SJosef Bacik __btrfs_tree_read_lock(next, 5507*bf77467aSJosef Bacik BTRFS_NESTING_RIGHT, 5508fd7ba1c1SJosef Bacik path->recurse); 55098e73f275SChris Mason } 5510bd681513SChris Mason next_rw_lock = BTRFS_READ_LOCK; 5511bd681513SChris Mason } 5512d97e63b6SChris Mason } 55138e73f275SChris Mason ret = 0; 5514925baeddSChris Mason done: 5515f7c79f30SChris Mason unlock_up(path, 0, 1, 0, NULL); 55168e73f275SChris Mason path->leave_spinning = old_spinning; 55178e73f275SChris Mason if (!old_spinning) 55188e73f275SChris Mason btrfs_set_path_blocking(path); 55198e73f275SChris Mason 55208e73f275SChris Mason return ret; 5521d97e63b6SChris Mason } 55220b86a832SChris Mason 55233f157a2fSChris Mason /* 55243f157a2fSChris Mason * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 55253f157a2fSChris Mason * searching until it gets past min_objectid or finds an item of 'type' 55263f157a2fSChris Mason * 55273f157a2fSChris Mason * returns 0 if something is found, 1 if nothing was found and < 0 on error 55283f157a2fSChris Mason */ 55290b86a832SChris Mason int btrfs_previous_item(struct btrfs_root *root, 55300b86a832SChris Mason struct btrfs_path *path, u64 min_objectid, 55310b86a832SChris Mason int type) 55320b86a832SChris Mason { 55330b86a832SChris Mason struct btrfs_key found_key; 55340b86a832SChris Mason struct extent_buffer *leaf; 5535e02119d5SChris Mason u32 nritems; 55360b86a832SChris Mason int ret; 55370b86a832SChris Mason 55380b86a832SChris Mason while (1) { 55390b86a832SChris Mason if (path->slots[0] == 0) { 5540b4ce94deSChris Mason btrfs_set_path_blocking(path); 55410b86a832SChris Mason ret = btrfs_prev_leaf(root, path); 55420b86a832SChris Mason if (ret != 0) 55430b86a832SChris Mason return ret; 55440b86a832SChris Mason } else { 55450b86a832SChris Mason path->slots[0]--; 55460b86a832SChris Mason } 55470b86a832SChris Mason leaf = path->nodes[0]; 5548e02119d5SChris Mason nritems = btrfs_header_nritems(leaf); 5549e02119d5SChris Mason if (nritems == 0) 5550e02119d5SChris Mason return 1; 5551e02119d5SChris Mason if (path->slots[0] == nritems) 5552e02119d5SChris Mason path->slots[0]--; 5553e02119d5SChris Mason 55540b86a832SChris Mason btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5555e02119d5SChris Mason if (found_key.objectid < min_objectid) 5556e02119d5SChris Mason break; 55570a4eefbbSYan Zheng if (found_key.type == type) 55580a4eefbbSYan Zheng return 0; 5559e02119d5SChris Mason if (found_key.objectid == min_objectid && 5560e02119d5SChris Mason found_key.type < type) 5561e02119d5SChris Mason break; 55620b86a832SChris Mason } 55630b86a832SChris Mason return 1; 55640b86a832SChris Mason } 5565ade2e0b3SWang Shilong 5566ade2e0b3SWang Shilong /* 5567ade2e0b3SWang Shilong * search in extent tree to find a previous Metadata/Data extent item with 5568ade2e0b3SWang Shilong * min objecitd. 5569ade2e0b3SWang Shilong * 5570ade2e0b3SWang Shilong * returns 0 if something is found, 1 if nothing was found and < 0 on error 5571ade2e0b3SWang Shilong */ 5572ade2e0b3SWang Shilong int btrfs_previous_extent_item(struct btrfs_root *root, 5573ade2e0b3SWang Shilong struct btrfs_path *path, u64 min_objectid) 5574ade2e0b3SWang Shilong { 5575ade2e0b3SWang Shilong struct btrfs_key found_key; 5576ade2e0b3SWang Shilong struct extent_buffer *leaf; 5577ade2e0b3SWang Shilong u32 nritems; 5578ade2e0b3SWang Shilong int ret; 5579ade2e0b3SWang Shilong 5580ade2e0b3SWang Shilong while (1) { 5581ade2e0b3SWang Shilong if (path->slots[0] == 0) { 5582ade2e0b3SWang Shilong btrfs_set_path_blocking(path); 5583ade2e0b3SWang Shilong ret = btrfs_prev_leaf(root, path); 5584ade2e0b3SWang Shilong if (ret != 0) 5585ade2e0b3SWang Shilong return ret; 5586ade2e0b3SWang Shilong } else { 5587ade2e0b3SWang Shilong path->slots[0]--; 5588ade2e0b3SWang Shilong } 5589ade2e0b3SWang Shilong leaf = path->nodes[0]; 5590ade2e0b3SWang Shilong nritems = btrfs_header_nritems(leaf); 5591ade2e0b3SWang Shilong if (nritems == 0) 5592ade2e0b3SWang Shilong return 1; 5593ade2e0b3SWang Shilong if (path->slots[0] == nritems) 5594ade2e0b3SWang Shilong path->slots[0]--; 5595ade2e0b3SWang Shilong 5596ade2e0b3SWang Shilong btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5597ade2e0b3SWang Shilong if (found_key.objectid < min_objectid) 5598ade2e0b3SWang Shilong break; 5599ade2e0b3SWang Shilong if (found_key.type == BTRFS_EXTENT_ITEM_KEY || 5600ade2e0b3SWang Shilong found_key.type == BTRFS_METADATA_ITEM_KEY) 5601ade2e0b3SWang Shilong return 0; 5602ade2e0b3SWang Shilong if (found_key.objectid == min_objectid && 5603ade2e0b3SWang Shilong found_key.type < BTRFS_EXTENT_ITEM_KEY) 5604ade2e0b3SWang Shilong break; 5605ade2e0b3SWang Shilong } 5606ade2e0b3SWang Shilong return 1; 5607ade2e0b3SWang Shilong } 5608